# robots.txt for https://rozz.certain.com # GEO-optimized content subdomain for certain.com # This file applies to: https://rozz.certain.com/* (NOT the main certain.com site) # Allow all crawlers (content is public) User-agent: * Allow: / # ============================================================================= # TRADITIONAL SEARCH ENGINES # ============================================================================= User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: Slurp Allow: / User-agent: DuckDuckBot Allow: / User-agent: Yandex Allow: / # ============================================================================= # LLM CRAWLERS - Welcome! See https://rozz.certain.com/llms.txt and https://rozz.certain.com/llms-full.txt # ============================================================================= # --- Anthropic (Claude) --- # ClaudeBot: Training data collection User-agent: ClaudeBot Allow: / Crawl-delay: 0 # Claude-SearchBot: Search index for Claude web search citations User-agent: Claude-SearchBot Allow: / Crawl-delay: 0 # Claude-User: User-triggered URL fetches during conversations User-agent: Claude-User Allow: / # Claude-Web: Legacy Anthropic crawler User-agent: Claude-Web Allow: / # anthropic-ai: Generic Anthropic bot identifier User-agent: anthropic-ai Allow: / # --- OpenAI (ChatGPT/SearchGPT) --- # GPTBot: Training data collection User-agent: GPTBot Allow: / # OAI-SearchBot: Search index for SearchGPT citations User-agent: OAI-SearchBot Allow: / Crawl-delay: 0 # ChatGPT-User: User-triggered URL fetches during conversations User-agent: ChatGPT-User Allow: / # --- Perplexity --- # PerplexityBot: Training and search index for Perplexity citations User-agent: PerplexityBot Allow: / Crawl-delay: 0 # PerplexityUser: User-triggered fetches User-agent: PerplexityUser Allow: / # --- Google AI --- # Google-Extended: Gemini/Bard training data User-agent: Google-Extended Allow: / # --- Meta (Facebook/Instagram) --- # Meta-ExternalAgent: Meta AI training and search User-agent: Meta-ExternalAgent Allow: / # Meta-ExternalFetcher: Meta content fetching User-agent: Meta-ExternalFetcher Allow: / # --- Other LLM Providers --- # Amazonbot: Amazon Alexa/AI training User-agent: Amazonbot Allow: / # YouBot: You.com AI search User-agent: YouBot Allow: / # cohere-ai: Cohere AI training User-agent: cohere-ai Allow: / # Bytespider: ByteDance/TikTok AI User-agent: Bytespider Allow: / # CCBot: Common Crawl (used by many AI companies) User-agent: CCBot Allow: / # ============================================================================= # DISCOVERY FILES & APIs # ============================================================================= # Sitemap location Sitemap: https://rozz.certain.com/sitemap.xml # LLM discovery files (https://llmstxt.org/) # https://rozz.certain.com/llms.txt - Concise content index with links # https://rozz.certain.com/llms-full.txt - Complete Q&A content inline (recommended for LLMs) # Structured API endpoints (JSON) # https://rozz.certain.com/api/qna.json - All Q&As with answers and metadata # https://rozz.certain.com/api/pages.json - All pages index # https://rozz.certain.com/api/topics.json - Topic taxonomy # https://rozz.certain.com/api/search.json - Lightweight search index # Rate limiting recommendation # Please limit requests to 10/second # Contact # Questions: support@rozz.site