# The Samosa Shop Robots.txt # Last updated: 2025-11-20 # Main directives for all search engine bots User-agent: * Allow: / # Disallow admin and system directories Disallow: /config/ Disallow: /search/ Disallow: /account/ Disallow: /api/ Allow: /api/ui-extensions/ Disallow: /static/ # Disallow query parameters Disallow: /*?*author=* Disallow: /*?*tag=* Disallow: /*?*month=* Disallow: /*?*view=* Disallow: /*?*format=* # Noindex for llms.txt (for search engines only, AI crawlers should still access it) Disallow: /llms.txt # AI Bot specific directives # AI crawlers CAN access llms.txt and all content User-agent: GPTBot User-agent: ChatGPT-User User-agent: CCBot User-agent: anthropic-ai User-agent: Claude-Web User-agent: Google-Extended User-agent: GoogleOther User-agent: FacebookBot User-agent: cohere-ai User-agent: PerplexityBot User-agent: Omgilibot Allow: / Allow: /llms.txt Disallow: /privacy-policy.html Disallow: /terms-of-use.html # Google Ads Bot User-agent: AdsBot-Google User-agent: AdsBot-Google-Mobile User-agent: AdsBot-Google-Mobile-Apps Allow: / # Bing Ads Bot User-agent: adidxbot Allow: / # Sitemap location Sitemap: https://www.thesamosashop.online/sitemap.xml # Crawl delay for heavy bots User-agent: Baiduspider Crawl-delay: 10 User-agent: Yandex Crawl-delay: 5 # Block aggressive crawlers User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: dotbot Disallow: /