### ### This is a copy of main robots.txt to track in repository ### # ========== Core allowances (let Google fetch assets) ========== User-agent: * Allow: /wp-content/uploads/ # images/files used by pages Allow: /wp-includes/js/ # core JS needed for rendering # ========== Block low-value/duplicate endpoints ========== Disallow: /print/?* # print views (duplicate of articles) Disallow: /feed/* # RSS/Atom feeds (thin/duplicate) Disallow: /comment-page-* # paginated comment URLs Disallow: /?attachment_id= # legacy attachment URLs Disallow: /embed/ # oEmbed endpoints # ========== Block admin/system noise (non-content) ========== Disallow: /wp-admin/ # admin area Disallow: /wp-login.php # login Disallow: /wp-register.php # registration Disallow: /xmlrpc.php # legacy XML-RPC endpoint Disallow: /wp-content/cache/ # cache artifacts Disallow: /cgi-bin/ # server scripts Disallow: /e/ # custom utility paths Disallow: /show-error-* # error pages or debug views Disallow: /wp-json/ # REST API (block crawl if not public) Disallow: /readme.html # version disclosure Disallow: /license.txt # license file Disallow: /trackback/ # legacy blog trackbacks # ========== Bots with special allowances ========== User-agent: Mediapartners-Google Allow: / User-agent: Adsbot-Google Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-Mobile Allow: / User-agent: cXensebot Allow: / # ========== Sitemaps ========== Sitemap: https://tricycle.org/sitemap_index.xml