User-agent: * # user agent with an asterix tells all web crawlers to follow the rules shown beneath it Disallow: /cgi-bin/ Disallow: /wp-admin/ # disallow the wp admin and includes directories as these contain WordPress software application data Disallow: /wp-includes/ Disallow: /wp-content/ Disallow: /category/ # categories are duplicate content, the crawlers will still find your posts Disallow: /trackback/ Disallow: /author/ Disallow: /page/ Disallow: /tag/ Disallow: /search/ Disallow: /feed/ # feeds are duplicate content but don't worry, crawlers will find your feeds elsewhere anyway Disallow: /comments/ Disallow: /2008/ # add another of these lines for each year in your archives Disallow: /fantversion.php Disallow: /wp-app.php Disallow: /wp-atom.php Disallow: /wp-feed.php Disallow: /wp-blog-header.php Disallow: /wp-comments-post.php Disallow: /wp-commentsrss2.php Disallow: /wp-config-sample.php Disallow: /wp-config.php Disallow: /wp-cron.php Disallow: /wp-links-opml.php Disallow: /wp-login.php Disallow: /wp-mail.php Disallow: /wp-pass.php Disallow: /wp-rdf.php Disallow: /wp-register.php Disallow: /wp-rss.php Disallow: /wp-rss2.php Disallow: /wp-settings.php Disallow: /wp-trackback.php Disallow: /xmlrpc.php # all the files here with .php extensions should be disallowed except the index.php file. Find your main directory and simply disallow each php file specifically Disallow: /license.txt/ Disallow: /readme.html # readmes and licenses are useless to your rankings User-agent: duggmirror # banish this crawler as it is used by digg.com to make copies of your website before posting it on digg which sucks bandwidth, steals your traffic and takes away from your click thrus Disallow: / User-agent: ia_archiver # disallow certain search engines such as the Internet Wayback Machine so that old versions of your site can no longer be accessed Disallow: / User-agent: Googlebot # disallow all files ending with these extensions in Google Disallow: /*.js$ Disallow: /*.inc$ Disallow: /*.css$ Disallow: /*.gz$ Disallow: /*.wmv$ Disallow: /*.cgi$ Disallow: /*.xhtml$ Disallow: /*.tar$ Disallow: /*.tgz$ # allow the medapartners google bot to determine adsense content User-agent: Mediapartners-Google Allow: / # allow google to crawl all pages to measure page quality for AdWords User-agent: Adsbot-Google Allow: / # allow the Google image crawler to crawl all pages User-agent: Googlebot-Image Allow: / # allow the Google bot to craw all pages for its mobile index User-agent: Googlebot-Mobile Allow: /