From 4fde7d158e66bf8a97579dc9e963dff01adea16e Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 21:39:42 +0100 Subject: [PATCH 01/27] Update spiders.txt Reference: http://www.useragentstring.com/pages/useragentstring.php I'm proposing this because the list of spiders is really outdated. --- catalog/includes/spiders.txt | 222 +++++++++++++++++++++++++++-------- 1 file changed, 171 insertions(+), 51 deletions(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 11dd705dd..34c27ec36 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -1,54 +1,174 @@ $Id$ -almaden.ibm.com -appie 1.1 -architext -ask jeeves -asterias2.0 -augurfind -baiduspider -bannana_bot -bdcindexer -crawler -crawler@fast -docomo -fast-webcrawler -fluffy the spider -frooglebot -geobot -googlebot -gulliver -henrythemiragorobot +008 +ABACHOBot +Accoona-AI-Agent +AddSugarSpiderBot +AnyApexBot +Arachmo +B-l-i-t-z-B-O-T +Baiduspider +BecomeBot +BeslistBot +BillyBobBot +Bimbot +Bingbot +BlitzBOT +boitho.com-dc +boitho.com-robot +btbot +CatchBot +Cerberian Drtrs +Charlotte +ConveraCrawler +cosmos +Covario IDS +DataparkSearch +DiamondBot +Discobot +Dotbot +EARTHCOM.info +EmeraldShield.com WebBot +envolk[ITS]spider +EsperanzaBot +Exabot +FAST Enterprise Crawler +FAST-WebCrawler +FDSE robot +FindLinks +FurlBot +FyberSpider +g2crawler +Gaisbot +GalaxyBot +genieBot +Gigabot +Girafabot +Googlebot +Googlebot-Image +GurujiBot +HappyFunBot +hl_ftien_spider +Holmes +htdig +iaskspider ia_archiver -infoseek -kit_fireball -lachesis -lycos_spider -mantraagent -mercator -moget/1.0 -muscatferret -nationaldirectory-webspider -naverrobot -ncsa beta -netresearchserver -ng/1.0 -osis-project +iCCrawler +ichiro +igdeSpyder +IRLbot +IssueCrawler +Jaxified Bot +Jyxobot +KoepaBot +L.webis +LapozzBot +Larbin +LDSpider +LexxeBot +Linguee Bot +LinkWalker +lmspider +lwp-trivial +mabontland +magpie-crawler +Mediapartners-Google +MJ12bot +MLBot +Mnogosearch +mogimogi +MojeekBot +Moreoverbot +Morning Paper +msnbot +MSRBot +MVAClient +mxbot +NetResearchServer +NetSeer Crawler +NewsGator +NG-Search +nicebot +noxtrumbot +Nusearch Spider +NutchCVS +Nymesis +obot +oegp +omgilibot +OmniExplorer_Bot +OOZBOT +OrangeBot +Orbiter +PageBitesHyperBot +Peew polybot -pompos -scooter -seventwentyfour -sidewinder -sleek spider -slurp/si -slurp@inktomi.com -steeler/1.3 -szukacz -t-h-u-n-d-e-r-s-t-o-n-e -teoma -turnitinbot -ultraseek -vagabondo -voilabot -w3c_validator -zao/0 -zyborg/1.0 +Pompos +PostPost +Psbot +PycURL +Qseero +Radian6 +RAMPyBot +RufusBot +SandCrawler +SBIder +ScoutJet +Scrubby +SearchSight +Seekbot +semanticdiscovery +Sensis Web Crawler +SEOChat::Bot +SeznamBot +Shim-Crawler +ShopWiki +Shoula robot +silk +Sitebot +Snappy +sogou spider +Sosospider +Speedy Spider +Sqworm +StackRambler +suggybot +SurveyBot +SynooBot +Teoma +TerrawizBot +TheSuBot +Thumbnail.CZ robot +TinEye +truwoGPS +TurnitinBot +TweetedTimes Bot +TwengaBot +updated +Urlfilebot +Vagabondo +Vortex +voyager +VYU2 +webcollage +Websquash.com +wf84 +WoFindeIch Robot +WomlpeFactory +Xaldon_WebSpider +yacy +Yahoo! Slurp +Yahoo! Slurp China +YahooSeeker +YahooSeeker-Testing +YandexBot +YandexImages +YandexMetrika +Yasaklibot +Yeti +YodaoBot +yoogliFetchAgent +YoudaoBot +Zao +Zealbot +zspider +ZyBorg From 49ac9ae7b181cb6318af146fa53750394b05e015 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 21:42:46 +0100 Subject: [PATCH 02/27] change 008 spider to voltron --- catalog/includes/spiders.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 34c27ec36..fad1644f9 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -1,5 +1,4 @@ $Id$ -008 ABACHOBot Accoona-AI-Agent AddSugarSpiderBot @@ -146,6 +145,7 @@ TwengaBot updated Urlfilebot Vagabondo +Voltron Vortex voyager VYU2 From 9200ef41ab5cc3a5f29235bbd297ffb8f37de9ba Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 21:49:42 +0100 Subject: [PATCH 03/27] Remove AnyApexBot as not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index fad1644f9..8092c82b8 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -2,7 +2,6 @@ $Id$ ABACHOBot Accoona-AI-Agent AddSugarSpiderBot -AnyApexBot Arachmo B-l-i-t-z-B-O-T Baiduspider From c6e4b5ded82bcf8683691e9a6443498f008974d0 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 21:52:11 +0100 Subject: [PATCH 04/27] Remove B-l-i-t-z-B-O-T as does not exists anymore --- catalog/includes/spiders.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 8092c82b8..72b3bf87d 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -3,14 +3,12 @@ ABACHOBot Accoona-AI-Agent AddSugarSpiderBot Arachmo -B-l-i-t-z-B-O-T Baiduspider BecomeBot BeslistBot BillyBobBot Bimbot Bingbot -BlitzBOT boitho.com-dc boitho.com-robot btbot From 8d040f64bea9f2af889561ee5bba257e6293b1a9 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 21:54:00 +0100 Subject: [PATCH 05/27] Remove BillyBobBot as does not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 72b3bf87d..35d62329f 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -6,7 +6,6 @@ Arachmo Baiduspider BecomeBot BeslistBot -BillyBobBot Bimbot Bingbot boitho.com-dc From a984faf2aa39551a879e013fa75305e7d30b1c91 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 21:56:36 +0100 Subject: [PATCH 06/27] remoe Boithobot as does not works anymore --- catalog/includes/spiders.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 35d62329f..82b61280e 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -8,8 +8,6 @@ BecomeBot BeslistBot Bimbot Bingbot -boitho.com-dc -boitho.com-robot btbot CatchBot Cerberian Drtrs From f0dda94bf0f2894406d1c67b14d3470234430b25 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 21:57:33 +0100 Subject: [PATCH 07/27] Remove btbot as use google search results --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 82b61280e..60b689d5b 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -8,7 +8,6 @@ BecomeBot BeslistBot Bimbot Bingbot -btbot CatchBot Cerberian Drtrs Charlotte From 8104436cbeeb15d4d287d08ae020c17c3ac1f617 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 21:58:48 +0100 Subject: [PATCH 08/27] remove CatchBot as does not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 60b689d5b..8923b5292 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -8,7 +8,6 @@ BecomeBot BeslistBot Bimbot Bingbot -CatchBot Cerberian Drtrs Charlotte ConveraCrawler From 7bb8b734dd7f31adf1dd5acca118db6942d17300 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 21:59:40 +0100 Subject: [PATCH 09/27] Remove Cerberian Drtrs as it is a filtering tool --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 8923b5292..a604f729c 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -8,7 +8,6 @@ BecomeBot BeslistBot Bimbot Bingbot -Cerberian Drtrs Charlotte ConveraCrawler cosmos From 3c4f48bf4b4732739d571b20da99ade3851c6599 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:00:37 +0100 Subject: [PATCH 10/27] Remove Charlotte as it use yahoo search results --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index a604f729c..dbbf5634b 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -8,7 +8,6 @@ BecomeBot BeslistBot Bimbot Bingbot -Charlotte ConveraCrawler cosmos Covario IDS From ee739dc353a62fe24badc882d57a3b668060c8b9 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:02:38 +0100 Subject: [PATCH 11/27] Remove ConveraCrawler cosmos spiders as index not html pages --- catalog/includes/spiders.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index dbbf5634b..d3c82a053 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -8,8 +8,6 @@ BecomeBot BeslistBot Bimbot Bingbot -ConveraCrawler -cosmos Covario IDS DataparkSearch DiamondBot From 87c7dcb4e0f24c55ff5056a7af0e60d4f4c16457 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:04:17 +0100 Subject: [PATCH 12/27] Remove Covario IDS as does not seems to work anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index d3c82a053..83c41a751 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -8,7 +8,6 @@ BecomeBot BeslistBot Bimbot Bingbot -Covario IDS DataparkSearch DiamondBot Discobot From 1a6046a94707aba3be81b09fcbd0e87db2df6713 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:06:29 +0100 Subject: [PATCH 13/27] Remove DiamondBot as seems to not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 83c41a751..77bc098a0 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -9,7 +9,6 @@ BeslistBot Bimbot Bingbot DataparkSearch -DiamondBot Discobot Dotbot EARTHCOM.info From 5102b2855eb7599c6afe7a75242acddaa67bdafd Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:07:55 +0100 Subject: [PATCH 14/27] Remove discobot as does not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 77bc098a0..381868885 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -9,7 +9,6 @@ BeslistBot Bimbot Bingbot DataparkSearch -Discobot Dotbot EARTHCOM.info EmeraldShield.com WebBot From d75908a3e33abcab3928ef9616e2c4edce4d890f Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:09:40 +0100 Subject: [PATCH 15/27] Remove EARTHCOM.info as does not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 381868885..4b00414c5 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -10,7 +10,6 @@ Bimbot Bingbot DataparkSearch Dotbot -EARTHCOM.info EmeraldShield.com WebBot envolk[ITS]spider EsperanzaBot From 48329889e637f045eb8dbad8fb8bf11cfb7a268a Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:10:23 +0100 Subject: [PATCH 16/27] Remove EmeraldShield.com WebBot as does not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 4b00414c5..56c121072 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -10,7 +10,6 @@ Bimbot Bingbot DataparkSearch Dotbot -EmeraldShield.com WebBot envolk[ITS]spider EsperanzaBot Exabot From 10575a9eda5619a029897a97933525e8ff17be52 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:12:02 +0100 Subject: [PATCH 17/27] remove EsperanzaBot as does not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 56c121072..816b14ad4 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -11,7 +11,6 @@ Bingbot DataparkSearch Dotbot envolk[ITS]spider -EsperanzaBot Exabot FAST Enterprise Crawler FAST-WebCrawler From 2963a62f893e497f365b1e6e8bc721eba2fcacb6 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:16:31 +0100 Subject: [PATCH 18/27] Remove FDSE robot as is an internal serch engine for sites --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 816b14ad4..c0eadecda 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -14,7 +14,6 @@ envolk[ITS]spider Exabot FAST Enterprise Crawler FAST-WebCrawler -FDSE robot FindLinks FurlBot FyberSpider From 9dc86a04c0e47144b1fd4523b590653f5caeabcd Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:17:22 +0100 Subject: [PATCH 19/27] Remove FindLinks as actually is not working --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index c0eadecda..263fa031d 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -14,7 +14,6 @@ envolk[ITS]spider Exabot FAST Enterprise Crawler FAST-WebCrawler -FindLinks FurlBot FyberSpider g2crawler From e753ed596798ef381ede502dbfd0afaa4eab9564 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:20:39 +0100 Subject: [PATCH 20/27] Remove g2crawler as it is not used anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 263fa031d..50553f80a 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -16,7 +16,6 @@ FAST Enterprise Crawler FAST-WebCrawler FurlBot FyberSpider -g2crawler Gaisbot GalaxyBot genieBot From 08b4feee7cd5d13e4fbb4161ecbb55b1a86277f7 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:22:50 +0100 Subject: [PATCH 21/27] remove Gaisbot as does not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 50553f80a..effe241f6 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -16,7 +16,6 @@ FAST Enterprise Crawler FAST-WebCrawler FurlBot FyberSpider -Gaisbot GalaxyBot genieBot Gigabot From 8a9ee7e8757cd4d96b321bba0631118496c50a5d Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:23:47 +0100 Subject: [PATCH 22/27] Remove genieBot as does not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index effe241f6..7f088ec03 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -17,7 +17,6 @@ FAST-WebCrawler FurlBot FyberSpider GalaxyBot -genieBot Gigabot Girafabot Googlebot From c1c02a04ac791e4329c4d94fea6136ca08e0993b Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:26:43 +0100 Subject: [PATCH 23/27] Remove GurujiBot as does not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 7f088ec03..9c86bcbb5 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -21,7 +21,6 @@ Gigabot Girafabot Googlebot Googlebot-Image -GurujiBot HappyFunBot hl_ftien_spider Holmes From bcf88cebf47f5ee4a99f5cd9b9d88133e6be5f34 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 22:27:25 +0100 Subject: [PATCH 24/27] remove HappyFunBot as does not exists anymore --- catalog/includes/spiders.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 9c86bcbb5..a5fc57103 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -21,7 +21,6 @@ Gigabot Girafabot Googlebot Googlebot-Image -HappyFunBot hl_ftien_spider Holmes htdig From b6b6e073949f4322847cf720e0902b1bd9a41a99 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 23:18:11 +0100 Subject: [PATCH 25/27] Remove small and not existent spiders user agents --- catalog/includes/spiders.txt | 56 +----------------------------------- 1 file changed, 1 insertion(+), 55 deletions(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index a5fc57103..e778aa311 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -1,4 +1,5 @@ $Id$ +almaden.ibm.com ABACHOBot Accoona-AI-Agent AddSugarSpiderBot @@ -21,21 +22,12 @@ Gigabot Girafabot Googlebot Googlebot-Image -hl_ftien_spider Holmes -htdig -iaskspider ia_archiver -iCCrawler ichiro igdeSpyder IRLbot -IssueCrawler -Jaxified Bot -Jyxobot -KoepaBot L.webis -LapozzBot Larbin LDSpider LexxeBot @@ -47,15 +39,11 @@ mabontland magpie-crawler Mediapartners-Google MJ12bot -MLBot Mnogosearch -mogimogi MojeekBot Moreoverbot Morning Paper msnbot -MSRBot -MVAClient mxbot NetResearchServer NetSeer Crawler @@ -63,87 +51,45 @@ NewsGator NG-Search nicebot noxtrumbot -Nusearch Spider NutchCVS -Nymesis obot -oegp -omgilibot -OmniExplorer_Bot OOZBOT OrangeBot -Orbiter -PageBitesHyperBot -Peew polybot -Pompos -PostPost Psbot PycURL -Qseero -Radian6 -RAMPyBot -RufusBot -SandCrawler -SBIder ScoutJet Scrubby -SearchSight Seekbot -semanticdiscovery -Sensis Web Crawler SEOChat::Bot SeznamBot Shim-Crawler ShopWiki Shoula robot silk -Sitebot Snappy sogou spider Sosospider Speedy Spider -Sqworm StackRambler suggybot -SurveyBot SynooBot Teoma -TerrawizBot -TheSuBot Thumbnail.CZ robot TinEye -truwoGPS TurnitinBot -TweetedTimes Bot TwengaBot -updated Urlfilebot Vagabondo Voltron -Vortex -voyager -VYU2 -webcollage Websquash.com -wf84 -WoFindeIch Robot -WomlpeFactory -Xaldon_WebSpider yacy Yahoo! Slurp -Yahoo! Slurp China YahooSeeker -YahooSeeker-Testing YandexBot YandexImages -YandexMetrika -Yasaklibot Yeti YodaoBot -yoogliFetchAgent YoudaoBot -Zao Zealbot -zspider ZyBorg From a958130e5757d4e6326fffbb2fb73f670d20a9db Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 23:22:25 +0100 Subject: [PATCH 26/27] Convert all to lowercase --- catalog/includes/spiders.txt | 156 +++++++++++++++++------------------ 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index e778aa311..1cffc747f 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -1,95 +1,95 @@ -$Id$ +$id$ almaden.ibm.com -ABACHOBot -Accoona-AI-Agent -AddSugarSpiderBot -Arachmo -Baiduspider -BecomeBot -BeslistBot -Bimbot -Bingbot -DataparkSearch -Dotbot -envolk[ITS]spider -Exabot -FAST Enterprise Crawler -FAST-WebCrawler -FurlBot -FyberSpider -GalaxyBot -Gigabot -Girafabot -Googlebot -Googlebot-Image -Holmes +abachobot +accoona-ai-agent +addsugarspiderbot +arachmo +baiduspider +becomebot +beslistbot +bimbot +bingbot +dataparksearch +dotbot +envolk[its]spider +exabot +fast enterprise crawler +fast-webcrawler +furlbot +fyberspider +galaxybot +gigabot +girafabot +googlebot +googlebot-image +holmes ia_archiver ichiro -igdeSpyder -IRLbot -L.webis -Larbin -LDSpider -LexxeBot -Linguee Bot -LinkWalker +igdespyder +irlbot +l.webis +larbin +ldspider +lexxebot +linguee bot +linkwalker lmspider lwp-trivial mabontland magpie-crawler -Mediapartners-Google -MJ12bot -Mnogosearch -MojeekBot -Moreoverbot -Morning Paper +mediapartners-google +mj12bot +mnogosearch +mojeekbot +moreoverbot +morning paper msnbot mxbot -NetResearchServer -NetSeer Crawler -NewsGator -NG-Search +netresearchserver +netseer crawler +newsgator +ng-search nicebot noxtrumbot -NutchCVS +nutchcvs obot -OOZBOT -OrangeBot +oozbot +orangebot polybot -Psbot -PycURL -ScoutJet -Scrubby -Seekbot -SEOChat::Bot -SeznamBot -Shim-Crawler -ShopWiki -Shoula robot +psbot +pycurl +scoutjet +scrubby +seekbot +seochat::bot +seznambot +shim-crawler +shopwiki +shoula robot silk -Snappy +snappy sogou spider -Sosospider -Speedy Spider -StackRambler +sosospider +speedy spider +stackrambler suggybot -SynooBot -Teoma -Thumbnail.CZ robot -TinEye -TurnitinBot -TwengaBot -Urlfilebot -Vagabondo -Voltron -Websquash.com +synoobot +teoma +thumbnail.cz robot +tineye +turnitinbot +twengabot +urlfilebot +vagabondo +voltron +websquash.com yacy -Yahoo! Slurp -YahooSeeker -YandexBot -YandexImages -Yeti -YodaoBot -YoudaoBot -Zealbot -ZyBorg +yahoo! slurp +yahooseeker +yandexbot +yandeximages +yeti +yodaobot +youdaobot +zealbot +zyborg From a8097b4fc57c073b0feae29cf7173c592bdf9192 Mon Sep 17 00:00:00 2001 From: Luca Date: Tue, 9 Dec 2014 23:22:44 +0100 Subject: [PATCH 27/27] Fix typo --- catalog/includes/spiders.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalog/includes/spiders.txt b/catalog/includes/spiders.txt index 1cffc747f..7a20e2b41 100644 --- a/catalog/includes/spiders.txt +++ b/catalog/includes/spiders.txt @@ -1,4 +1,4 @@ -$id$ +$Id$ almaden.ibm.com abachobot accoona-ai-agent