~ubuntu-branches/ubuntu/gutsy/awstats/gutsy-updates

« back to all changes in this revision

Viewing changes to wwwroot/cgi-bin/lib/robots.pm

  • Committer: Bazaar Package Importer
  • Author(s): Charles Fry
  • Date: 2007-02-10 11:11:02 UTC
  • mfrom: (1.2.4 upstream)
  • Revision ID: james.westby@ubuntu.com-20070210111102-bbx43ay1krtxh1nq
Tags: 6.6+dfsg-1
New upstream release (Closes: #350987, #335865)

Show diffs side-by-side

added added

removed removed

Lines of Context:
3
3
# If you want to add robots to extend AWStats database detection capabilities,
4
4
# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.
5
5
#-------------------------------------------------------
6
 
# $Revision: 1.41 $ - $Author: eldy $ - $Date: 2005/12/17 12:37:01 $
 
6
# $Revision: 1.44 $ - $Author: eldy $ - $Date: 2006/07/17 23:50:54 $
7
7
 
8
8
# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
9
9
#              added dipsie (not tested with real data).
79
79
#               added link for GigaBot
80
80
#               added link for MagpieRSS
81
81
#               added link for MSIECrawler
 
82
# 2005-12-21
 
83
#               added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net]
 
84
#               added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)
 
85
#               added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70  users.sourceforge.net]
 
86
#               added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/
 
87
#               added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt).  May be used as robot or browser - a site may want to remove this entry.
 
88
#               added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net]
 
89
#               added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ?
 
90
# 2005-12-22
 
91
#               added EARTHCOM.info www.earthcom.info
 
92
#               added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]
 
93
#               added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]
 
94
# 2006-01-01    
 
95
#               added Dulance http://www.dulance.com/bot.jsp
 
96
#               added MojeekBot http://www.mojeek.com/bot.html
 
97
#               added nicebot http://www.egghelp.org/setup.htm ?
 
98
#               added Snappy http://www.urltrends.com/faq.php 
 
99
#               added sohu agent
 
100
#               added TencentTraveler
 
101
#               added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]
 
102
#               added zspider http://feedback.redkolibri.com/
 
103
# 2006-01-13
 
104
#               added boitho.com-dc http://www.boitho.com/dcbot.html
 
105
#               added IRLbot http://irl.cs.tamu.edu/crawler
 
106
#               added virus_detector virus_harvester@securecomputing.com
 
107
#               added Wavefire http://www.wavefire.com; info@wavefire.com
 
108
#               added WebFilter Robot
 
109
# 2006-01-24
 
110
#               added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp
 
111
#               added Exabot exabot.com
 
112
#               added LetsCrawl.com http://letscrawl.com
 
113
#               added ichiro http://help.goo.ne.jp/door/crawlerE.html
 
114
# 2006-01-27    additional 22 robots from a list provided by Moizes Gabor
 
115
#               added ALeadSoftbot      http://www.aleadsoft.com/bot.htm        
 
116
#               added CipinetBot        http://www.cipinet.com/bot.html 
 
117
#               added Cuasarbot http://www.cuasar.com/  
 
118
#               added Dumbot    http://www.dumbfind.com/        
 
119
#               added Extreme_Picture_Finder    http://www.exisoftware.com/     
 
120
#               added Fooky.com/ScorpionBot/ScoutOut    http://www.fooky.com/scorpionbots       
 
121
#               added IlTrovatore-Setaccio      http://www.iltrovatore.it/aiuto/motore_di_ricerca.html  bot@iltrovatore.it
 
122
#               added InsurancoBot      http://www.fastspywareremoval.com/      
 
123
#               added InternetArchive   http://lucene.apache.org/nutch/bot.html         nutch-agent@lucene.apache.org
 
124
#               added KazoomBot http://www.kazoom.ca/bot.html   kazoombot@kazoom.ca 
 
125
#               added Kurzor    http://www.easymail.hu/ cursor@easymail.hu
 
126
#               added NutchCVS  http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
 
127
#               added NutchOSU-VLIB     http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
 
128
#               added Orbiter   http://www.dailyorbit.com/bot.htm       
 
129
#               added PHP_version_tracker       http://www.nexen.net/phpversion/bot.php 
 
130
#               added SuperBot  http://www.sparkleware.com/superbot/    
 
131
#               added SynooBot  http://www.synoo.de/bot.html    webmaster@synoo.com
 
132
#               added TestBot   http://www.agbrain.com/ 
 
133
#               added TutorGigBot       http://www.tutorgig.info/       
 
134
#               added UP.Browser        http://developer.openwave.com/dvl/support/faqs/faq_mag_browser.htm      
 
135
#               added WebIndexer        mailto://webindexerv1@yahoo.com 
 
136
#               added WebMiner  http://64.124.122.252/feedback.html
 
137
# 2006-02-01    
 
138
#               added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202
 
139
#               added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164
 
140
#               additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]
 
141
#               added Candlelight_Favorites_Inspector
 
142
#               added DomainChecker 
 
143
#               added EasyDL 
 
144
#               added FavOrg  
 
145
#               added Favorites_Sweeper
 
146
#               added Html_Link_Validator
 
147
#               added Internet_Ninja  
 
148
#               added JRTwine_Software_Check_Favorites_Utility
 
149
#               fixed Microsoft_URL_Control
 
150
#               added miniRank 
 
151
#               added Missigua_Locator
 
152
#               added NPBot 
 
153
#               added Ocelli 
 
154
#               added Onet.pl_SA 
 
155
#               added proodleBot 
 
156
#               added SearchGuild_DMOZ_Experiment 
 
157
#               added Susie 
 
158
#               added Website_Monitoring_Bot
 
159
#               added Xenu_Link_Sleuth
 
160
# 2006-05-15
 
161
#               added ASPseek http://www.aspseek.org/
 
162
#               added AdamM Bot http://home.blic.net/adamm/  
 
163
#               added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html
 
164
#               added arianna.libero.it (Italian Portal/search engine)
 
165
#               added Biz360 spider http://www.biz360.com
 
166
#               added BlogBridge Service http://www.blogbridge.com/
 
167
#               added BlogSearch http://www.icerocket.com/ 
 
168
#               added libcrawl
 
169
#               added edgeio-relanshanbottriever http://www.edgeio.com
 
170
#               added FeedFlow http://feedflow.com/about
 
171
#               added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt
 
172
#               added Java catchall - used by many spam bots 
 
173
#               added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb
 
174
#               added msnbot-media http://search.msn.com/msnbot.htm
 
175
#               added MT::Telegraph::Agent
 
176
#               added Netluchs http://www.netluchs.de/ (German SE bot)
 
177
#               added oBot http://www.webmasterworld.com/forum11/1616.htm
 
178
#               added Onfolio http://www.onfolio.com/  (IE Toolbar plugin) - hit rss feeds.
 
179
#               added ping.blo.gs http://blo.gs/ping.php blog bot
 
180
#               added sogou spider http://corp.sohu.com/20051130/n240842344.shtml
 
181
#               added sogou test http://corp.sohu.com/20051130/n240842344.shtml
 
182
#               added Sphere Scout http://www.sphere.com/
 
183
#               added sproose crawler http://www.sproose.com/bot.html
 
184
#               added SyndicAPI http://syndicapi.com/bot.html
 
185
#               added Yahoo! Mindset http://mindset.research.yahoo.com/
 
186
#               added msrabot
 
187
#               added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents&lang=uk
 
188
#               fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator)
 
189
#               changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser.
 
190
#                       This requires you to reprocess historic logs if you want EchO! to be recognized for older reports.
 
191
# 2006-05-17
 
192
#               added Alpha Search Agent # 62.152.125.60 Eurologon Srl
 
193
#               added Krugle http://www.krugle.com/crawler/info.html the search engine for developers
 
194
#               added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine
 
195
#               added UbiCrawler http://law.dsi.unimi.it/ubicrawler/
 
196
#               added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html
 
197
#                       You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports
 
198
# 2006-05-20
 
199
#               added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml
 
200
#               added Accoona-AI-Agent http://www.accoona.com/
 
201
#               added ActiveBookmark http://www.libmaster.com/active_bookmark.php
 
202
#               added BIGLOTRON http://www.biglotron.com/robot.html
 
203
#               added Bookmark-Manager http://bkm.sourceforge.net/
 
204
#               added cbn00glebot 
 
205
#               added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240
 
206
#               added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork
 
207
#               added CheckWeb link validator http://p.duby.free.fr/chkweb.htm
 
208
#               added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html
 
209
#               added ConveraCrawler http://www.authoritativeweb.com/crawl/
 
210
#               added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/
 
211
#               added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php
 
212
#               added Cursor http://adcenter.hu/docs/en/bot.html 
 
213
#               added Custo http://www.netwu.com/custo/
 
214
#               added DataFountains/DMOZ Downloader http://infomine.ucr.edu/ 
 
215
#               added Deepindex http://www.deepindex.net/faq.php
 
216
#               added DNSGroup http://www.dnsgroup.com/
 
217
#               added DoCoMo http://www.nttdocomo.co.jp/
 
218
#               added dumm.de-Bot http://www.dumm.de/
 
219
#               added ETS v http://www.freetranslation.com/help/
 
220
#               added eventax http://www.eventax.de/
 
221
#               added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/
 
222
#               added FAST Enterprise Crawler http://www.fast.no/
 
223
#               added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/
 
224
#               added FeedValidator http://feedvalidator.org/
 
225
#               added FilmkameraBot http://www.filmkamera.at/bot.html
 
226
#               added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece 
 
227
#               added Global Fetch http://www.wesonet.com/
 
228
#               added GOFORITBOT http://www.goforit.com/about/
 
229
#               added GoForIt.com http://www.goforit.com/about/
 
230
#               added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php
 
231
#               added HooWWWer http://cosco.hiit.fi/search/hoowwwer/
 
232
#               added HPPrint 
 
233
#               added HTMLParser http://htmlparser.sourceforge.net/
 
234
#               added Hundesuche.com-Bot http://www.hundesuche.com/
 
235
#               added InfoBot http://www.infobot.org/
 
236
#               added InfociousBot http://corp.infocious.com/tech_crawler.php
 
237
#               added InternetSupervision http://internetsupervision.com/
 
238
#               added isearch2006 http://www.yahoo.com.cn/
 
239
#               added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/
 
240
#               added KalamBot http://64.124.122.251/feedback.html
 
241
#               added kamano.de NewsFeedVerzeichnis http://www.kamano.de/
 
242
#               added Kevin http://dznet.com/kevin/
 
243
#               added KnowItAll http://www.cs.washington.edu/research/knowitall/
 
244
#               added Knowledge.com http://www.knowledge.com/
 
245
#               added Kouaa Krawler http://www.kouaa.com/
 
246
#               added ksibot http://ego.ms.mff.cuni.cz/
 
247
#               added Link Valet Online http://www.htmlhelp.com/tools/valet/
 
248
#               added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request
 
249
#               added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm
 
250
#               added MapoftheInternet.com http://MapoftheInternet.com/
 
251
#               added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/
 
252
#               added Megite http://www.megite.com/
 
253
#               added Metaspinner http://index.meta-spinner.de/
 
254
#               added Mini-reptile 
 
255
#               added Misterbot http://www.misterbot.fr/
 
256
#               added Miva http://www.miva.com/
 
257
#               added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b 
 
258
#               added MSRBOT http://research.microsoft.com/research/sv/msrbot/
 
259
#               added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022
 
260
#               added Mydoyouhike http://www.doyouhike.net/my
 
261
#               added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b
 
262
#               added NetSprint http://www.netsprint.pl/serwis/
 
263
#               added NimbleCrawler http://www.healthline.com/
 
264
#               added OpenWebSpider http://www.openwebspider.org/
 
265
#               added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html
 
266
#               added OSSProxy http://www.marketscore.com/FAQ.Aspx
 
267
#               added passwordmaker.org http://passwordmaker.org/
 
268
#               added PEAR HTTP Request class http://pear.php.net/
 
269
#               added PEERbot http://www.peerbot.com/
 
270
#               added PHP version tracker http://www.nexen.net/phpversion/bot.php
 
271
#               added PictureOfInternet http://malfunction.org/poi/
 
272
#               added plinki http://www.plinki.com/
 
273
#               added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b
 
274
#               added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b 
 
275
#               added ProjectWF-java-test-crawler 
 
276
#               added PyQuery http://sourceforge.net/projects/pyquery/
 
277
#               added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/ 
 
278
#               added Scumbot
 
279
#               added Sensis Web Crawler http://www.sensis.com.au/
 
280
#               added snap.com beta crawler http://www.snap.com/
 
281
#               added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/ 
 
282
#               added STEROID  Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm
 
283
#               added Suchfin-Bot http://www.suchfin.de/
 
284
#               added Sunrise http://www.sunrisexp.com/
 
285
#               added Tagyu Agent http://www.tagyu.com/
 
286
#               added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm
 
287
#               added TeragramCrawlerSURF http://www.teragram.com/
 
288
#               added Test Crawler http://netp.ath.cx/
 
289
#               added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/
 
290
#               added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html
 
291
#               added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)
 
292
#               added updated http://www.updated.com/
 
293
#               added Vermut http://vermut.aol.com
 
294
#               added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html  
 
295
#               added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb
 
296
#               added VSE http://www.vivisimo.com/
 
297
#               added webcrawl.net http://www.webcrawl.net/
 
298
#               added Web Downloader http://www.krasu.ru/soft/chuchelo/
 
299
#               added Webdup http://www.webdup.com/en/index.html
 
300
#               added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b 
 
301
#               added WordPress http://wordpress.org/
 
302
#               added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/ 
 
303
#               added Xenu's Link Sleuth (with ')
 
304
#               added xirq http://www.xirq.com/
 
305
#               added yoogliFetchAgent http://www.yoogli.com/
 
306
#               added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/
 
307
#               -- fix - some robots were reported with _ where _ should have been a space.
 
308
#               changed Xenu Link Sleuth
 
309
#               changed microsoft\_url\_control -> microsoft\surl\scontrol
 
310
#               changed favorites\ssweeper -> favorites\ssweeper
 
311
#               -- updates
 
312
#               updated AskJeeves to Ask
82
313
 
 
314
# to do  MS Search 4.0 Robot
83
315
 
84
316
#package AWSROB;
85
317
 
113
345
'ferret',
114
346
'googlebot',
115
347
'gulliver',
 
348
'virus\_detector',              # Must be before harvest
116
349
'harvest',
117
350
'htdig',
118
351
'linkwalker',
122
355
'myweb',
123
356
'nomad',
124
357
'scooter',
 
358
'yahoo!\sslurp\schina', # Must come before singluar slurp or yahoo
125
359
'slurp',
126
360
'^voyager\/',
127
361
'weblayers',
128
362
# Common robots (Not in robot file)
129
363
'antibot',
 
364
'bruinbot',
130
365
'digout4u',
131
 
'echo',
 
366
'echo!',
132
367
'fast\-webcrawler',
133
368
'ia_archiver\-web\.archive\.org', # Must be before ia_archiver to avoid confusion with alexa
134
369
'ia_archiver',
135
370
'jennybot',
136
371
'mercator',
137
372
'netcraft',
 
373
'msnbot\-media',
138
374
'msnbot',
139
375
'petersnews',
140
376
'unlost_web_crawler',
141
377
'voila',
142
378
'webbase',
 
379
'webcollage',
 
380
'cfetch',
143
381
'zyborg',       # Must be before wisenut 
144
382
'wisenutbot'
145
383
);
257
495
'katipo',
258
496
'kilroy',
259
497
'ko_yappo_robot',
 
498
'kummhttp',
260
499
'labelgrabber\.txt',
261
500
'larbin',
262
501
'legs',
350
589
'sqworm',
351
590
'ssearcher',
352
591
'suke',
 
592
'sunrise',
353
593
'suntek',
354
594
'sven',
355
595
'tach_bw',
 
596
'tagyu\sagent',
356
597
'tarantula',
357
598
'tarspider',
358
599
'techbot',
363
604
'tlspider',
364
605
'ucsd',
365
606
'udmsearch',
 
607
'universalfeedparser',
366
608
'urlck',
367
609
'valkyrie',
368
610
'verticrawl',
374
616
'w3m2',
375
617
'wallpaper',
376
618
'wanderer',
377
 
'wapspider',
 
619
'wapspIRLider',
378
620
'webbandit',
379
621
'webcatcher',
380
622
'webcopy',
399
641
'wmir',
400
642
'wolp',
401
643
'wombat',
 
644
'wordpress',
402
645
'worm',
403
646
'wwwc',
404
647
'wz101',
405
648
'xget',
406
649
# Other robots reported by users
 
650
'1\-more\sscanner',
 
651
'accoona\-ai\-agent',
 
652
'activebookmark',
 
653
'adamm\sbot',
407
654
'almaden',
 
655
'aipbot',
 
656
'aleadsoftbot',
 
657
'alpha\ssearch\sagent',
408
658
'aport',
 
659
'archive\.org_bot',
409
660
'argus',                # Must be before nutch
 
661
'arianna\.libero\.it',
 
662
'aspseek',
410
663
'asterias',
411
664
'awbot',
412
665
'baiduspider',
413
666
'becomebot',
414
667
'bender',
 
668
'biglotron',
 
669
'bittorrent\sbot',
 
670
'biz360\sspider',
 
671
'blogbridge\sservice',
415
672
'bloglines',
416
 
'blogpulse',
 
673
'blogpulse',#           added OpenWebSpider http://www.openwebspider.org/
 
674
#               added NimbleCrawler http://www.healthline.com/
 
675
#               added Mydoyouhike http://www.doyouhike.net/my
 
676
#               added PHP version tracker http://www.nexen.net/phpversion/bot.php
 
677
#               added kamano.de NewsFeedVerzeichnis http://www.kamano.de/
 
678
#               added yoogliFetchAgent http://www.yoogli.com/
 
679
#               added ETS v http://www.freetranslation.com/help/
 
680
#               added HPPrint 
 
681
#               added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork
 
682
#               added STEROID  Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm
 
683
#               added OSSProxy http://www.marketscore.com/FAQ.Aspx
 
684
#               added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html
 
685
#               added Web Downloader http://www.krasu.ru/soft/chuchelo/
 
686
#               added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request
 
687
#               added Sunrise http://www.sunrisexp.com/
 
688
#               added WordPress http://wordpress.org/
 
689
#               added Global Fetch http://www.wesonet.com/
 
690
#               added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b
 
691
#               added NetSprint http://www.netsprint.pl/serwis/
 
692
#               added Webdup http://www.webdup.com/en/index.html
 
693
#               added Megite http://www.megite.com/
 
694
#               added Mini-reptile 
 
695
#               added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm
 
696
#               added HTMLParser http://htmlparser.sourceforge.net/
 
697
 
 
698
'blogsearch',
417
699
'blogshares',
418
700
'blogslive',
419
701
'blogssay',
 
702
'bncf\.firenze\.sbn\.it\/raccolta\.txt',
420
703
'bobby',
 
704
'boitho\.com\-dc',
 
705
'bookmark\-manager',
421
706
'boris',
422
707
'bumblebee',
 
708
'candlelight\_favorites\_inspector',
 
709
'cbn00glebot',
 
710
'cerberian\sdrtrs',
 
711
'cfnetwork',
 
712
'cipinetbot',
 
713
'checkweb\slink\svalidator',
 
714
'commons\-httpclient',
 
715
'computer\sand\sautomation\sresearch\sinstitute\scrawler',
 
716
'converamultimediacrawler',
423
717
'converacrawler',
424
718
'cscrawler',
 
719
'cse\shtml\svalidator\slite\sonline',
 
720
'cuasarbot',
 
721
'cursor',
 
722
'custo',
 
723
'datafountains/dmoz\sdownloader',
425
724
'daviesbot',
426
725
'daypopbot',
 
726
'deepindex',
427
727
'dipsie\.bot',
 
728
'dnsgroup',
 
729
'docomo',
 
730
'domainchecker',
428
731
'domainsdb\.net',
 
732
'dulance',
 
733
'dumbot',
 
734
'dumm\.de\-bot',
 
735
'earthcom\.info',
 
736
'easydl',
 
737
'edgeio\-retriever',
 
738
'ets\sv',
429
739
'exactseek',
 
740
'extreme\_picture\_finder',
 
741
'eventax',
430
742
'everbeecrawler',
 
743
'everest\-vulcan',
431
744
'ezresult',
432
745
'enteprise',
 
746
'fast\senterprise\scrawler.*crawleradmin\.t\-info@telekom\.de',
 
747
'fast\senterprise\scrawler.*t\-info_bi_cluster\scrawleradmin\.t\-info@telekom\.de',
 
748
'matrix\ss\.p\.a\.\s\-\sfast\senterprise\scrawler', # must come before fast enterprise crawler
 
749
'fast\senterprise\scrawler',
 
750
'fast\-search\-engine',
 
751
'favorg',
 
752
'favorites\ssweeper',
433
753
'feedburner',
434
754
'feedfetcher\-google',
 
755
'feedflow',
435
756
'feedster',
 
757
'feedvalidator',
 
758
'filmkamerabot',
436
759
'findlinks',
 
760
'findexa\scrawler',
 
761
'fooky\.com\/ScorpionBot',
 
762
'g2crawler',
437
763
'gaisbot',
438
764
'geniebot',
439
765
'gigabot',
440
766
'girafabot',
 
767
'global\sfetch',
441
768
'gnodspider',
 
769
'goforit\.com',
 
770
'goforitbot',
442
771
'grub',
 
772
'gpu\sp2p\scrawler',
443
773
'henrythemiragorobot',
 
774
'heritrix',
444
775
'holmes',
 
776
'hoowwwer',
 
777
'hpprint',
 
778
'htmlparser',
 
779
'html\_link\_validator',
 
780
'httrack',
 
781
'hundesuche\.com\-bot',
 
782
'ichiro',
 
783
'iltrovatore\-setaccio',
 
784
'infobot',
 
785
'infociousbot',
445
786
'infomine',
 
787
'insurancobot',
 
788
'internet\_ninja',
 
789
'internetarchive',
446
790
'internetseer',
 
791
'internetsupervision',
 
792
'irlbot',
 
793
'isearch2006',
 
794
'iupui_research_bot',
 
795
'jrtwine\_software\_check\_favorites\_utility',
447
796
'justview',
 
797
'kalambot',
 
798
'kamano\.de\snewsfeedverzeichnis',
 
799
'kazoombot',
 
800
'kevin',
448
801
'keyoshid', # Must come before Y!J
449
802
'kinjabot',
450
803
'kinja\-imagebot',
 
804
'knowitall',
 
805
'knowledge\.com',
 
806
'kouaa\skrawler',
 
807
'krugle',
 
808
'ksibot',
 
809
'kurzor',
 
810
'lanshanbot',
 
811
'letscrawl\.com',
 
812
'libcrawl',
451
813
'linkbot',
 
814
'link\svalet\sonline',
452
815
'metager\-linkchecker', # Must be before linkchecker
453
816
'linkchecker',
454
817
'livejournal\.com',
455
818
'lmspider',
 
819
'lwp\-request',
 
820
'lwp\-trivial',
456
821
'magpierss',
 
822
'mapoftheinternet\.com',
457
823
'mediapartners\-google',
458
 
'microsoft_url_control',
 
824
'megite',
 
825
'metaspinner',
 
826
'microsoft\surl\scontrol',
 
827
'mini\-reptile',
 
828
'minirank',
 
829
'missigua\slocator',
 
830
'misterbot',
 
831
'miva',
 
832
'mizzu\slabs',
459
833
'mj12bot',
 
834
'mojeekbot',
 
835
'tencenttraveler', # Must be before msiecrawler
460
836
'msiecrawler',
 
837
'ms\ssearch\s4\.0\srobot',
 
838
'msrabot',
 
839
'msrbot',
 
840
'mt::telegraph::agent',
461
841
'nagios',
 
842
'nasa\ssearch',
 
843
'mydoyouhike',
 
844
'netluchs',
 
845
'netsprint',
462
846
'newsgatoronline',
 
847
'nicebot',
 
848
'nimblecrawler',
463
849
'noxtrumbot',
464
 
'nutch',
 
850
'npbot',
 
851
'nutchcvs',
 
852
'nutchosu\-vlib',
 
853
'nutch',  # Must come after other nutch versions
 
854
'ocelli',
 
855
'octora\sbeta\sbot',
 
856
'omniexplorer\_bot',
 
857
'onet\.pl\_sa',
 
858
'onfolio',
465
859
'opentaggerbot',
 
860
'openwebspider',
 
861
'oracle\sultra\ssearch',
 
862
'orbiter',
466
863
'outfoxbot',
 
864
'passwordmaker\.org',
 
865
'pear\shttp\srequest\sclass',
 
866
'peerbot',
467
867
'perman',
 
868
'php\_version\_tracker',
 
869
'php\sversion\stracker',
 
870
'pictureofinternet',
 
871
'ping\.blo\.gs',
 
872
'plinki',
468
873
'pluckfeedcrawler',
469
874
'pompos',
470
875
'popdexter',
 
876
'port\shuron\slabs',
 
877
'postfavorites',
 
878
'projectwf\-java\-test\-crawler',
 
879
'proodlebot',
 
880
'pyquery',
471
881
'rambler',
472
882
'redalert',
473
883
'rojo',
476
886
'rufusbot',
477
887
'sandcrawler',
478
888
'sbider',
 
889
'schizozilla',
 
890
'scumbot',
 
891
'searchguild\_dmoz\_experiment',
479
892
'seekbot',
 
893
'sensis\sweb\scrawler',
480
894
'seznambot',
 
895
'shim\-crawler',
481
896
'shoutcast',
482
897
'slysearch',
483
 
'sohu-search',
 
898
'snap\.com\sbeta\scrawler',
 
899
'sogou\sspider',
 
900
'sogou\stest',
 
901
'sohu\-search',
 
902
'sohu', # "sohu agent"
 
903
'snappy',
 
904
'sphere\sscout',
 
905
'sproose\scrawler',
 
906
'steeler',
 
907
'steroid\s\sdownload',
 
908
'suchfin\-bot',
 
909
'superbot',
484
910
'surveybot',
 
911
'susie',
485
912
'syndic8',
 
913
'syndicapi',
 
914
'synoobot',
 
915
'tcl\shttp\sclient\spackage',
486
916
'technoratibot',
 
917
'teragramcrawlersurf',
 
918
'test\scrawler',
 
919
'testbot',
487
920
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
488
921
'topicblogs',
489
922
'turnitinbot',
490
923
'turtlescanner',                # Must be before turtle
491
924
'turtle',
 
925
'tutorgigbot',
 
926
'ubicrawler',
492
927
'ultraseek',
 
928
'unchaos\sbot\shybrid\sweb\ssearch\sengine',
 
929
'unido\-bot',
 
930
'up\.browser',
 
931
'updated',
 
932
'ustc\-semantic\-group',
 
933
'vagabondo\-wap',
 
934
'vagabondo',
 
935
'vermut',
 
936
'versus\scrawler\sfrom\seda\.baykan@epfl\.ch',
 
937
'vespa\scrawler',
 
938
'vortex',
 
939
'vse',
493
940
'w3c\-checklink',
494
 
'w3c_css_validator_jfouffa',
 
941
'w3c\_css\_validator\_jfouffa',
495
942
'w3c_validator',
 
943
'wavefire',
496
944
'webclipping\.com',
497
945
'webcompass',
 
946
'webcrawl\.net',
 
947
'web\sdownloader',
 
948
'webdup',
 
949
'webfilter',
 
950
'webindexer',
 
951
'webminer',
 
952
'website\_monitoring\_bot',
498
953
'webvulncrawl',
 
954
'wells\ssearch',
499
955
'wonderer',
 
956
'wume\scrawler',
 
957
'wwweasel',
 
958
'xenu\'s\slink\ssleuth',
 
959
'xenu\slink\ssleuth',
 
960
'xirq',
500
961
'y!j', # Must come after keyoshid Y!J
501
962
'yacy',
502
963
'yahoo\-blogs',
505
966
'yahooseeker\-testing',
506
967
'yahooseeker',
507
968
'yahoo\-mmcrawler',
 
969
'yahoo!\smindset',
508
970
'yandex',
 
971
'yooglifetchagent',
 
972
'z\-add\slink\schecker',
509
973
'zealbot',
 
974
'zspider',
 
975
'zeus',
510
976
'ng\/1\.', # put at end to avoid false positive
511
 
'ng\/2\.' # put at end to avoid false positive
 
977
'ng\/2\.', # put at end to avoid false positive
 
978
'exabot',  # put at end to avoid false positive
 
979
'java'   # put at end to avoid false positive
512
980
);
513
981
@RobotsSearchIDOrder_listgen = (
514
982
# Generic robot
527
995
# Common robots (In robot file)
528
996
'appie','<a href="http://www.walhello.com/" title="Bot home page [new window]" target="_blank">Walhello appie</a>',
529
997
'architext','ArchitextSpider',
530
 
'jeeves','<a href="http://sp.ask.com/docs/about/tech_crawling.html" title="Bot home page [new window]" target="_blank">AskJeeves</a>',
 
998
'jeeves','<a href="http://sp.ask.com/docs/about/tech_crawling.html" title="Bot home page [new window]" target="_blank">Ask</a>',
531
999
'bjaaland','Bjaaland',
532
1000
'ferret','Wild Ferret Web Hopper #1, #2, #3',
533
1001
'googlebot','<a href="http://www.google.com/bot.html" title="Bot home page [new window]" target="_blank">Googlebot</a>',
534
1002
'gulliver','Northern Light Gulliver',
 
1003
'virus\_detector','<a href="http://www.securecomputing.com/" title="virus_harvester@securecomputing.com; Bot home page [new window]" target="_blank">virus_detector</a>',
535
1004
'harvest','Harvest',
536
1005
'htdig','ht://Dig',
537
1006
'linkwalker','LinkWalker',
541
1010
'myweb','Internet Shinchakubin',
542
1011
'nomad','Nomad',
543
1012
'scooter','Scooter',
 
1013
'yahoo!\sslurp\schina','<a href="http://misc.yahoo.com.cn/help.html" title="Bot home page [new window]" target="_blank">Yahoo! Slurp China</a>',
544
1014
'slurp','<a href="http://help.yahoo.com/help/us/ysearch/slurp/" title="Bot home page [new window]" target="_blank">Yahoo Slurp</a>',
545
1015
'^voyager\/','Voyager',
546
 
'weblayers','weblayers',
 
1016
'weblayers','Weblayers',
547
1017
# Common robots (Not in robot file)
548
1018
'antibot','Antibot',
 
1019
'bruinbot','<a href="http://web.archive.org/" title="BruinBot home page [new window]" target="_blank">The web archive</a>',
549
1020
'digout4u','Digout4u',
550
 
'echo','EchO!',
 
1021
'echo!','EchO!',
551
1022
'fast\-webcrawler','Fast-Webcrawler',
552
1023
'ia_archiver\-web\.archive\.org','<a href="http://web.archive.org/" title="Bot home page [new window]" target="_blank">The web archive (IA Archiver)</a>',
553
1024
'ia_archiver','<a href="http://www.alexa.com/" title="Bot home page [new window]" target="_blank">Alexa (IA Archiver)</a>',
554
1025
'jennybot','JennyBot',
555
1026
'mercator','Mercator',
 
1027
'msnbot\-media','<a href="http://search.msn.com/msnbot.htm" title="Bot home page [new window]" target="_blank">MSNBot-media</a>',
556
1028
'msnbot','<a href="http://search.msn.com/msnbot.htm" title="Bot home page [new window]" target="_blank">MSNBot</a>',
557
1029
'netcraft','<a href="http://www.netcraft.com/survey/" title="Bot home page [new window]" target="_blank">Netcraft</a>',
558
1030
'petersnews','Petersnews',
561
1033
'webbase', 'WebBase',
562
1034
'zyborg','<a href="http://www.WISEnutbot.com/" title="wn-14.zyborg@looksmart.net Bot home page [new window]" target="_blank">ZyBorg</a>',
563
1035
'wisenutbot','<a href="http://www.WISEnutbot.com/" title="Bot home page [new window]" target="_blank">WISENutbot</a>',
 
1036
'webcollage','<a href="http://www.jwz.org/webcollage/" title="WebCollage home page [new window]" target="_blank">WebCollage</a>',
 
1037
'cfetch','<a href="http://www.kosmix.com/crawler.html" title="kosmix home page [new window]" target="_blank">Cfetch</a>',
564
1038
# Less common robots (In robot file)
565
1039
'[^a]fish','Fish search',
566
1040
'abcdatos','ABCdatos BotLink',
674
1148
'katipo','Katipo',
675
1149
'kilroy','Kilroy',
676
1150
'ko_yappo_robot','KO_Yappo_Robot',
 
1151
'kummhttp','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b" title="Bot documentation page [new window]" target="_blank">KummHttp</a>',
677
1152
'labelgrabber\.txt','LabelGrabber',
678
1153
'larbin','<a href="http://para.inria.fr/~ailleret/larbin/index-eng.html" title="Bot home page [new window]" target="_blank">larbin</a>',
679
1154
'legs','legs',
683
1158
'logo_gif','logo.gif Crawler',
684
1159
'macworm','Mac WWWWorm',
685
1160
'lmspider','<a href="http://www.nuance.com/" title="Bot home page lmspider@scansoft.com [new window]" target="_blank">lmspider</a>',
 
1161
'lwp\-request','<a href="http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request" title="lwp-request home page [new window]" target="_blank">lwp-request</a>',
 
1162
'lwp\-trivial','<a href="http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm" title="lwp-trivial home page [new window]" target="_blank">lwp-trivial</a>',
686
1163
'magpie','<a href="http://magpierss.sf.net/" title="Bot home page [new window]" target="_blank">MagpieRSS</a>',
687
1164
'marvin','marvin/infoseek',
688
1165
'mattie','Mattie',
768
1245
'ssearcher','Site Searcher',
769
1246
'sqworm','<a href="http://www.websense.com/" title="Bot home page (source: http://www.pgts.com.au/) [new window]" target="_blank">Sqworm</a>',
770
1247
'suke','Suke',
 
1248
'sunrise','<a href="http://www.sunrisexp.com/" title="Sunrise home page [new window]" target="_blank">Sunrise</a>',
771
1249
'suntek','suntek search engine',
772
1250
'sven','Sven',
773
1251
'tach_bw','TACH Black Widow',
 
1252
'tagyu\sagent','<a href="http://www.tagyu.com/" title="Bot home page [new window]" target="_blank">Tagyu Agent</a>',
774
1253
'tarantula','Tarantula',
775
1254
'tarspider','tarspider',
776
1255
'techbot','TechBOT',
781
1260
'tlspider','TLSpider',
782
1261
'ucsd','UCSD Crawl',
783
1262
'udmsearch','UdmSearch',
 
1263
'universalfeedparser','<a href="http://feedparser.org/" title="Bot home page [new window]" target="_blank">UniversalFeedParser</a>',
784
1264
'urlck','URL Check',
785
1265
'valkyrie','Valkyrie',
786
1266
'verticrawl','Verticrawl',
817
1297
'wmir','w3mir',
818
1298
'wolp','WebStolperer',
819
1299
'wombat','The Web Wombat',
 
1300
'wordpress','<a href="http://wordpress.org/" title="WordPress home page [new window]" target="_blank">WordPress</a>',
820
1301
'worm','The World Wide Web Worm',
821
1302
'wwwc','WWWC Ver 0.2.5',
822
1303
'wz101','WebZinger',
823
1304
'xget','XGET',
824
1305
# Other robots reported by users
 
1306
'1\-more\sscanner','<a href="http://www.myzips.com/software/1-More-Scanner.phtml" title="1-More Scanner home page [new window]" target="_blank">1-More Scanner</a>',
 
1307
'accoona\-ai\-agent','<a href="http://www.accoona.com/" title="Accoona-AI-Agent home page [new window]" target="_blank">Accoona-AI-Agent</a>',
 
1308
'activebookmark','<a href="http://www.libmaster.com/active_bookmark.php" title="ActiveBookmark home page [new window]" target="_blank">ActiveBookmark</a>',
 
1309
'adamm\sbot','<a href="http://home.blic.net/adamm/" title="Bot home page [new window]" target="_blank">AdamM Bot</a>',
825
1310
'almaden','<a href="http://www.almaden.ibm.com/cs/crawler" title="IBM Almaden Research Center WebFountain&trade; Bot home page [new window]" target="_blank">IBM Almaden</a> Research Center WebFountain&trade;',
 
1311
'aipbot','<a href="http://www.aipbot.com/" title="aipbot@aipbot.com Bot home page [new window]" target="_blank">aipbot</a>',  
 
1312
'aleadsoftbot','<a href="http://www.aleadsoft.com/bot.htm" title="ALeadSoftbot home page [new window]" target="_blank">ALeadSoftbot</a>',
 
1313
'alpha\ssearch\sagent','Alpha Search Agent',
826
1314
'aport', 'Aport',
 
1315
'archive\.org_bot','<a href="http://crawls.archive.org/collections/bncf/crawl.html" title="Bot home page [new window]" target="_blank">archive.org bot</a>',
827
1316
'argus','<a href="http://www.simpy.com/bot.html" title="feedback@simpy.com Bot home page [new window]" target="_blank">Argus</a>',
 
1317
'arianna\.libero\.it','<a href="http://arianna.libero.it/" title="Bot home page [new window]" target="_blank">arianna.libero.it</a>',
 
1318
'aspseek','<a href="http://www.aspseek.org/" title="Bot home page [new window]" target="_blank">ASPseek</a>',
828
1319
'asterias', 'Asterias',
829
1320
'awbot', 'AWBot',
830
1321
'baiduspider','<a href="http://www.baidu.com/search/spider.html" title="Bot home page [new window]" target="_blank">BaiDuSpider</a>',
831
1322
'becomebot', '<a href="http://www.become.com/site_owners.html" title="Bot home page [new window]" target="_blank">BecomeBot</a>',
832
1323
'bender','<a href="http://bender.ucr.edu/" title="Bot home page [new window]" target="_blank">bender</a> <a href="http://ivia.ucr.edu/manuals/NiFC/current/index.shtml" title="Bot home page [new window]" target="_blank">focused_crawler</a>',
 
1324
'biglotron','<a href="http://www.biglotron.com/robot.html" title="Bot home page [new window]" target="_blank">Biglotron</a>',
 
1325
'bittorrent\sbot','<a href="http://www.bittorrent.com/" title="Bot home page [new window]" target="_blank">BitTorrent Bot</a>',
 
1326
'biz360\sspider','<a href="http://www.biz360.com/" title="blogsmanager@biz360.com Bot home page [new window]" target="_blank">Biz360 spider</a>',
 
1327
'blogbridge\sservice','<a href="http://www.blogbridge.com/" title="Bot home page [new window]" target="_blank">BlogBridge Service</a>',
833
1328
'bloglines','<a href="http://www.bloglines.com/" title="Bot home page [new window]" target="_blank">Bloglines</a>',
834
1329
'blogpulse','<a href="http://www.intelliseek.com/" title="Bot home page [new window]" target="_blank">BlogPulse ISSpider intelliseek.com</a>',
 
1330
'blogsearch','<a href="http://www.icerocket.com/" title="Bot home page [new window]" target="_blank">BlogSearch</a>',
835
1331
'blogshares','<a href="http://blogshares.com/help.php?node=7" title="Bot home page [new window]" target="_blank">Blogshares Spiders</a>',
836
1332
'blogslive','<a href="http://www.blogslive.com/" title="info@blogslive.com Bot home page [new window]" target="_blank">Blogslive</a>',
837
1333
'blogssay','<a href="http://www.blogssay.com/" title="Bot home page [new window]" target="_blank">BlogsSay :: RSS Search Crawler</a>',
838
 
'bobby', 'Bobby', 
 
1334
'bncf\.firenze\.sbn\.it\/raccolta\.txt','<a href="http://www.bncf.firenze.sbn.it/raccolta.txt" title="Bot home page [new window]" target="_blank">Biblioteca Nazionale Centrale di Firenze</a>',
 
1335
'bobby', 'Bobby',
 
1336
'boitho\.com\-dc','<a href="http://www.boitho.com/dcbot.html" title="Bot home page [new window]" target="_blank">boitho.com-dc</a>',
 
1337
'bookmark\-manager','<a href="http://bkm.sourceforge.net/" title="Bookmark-Manager home page [new window]" target="_blank">Bookmark-Manager</a>',
839
1338
'boris', 'Boris',
840
1339
'bumblebee', 'Bumblebee (relevare.com)',
841
 
'converacrawler','<a href="http://www.authoritativeweb.com/crawl" title="Bot home page [new window]" target="_blank">ConveraCrawler</a>',
 
1340
'candlelight\_favorites\_inspector','<a href="http://www.candlelight.com/home.html" title="Candlelight_Favorites_Inspector  home page [new window]" target="_blank">Candlelight_Favorites_Inspector</a>',
 
1341
'cbn00glebot','cbn00glebot',
 
1342
'cerberian\sdrtrs','<a href="http://www.pgts.com.au/cgi-bin/psql?robot_info=25240" title="Bot home page [new window]" target="_blank">Cerberian Drtrs</a>',
 
1343
'cfnetwork','<a href="http://www.cocoadev.com/index.pl?CFNetwork" title="CFNetwork home page [new window]" target="_blank">CFNetwork</a>',
 
1344
'cipinetbot','<a href="http://www.cipinet.com/bot.html" title="CipinetBot home page [new window]" target="_blank">CipinetBot</a>',
 
1345
'checkweb\slink\svalidator','<a href="http://p.duby.free.fr/chkweb.htm" title="CheckWeb link validator home page [new window]" target="_blank">CheckWeb link validator</a>',
 
1346
'commons\-httpclient','<a href="http://jakarta.apache.org/commons/httpclient/" title="Bot home page [new window]" target="_blank">Jakarta commons-httpclient</a>', 
 
1347
'computer\sand\sautomation\sresearch\sinstitute\scrawler','<a href="http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html" title="Computer and Automation Research Institute Crawler home page [new window]" target="_blank">Computer and Automation Research Institute Crawler</a>',
 
1348
'converamultimediacrawler','<a href="http://www.authoritativeweb.com/crawl/" title="ConveraMultiMediaCrawler home page [new window]" target="_blank">ConveraMultiMediaCrawler</a>',
 
1349
'converacrawler','<a href="http://www.authoritativeweb.com/crawl/" title="ConveraCrawler home page [new window]" target="_blank">ConveraCrawler</a>',
842
1350
'cscrawler','CsCrawler',
 
1351
'cse\shtml\svalidator\slite\sonline','<a href="http://online.htmlvalidator.com/php/onlinevallite.php" title="CSE HTML Validator Lite Online home page [new window]" target="_blank">CSE HTML Validator Lite Online</a>','cuasarbot','<a href="http://www.cuasar.com/" title="Cuasarbot home page [new window]" target="_blank">Cuasarbot</a>',
 
1352
'cursor','<a href="http://adcenter.hu/docs/en/bot.html " title="Cursor home page [new window]" target="_blank">Cursor</a>',
 
1353
'custo','<a href="http://www.netwu.com/custo/" title="Custo home page [new window]" target="_blank">Custo</a>',
 
1354
'datafountains/dmoz\sdownloader','<a href="http://infomine.ucr.edu/ " title="DataFountains/DMOZ Downloader home page [new window]" target="_blank">DataFountains/DMOZ Downloader</a>',
843
1355
'daviesbot', 'DaviesBot',
844
1356
'daypopbot', 'DayPop',
 
1357
'deepindex','<a href="http://www.deepindex.net/faq.php" title="Deepindex home page [new window]" target="_blank">Deepindex</a>',
845
1358
'dipsie\.bot','<a href="http://www.dipsie.com/bot/" title="Bot home page [new window]" target="_blank">Dipsie</a>',
 
1359
'dnsgroup','<a href="http://www.dnsgroup.com/" title="DNSGroup home page [new window]" target="_blank">DNSGroup</a>',
 
1360
'docomo','<a href="http://www.nttdocomo.co.jp/" title="DoCoMo home page [new window]" target="_blank">DoCoMo</a>',
 
1361
'domainchecker','<a href="http://net-promoter.com/" title="DomainChecker home page (not confirmed) [new window]" target="_blank">DomainChecker</a>',
846
1362
'domainsdb\.net','<a href="http://domainsdb.net/" title="Bot home page [new window]" target="_blank">DomainsDB.net</a>',
 
1363
'dulance','<a href="http://www.dulance.com/bot.jsp" title="Bot home page [new window]" target="_blank">Dulance</a>',
 
1364
'dumbot','<a href="http://www.dumbfind.com/" title="Dumbot home page [new window]" target="_blank">Dumbot</a>',
 
1365
'dumm\.de\-bot','<a href="http://www.dumm.de/" title="dumm.de-Bot home page [new window]" target="_blank">dumm.de-Bot</a>',
 
1366
'earthcom\.info','<a href="http://www.earthcom.info/" title="Bot home page [new window]" target="_blank">EARTHCOM.info</a>',
 
1367
'easydl','<a href="http://keywen.com/Encyclopedia/Bot/" title="EasyDL  home page [new window]" target="_blank">EasyDL</a>',
 
1368
'edgeio\-retriever','<a href="http://www.edgeio.com/" title="Bot home page [new window]" target="_blank">edgeio-retriever</a>',
 
1369
'ets\sv','<a href="http://www.freetranslation.com/help/" title="ETS home page [new window]" target="_blank">ETS</a> Enterprise Translation Server',
847
1370
'exactseek','ExactSeek Crawler',
 
1371
'extreme\_picture\_finder','<a href="http://www.exisoftware.com/" title="Extreme_Picture_Finder home page [new window]" target="_blank">Extreme_Picture_Finder</a>',
 
1372
'eventax','<a href="http://www.eventax.de/" title="eventax home page [new window]" target="_blank">eventax</a>',
848
1373
'everbeecrawler','EverbeeCrawler',
 
1374
'everest\-vulcan','<a href="http://everest.vulcan.com/crawlerhelp" title="Bot home page [new window]" target="_blank">Everest-Vulcan</a>',
849
1375
'ezresult', 'Ezresult',
850
1376
'enteprise','<a href="http://www.fastsearch.com/" title="Bot home page [new window]" target="_blank">Fast Enteprise Crawler</a>',
 
1377
'fast\-search\-engine','<a href="http://www.fast-search-engine.com/" title="Bot home page [new window]" target="_blank">Fast-Search-Engine</a> (not fastsearch.com)',
 
1378
'fast\senterprise\scrawler','<a href="http://www.fast.no/" title="FAST Enterprise Crawler home page [new window]" target="_blank">FAST Enterprise Crawler</a>',
 
1379
'fast\senterprise\scrawler.*scrawleradmin\.t\-info@telekom\.de','<a href="http://www.telekom.de/" title="FAST Enterprise Crawler * crawleradmin.t-info@telekom.de home page [new window]" target="_blank">FAST Enterprise Crawler * crawleradmin.t-info@telekom.de</a>',
 
1380
'matrix\ss\.p\.a\.\s\-\sfast\senterprise\scrawler','<a href="http://tin.virgilio.it/" title="Matrix S.p.A. - FAST Enterprise Crawler home page [new window]" target="_blank">Matrix S.p.A. - FAST Enterprise Crawler</a>',
 
1381
'fast\senterprise\scrawler.*t\-info_bi_cluster\scrawleradmin\.t\-info@telekom\.de','<a href="http://www.telekom.de/" title="FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de home page [new window]" target="_blank">FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de</a>',
 
1382
'favorg','<a href="http://www.pcmag.com/article2/0,4149,108438,00.asp" title="FavOrg home page [new window]" target="_blank">FavOrg</a>',
 
1383
'favorites\ssweeper','<a href="http://www.manitools.com/favsweep/" title="Favorites_Sweeper home page [new window]" target="_blank">Favorites Sweeper</a>',
851
1384
'feedburner', 'Feedburner',
852
1385
'feedfetcher\-google','<a href="http://www.google.com/feedfetcher.html" title="Bot home page [new window]" target="_blank">Feedfetcher-Google</a>',
853
 
'feedster', '<a href="http://www.feedster.com/" title="Bot home page [new window]" target="_blank">Feedster</a>',
854
 
'geniebot','<a href="http://www.genieknows.com/" title="Bot home page [new window]" target="_blank">geniebot</a>',
855
 
'findlinks','<a href="http://wortschatz.uni-leipzig.de/findlinks/" title="Bot home page [new window]" target="_blank">findlinks</a>',
 
1386
'feedflow','<a href="http://feedflow.com/about" title="Bot home page [new window]" target="_blank">FeedFlow</a>',
 
1387
'feedster','<a href="http://www.feedster.com/" title="Bot home page [new window]" target="_blank">Feedster</a>',
 
1388
'feedvalidator','<a href="http://feedvalidator.org/" title="FeedValidator home page [new window]" target="_blank">FeedValidator</a>',
 
1389
'filmkamerabot','<a href="http://www.filmkamera.at/bot.html" title="FilmkameraBot home page [new window]" target="_blank">FilmkameraBot</a>',
 
1390
'findexa\scrawler','<a href="http://www.findexa.no/gulesider/article26548.ece " title="Findexa Crawler home page [new window]" target="_blank">Findexa Crawler</a>',
 
1391
'geniebot','<a href="http://www.genieknows.com/" title="Bot home page [new window]" target="_blank">Geniebot</a>',
 
1392
'findlinks','<a href="http://wortschatz.uni-leipzig.de/findlinks/" title="Bot home page [new window]" target="_blank">Findlinks</a>',
 
1393
'fooky\.com\/ScorpionBot','<a href="http://www.fooky.com/scorpionbots" title="Fooky.com/ScorpionBot/ScoutOut home page [new window]" target="_blank">Fooky.com/ScorpionBot/ScoutOut</a>',
 
1394
'g2crawler','<a href="http://crawler.instantnetworks.net/" title="Bot home page (nobody@airmail.net) [new window]" target="_blank">G2Crawler</a>',
856
1395
'gaisbot','<a href="http://gais.cs.ccu.edu.tw/robot.php" title="Bot home page [new window]" target="_blank">Gaisbot</a>',
857
1396
'gigabot','<a href="http://www.gigablast.com/spider.html" title="Bot home page [new window]" target="_blank">GigaBot</a>',
858
1397
'girafabot','<a href="http://www.girafa.com/" title="Bot home page [new window]" target="_blank">Girafabot</a>',
 
1398
'global\sfetch','<a href="http://www.wesonet.com/" title="Global Fetch home page [new window]" target="_blank">Global Fetch</a>',
859
1399
'gnodspider','GNOD Spider',
 
1400
'goforit\.com','<a href="http://www.goforit.com/about/" title="GoForIt.com home page [new window]" target="_blank">GoForIt.com</a>',
 
1401
'goforitbot','<a href="http://www.goforit.com/about/" title="GOFORITBOT home page [new window]" target="_blank">GOFORITBOT</a>',
 
1402
'gpu\sp2p\scrawler','<a href="http://gpu.sourceforge.net/search_engine.php" title="Bot home page [new window]" target="_blank">GPU p2p crawler</a>',
860
1403
'grub','Grub.org',
861
 
'henrythemiragorobot', 'Mirago',
 
1404
'henrythemiragorobot', '<a href="http://www.miragorobot.com/scripts/mrinfo.asp" title="Bot home page [new window]" target="_blank">Mirago</a>',
 
1405
'heritrix','<a href="http://crawler.archive.org/" title="(used by a few different companies) Bot home page [new window]" target="_blank">Heritrix</a>',
862
1406
'holmes', 'Holmes',
 
1407
'hoowwwer','<a href="http://cosco.hiit.fi/search/hoowwwer/" title="HooWWWer home page [new window]" target="_blank">HooWWWer</a>',
 
1408
'hpprint','HPPrint',
 
1409
'htmlparser','<a href="http://htmlparser.sourceforge.net/" title="HTMLParser home page [new window]" target="_blank">HTMLParser</a>',
 
1410
'html\_link\_validator','<a href="http://www.lithopssoft.com/ " title="Html_Link_Validator home page [new window]" target="_blank">Html_Link_Validator</a>',
 
1411
'httrack','<a href="http://www.httrack.com/" title="Bot home page [new window]" target="_blank">HTTrack off-line browser</a>',
 
1412
'hundesuche\.com\-bot','<a href="http://www.hundesuche.com/" title="Hundesuche.com-Bot home page [new window]" target="_blank">Hundesuche.com-Bot</a>',
 
1413
'ichiro','<a href="http://help.goo.ne.jp/door/crawlerE.html" title="Bot home page [new window]" target="_blank">ichiro</a>',
 
1414
'iltrovatore\-setaccio','<a href="http://www.iltrovatore.it/aiuto/motore_di_ricerca.html" title="bot@iltrovatore.it IlTrovatore-Setaccio home page [new window]" target="_blank">IlTrovatore-Setaccio</a>',
 
1415
'infobot','<a href="http://www.infobot.org/" title="InfoBot home page [new window]" target="_blank">InfoBot</a>',
 
1416
'infociousbot','<a href="http://corp.infocious.com/tech_crawler.php" title="InfociousBot home page [new window]" target="_blank">InfociousBot</a>',
863
1417
'infomine','<a href="http://infomine.ucr.edu/useragents" title="Bot home page [new window]" target="_blank">INFOMINE VLCrawler</a>',
 
1418
'insurancobot','<a href="http://www.fastspywareremoval.com/" title="InsurancoBot home page [new window]" target="_blank">InsurancoBot</a>',
 
1419
'internet\_ninja','<a href="http://www.dti.ne.jp/  " title="Internet_Ninja home page [new window]" target="_blank">Internet_Ninja </a>',
 
1420
'internetarchive','<a href="http://lucene.apache.org/nutch/bot.html " title="InternetArchive home page [new window]" target="_blank">InternetArchive</a>',
864
1421
'internetseer', 'InternetSeer',
 
1422
'internetsupervision','<a href="http://internetsupervision.com/" title="InternetSupervision home page [new window]" target="_blank">InternetSupervision</a>',
 
1423
'irlbot','<a href="http://irl.cs.tamu.edu/crawler" title="Bot home page [new window]" target="_blank">IRLbot</a>',
 
1424
'isearch2006','<a href="http://www.yahoo.com.cn/" title="isearch2006 home page [new window]" target="_blank">isearch2006</a>',
 
1425
'iupui_research_bot','<a href="http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/" title="IUPUI_Research_Bot home page [new window]" target="_blank">IUPUI_Research_Bot</a>',
 
1426
'jrtwine\_software\_check\_favorites\_utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility  home page [new window]" target="_blank">JRTwine_Software_Check_Favorites_Utility</a>',
865
1427
'justview', 'JustView',
866
 
'keyoshid','<a href="http://www.yahoo.co.jp/" title="Bot home page [new window]" target="_blank">Yahoo! Japan keyoshid robot study</a>','kinjabot', 'Kinjabot',
 
1428
'kalambot','<a href="http://64.124.122.251/feedback.html" title="KalamBot home page [new window]" target="_blank">KalamBot</a>',
 
1429
'kamano\.de\snewsfeedverzeichnis','<a href="http://www.kamano.de/" title="kamano.de NewsFeedVerzeichnis home page [new window]" target="_blank">kamano.de NewsFeedVerzeichnis</a>',
 
1430
'kazoombot','<a href="http://www.kazoom.ca/bot.html" title="kazoombot@kazoom.ca KazoomBot home page [new window]" target="_blank">KazoomBot</a>',
 
1431
'kevin','<a href="http://dznet.com/kevin/" title="Kevin home page [new window]" target="_blank">Kevin</a>',
 
1432
'keyoshid','<a href="http://www.yahoo.co.jp/" title="Bot home page [new window]" target="_blank">Yahoo! Japan keyoshid robot study</a>',
 
1433
'kinjabot', 'Kinjabot',
867
1434
'kinja\-imagebot', 'Kinja Imagebot',
 
1435
'knowitall','<a href="http://www.cs.washington.edu/research/knowitall/" title="KnowItAll home page [new window]" target="_blank">KnowItAll</a>',
 
1436
'knowledge\.com','<a href="http://www.knowledge.com/" title="Knowledge.com home page [new window]" target="_blank">Knowledge.com</a>',
 
1437
'kouaa\skrawler','<a href="http://www.kouaa.com/" title="Kouaa Krawler home page [new window]" target="_blank">Kouaa Krawler</a>',
 
1438
'krugle','<a href="http://www.krugle.com/crawler/info.html" title="Bot home page [new window]" target="_blank">Krugle</a>',
 
1439
'ksibot','<a href="http://ego.ms.mff.cuni.cz/" title="Bot home page [new window]" target="_blank">ksibot</a>',
 
1440
'kurzor','<a href="http://www.easymail.hu/" title="cursor@easymail.hu Kurzor home page [new window]" target="_blank">Kurzor</a>',
 
1441
'lanshanbot','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=%5Cbid_g_l_140406_1%5Cb" title="Bot Information [new window]" target="_blank">lanshanbot</a>',
 
1442
'letscrawl\.com','<a href="http://letscrawl.com/" title="Bot home page [new window]" target="_blank">LetsCrawl.com</a>',
 
1443
'libcrawl','Crawl libcrawl',
 
1444
'link\svalet\sonline','<a href="http://www.htmlhelp.com/tools/valet/" title="Link Valet Online home page [new window]" target="_blank">Link Valet Online</a>',
868
1445
'linkbot','LinkBot',
869
1446
'linkchecker','<a href="http://linkchecker.sourceforge.net" title="Bot home page [new window]" target="_blank">LinkChecker</a>',
870
1447
'livejournal\.com', 'LiveJournal.com',
871
1448
'magpierss', 'MagpieRSS',
 
1449
'mapoftheinternet\.com','<a href="http://MapoftheInternet.com/" title="MapoftheInternet.com home page [new window]" target="_blank">MapoftheInternet.com</a>',
872
1450
'mediapartners\-google','<a href="https://adwords.google.com/" title="Bot home page [new window]" target="_blank">Google AdSense</a>',
 
1451
'megite','<a href="http://www.megite.com/" title="Megite home page [new window]" target="_blank">Megite</a>',
873
1452
'metager\-linkchecker','MetaGer LinkChecker',
874
 
'microsoft_url_control','Microsoft URL Control',
 
1453
'metaspinner','<a href="http://index.meta-spinner.de/" title="Metaspinner home page [new window]" target="_blank">Metaspinner</a>',
 
1454
'microsoft\surl\scontrol','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control  home page [new window]" target="_blank">Microsoft URL Control</a>',
 
1455
'minirank','<a href="http://minirank.com/" title="miniRank home page [new window]" target="_blank">miniRank</a>',
 
1456
'mini\-reptile','Mini-reptile',
 
1457
'missigua\slocator','<a href="http://www.webmasterworld.com/forum11/2690.htm" title="Missigua_Locator  home page [new window]" target="_blank">Missigua_Locator</a>',
 
1458
'misterbot','<a href="http://www.misterbot.fr/" title="Misterbot home page [new window]" target="_blank">Misterbot</a>',
 
1459
'miva','<a href="http://www.miva.com/" title="Miva home page [new window]" target="_blank">Miva</a>',
 
1460
'mizzu\slabs','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_m_141105_2\b " title="Mizzu Labs home page [new window]" target="_blank">Mizzu Labs</a>',
 
1461
'mj12bot','<a href="http://majestic12.co.uk/bot.php" title="Bot home page. [new window]" target="_blank">MJ12bot</a>',
 
1462
'mojeekbot','<a href="http://www.mojeek.com/bot.html" title="Bot home page. [new window]" target="_blank">MojeekBot</a>',
 
1463
'tencenttraveler','TencentTraveler',    # Must be before msiecrawler.
 
1464
'msiecrawler','<a href="http://msdn.microsoft.com/workshop/delivery/offline/linkrel.asp" title="Bot home page. [new window]" target="_blank">MSIECrawler</a>',
 
1465
'ms\ssearch\s4\.0\srobot','<a href="http://support.microsoft.com/default.aspx?scid=kb;en-us;284022" title="Bot home page. [new window]" target="_blank">MS SharePoint Portal Server - MS Search 4.0 Robot</a>',
 
1466
'msrabot','msrabot',
 
1467
'msrbot','<a href="http://research.microsoft.com/research/sv/msrbot/" title="MSRBOT home page [new window]" target="_blank">MSRBOT</a>',
 
1468
'mt::telegraph::agent','MT::Telegraph::Agent',
 
1469
'mydoyouhike','<a href="http://www.doyouhike.net/my" title="Mydoyouhike home page [new window]" target="_blank">Mydoyouhike</a>',
875
1470
'nagios','Nagios',
 
1471
'nasa\ssearch','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_140506_2\b" title="NASA Search home page [new window]" target="_blank">NASA Search</a>',
 
1472
'netluchs','<a href="http://www.netluchs.de/" title="Bot home page. [new window]" target="_blank">Netluchs</a>',
 
1473
'netsprint','<a href="http://www.netsprint.pl/serwis/" title="NetSprint home page [new window]" target="_blank">NetSprint</a>',
876
1474
'newsgatoronline', 'NewsGator Online',
 
1475
'nicebot','<a href="http://www.egghelp.org/setup.htm" title="Bot home page (there may be others) [new window]" target="_blank">nicebot</a>',
 
1476
'nimblecrawler','<a href="http://www.healthline.com/" title="NimbleCrawler home page [new window]" target="_blank">NimbleCrawler</a>',
877
1477
'noxtrumbot','<a href="http://www.noxtrum.com/" title="Bot home page [new window]" target="_blank">noxtrumbot</a>',
 
1478
'npbot','<a href="http://www.nameprotect.com/botinfo.html" title="NPBot home page [new window]" target="_blank">NPBot</a>',
 
1479
'nutchcvs','<a href="http://lucene.apache.org/nutch/bot.html" title="NutchCVS home page [new window]" target="_blank">NutchCVS</a>',
 
1480
'nutchosu\-vlib','<a href="http://lucene.apache.org/nutch/bot.html" title="NutchOSU-VLIB home page [new window]" target="_blank">NutchOSU-VLIB</a>',
878
1481
'nutch','<a href="http://lucene.apache.org/nutch/" title="Bot home page. Used by many, including Looksmart. [new window]" target="_blank">Nutch</a>',
879
 
'mj12bot','<a href="http://majestic12.co.uk/bot.php" title="Bot home page. [new window]" target="_blank">MJ12bot</a>',
880
 
'msiecrawler','<a href="http://msdn.microsoft.com/workshop/delivery/offline/linkrel.asp" title="Bot home page. [new window]" target="_blank">MSIECrawler</a>',
 
1482
'ocelli','<a href="http://www.globalspec.com/Ocelli/" title="Ocelli home page [new window]" target="_blank">Ocelli</a>',
 
1483
'octora\sbeta\sbot','<a href="http://www.octora.com/" title="Bot home page [new window]" target="_blank">Octora Beta Bot</a>',
 
1484
'omniexplorer\_bot','<a href="http://www.omni-explorer.com/" title="Bot home page. [new window]" target="_blank">OmniExplorer Bot</a>',
 
1485
'onet\.pl\_sa','<a href="http://szukaj.onet.pl/" title="Onet.pl_SA home page [new window]" target="_blank">Onet.pl_SA</a>',
 
1486
'onfolio','<a href="http://www.onfolio.com/" title="Bot home page [new window]">Onfolio</a>',
881
1487
'opentaggerbot','<a href="http://www.opentagger.com/opentaggerbot.htm" title="Bot home page [new window]">OpenTaggerBot</a>',
 
1488
'openwebspider','<a href="http://www.openwebspider.org/" title="OpenWebSpider home page [new window]" target="_blank">OpenWebSpider</a>',
 
1489
'oracle\sultra\ssearch','<a href="http://www.oracle.com/technology/products/ultrasearch/index.html" title="Oracle Ultra Search home page [new window]" target="_blank">Oracle Ultra Search</a>',
 
1490
'orbiter','<a href="http://www.dailyorbit.com/bot.htm" title="Orbiter home page [new window]" target="_blank">Orbiter</a>',
882
1491
'outfoxbot','<a href="mailto:outfox.agent@gmail.com?subject=Outfox Bot Information" title="Bot e-mail.">OutfoxBot</a>',
 
1492
'passwordmaker\.org','<a href="http://passwordmaker.org/" title="passwordmaker.org home page [new window]" target="_blank">passwordmaker.org</a>',
 
1493
'pear\shttp\srequest\sclass','<a href="http://pear.php.net/" title="PEAR HTTP Request class home page [new window]" target="_blank">PEAR HTTP Request class</a>',
 
1494
'peerbot','<a href="http://www.peerbot.com/" title="PEERbot home page [new window]" target="_blank">PEERbot</a>',
883
1495
'perman', 'Perman surfer',
 
1496
'php\_version\_tracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP_version_tracker home page [new window]" target="_blank">PHP_version_tracker</a>',
 
1497
'php\sversion\stracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP version tracker home page [new window]" target="_blank">PHP version tracker</a>',
 
1498
'pictureofinternet','<a href="http://malfunction.org/poi/" title="PictureOfInternet home page [new window]" target="_blank">PictureOfInternet</a>',
 
1499
'ping\.blo\.gs','<a href="http://blo.gs/ping.php" title="Bot home page. [new window]" target="_blank">ping.blo.gs</a>',
 
1500
'plinki','<a href="http://www.plinki.com/" title="plinki home page [new window]" target="_blank">plinki</a>',
884
1501
'pluckfeedcrawler','<a href="http://www.pluck.com/" title="Bot home page. [new window]" target="_blank">PluckFeedCrawler</a>',
885
 
'pompos','Pompos',
 
1502
'pompos','<a href="http://dir.com/pompos.html" title="Bot home page. [new window]" target="_blank">Pompos</a>',
886
1503
'popdexter','Popdexter',
 
1504
'port\shuron\slabs','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1133\b" title="Port Huron Labs home page [new window]" target="_blank">Port Huron Labs</a>',
 
1505
'postfavorites','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1135\b " title="PostFavorites home page [new window]" target="_blank">PostFavorites</a>',
 
1506
'projectwf\-java\-test\-crawler','ProjectWF-java-test-crawler',
 
1507
'proodlebot','<a href="http://www.proodle.com/" title="proodleBot home page [new window]" target="_blank">proodleBot</a>',
 
1508
'pyquery','<a href="http://sourceforge.net/projects/pyquery/" title="PyQuery home page [new window]" target="_blank">PyQuery</a>',
887
1509
'rambler','<a href="http://www.rambler.ru/doc/faq.shtml" title="Bot home page [new window]">StackRambler</a>',
888
1510
'redalert','Red Alert',
889
1511
'rojo','<a href="http://rojo.com/" title="Bot home page [new window]" target="_blank">RoJo</a> aggregator',
892
1514
'rufusbot','<a href="http://64.124.122.252.webaroo.com/feedback.html" title="Bot home page [new window]" target="_blank">RufusBot Rufus Web Miner</a>',
893
1515
'sandcrawler','<a href="http://www.microsoft.com/" title="Bot home page [new window]" target="_blank">SandCrawler (Microsoft)</a>',
894
1516
'sbider','<a href="http://www.sitesell.com/sbider.html" title="Bot home page [new window]" target="_blank">SBIder</a>',
 
1517
'schizozilla','<a href="http://spamhuntress.com/2005/03/18/gizmo/ " title="Schizozilla home page [new window]" target="_blank">Schizozilla</a>',
 
1518
'scumbot','Scumbot',
 
1519
'searchguild\_dmoz\_experiment','<a href="http://www.searchguild.com/" title="SearchGuild_DMOZ_Experiment  home page [new window]" target="_blank">SearchGuild_DMOZ_Experiment</a>',
895
1520
'seekbot','<a href="http://www.seekbot.net/bot.html" title="Bot home page [new window]">Seekbot</a>',
 
1521
'sensis\sweb\scrawler','<a href="http://www.sensis.com.au/" title="Sensis Web Crawler home page [new window]" target="_blank">Sensis Web Crawler</a>',
896
1522
'seznambot','<a href="http://fulltext.seznam.cz/" title="Bot home page [new window]" target="_blank">SeznamBot</a>',
 
1523
'shim\-crawler','<a href="http://www.logos.ic.i.u-tokyo.ac.jp/crawler/" title="crawl@logos.ic.i.u-tokyo.ac.jp Bot home page [new window]" target="_blank">Shim-Crawler</a>',
897
1524
'shoutcast','Shoutcast Directory Service',
898
1525
'slysearch','SlySearch',
899
 
'sohu-search','<a href="http://corp.sohu.com/" title="Bot home page [new window]" target="_blank">sohu-search</a>',
 
1526
'snap\.com\sbeta\scrawler','<a href="http://www.snap.com/" title="snap.com beta crawler home page [new window]" target="_blank">snap.com beta crawler</a>',
 
1527
'sogou\sspider','<a href="http://corp.sohu.com/20051130/n240842344.shtml" title="Bot home page [new window]" target="_blank">sogou spider</a>',
 
1528
'sogou\stest','<a href="http://corp.sohu.com/20051130/n240842344.shtml" title="Bot home page [new window]" target="_blank">sogou test</a>',
 
1529
'sohu\-search','<a href="http://corp.sohu.com/" title="Bot home page [new window]" target="_blank">sohu-search</a>',
 
1530
'sohu','<a href="http://corp.sohu.com/" title="Bot home page [new window]" target="_blank">sohu agent</a>', 
 
1531
'snappy','<a href="http://www.urltrends.com/faq.php" title="Bot home page [new window]" target="_blank">Snappy</a>',
 
1532
'sphere\sscout','<a href="http://www.sphere.com/" title="Bot home page [new window]" target="_blank">Sphere Scout</a>',
 
1533
'sproose\scrawler','<a href="http://www.sproose.com/bot.html" title="Bot home page [new window]" target="_blank">sproose crawler</a>',
 
1534
'steroid\s\sdownload','<a href="http://faqs.org.ru/progr/pascal/delphi_internet2.htm" title="STEROID  Download home page [new window]" target="_blank">STEROID  Download</a>',
 
1535
'steeler','<a href="http://www.tkl.iis.u-tokyo.ac.jp/~crawler/ " title="Steeler home page [new window]" target="_blank">Steeler</a>',
 
1536
'suchfin\-bot','<a href="http://www.suchfin.de/" title="Suchfin-Bot home page [new window]" target="_blank">Suchfin-Bot</a>',
 
1537
'superbot','<a href="http://www.sparkleware.com/superbot/" title="SuperBot home page [new window]" target="_blank">SuperBot</a>',
900
1538
'surveybot','SurveyBot',
 
1539
'susie','<a href="http://www.sync2it.com/bms/susie.php" title="Susie home page [new window]" target="_blank">Susie</a>',
901
1540
'syndic8','Syndic8',
 
1541
'syndicapi','<a href="http://syndicapi.com/bot.html" title="Bot home page [new window]" target="_blank">SyndicAPI</a>',
 
1542
'synoobot','<a href="http://www.synoo.de/bot.html" title="webmaster@synoo.com SynooBot home page [new window]" target="_blank">SynooBot</a>',
 
1543
'tcl\shttp\sclient\spackage','<a href="http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm" title="Tcl http client package home page [new window]" target="_blank">Tcl http client package</a>',
902
1544
'technoratibot', 'Technoratibot',
 
1545
'teragramcrawlersurf','<a href="http://www.teragram.com/" title="TeragramCrawlerSURF home page [new window]" target="_blank">TeragramCrawlerSURF</a>',
 
1546
'test\scrawler','<a href="http://netp.ath.cx/" title="Test Crawler home page [new window]" target="_blank">Test Crawler</a>',
 
1547
'testbot','<a href="http://www.agbrain.com/" title="TestBot home page [new window]" target="_blank">TestBot</a>',
903
1548
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','<a href="http://www.thunderstone.com/" title="Bot home page. Used by many. [new window]" target="_blank">T-H-U-N-D-E-R-S-T-O-N-E</a>',
904
1549
'topicblogs', '<a href="http://www.topicblogs.com/" title="Bot home page [new window]" target="_blank">topicblogs</a>',
905
1550
'turnitinbot','Turn It In',
906
1551
'turtle', 'Turtle',
907
1552
'turtlescanner', 'Turtle',
 
1553
'tutorgigbot','<a href="http://www.tutorgig.info/" title="TutorGigBot home page [new window]" target="_blank">TutorGigBot</a>',
 
1554
'ubicrawler','<a href="http://law.dsi.unimi.it/ubicrawler/" title="Bot home page [new window]" target="_blank">UbiCrawler</a>',
908
1555
'ultraseek', 'Ultraseek',
 
1556
'unchaos\sbot\shybrid\sweb\ssearch\sengine','<a href="http://www.unchaos.com/" title="UnChaos Bot Hybrid Web Search Engine home page [new window]" target="_blank">UnChaos Bot Hybrid Web Search Engine</a>',
 
1557
'unido\-bot','<a href="http://www.unchina.org/unido/unido/our_projects/3_3.html" title="unido-bot home page [new window]" target="_blank">unido-bot</a>',
 
1558
'up\.browser','<a href="http://developer.openwave.com/dvl/support/faqs/faq_mag_browser.htm" title="UP.Browser home page [new window]" target="_blank">UP.Browser</a>',
 
1559
'updated','<a href="http://www.updated.com/" title="updated home page [new window]" target="_blank">updated</a>',
 
1560
'ustc\-semantic\-group','<a href="http://ai.ustc.edu.cn/mas/en/research/index.php" title="Bot home page [new window]" target="_blank">USTC-Semantic-Group</a>',  
 
1561
'vagabondo\-wap','<a href="http://www.wise-guys.nl/Contact/index.php?botselected=webagents&amp;lang=uk" title="Bot home page [new window]" target="_blank">Vagabondo-WAP</a>',
 
1562
'vagabondo','<a href="http://www.wise-guys.nl/Contact/index.php?botselected=webagents&amp;lang=uk" title="Bot home page [new window]" target="_blank">Vagabondo</a>',
 
1563
'vermut','<a href="http://vermut.aol.com/" title="Bot home page [new window]" target="_blank">Vermut</a>',
 
1564
'versus\scrawler\sfrom\seda\.baykan@epfl\.ch','<a href="http://www.epfl.ch/Eindex.html  " title="versus crawler from eda.baykan@epfl.ch home page [new window]" target="_blank">versus crawler from eda.baykan@epfl.ch</a>',
 
1565
'vespa\scrawler','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb" title="Bot home page [new window]" target="_blank">Vespa Crawler</a>',
 
1566
'vortex','<a href="http://marty.anstey.ca/projects/robots/vortex/" title="Bot home page [new window]" target="_blank">VORTEX</a>',
 
1567
'vse','<a href="http://www.vivisimo.com/" title="VSE home page [new window]" target="_blank">VSE</a>',
909
1568
'w3c\-checklink','<a href="http://validator.w3.org/checklink/" title="Bot home page [new window]" target="_blank">W3C Link Checker</a>',
910
 
'w3c_css_validator_jfouffa', '<a href="http://jigsaw.w3.org/css-validator/" title="Bot home page [new window]" target="_blank">W3C jigsaw CSS Validator</a>',
911
 
'w3c_validator', '<a href="http://validator.w3.org/" title="Bot home page [new window]" target="_blank">W3C Validator</a>',
 
1569
'w3c\_css\_validator\_jfouffa', '<a href="http://jigsaw.w3.org/css-validator/" title="Bot home page [new window]" target="_blank">W3C jigsaw CSS Validator</a>',
 
1570
'w3c_validator','<a href="http://validator.w3.org/" title="Bot home page [new window]" target="_blank">W3C Validator</a>',
 
1571
'wavefire','<a href="http://www.wavefire.com" title="info@wavefire.com; Bot home page [new window]" target="_blank">Wavefire</a>',
912
1572
'webclipping\.com', 'WebClipping.com',
913
1573
'webcompass', 'webcompass',
 
1574
'webcrawl\.net','<a href="http://www.webcrawl.net/" title="webcrawl.net home page [new window]" target="_blank">webcrawl.net</a>',
 
1575
'web\sdownloader','<a href="http://www.krasu.ru/soft/chuchelo/" title="Web Downloader home page [new window]" target="_blank">Web Downloader</a>',
 
1576
'webdup','<a href="http://www.webdup.com/en/index.html" title="Webdup home page [new window]" target="_blank">Webdup</a>',
 
1577
'webfilter','<a href="http://www.verso.com/enterprise/netspective/webfilter.asp" title="Bot home page [new window]" target="_blank">WebFilter</a>',
 
1578
'webindexer','<a href="mailto://webindexerv1@yahoo.com" title="WebIndexer home page [new window]" target="_blank">WebIndexer</a>',
 
1579
'webminer','<a href="http://64.124.122.252/feedback.html" title="WebMiner home page [new window]" target="_blank">WebMiner</a>',
 
1580
'website\_monitoring\_bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" title="Website_Monitoring_Bot home page [new window]" target="_blank">Website_Monitoring_Bot</a>',
914
1581
'webvulncrawl', 'WebVulnCrawl',
 
1582
'wells\ssearch','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_t_z_1484\b " title="Wells Search home page [new window]" target="_blank">Wells Search</a>',
915
1583
'wonderer', 'Web Wombat Redback Spider',
 
1584
'wume\scrawler','<a href="http://wume.cse.lehigh.edu/~xiq204/crawler/ " title="wume crawler home page [new window]" target="_blank">wume crawler</a>',
 
1585
'wwweasel',,'<a href="http://wwweasel.de/" title="Website_Monitoring_Bot home page [new window]" target="_blank">WWWeasel</a>',
 
1586
'xenu\'s\slink\ssleuth','<a href="http://home.snafu.de/tilman/xenulink.html" title="Xenu Link Sleuth home page [new window]" target="_blank">Xenu Link Sleuth</a>',
 
1587
'xenu\slink\ssleuth','<a href="http://home.snafu.de/tilman/xenulink.html" title="Xenu Link Sleuth home page [new window]" target="_blank">Xenu Link Sleuth</a>',
 
1588
'xirq','<a href="http://www.xirq.com/" title="xirq home page [new window]" target="_blank">xirq</a>',
916
1589
'y!j', '<a href="http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html" title="Bot home page [new window]" target="_blank">Y!J Yahoo Japan</a>',
917
1590
'yacy','<a href="http://www.yacy.net/yacy" title="Bot home page [new window]" target="_blank">yacy</a>',
918
1591
'yahoo\-blogs','<a href="http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html" title="Bot home page [new window]" target="_blank">Yahoo-Blogs</a>',
921
1594
'yahooseeker\-testing', '<a href="http://search.yahoo.com/" title="Bot home page [new window]" target="_blank">YahooSeeker-Testing</a>',
922
1595
'yahooseeker', '<a href="http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html" title="Bot home page [new window]" target="_blank">YahooSeeker Yahoo! Blog crawler</a>',
923
1596
'yahoo\-mmcrawler', '<a href="mailto:mms-mmcrawler-support@yahoo-inc.com?subject=Yahoo-MMCrawler Information" title="E-mail Bot">Yahoo-MMCrawler</a>',
 
1597
'yahoo!\smindset','<a href="http://mindset.research.yahoo.com/" title="Bot home page [new window]">Yahoo! Mindset</a>',
924
1598
'yandex', 'Yandex bot',
 
1599
'yooglifetchagent','<a href="http://www.yoogli.com/" title="yoogliFetchAgent home page [new window]" target="_blank">yoogliFetchAgent</a>',
 
1600
'z\-add\slink\schecker','<a href="http://w3.z-add.co.uk/linkcheck/" title="Z-Add Link Checker home page [new window]" target="_blank">Z-Add Link Checker</a>',
925
1601
'zealbot','ZealBot',
 
1602
'zspider','<a href="http://feedback.redkolibri.com/" title="Bot home page [new window]" target="_blank">zspider</a>',
 
1603
'zeus','<a href="http://www.webmasterworld.com/forum11/1840.htm" title="Bot documentation [new window]" target="_blank">Zeus Webster Pro</a>',
926
1604
'ng\/1\.','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">NG 1.x (Exalead)</a>', # put at end to avoid false positive
927
1605
'ng\/2\.','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">NG 2.x (Exalead)</a>', # put at end to avoid false positive
 
1606
'exabot','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">Exabot</a>', # put at end to avoid false positive
 
1607
'java','<a href="http://www.projecthoneypot.org/harvester_useragents.php" title="Bot home page [new window]" target="_blank">Java (Often spam bot)</a>', # put at end to avoid false positive
928
1608
# Generic root ID
929
1609
'robot', 'Unknown robot (identified by \'robot\')',
930
1610
'crawl', 'Unknown robot (identified by \'crawl\')',
946
1626
'scooter'=>'AltaVista',
947
1627
'wisenutbot'=>'Looksmart',
948
1628
'yahoo\-verticalcrawler'=>'Yahoo',
949
 
'zyborg'=>'Looksmart'
 
1629
'zyborg'=>'Looksmart',
 
1630
'cfetch'=>'Kosmix',
 
1631
'^voyager\/'=>'Kosmix'
950
1632
);
951
1633
 
952
1634
1;