3
3
# If you want to add robots to extend AWStats database detection capabilities,
4
4
# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.
5
5
#-------------------------------------------------------
6
# $Revision: 1.28 $ - $Author: eldy $ - $Date: 2004/01/07 05:43:31 $
6
# $Revision: 1.35 $ - $Author: eldy $ - $Date: 2004/09/13 17:47:27 $
12
12
# Robots list was found at http://www.robotstxt.org/wc/active/all.txt
13
13
# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
14
14
# Rem: To avoid bad detection, some robots id were removed from this list:
15
# - Robots with ID of 2 letters only
15
# - Robots with ID of 3 letters only
16
16
# - Robot called 'webs' and 'tcl'
17
# Rem: Some robot most used for download are also remode: wget
17
18
# Rem: directhit changed into direct_hit (its real id)
18
19
# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser
19
20
# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser
20
21
# Rem: roadrunner changed into road_runner
21
22
# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser
23
# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser
24
25
# RobotsSearchIDOrder
25
26
# It contains all matching criteria to search for in log fields. This list is
29
30
# Note: Robots IDs are in lower case, ' ' and '+' are changed into '_' and are quoted.
30
31
#-------------------------------------------------------
31
32
@RobotsSearchIDOrder_list1 = (
33
# Common robots (In robot file)
53
# Common robots (Not in robot file)
58
64
'unlost_web_crawler',
60
'^voyager\/', # Add ^ and \/ to avoid to exclude voyager and amigavoyager browser
65
69
@RobotsSearchIDOrder_list2 = (
70
# Less common robots (In robot file)
67
74
'ahoythehomepagefinder',
359
381
# List of robots names ('robot id','robot clear text')
360
382
#-------------------------------------------------------
361
383
%RobotsHashIDLib = (
384
# Common robots (In robot file)
385
'appie','Walhello appie',
386
'architext','ArchitextSpider',
387
'jeeves','AskJeeves',
388
'bjaaland','Bjaaland',
389
'ferret','Wild Ferret Web Hopper #1, #2, #3',
390
'googlebot','Googlebot',
391
'gulliver','Northern Light Gulliver',
394
'linkwalker','LinkWalker',
397
'muscatferret','Muscat Ferret',
398
'myweb','Internet Shinchakubin',
401
'slurp','Inktomi Slurp',
402
'^voyager\/','Voyager',
403
'weblayers','weblayers',
404
# Common robots (Not in robot file)
406
'digout4u','Digout4u',
408
'fast\-webcrawler','Fast-Webcrawler',
409
'ia_archiver','Alexa (IA Archiver)',
410
'jennybot','JennyBot',
411
'mercator','Mercator',
413
'netcraft','Netcraft',
414
'petersnews','Petersnews',
415
'unlost_web_crawler','Unlost Web Crawler',
417
'webbase', 'WebBase',
418
'wisenutbot','WISENutbot',
419
# Less common robots (In robot file)
420
'[^a]fish','Fish search',
421
'abcdatos','ABCdatos BotLink',
362
422
'acme\.spider','Acme.Spider',
363
423
'ahoythehomepagefinder','Ahoy! The Homepage Finder',
364
424
'alkaline','Alkaline',
365
'appie','Walhello appie',
366
426
'arachnophilia','Arachnophilia',
367
'architext','ArchitextSpider',
368
429
'aretha','Aretha',
369
430
'ariadne','ARIADNE',
373
434
'atomz','Atomz.com Search Robot',
374
435
'auresys','AURESYS',
375
436
'backrub','BackRub',
376
438
'bigbrother','Big Brother',
377
'bjaaland','Bjaaland',
378
439
'blackwidow','BlackWidow',
379
440
'blindekuh','Die Blinde Kuh',
380
441
'bloodhound','Bloodhound',
442
'borg\-bot','Borg-Bot',
381
443
'brightnet','bright.net caching robot',
382
444
'bspider','BSpider',
383
445
'cactvschemistryspider','CACTVS Chemistry Spider',
385
447
'cassandra','Cassandra',
386
448
'cgireader','Digimarc Marcspider/CGI',
387
449
'checkbot','Checkbot',
450
'christcrawler','ChristCrawler.com',
452
'cienciaficcion','cIeNcIaFiCcIoN.nEt',
390
453
'collective','Collective',
391
454
'combine','Combine System',
392
455
'conceptbot','Conceptbot',
396
459
'cruiser','Internet Cruiser Robot',
398
461
'cyberspyder','CyberSpyder Link Test',
462
'desertrealm','Desert Realm Spider',
399
463
'deweb','DeWeb(c) Katalog/Index',
400
464
'dienstspider','DienstSpider',
401
465
'digger','Digger',
402
466
'diibot','Digital Integrity Robot',
403
'directhit','Direct Hit Grabber',
467
'direct_hit','Direct Hit Grabber',
404
468
'dnabot','DNAbot',
405
469
'download_express','DownLoad Express',
406
470
'dragonbot','DragonBot',
407
471
'dwcp','DWCP (Dridus\' Web Cataloging Project)',
408
472
'e\-collector','e-collector',
409
473
'ebiness','EbiNess',
410
'eit','EIT Link Verifier Robot',
411
474
'elfinbot','ELFINBOT',
412
475
'emacs','Emacs-w3 Search Engine',
413
476
'emcspider','ananzi',
414
477
'esther','Esther',
415
478
'evliyacelebi','Evliya Celebi',
416
'nzexplorer','nzexplorer',
479
'fastcrawler','FastCrawler',
417
480
'fdse','Fluid Dynamics Search Engine robot',
418
481
'felix','Felix IDE',
419
'ferret','Wild Ferret Web Hopper #1, #2, #3',
420
482
'fetchrover','FetchRover',
422
484
'finnish','H�m�h�kki',
423
485
'fireball','KIT-Fireball',
424
'[^a]fish','Fish search',
425
486
'fouineur','Fouineur',
426
487
'francoroute','Robot Francoroute',
427
488
'freecrawl','Freecrawl',
432
493
'getbot','GetBot',
433
494
'geturl','GetURL',
435
'googlebot','Googlebot (Google)',
436
496
'grapnel','Grapnel/0.01 Experiment',
437
497
'griffon','Griffon',
438
498
'gromit','Gromit',
439
'gulliver','Northern Light Gulliver',
499
'gulperbot','Gulper Bot',
440
500
'hambot','HamBot',
442
501
'havindex','havIndex',
443
502
'hometown','Hometown Spider Pro',
445
503
'htmlgobble','HTMLgobble',
446
504
'hyperdecontextualizer','Hyper-Decontextualizer',
447
505
'iajabot','iajaBot',
448
'ibm','IBM_Planetwide',
449
506
'iconoclast','Popular Iconoclast',
451
508
'imagelock','Imagelock',
462
519
'javabee','JavaBee',
463
520
'jbot','JBot Java Web Robot',
464
521
'jcrawler','JCrawler',
466
522
'jobo','JoBo Java Web Robot',
468
524
'joebot','JoeBot',
469
525
'jubii','The Jubii Indexing Robot',
470
526
'jumpstation','JumpStation',
527
'kapsi','image.kapsi.net',
471
528
'katipo','Katipo',
472
'kdd','KDD-Explorer',
473
529
'kilroy','Kilroy',
474
530
'ko_yappo_robot','KO_Yappo_Robot',
475
531
'labelgrabber\.txt','LabelGrabber',
478
534
'linkidator','Link Validator',
479
535
'linkscan','LinkScan',
480
'linkwalker','LinkWalker',
481
536
'lockon','Lockon',
482
537
'logo_gif','logo.gif Crawler',
484
538
'macworm','Mac WWWWorm',
485
539
'magpie','Magpie',
486
540
'marvin','marvin/infoseek',
489
543
'merzscope','MerzScope',
490
544
'meshexplorer','NEC-MeshExplorer',
491
545
'mindcrawler','MindCrawler',
546
'mnogosearch','mnoGoSearch search engine software',
493
547
'momspider','MOMspider',
494
548
'monster','Monster',
496
'muscatferret','Muscat Ferret',
497
551
'mwdsearch','Mwd.Search',
498
'myweb','Internet Shinchakubin',
499
'nagios','Nagios monitoring checker',
552
'ndspider','NDSpider',
553
'nederland\.zoek','Nederland.zoek',
500
554
'netcarta','NetCarta WebMap Engine',
501
'netcraft','Netcraft Web Server Survey',
502
555
'netmechanic','NetMechanic',
503
556
'netscoop','NetScoop',
504
557
'newscan\-online','newscan-online',
505
558
'nhse','NHSE Web Forager',
507
559
'northstar','The NorthStar Robot',
560
'nzexplorer','nzexplorer',
561
'objectssearch','ObjectsSearch',
509
563
'octopus','HKU WWW Octopus',
510
564
'openfind','Openfind data gatherer',
517
571
'perignator','The Peregrinator',
518
572
'perlcrawler','PerlCrawler 1.0',
519
573
'phantom','Phantom',
520
575
'piltdownman','PiltdownMan',
521
576
'pimptrain','Pimptrain.com\'s robot',
522
577
'pioneer','Pioneer',
523
578
'pitkow','html_analyzer',
524
579
'pjspider','Portal Juice Spider',
525
'pka','PGP Key Agent',
526
580
'plumtreewebaccessor','PlumtreeWebAccessor',
528
582
'portalb','PortalB Spider',
529
'puu','GetterroboPlus Puu',
530
584
'python','The Python Robot',
531
585
'raven','Raven Search',
532
586
'rbse','RBSE Spider',
535
589
'road_runner','Road Runner: The ImageScape Robot',
536
590
'robbie','Robbie the Robot',
537
591
'robi','ComputingSite Robi/1.0',
592
'robocrawl','RoboCrawl Spider',
538
593
'robofox','RoboFox',
539
594
'robozilla','Robozilla',
540
595
'roverbot','Roverbot',
542
597
'safetynetrobot','SafetyNet Robot',
543
'scooter','Scooter (AltaVista)',
598
'search\-info','Sleek',
544
599
'search_au','Search.Aus-AU.COM',
545
600
'searchprocess','SearchProcess',
546
601
'senrigan','Senrigan',
551
606
'simbot','Simmany Robot Ver1.0',
552
607
'site\-valet','Site Valet',
553
'sitegrabber','Open Text Index Robot',
554
608
'sitetech','SiteTech-Rover',
609
'skymob','Skymob.com',
555
610
'slcrawler','SLCrawler',
556
'slurp','Inktomi Slurp',
557
611
'smartspider','Smart Spider',
558
612
'snooper','Snooper',
559
613
'solbot','Solbot',
561
614
'speedy','Speedy Spider',
562
615
'spider_monkey','spider_monkey',
563
616
'spiderbot','SpiderBot',
586
638
'verticrawl','Verticrawl',
587
639
'victoria','Victoria',
588
640
'visionsearch','vision-search',
589
'^voyager\/','Voyager',
641
'voidbot','void-bot',
591
643
'w3index','The NWI Robot',
593
'wallpaper','WallPaper',
645
'wallpaper','WallPaper (alias crawlpaper)',
594
646
'wanderer','the World Wide Web Wanderer',
595
647
'wapspider','w@pSpider by wap4.com',
596
648
'webbandit','WebBandit Web Spider',
597
649
'webcatcher','WebCatcher',
598
650
'webcopy','WebCopy',
599
'webfetcher','Webfetcher',
651
'webfetcher','webfetcher',
600
652
'webfoot','The Webfoot Robot',
601
'weblayers','Weblayers',
653
'webinator','Webinator',
602
654
'weblinker','WebLinker',
603
655
'webmirror','WebMirror',
604
656
'webmoose','The Web Moose',
632
682
'bumblebee', 'Bumblebee (relevare.com)',
633
683
'cscrawler','CsCrawler',
634
684
'daviesbot', 'DaviesBot',
635
'digout4u', 'Digout4u',
637
685
'exactseek','ExactSeek Crawler',
638
686
'ezresult', 'Ezresult',
639
'fast\-webcrawler', 'Fast-Webcrawler (AllTheWeb)',
640
687
'gigabot','GigaBot',
641
688
'gnodspider','GNOD Spider',
642
690
'henrythemiragorobot', 'Mirago',
643
'ia_archiver', 'Alexa (IA Archiver)',
644
692
'internetseer', 'InternetSeer',
645
'jennybot', 'JennyBot',
646
693
'justview', 'JustView',
647
694
'linkbot','LinkBot',
648
695
'linkchecker','LinkChecker',
696
'mediapartners\-google','Google AdSense',
649
697
'metager\-linkchecker','MetaGer LinkChecker',
650
698
'microsoft_url_control','Microsoft URL Control',
651
'mercator', 'Mercator',
652
700
'msiecrawler','MSIECrawler',
654
701
'perman', 'Perman surfer',
655
'petersnews', 'Petersnews',
656
702
'pompos','Pompos',
658
703
'rambler', 'StackRambler',
659
704
'redalert', 'Red Alert',
660
705
'shoutcast','Shoutcast Directory Service',
664
709
'turtle', 'Turtle',
665
710
'turtlescanner', 'Turtle',
666
711
'ultraseek', 'Ultraseek',
667
'unlost_web_crawler', 'Unlost Web Crawler',
669
'webbase', 'WebBase',
712
'webclipping\.com', 'WebClipping.com',
670
713
'webcompass', 'webcompass',
671
'webclipping\.com', 'WebClipping.com',
672
'wisenutbot','WISENutbot (Looksmart)',
714
'wonderer', 'Web Wombat Redback Spider',
715
'yahoo\-verticalcrawler', 'Yahoo Vertical Crawler',
673
716
'yandex', 'Yandex bot',
674
717
'zealbot','ZealBot',
675
'zyborg','Zyborg (Looksmart)',
677
720
# Generic root ID
678
721
'robot', 'Unknown robot (identified by \'robot\')',
731
# This list try to tell by which Search Engine a robot is used
732
#-------------------------------------------------------------
733
%RobotsAffiliateLib = (
734
'fast\-webcrawler'=>'AllTheWeb',
735
'googlebot'=>'Google',
737
'scooter'=>'AltaVista',
738
'wisenutbot'=>'Looksmart',
739
'yahoo\-verticalcrawler'=>'Yahoo',
740
'zyborg'=>'Looksmart'