3
# simple brute force robots validator
6
my $rules = WWW::RobotRules->new('GrubNG 0.1');
8
use LWP::Simple qw($ua get);
14
open (URLSLIST, ">allowed.txt");
20
(print "ILLEGAL: $_\n" && next) unless($u = URI->new($_));
21
(print "ILLEGAL: $_\n" && next) unless(length($u->host) > 3 );
24
$host = $u->host.":".$u->port;
30
my $robots_txt = get "http://".$host."/robots.txt";
31
$rules->parse("http://".$host."/robots.txt", $robots_txt) if defined $robots_txt;
32
if($rules->allowed($_))
34
print URLSLIST "$_\n";