~grubng-dev/grubng/tools-python

« back to all changes in this revision

Viewing changes to robots.py

  • Committer: thindil
  • Date: 2011-02-03 10:11:52 UTC
  • Revision ID: thindil2@gmail.com-20110203101152-xyurtqhhl7z3aaub
fixed bug with crash on invalid url

Show diffs side-by-side

added added

removed removed

Lines of Context:
3
3
###########################################################################
4
4
# Convert XML sitemaps to plain text file and check robots.txt
5
5
#
6
 
# Copyright (C) 2010  Bartek thindil Jasicki
 
6
# Copyright (C) 2010,2011  Bartek thindil Jasicki
7
7
#
8
8
# This file is part of Grub.
9
9
#
21
21
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
22
###########################################################################
23
23
 
24
 
import os, gzip, robotparser
 
24
import os, gzip, robotparser, httplib
25
25
from xml.dom import minidom
26
26
 
27
27
directory = './sitemaps/'
60
60
                        robot.read()
61
61
                    except IOError:
62
62
                        continue
63
 
                    except InvalidURL:
 
63
                    except httplib.InvalidURL:
64
64
                        continue
65
65
                    oldroboturl = roboturl
66
66
                #if we can visit this link, add it to file