~kaboom/blam/trunk

« back to all changes in this revision

Viewing changes to src/http.py

Committer: Bastian Kennel
Date: 2009-06-28 16:52:33 UTC
Revision ID: bastian@simpsus-20090628165233-v38ysfvy3fo1ja5h

archive download type.
PHD Comics downloading.
Updated ROADMAP

@Wolfer: Plz read!!

files added:
data/comics_base.ini

files modified:
ROADMAP

data/comics.ini

src/blamobjects.py

src/blamstore.py

src/blamthreads.py

src/comicthread.py

src/config.py

src/http.py

src/treeviews.py

Show diffs side-by-side

added added

removed removed

src/http.py

#r = re.compile(regex, re.MULTILINE)

results = re.findall(regex, string, re.MULTILINE)

if results is not None:

print "Result of regex ", regex, ": ", results

#print "Result of regex ", regex, ": ", results

return results

else:

logger.debug('Regex ' + regex + ' not found.')

def download_html(url):

"""url: the url to download"""

print url

#print url

request = urllib2.Request(url)

request.add_header('Accept-encoding', 'gzip')

response = None

def download_image(url):

"""url: the image to download"""

print "Image: ",url

#print "Image: ",url

try:

request = urllib2.Request(url)

request.add_header('Referer', url)

logging.getLogger().error("Error while downloadind image: " + str(url) +" "+ str(e))

return None

return response

def download_image_from_URL(args):

url, regex = args

try:

urlImage = apply_regex(download_html(url), regex)

return download_image(urlImage.pop())

except Exception, e:

logging.getLogger().error("Error while downloadind image from: " + str(url) +" "+ str(e))

return None

return response

def getNumberURLS(conf):

return _getURLS(conf, [NUMBERREGEX])

logger.debug('Comic ' + conf['name'] + ' does not have a baseURL.')

return {}

html = download_html(baseURL)

print "Base: ", baseURL

#print "Base: ", baseURL

regexes = {}

for ex in toApply:

current = conf[ex]

print "Regex: ", current

#print "Regex: ", current

if current:

regexes[ex] = apply_regex(html, current)

else:

logger.debug('Comic ' + conf['name'] + ' does not have a regex ' + ex)

return regexes

def getArchiveURLS(conf):

return _getURLS(conf, [ARCHIVEITEM])

def getDateInPageURLS(conf):

return _getURLS(conf, [PREVEX, DATEREGEX, IMGEXP])

Older »