~kaboom/blam/trunk

« back to all changes in this revision

Viewing changes to src/http.py

  • Committer: Bastian Kennel
  • Date: 2009-06-28 16:52:33 UTC
  • Revision ID: bastian@simpsus-20090628165233-v38ysfvy3fo1ja5h
archive download type.
PHD Comics downloading.
Updated ROADMAP

@Wolfer: Plz read!!

Show diffs side-by-side

added added

removed removed

Lines of Context:
14
14
    #r = re.compile(regex, re.MULTILINE)
15
15
    results = re.findall(regex, string, re.MULTILINE)
16
16
    if results is not None:
17
 
        print "Result of regex ", regex, ": ", results
 
17
        #print "Result of regex ", regex, ": ", results
18
18
        return results
19
19
    else:
20
20
        logger.debug('Regex ' + regex + ' not found.')
22
22
 
23
23
def download_html(url):
24
24
    """url: the url to download"""
25
 
    print url
 
25
    #print url
26
26
    request = urllib2.Request(url)
27
27
    request.add_header('Accept-encoding', 'gzip')
28
28
    response = None
41
41
 
42
42
def download_image(url):
43
43
    """url: the image to download"""
44
 
    print "Image: ",url
 
44
    #print "Image: ",url
45
45
    try:
46
46
        request = urllib2.Request(url)
47
47
        request.add_header('Referer', url)
50
50
        logging.getLogger().error("Error while downloadind image: " + str(url) +" "+ str(e))
51
51
        return None
52
52
    return response
 
53
    
 
54
def download_image_from_URL(args):
 
55
    url, regex = args
 
56
    try:
 
57
        urlImage = apply_regex(download_html(url), regex)
 
58
        return download_image(urlImage.pop())
 
59
    except Exception, e:
 
60
        logging.getLogger().error("Error while downloadind image from: " + str(url) +" "+ str(e))
 
61
        return None
 
62
    return response
53
63
   
54
64
def getNumberURLS(conf):
55
65
    return _getURLS(conf, [NUMBERREGEX])
60
70
        logger.debug('Comic ' + conf['name'] + ' does not have a baseURL.')
61
71
        return {}
62
72
    html = download_html(baseURL)
63
 
    print "Base: ", baseURL
 
73
    #print "Base: ", baseURL
64
74
    regexes = {}
65
75
    for ex in toApply:
66
76
        current = conf[ex]
67
 
        print "Regex: ", current
 
77
        #print "Regex: ", current
68
78
        if current:
69
79
            regexes[ex] = apply_regex(html, current)
70
80
        else:
71
81
            logger.debug('Comic ' + conf['name'] + ' does not have a regex ' + ex)
72
82
    return regexes
73
83
    
 
84
def getArchiveURLS(conf):
 
85
    return _getURLS(conf, [ARCHIVEITEM])
 
86
    
74
87
def getDateInPageURLS(conf):
75
88
    return _getURLS(conf, [PREVEX, DATEREGEX, IMGEXP])
76
89