1
from re import compile, MULTILINE
3
from dosage.helpers import BasicScraper, regexNamer, bounceStarter
6
class ALessonIsLearned(BasicScraper):
7
latestUrl = 'http://www.alessonislearned.com/'
8
imageUrl = 'http://www.alessonislearned.com/lesson%s.html'
9
imageSearch = compile(r'<img src="(cmx/.+?)"')
10
prevSearch = compile(r"<a href='(index.php\?comic=.+?)'.+?previous")
11
help = 'Index format: nnn'
14
class ASofterWorld(BasicScraper):
15
latestUrl = 'http://www.asofterworld.com/'
16
imageUrl = 'http://www.asofterworld.com/index.php?id=%s'
17
imageSearch = compile(r'<img src="(http://www.asofterworld.com/clean/.*?)"')
18
prevSearch = compile(r'"(.*?)">.*?back')
19
help = 'Index format: n (unpadded)'
22
class AbleAndBaker(BasicScraper):
23
latestUrl = 'http://www.jimburgessdesign.com/comics/index.php'
24
imageUrl = 'http://www.jimburgessdesign.com/comics/index.php?comic=%s'
25
imageSearch = compile(r'<img src="(comics/.+?)"')
26
prevSearch = compile(r'<a href="(.+\d+?)".+previous.gif')
27
help = 'Index format: nnn'
30
class AbominableCharlesChristopher(BasicScraper):
31
latestUrl = 'http://abominable.cc/'
32
imageUrl = 'http://abominable.cc/%s'
33
imageSearch = compile(r'cc(/comics/.+?)"')
34
prevSearch = compile(r'cc(/.+?)".+?prev')
35
help = 'Index format: yyyy/mm/dd/comicname'
38
class AbstractGender(BasicScraper):
39
latestUrl = 'http://www.abstractgender.com/'
40
imageUrl = 'http://www.abstractgender.com/?comic=%s'
41
imageSearch = compile(r'<img[^>]+src="(comics/\d+\.\w+)"')
42
prevSearch = compile(r'<a\W+href="(\?comic=\d+)"><img[^>]+id="comic_menu_prev"')
43
help = 'Index format: n (unpadded)'
46
class AbsurdNotions(BasicScraper):
47
latestUrl = 'http://www.absurdnotions.org/'
48
imageUrl = 'http://www.absurdnotions.org/page%s.html'
49
imageSearch = compile(r'<IMG SRC="(an\d+.?\..+?)"')
50
prevSearch = compile(r'<A.*?HREF="(page\d+\.html)"><img SRC="nprev\.gif"')
51
help = 'Index format: n (unpadded)'
54
class AcademyVale(BasicScraper):
55
latestUrl = 'http://imagerie.com/vale/'
56
imageUrl = 'http://imagerie.com/vale/avarch.cgi?%s'
57
imageSearch = compile(r'<IMG.+?SRC="(avale\d{4}-\d{2}\..*?)"')
58
prevSearch = compile(r'HREF=(avarch.*?)><IMG SRC="AVNavBack.gif"')
59
help = 'Index format: nnn'
62
class AlienLovesPredator(BasicScraper):
63
latestUrl = 'http://alienlovespredator.com/index.php'
64
imageUrl = 'http://alienlovespredator.com/index.php?id=%s'
65
imageSearch = compile(r'<img src="(http://planetavp\.com/alienlovespredator/strips/.+?)" width')
66
prevSearch = compile(r'<a href="(/index\.php\?id=\d+?)".+?nav_previous')
67
help = 'Index format: nnn'
70
class Altermeta(BasicScraper):
71
latestUrl = 'http://www.altermeta.com/'
72
imageUrl = 'http://www.altermeta.com/index.php?PS=viewComic.php&comic=%s'
73
imageSearch = compile(r'<img.+?src="(comics/\d+_\d+\..+?)"')
74
prevSearch = compile(r'<a href="(index\.php\?PS=viewComic\.php&comic=\d+)">Previous')
75
help = 'Index format: n (unpadded)'
78
class Angels2200(BasicScraper):
79
latestUrl = 'http://peter.haynes.iconz.co.nz/'
80
imageUrl = 'http://peter.haynes.iconz.co.nz/Default.asp?ID=%s'
81
imageSearch = compile(r'(/archives/.+?)"')
82
prevSearch = compile(r'(/Default.asp\?ID=\d+?)".+previous.jpg')
83
help = 'Index format: nnn'
86
class AnimeArcadia(BasicScraper):
87
latestUrl = 'http://www.animearcadia.com/index.php?p=comics'
88
imageUrl = 'http://www.animearcadia.com/index.php?p=comics&c=%s'
89
imageSearch = compile(r'(/comics/.+?)"')
90
prevSearch = compile(r'First.+?"(index.php\?p=comics&c=.+?)".+?Back')
91
help = 'Index format: n (unpadded)'
94
class AppleGeeks(BasicScraper):
95
latestUrl = 'http://www.applegeeks.com/'
96
imageUrl = 'http://www.applegeeks.com/comics/viewcomic.php?issue=%s'
97
imageSearch = compile(r'<img src="((?:/comics/)?issue\d+?\..+?)"')
98
prevSearch = compile(r'<div class="caption">Previous Comic</div>\s*<p><a href="([^"]+)">', MULTILINE)
99
help = 'Index format: n (unpadded)'
102
class AppleGeeksLite(BasicScraper):
103
latestUrl = 'http://www.applegeeks.com/lite/'
104
imageUrl = 'http://applegeeks.com/lite/index.php?aglitecomic=%s'
105
imageSearch = compile(r'<img src="(strips/.+?)"')
106
prevSearch = compile(r'<a href="(index.php\?aglitecomic=.+?)".+?back')
107
help = 'Index format: yyyy-mm-dd'
110
class AsIf(BasicScraper):
111
latestUrl = 'http://asifcomic.com/404.html'
112
imageUrl = 'http://www.asifcomic.com/%s.html'
113
imageSearch = compile(r'<img src="((?:\.\./)*strips/\d+\..+?)"')
114
prevSearch = compile(r'<a href="(?:\.\./)*(\d+\..+?)"><img[^>]*src="(?:\.\./)*previous\.jpg"')
115
help = 'Index format: nn(n) (padded up to a maximum of 2 characters)'
118
class Achewood(BasicScraper):
119
latestUrl = 'http://www.achewood.com/'
120
imageUrl = 'http://www.achewood.com/index.php?date=%s'
121
imageSearch = compile(r'<img src="(http://m.assetbar.com/achewood/autaux.+?)"')
122
prevSearch = compile(r'<a href="(/index\.php\?date=\d{8})"[^>]*><-</a>')
123
help = 'Index format: mmddyyyy'
124
namer = regexNamer(compile(r'date%3D(\d{8})'))
127
class AstronomyPOTD(BasicScraper):
128
starter = bounceStarter('http://antwrp.gsfc.nasa.gov/apod/astropix.html', compile(r'<a href="(ap\d{6}\.html)">></a>'))
129
imageUrl = 'http://antwrp.gsfc.nasa.gov/apod/ap%s.html'
130
imageSearch = compile(r'<a href="(image/\d{4}/.+\..+?)">')
131
prevSearch = compile(r'<a href="(ap\d{6}\.html)"><</a>')
132
help = 'Index format: yymmdd'
134
def namer(cls, imageUrl, pageUrl):
135
return '%s-%s' % (pageUrl.split('/')[-1].split('.')[0][2:], imageUrl.split('/')[-1].split('.')[0])