1
from re import compile, IGNORECASE, sub
3
from dosage.helpers import BasicScraper
4
from dosage.util import fetchManyMatches, fetchUrl
7
class UClickScraper(BasicScraper):
10
homepage = 'http://content.uclick.com/a2z.html'
11
baseUrl = 'http://www.uclick.com/client/zzz/%s/'
12
imageUrl = property(lambda self: self.latestUrl + '%s/')
13
imageSearch = compile(r'<img.+?src="(/feature/\d{2}/\d{2}/\d{2}/[^"]+\.gif|http://images\.ucomics\.com/comics/\w+/\d{4}/[^"]+\.gif)">', IGNORECASE)
14
prevSearch = compile(r'<a href="(/client/zzz/\w+/\d{4}/\d{2}/\d{2}/)">Previous date', IGNORECASE)
15
help = 'Index format: yyyy/mm/dd'
18
return cls.baseUrl % (cls.shortName,)
21
def fetchSubmodules(cls):
26
submoduleSearch = compile(r'(<A HREF="http://content.uclick.com/content/\w+.html">[^>]+?</a>)', IGNORECASE)
27
partsMatch = compile(r'<A HREF="http://content.uclick.com/content/(\w+?).html">([^>]+?)</a>', IGNORECASE)
28
matches = fetchManyMatches(cls.homepage, (submoduleSearch,))[0]
29
possibles = [partsMatch.match(match).groups() for match in matches]
31
def normalizeName(name):
32
name = sub(r'&(.)acute;', r'\1', name).title()
33
return ''.join([c for c in name if c.isalnum()])
35
def fetchSubmodule(module):
37
return fetchUrl(cls.baseUrl % module, cls.imageSearch)
41
return [normalizeName(name) for part, name in possibles if part not in exclusions and fetchSubmodule(part)]
45
return dict((name, UClickScraper.make('UClick/' + name, shortName=shortName))
46
for name, shortName in comics.iteritems())
53
'AlcarazLaloSpanish': 'spla',
54
'AndersonNick': 'wpnan',
56
'AnimalCrackers': 'tmani',
64
'BallardStreet': 'crbal',
67
'BensonLisa': 'wplbe',
68
'BensonSteve': 'crsbe',
71
'BobTheSquirrel': 'bob',
75
'BottomLiners': 'tmbot',
76
'BoundAndGagged': 'tmbou',
78
'BrendaStarr': 'tmbre',
79
'BrewsterRockit': 'tmrkt',
80
'BroomHilda': 'tmbro',
81
'Candorville': 'cand',
82
'CarlsonStuart': 'sc',
83
'CatalinoKen': 'crkca',
85
'CathySpanish': 'spca',
88
'ClearBlueWater': 'cbw',
93
'ConradPaul': 'tmpco',
96
'DaviesMatt': 'tmmda',
98
'DeeringJohn': 'crjde',
100
'DinetteSetThe': 'crdin',
101
'DogEatDoug': 'crdog',
102
'DonWright': 'tmdow',
106
'ElderberriesThe': 'eld',
107
'FacesInTheNews': 'kw',
108
'FlightDeck': 'crfd',
109
'FloAndFriends': 'crflo',
110
'FlyingMccoysThe': 'fmc',
111
'ForBetterOrForWorse': 'fb',
112
'ForHeavenSSake': 'crfhs',
114
'FoxtrotSpanish': 'spft',
115
'FrankAndErnest': 'fa',
116
'FredBassetSpanish': 'spfba',
117
'FredBasset': 'tmfba',
118
'FrogApplause': 'frog',
119
'FuscoBrothersThe': 'fu',
121
'GarfieldSpanish': 'gh',
122
'GasolineAlley': 'tmgas',
123
'GaturroSpanish': 'spgat',
125
'GingerMeggs': 'gin',
126
'GingerMeggsSpanish': 'spgin',
127
'GirlsAndSports': 'crgis',
128
'GorrellBob': 'crbgo',
129
'HammondBruce': 'hb',
130
'HandelsmanWalt': 'tmwha',
131
'HeartOfTheCity': 'hc',
132
'Heathcliff': 'crhea',
133
'HeathcliffSpanish': 'crhes',
134
'HerbAndJamaal': 'crher',
136
'HorseyDavid': 'tmdho',
137
'Housebroken': 'tmhou',
138
'HubertAndAbby': 'haa',
141
'InTheBleachers': 'bl',
142
'JamesBondSpanish': 'spjb',
143
'JonesClay': 'crcjo',
144
'KallaugherKevin': 'cwkal',
145
'KChroniclesThe': 'kk',
146
'KelleySteve': 'crske',
149
'LibertyMeadows': 'crlib',
151
'LocherDick': 'tmdlo',
152
'LooseParts': 'tmloo',
155
'LuckovichMike': 'crmlu',
157
'MarletteDoug': 'tmdma',
159
'MeaningOfLilaThe': 'crlil',
160
'MeehanStreak': 'tmmee',
161
'MiddletonsThe': 'tmmid',
162
'MinimumSecurity': 'ms',
163
'ModestyBlaiseSpanish': 'spmb',
166
'MuttJeffSpanish': 'spmut',
167
'NaturalSelection': 'crns',
168
'NestHeads': 'cpnst',
171
'OhmanJack': 'tmjoh',
173
'OnAClaireDay': 'crocd',
174
'OneBigHappy': 'crobh',
175
'OtherCoastThe': 'crtoc',
176
'OutOfTheGenePool': 'wpgen',
178
'OverboardSpanish': 'spob',
179
'PepeSpanish': 'sppep',
184
'PoochCafeSpanish': 'sppoc',
186
'PowellDwane': 'crdpo',
188
'PricklyCity': 'prc',
189
'QuigmansThe': 'tmqui',
191
'RealLifeAdventures': 'rl',
192
'RedAndRover': 'wpred',
194
'ReynoldsUnwrapped': 'rw',
195
'RonaldinhoGaucho': 'ron',
196
'RonaldinhoGauchoSpanish': 'spron',
198
'SackSteve': 'tmssa',
200
'SargentBenSpanish': 'spbs',
202
'ShenemanDrew': 'tmdsh',
203
'Shoecabbage': 'shcab',
205
'SigmundSpanish': 'spsig',
207
'SmallWorld': 'small',
208
'SpaceIsThePlace': 'sitp',
209
'SpeedBump': 'crspe',
210
'StateOfTheUnion': 'crsou',
211
'StayskalWayne': 'tmwst',
213
'StrangeBrew': 'crstr',
214
'SummersDana': 'tmdsu',
215
'SuttonImpact': 'stn',
218
'TankMcnamara': 'tm',
219
'TeenageMutantNinjaTurtles': 'tmnt',
220
'TelnaesAnn': 'tmate',
221
'TheArgyleSweater': 'tas',
222
'ThePinkPanther': 'tmpnk',
223
'TheWizardOfId': 'crwiz',
224
'TheWizardOfIdSpanish': 'crwis',
226
'ThroughThickAndThin': 'cpthk',
227
'TinySepuku': 'tiny',
229
'TomTheDancingBug': 'td',
230
'TooMuchCoffeeMan': 'tmcm',
232
'TutelandiaSpanish': 'sptut',
233
'VarvelGary': 'crgva',
234
'WassermanDan': 'tmdwa',
235
'WatchYourHead': 'wpwyh',
238
'WinnieThePooh': 'crwin',
239
'WorkingItOut': 'crwio',
240
'YennySpanish': 'spyen',
243
'ZiggySpanish': 'spzi',
247
globals().update(uclick(comics))