202
class Downloader(object):
203
"""Episode downloader."""
204
class BaseDownloader(object):
205
"""Base episode downloader."""
205
207
def __init__(self, config):
206
208
self.config = config
211
"""Quit the download."""
212
return self._shutdown()
215
"""Cancel a download."""
216
return self._cancel()
218
def _setup_target(self, channel, section, title, extension):
219
"""Set up the target file to download."""
220
# build where to save it
221
downloaddir = self.config.get('downloaddir', '')
222
channel = platform.sanitize(channel)
223
section = platform.sanitize(section)
224
title = platform.sanitize(title)
225
fname = os.path.join(downloaddir, channel, section, title + extension)
227
# if the directory doesn't exist, create it
228
dirsecc = os.path.dirname(fname)
229
if not os.path.exists(dirsecc):
232
tempf = fname + str(time.time())
235
def download(self, channel, section, title, url, cb_progress):
236
"""Download an episode."""
237
return self._download(channel, section, title, url, cb_progress)
240
class ConectarDownloader(BaseDownloader):
241
"""Episode downloader for Conectar site."""
243
def __init__(self, config):
244
super(ConectarDownloader, self).__init__(config)
207
245
self._prev_progress = None
208
246
self.browser_quit = set()
209
logger.info("Downloader inited")
210
247
self.cancelled = False
248
logger.info("Conectar downloader inited")
213
251
"""Quit the download."""
214
252
for bquit in self.browser_quit:
216
logger.info("Downloader shutdown finished")
254
logger.info("Conectar downloader shutdown finished")
219
257
"""Cancel a download."""
220
258
self.cancelled = True
259
logger.info("Conectar downloader cancelled")
222
261
@defer.inlineCallbacks
223
def download(self, canal, seccion, titulo, url, cb_progress):
262
def _download(self, canal, seccion, titulo, url, cb_progress):
224
263
"""Descarga una emisión a disco."""
225
264
self.cancelled = False
239
278
# build where to save it
240
downloaddir = self.config.get('downloaddir', '')
241
canal = platform.sanitize(canal)
242
seccion = platform.sanitize(seccion)
243
titulo = platform.sanitize(titulo)
244
fname = os.path.join(downloaddir, canal, seccion, titulo + u".avi")
246
# ver si esa seccion existe, sino crearla
247
dirsecc = os.path.dirname(fname)
248
if not os.path.exists(dirsecc):
251
# descargamos en un temporal
252
tempf = fname + str(time.time())
279
fname, tempf = self._setup_target(canal, seccion, titulo, u".avi")
253
280
logger.debug("Downloading to temporal file %r", tempf)
254
281
qoutput.put(tempf)
284
311
defer.returnValue(fname)
314
class GenericDownloader(BaseDownloader):
315
"""Episode downloader for a generic site that works with urllib2."""
318
'User-Agent': 'Mozilla/5.0',
322
def __init__(self, config):
323
super(GenericDownloader, self).__init__(config)
324
self._prev_progress = None
325
self.downloader = None
326
logger.info("Generic downloader inited")
329
"""Quit the download."""
330
logger.info("Generic downloader shutdown finished")
333
"""Cancel a download."""
334
if self.downloader is not None:
335
self.downloader.cancel()
336
logger.info("Generic downloader cancelled")
338
def _parse_url(self, url):
339
"""Return host and port from the URL."""
340
urlparts = urlparse.urlparse(url)
341
if urlparts.port is None:
342
if urlparts.scheme == 'http':
344
elif urlparts.scheme == 'http':
347
raise ValueError("Unknown schema when guessing port: " +
348
repr(urlparts.scheme))
350
port = int(urlparts.port)
351
return urlparts.hostname, port
353
@defer.inlineCallbacks
354
def _download(self, canal, seccion, titulo, url, cb_progress):
355
"""Download an episode to disk."""
357
logger.info("Download episode %r", url)
359
def report(dloaded, total):
360
"""Report download."""
361
size_mb = total // 1024 ** 2
362
perc = dloaded * 100.0 / total
363
m = "%.1f%% (de %d MB)" % (perc, size_mb)
364
if m != self._prev_progress:
366
self._prev_progress = m
368
class ReportingDownloader(HTTPDownloader):
369
"""Customize HTTPDownloader to also report and can be cancelled."""
370
def __init__(self, *args, **kwrgs):
371
self.content_length = None
373
self.connected_client = None
374
self.cancelled = False
375
HTTPDownloader.__init__(self, *args, **kwrgs)
377
def gotHeaders(self, headers):
379
clength = headers.get("content-length", [None])[0]
380
self.content_length = int(clength)
381
HTTPDownloader.gotHeaders(self, headers)
383
def pagePart(self, data):
384
"""Got part of content."""
385
self.downloaded += len(data)
386
report(self.downloaded, self.content_length)
387
HTTPDownloader.pagePart(self, data)
391
self.cancelled = True
392
if self.connected_client is not None:
393
self.connected_client.stopProducing()
395
def buildProtocol(self, addr):
396
"""Store the protocol built."""
397
p = HTTPDownloader.buildProtocol(self, addr)
398
self.connected_client = p
403
# build where to save it
404
fname, tempf = self._setup_target(canal, seccion, titulo, u".mp4")
405
logger.debug("Downloading to temporal file %r", tempf)
407
self.downloader = ReportingDownloader(url, tempf, headers=self.headers)
408
host, port = self._parse_url(url)
409
reactor.connectTCP(host, port, self.downloader)
411
yield self.downloader.deferred
412
except PartialDownloadError:
413
if self.downloader.cancelled:
418
# rename to final name and end
419
logger.info("Downloading done, renaming temp to %r", fname)
420
os.rename(tempf, fname)
421
defer.returnValue(fname)
424
# this is the entry point to get the downloaders for each type
426
None: ConectarDownloader,
427
'conectar': ConectarDownloader,
428
'generic': GenericDownloader,
287
432
if __name__ == "__main__":
288
433
h = logging.StreamHandler()
289
434
h.setLevel(logging.DEBUG)
297
442
test_config = dict(user="lxpdvtnvrqdoa@mailinator.com",
298
443
password="descargas", downloaddir='.')
300
url_episode = "http://conectate.gov.ar/educar-portal-video-web/module/"\
301
"detalleRecurso/DetalleRecurso.do?modulo=masVotados&"\
302
"recursoPadreId=50001&idRecurso=50004"
445
# url_episode = "http://conectate.gov.ar/educar-portal-video-web/module/"\
446
# "detalleRecurso/DetalleRecurso.do?modulo=masVotados&"\
447
# "recursoPadreId=50001&idRecurso=50004"
448
url_episode = "http://backend.bacua.gob.ar/video.php?v=_173fb17c"
304
450
@defer.inlineCallbacks
307
downloader = Downloader(test_config)
308
# reactor.callLater(10, downloader.cancel)
453
# downloader = ConectarDownloader(test_config)
454
downloader = GenericDownloader(test_config)
455
# reactor.callLater(5, downloader.cancel)
310
457
fname = yield downloader.download("test-ej-canal", "secc", "tit",
311
458
url_episode, show)