~facundo/encuentro/trunk

« back to all changes in this revision

Viewing changes to external/youtube-dl/youtube_dl/extractor/dreisat.py

  • Committer: Facundo Batista
  • Date: 2015-12-27 11:27:15 UTC
  • mto: This revision was merged to the branch mainline in revision 274.
  • Revision ID: facundo@taniquetil.com.ar-20151227112715-ztuasdhqm26hycug
Able to download TEDx.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
from __future__ import unicode_literals
 
2
 
 
3
import re
 
4
 
 
5
from .common import InfoExtractor
 
6
from ..utils import (
 
7
    ExtractorError,
 
8
    unified_strdate,
 
9
)
 
10
 
 
11
 
 
12
class DreiSatIE(InfoExtractor):
 
13
    IE_NAME = '3sat'
 
14
    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
 
15
    _TESTS = [
 
16
        {
 
17
            'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
 
18
            'md5': 'be37228896d30a88f315b638900a026e',
 
19
            'info_dict': {
 
20
                'id': '45918',
 
21
                'ext': 'mp4',
 
22
                'title': 'Waidmannsheil',
 
23
                'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
 
24
                'uploader': '3sat',
 
25
                'upload_date': '20140913'
 
26
            }
 
27
        },
 
28
        {
 
29
            'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
 
30
            'only_matching': True,
 
31
        },
 
32
    ]
 
33
 
 
34
    def _real_extract(self, url):
 
35
        mobj = re.match(self._VALID_URL, url)
 
36
        video_id = mobj.group('id')
 
37
        details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
 
38
        details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
 
39
 
 
40
        status_code = details_doc.find('./status/statuscode')
 
41
        if status_code is not None and status_code.text != 'ok':
 
42
            code = status_code.text
 
43
            if code == 'notVisibleAnymore':
 
44
                message = 'Video %s is not available' % video_id
 
45
            else:
 
46
                message = '%s returned error: %s' % (self.IE_NAME, code)
 
47
            raise ExtractorError(message, expected=True)
 
48
 
 
49
        thumbnail_els = details_doc.findall('.//teaserimage')
 
50
        thumbnails = [{
 
51
            'width': int(te.attrib['key'].partition('x')[0]),
 
52
            'height': int(te.attrib['key'].partition('x')[2]),
 
53
            'url': te.text,
 
54
        } for te in thumbnail_els]
 
55
 
 
56
        information_el = details_doc.find('.//information')
 
57
        video_title = information_el.find('./title').text
 
58
        video_description = information_el.find('./detail').text
 
59
 
 
60
        details_el = details_doc.find('.//details')
 
61
        video_uploader = details_el.find('./channel').text
 
62
        upload_date = unified_strdate(details_el.find('./airtime').text)
 
63
 
 
64
        format_els = details_doc.findall('.//formitaet')
 
65
        formats = [{
 
66
            'format_id': fe.attrib['basetype'],
 
67
            'width': int(fe.find('./width').text),
 
68
            'height': int(fe.find('./height').text),
 
69
            'url': fe.find('./url').text,
 
70
            'filesize': int(fe.find('./filesize').text),
 
71
            'video_bitrate': int(fe.find('./videoBitrate').text),
 
72
        } for fe in format_els
 
73
            if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
 
74
 
 
75
        self._sort_formats(formats)
 
76
 
 
77
        return {
 
78
            '_type': 'video',
 
79
            'id': video_id,
 
80
            'title': video_title,
 
81
            'formats': formats,
 
82
            'description': video_description,
 
83
            'thumbnails': thumbnails,
 
84
            'thumbnail': thumbnails[-1]['url'],
 
85
            'uploader': video_uploader,
 
86
            'upload_date': upload_date,
 
87
        }