~facundo/encuentro/trunk

« back to all changes in this revision

Viewing changes to external/youtube-dl/youtube_dl/extractor/ultimedia.py

  • Committer: Facundo Batista
  • Date: 2015-12-27 11:27:15 UTC
  • mto: This revision was merged to the branch mainline in revision 274.
  • Revision ID: facundo@taniquetil.com.ar-20151227112715-ztuasdhqm26hycug
Able to download TEDx.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# coding: utf-8
 
2
from __future__ import unicode_literals
 
3
 
 
4
import re
 
5
 
 
6
from .common import InfoExtractor
 
7
from ..compat import compat_urllib_parse_urlparse
 
8
from ..utils import (
 
9
    ExtractorError,
 
10
    qualities,
 
11
    unified_strdate,
 
12
    clean_html,
 
13
)
 
14
 
 
15
 
 
16
class UltimediaIE(InfoExtractor):
 
17
    _VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)'
 
18
    _TESTS = [{
 
19
        # news
 
20
        'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
 
21
        'md5': '276a0e49de58c7e85d32b057837952a2',
 
22
        'info_dict': {
 
23
            'id': 's8uk0r',
 
24
            'ext': 'mp4',
 
25
            'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
 
26
            'description': 'md5:3e5c8fd65791487333dda5db8aed32af',
 
27
            'thumbnail': 're:^https?://.*\.jpg',
 
28
            'upload_date': '20150317',
 
29
        },
 
30
    }, {
 
31
        # music
 
32
        'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
 
33
        'md5': '2ea3513813cf230605c7e2ffe7eca61c',
 
34
        'info_dict': {
 
35
            'id': 'xvpfp8',
 
36
            'ext': 'mp4',
 
37
            'title': "Two - C'est la vie (Clip)",
 
38
            'description': 'Two',
 
39
            'thumbnail': 're:^https?://.*\.jpg',
 
40
            'upload_date': '20150224',
 
41
        },
 
42
    }]
 
43
 
 
44
    def _real_extract(self, url):
 
45
        video_id = self._match_id(url)
 
46
        webpage = self._download_webpage(url, video_id)
 
47
 
 
48
        deliver_url = self._proto_relative_url(self._search_regex(
 
49
            r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
 
50
            webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':')
 
51
 
 
52
        deliver_page = self._download_webpage(
 
53
            deliver_url, video_id, 'Downloading iframe page')
 
54
 
 
55
        if '>This video is currently not available' in deliver_page:
 
56
            raise ExtractorError(
 
57
                'Video %s is currently not available' % video_id, expected=True)
 
58
 
 
59
        player = self._parse_json(
 
60
            self._search_regex(
 
61
                r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on",
 
62
                deliver_page, 'player'),
 
63
            video_id)
 
64
 
 
65
        quality = qualities(['flash', 'html5'])
 
66
        formats = []
 
67
        for mode in player['modes']:
 
68
            video_url = mode.get('config', {}).get('file')
 
69
            if not video_url:
 
70
                continue
 
71
            if re.match(r'https?://www\.youtube\.com/.+?', video_url):
 
72
                return self.url_result(video_url, 'Youtube')
 
73
            formats.append({
 
74
                'url': video_url,
 
75
                'format_id': mode.get('type'),
 
76
                'quality': quality(mode.get('type')),
 
77
            })
 
78
        self._sort_formats(formats)
 
79
 
 
80
        thumbnail = player.get('image')
 
81
 
 
82
        title = clean_html((
 
83
            self._html_search_regex(
 
84
                r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
 
85
                webpage, 'title', default=None) or
 
86
            self._search_regex(
 
87
                r"var\s+nameVideo\s*=\s*'([^']+)'",
 
88
                deliver_page, 'title')))
 
89
 
 
90
        description = clean_html(self._html_search_regex(
 
91
            r'(?s)<span>Description</span>(.+?)</p>', webpage,
 
92
            'description', fatal=False))
 
93
 
 
94
        upload_date = unified_strdate(self._search_regex(
 
95
            r'Ajouté le\s*<span>([^<]+)', webpage,
 
96
            'upload date', fatal=False))
 
97
 
 
98
        return {
 
99
            'id': video_id,
 
100
            'title': title,
 
101
            'description': description,
 
102
            'thumbnail': thumbnail,
 
103
            'upload_date': upload_date,
 
104
            'formats': formats,
 
105
        }