~facundo/encuentro/trunk

« back to all changes in this revision

Viewing changes to external/youtube-dl/youtube_dl/extractor/firsttv.py

  • Committer: Facundo Batista
  • Date: 2015-12-27 11:27:15 UTC
  • mto: This revision was merged to the branch mainline in revision 274.
  • Revision ID: facundo@taniquetil.com.ar-20151227112715-ztuasdhqm26hycug
Able to download TEDx.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# encoding: utf-8
 
2
from __future__ import unicode_literals
 
3
 
 
4
from .common import InfoExtractor
 
5
from ..utils import int_or_none
 
6
 
 
7
 
 
8
class FirstTVIE(InfoExtractor):
 
9
    IE_NAME = '1tv'
 
10
    IE_DESC = 'Первый канал'
 
11
    _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
 
12
 
 
13
    _TESTS = [{
 
14
        'url': 'http://www.1tv.ru/videoarchive/73390',
 
15
        'md5': '777f525feeec4806130f4f764bc18a4f',
 
16
        'info_dict': {
 
17
            'id': '73390',
 
18
            'ext': 'mp4',
 
19
            'title': 'Олимпийские канатные дороги',
 
20
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
 
21
            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
 
22
            'duration': 149,
 
23
            'like_count': int,
 
24
            'dislike_count': int,
 
25
        },
 
26
        'skip': 'Only works from Russia',
 
27
    }, {
 
28
        'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
 
29
        'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
 
30
        'info_dict': {
 
31
            'id': '35930',
 
32
            'ext': 'mp4',
 
33
            'title': 'Наедине со всеми. Людмила Сенчина',
 
34
            'description': 'md5:89553aed1d641416001fe8d450f06cb9',
 
35
            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
 
36
            'duration': 2694,
 
37
        },
 
38
        'skip': 'Only works from Russia',
 
39
    }]
 
40
 
 
41
    def _real_extract(self, url):
 
42
        video_id = self._match_id(url)
 
43
 
 
44
        webpage = self._download_webpage(url, video_id, 'Downloading page')
 
45
 
 
46
        video_url = self._html_search_regex(
 
47
            r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
 
48
            webpage, 'video URL')
 
49
 
 
50
        title = self._html_search_regex(
 
51
            [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
 
52
             r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
 
53
        description = self._html_search_regex(
 
54
            r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
 
55
            webpage, 'description', default=None) or self._html_search_meta(
 
56
                'description', webpage, 'description')
 
57
 
 
58
        thumbnail = self._og_search_thumbnail(webpage)
 
59
        duration = self._og_search_property(
 
60
            'video:duration', webpage,
 
61
            'video duration', fatal=False)
 
62
 
 
63
        like_count = self._html_search_regex(
 
64
            r'title="Понравилось".*?/></label> \[(\d+)\]',
 
65
            webpage, 'like count', default=None)
 
66
        dislike_count = self._html_search_regex(
 
67
            r'title="Не понравилось".*?/></label> \[(\d+)\]',
 
68
            webpage, 'dislike count', default=None)
 
69
 
 
70
        return {
 
71
            'id': video_id,
 
72
            'url': video_url,
 
73
            'thumbnail': thumbnail,
 
74
            'title': title,
 
75
            'description': description,
 
76
            'duration': int_or_none(duration),
 
77
            'like_count': int_or_none(like_count),
 
78
            'dislike_count': int_or_none(dislike_count),
 
79
        }