~facundo/encuentro/trunk

« back to all changes in this revision

Viewing changes to external/youtube-dl/youtube_dl/extractor/playvid.py

  • Committer: Facundo Batista
  • Date: 2015-12-27 11:27:15 UTC
  • mto: This revision was merged to the branch mainline in revision 274.
  • Revision ID: facundo@taniquetil.com.ar-20151227112715-ztuasdhqm26hycug
Able to download TEDx.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
from __future__ import unicode_literals
 
2
 
 
3
import re
 
4
 
 
5
from .common import InfoExtractor
 
6
from ..compat import (
 
7
    compat_urllib_parse_unquote,
 
8
    compat_urllib_parse_unquote_plus,
 
9
)
 
10
from ..utils import (
 
11
    clean_html,
 
12
    ExtractorError,
 
13
)
 
14
 
 
15
 
 
16
class PlayvidIE(InfoExtractor):
 
17
    _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
 
18
    _TEST = {
 
19
        'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
 
20
        'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
 
21
        'info_dict': {
 
22
            'id': 'RnmBNgtrrJu',
 
23
            'ext': 'mp4',
 
24
            'title': 'md5:9256d01c6317e3f703848b5906880dc8',
 
25
            'duration': 82,
 
26
            'age_limit': 18,
 
27
        }
 
28
    }
 
29
 
 
30
    def _real_extract(self, url):
 
31
        video_id = self._match_id(url)
 
32
        webpage = self._download_webpage(url, video_id)
 
33
 
 
34
        m_error = re.search(
 
35
            r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
 
36
        if m_error:
 
37
            raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
 
38
 
 
39
        video_title = None
 
40
        duration = None
 
41
        video_thumbnail = None
 
42
        formats = []
 
43
 
 
44
        # most of the information is stored in the flashvars
 
45
        flashvars = self._html_search_regex(
 
46
            r'flashvars="(.+?)"', webpage, 'flashvars')
 
47
 
 
48
        infos = compat_urllib_parse_unquote(flashvars).split(r'&')
 
49
        for info in infos:
 
50
            videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
 
51
            if videovars_match:
 
52
                key = videovars_match.group(1)
 
53
                val = videovars_match.group(2)
 
54
 
 
55
                if key == 'title':
 
56
                    video_title = compat_urllib_parse_unquote_plus(val)
 
57
                if key == 'duration':
 
58
                    try:
 
59
                        duration = int(val)
 
60
                    except ValueError:
 
61
                        pass
 
62
                if key == 'big_thumb':
 
63
                    video_thumbnail = val
 
64
 
 
65
                videourl_match = re.match(
 
66
                    r'^video_urls\]\[(?P<resolution>[0-9]+)p', key)
 
67
                if videourl_match:
 
68
                    height = int(videourl_match.group('resolution'))
 
69
                    formats.append({
 
70
                        'height': height,
 
71
                        'url': val,
 
72
                    })
 
73
        self._sort_formats(formats)
 
74
 
 
75
        # Extract title - should be in the flashvars; if not, look elsewhere
 
76
        if video_title is None:
 
77
            video_title = self._html_search_regex(
 
78
                r'<title>(.*?)</title', webpage, 'title')
 
79
 
 
80
        return {
 
81
            'id': video_id,
 
82
            'formats': formats,
 
83
            'title': video_title,
 
84
            'thumbnail': video_thumbnail,
 
85
            'duration': duration,
 
86
            'description': None,
 
87
            'age_limit': 18
 
88
        }