~facundo/encuentro/trunk

« back to all changes in this revision

Viewing changes to external/youtube-dl/youtube_dl/extractor/snotr.py

  • Committer: Facundo Batista
  • Date: 2015-12-27 11:27:15 UTC
  • mto: This revision was merged to the branch mainline in revision 274.
  • Revision ID: facundo@taniquetil.com.ar-20151227112715-ztuasdhqm26hycug
Able to download TEDx.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# coding: utf-8
 
2
from __future__ import unicode_literals
 
3
 
 
4
import re
 
5
 
 
6
from .common import InfoExtractor
 
7
from ..utils import (
 
8
    float_or_none,
 
9
    str_to_int,
 
10
    parse_duration,
 
11
)
 
12
 
 
13
 
 
14
class SnotrIE(InfoExtractor):
 
15
    _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
 
16
    _TESTS = [{
 
17
        'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
 
18
        'info_dict': {
 
19
            'id': '13708',
 
20
            'ext': 'flv',
 
21
            'title': 'Drone flying through fireworks!',
 
22
            'duration': 247,
 
23
            'filesize_approx': 98566144,
 
24
            'description': 'A drone flying through Fourth of July Fireworks',
 
25
        }
 
26
    }, {
 
27
        'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
 
28
        'info_dict': {
 
29
            'id': '530',
 
30
            'ext': 'flv',
 
31
            'title': 'David Letteman - George W. Bush Top 10',
 
32
            'duration': 126,
 
33
            'filesize_approx': 8912896,
 
34
            'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
 
35
        }
 
36
    }]
 
37
 
 
38
    def _real_extract(self, url):
 
39
        mobj = re.match(self._VALID_URL, url)
 
40
        video_id = mobj.group('id')
 
41
 
 
42
        webpage = self._download_webpage(url, video_id)
 
43
        title = self._og_search_title(webpage)
 
44
 
 
45
        description = self._og_search_description(webpage)
 
46
        video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id
 
47
 
 
48
        view_count = str_to_int(self._html_search_regex(
 
49
            r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',
 
50
            webpage, 'view count', fatal=False))
 
51
 
 
52
        duration = parse_duration(self._html_search_regex(
 
53
            r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>',
 
54
            webpage, 'duration', fatal=False))
 
55
 
 
56
        filesize_approx = float_or_none(self._html_search_regex(
 
57
            r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>',
 
58
            webpage, 'filesize', fatal=False), invscale=1024 * 1024)
 
59
 
 
60
        return {
 
61
            'id': video_id,
 
62
            'description': description,
 
63
            'title': title,
 
64
            'url': video_url,
 
65
            'view_count': view_count,
 
66
            'duration': duration,
 
67
            'filesize_approx': filesize_approx,
 
68
        }