~facundo/encuentro/trunk

« back to all changes in this revision

Viewing changes to external/youtube-dl/youtube_dl/extractor/hellporno.py

  • Committer: Facundo Batista
  • Date: 2015-12-27 11:27:15 UTC
  • mto: This revision was merged to the branch mainline in revision 274.
  • Revision ID: facundo@taniquetil.com.ar-20151227112715-ztuasdhqm26hycug
Able to download TEDx.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
from __future__ import unicode_literals
 
2
 
 
3
import re
 
4
 
 
5
from .common import InfoExtractor
 
6
from ..utils import (
 
7
    js_to_json,
 
8
    remove_end,
 
9
)
 
10
 
 
11
 
 
12
class HellPornoIE(InfoExtractor):
 
13
    _VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
 
14
    _TEST = {
 
15
        'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
 
16
        'md5': '1fee339c610d2049699ef2aa699439f1',
 
17
        'info_dict': {
 
18
            'id': '149116',
 
19
            'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
 
20
            'ext': 'mp4',
 
21
            'title': 'Dixie is posing with naked ass very erotic',
 
22
            'thumbnail': 're:https?://.*\.jpg$',
 
23
            'age_limit': 18,
 
24
        }
 
25
    }
 
26
 
 
27
    def _real_extract(self, url):
 
28
        display_id = self._match_id(url)
 
29
 
 
30
        webpage = self._download_webpage(url, display_id)
 
31
 
 
32
        title = remove_end(self._html_search_regex(
 
33
            r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
 
34
 
 
35
        flashvars = self._parse_json(self._search_regex(
 
36
            r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
 
37
            display_id, transform_source=js_to_json)
 
38
 
 
39
        video_id = flashvars.get('video_id')
 
40
        thumbnail = flashvars.get('preview_url')
 
41
        ext = flashvars.get('postfix', '.mp4')[1:]
 
42
 
 
43
        formats = []
 
44
        for video_url_key in ['video_url', 'video_alt_url']:
 
45
            video_url = flashvars.get(video_url_key)
 
46
            if not video_url:
 
47
                continue
 
48
            video_text = flashvars.get('%s_text' % video_url_key)
 
49
            fmt = {
 
50
                'url': video_url,
 
51
                'ext': ext,
 
52
                'format_id': video_text,
 
53
            }
 
54
            m = re.search(r'^(?P<height>\d+)[pP]', video_text)
 
55
            if m:
 
56
                fmt['height'] = int(m.group('height'))
 
57
            formats.append(fmt)
 
58
        self._sort_formats(formats)
 
59
 
 
60
        categories = self._html_search_meta(
 
61
            'keywords', webpage, 'categories', default='').split(',')
 
62
 
 
63
        return {
 
64
            'id': video_id,
 
65
            'display_id': display_id,
 
66
            'title': title,
 
67
            'thumbnail': thumbnail,
 
68
            'categories': categories,
 
69
            'age_limit': 18,
 
70
            'formats': formats,
 
71
        }