~facundo/encuentro/trunk

« back to all changes in this revision

Viewing changes to external/youtube-dl/youtube_dl/extractor/netzkino.py

  • Committer: Facundo Batista
  • Date: 2015-12-27 11:27:15 UTC
  • mto: This revision was merged to the branch mainline in revision 274.
  • Revision ID: facundo@taniquetil.com.ar-20151227112715-ztuasdhqm26hycug
Able to download TEDx.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# coding: utf-8
 
2
from __future__ import unicode_literals
 
3
 
 
4
import re
 
5
 
 
6
from .common import InfoExtractor
 
7
from ..utils import (
 
8
    clean_html,
 
9
    int_or_none,
 
10
    js_to_json,
 
11
    parse_iso8601,
 
12
)
 
13
 
 
14
 
 
15
class NetzkinoIE(InfoExtractor):
 
16
    _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
 
17
 
 
18
    _TEST = {
 
19
        'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
 
20
        'md5': '92a3f8b76f8d7220acce5377ea5d4873',
 
21
        'info_dict': {
 
22
            'id': 'rakete-zum-mond',
 
23
            'ext': 'mp4',
 
24
            'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
 
25
            'comments': 'mincount:3',
 
26
            'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
 
27
            'upload_date': '20120813',
 
28
            'thumbnail': 're:https?://.*\.jpg$',
 
29
            'timestamp': 1344858571,
 
30
            'age_limit': 12,
 
31
        },
 
32
        'params': {
 
33
            'skip_download': 'Download only works from Germany',
 
34
        }
 
35
    }
 
36
 
 
37
    def _real_extract(self, url):
 
38
        mobj = re.match(self._VALID_URL, url)
 
39
        category_id = mobj.group('category')
 
40
        video_id = mobj.group('id')
 
41
 
 
42
        api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
 
43
        api_info = self._download_json(api_url, video_id)
 
44
        info = next(
 
45
            p for p in api_info['posts'] if p['slug'] == video_id)
 
46
        custom_fields = info['custom_fields']
 
47
 
 
48
        production_js = self._download_webpage(
 
49
            'http://www.netzkino.de/beta/dist/production.min.js', video_id,
 
50
            note='Downloading player code')
 
51
        avo_js = self._search_regex(
 
52
            r'var urlTemplate=(\{.*?"\})',
 
53
            production_js, 'URL templates')
 
54
        templates = self._parse_json(
 
55
            avo_js, video_id, transform_source=js_to_json)
 
56
 
 
57
        suffix = {
 
58
            'hds': '.mp4/manifest.f4m',
 
59
            'hls': '.mp4/master.m3u8',
 
60
            'pmd': '.mp4',
 
61
        }
 
62
        film_fn = custom_fields['Streaming'][0]
 
63
        formats = [{
 
64
            'format_id': key,
 
65
            'ext': 'mp4',
 
66
            'url': tpl.replace('{}', film_fn) + suffix[key],
 
67
        } for key, tpl in templates.items()]
 
68
        self._sort_formats(formats)
 
69
 
 
70
        comments = [{
 
71
            'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
 
72
            'id': c['id'],
 
73
            'author': c['name'],
 
74
            'html': c['content'],
 
75
            'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
 
76
        } for c in info.get('comments', [])]
 
77
 
 
78
        return {
 
79
            'id': video_id,
 
80
            'formats': formats,
 
81
            'comments': comments,
 
82
            'title': info['title'],
 
83
            'age_limit': int_or_none(custom_fields.get('FSK')[0]),
 
84
            'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
 
85
            'description': clean_html(info.get('content')),
 
86
            'thumbnail': info.get('thumbnail'),
 
87
            'playlist_title': api_info.get('title'),
 
88
            'playlist_id': category_id,
 
89
        }