~facundo/encuentro/trunk

« back to all changes in this revision

Viewing changes to external/youtube-dl/youtube_dl/extractor/radiojavan.py

  • Committer: Facundo Batista
  • Date: 2015-12-27 11:27:15 UTC
  • mto: This revision was merged to the branch mainline in revision 274.
  • Revision ID: facundo@taniquetil.com.ar-20151227112715-ztuasdhqm26hycug
Able to download TEDx.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
from __future__ import unicode_literals
 
2
 
 
3
import re
 
4
 
 
5
from .common import InfoExtractor
 
6
from ..utils import(
 
7
    unified_strdate,
 
8
    str_to_int,
 
9
)
 
10
 
 
11
 
 
12
class RadioJavanIE(InfoExtractor):
 
13
    _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
 
14
    _TEST = {
 
15
        'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
 
16
        'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
 
17
        'info_dict': {
 
18
            'id': 'chaartaar-ashoobam',
 
19
            'ext': 'mp4',
 
20
            'title': 'Chaartaar - Ashoobam',
 
21
            'thumbnail': 're:^https?://.*\.jpe?g$',
 
22
            'upload_date': '20150215',
 
23
            'view_count': int,
 
24
            'like_count': int,
 
25
            'dislike_count': int,
 
26
        }
 
27
    }
 
28
 
 
29
    def _real_extract(self, url):
 
30
        video_id = self._match_id(url)
 
31
 
 
32
        webpage = self._download_webpage(url, video_id)
 
33
 
 
34
        formats = [{
 
35
            'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
 
36
            'format_id': '%sp' % height,
 
37
            'height': int(height),
 
38
        } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
 
39
        self._sort_formats(formats)
 
40
 
 
41
        title = self._og_search_title(webpage)
 
42
        thumbnail = self._og_search_thumbnail(webpage)
 
43
 
 
44
        upload_date = unified_strdate(self._search_regex(
 
45
            r'class="date_added">Date added: ([^<]+)<',
 
46
            webpage, 'upload date', fatal=False))
 
47
 
 
48
        view_count = str_to_int(self._search_regex(
 
49
            r'class="views">Plays: ([\d,]+)',
 
50
            webpage, 'view count', fatal=False))
 
51
        like_count = str_to_int(self._search_regex(
 
52
            r'class="rating">([\d,]+) likes',
 
53
            webpage, 'like count', fatal=False))
 
54
        dislike_count = str_to_int(self._search_regex(
 
55
            r'class="rating">([\d,]+) dislikes',
 
56
            webpage, 'dislike count', fatal=False))
 
57
 
 
58
        return {
 
59
            'id': video_id,
 
60
            'title': title,
 
61
            'thumbnail': thumbnail,
 
62
            'upload_date': upload_date,
 
63
            'view_count': view_count,
 
64
            'like_count': like_count,
 
65
            'dislike_count': dislike_count,
 
66
            'formats': formats,
 
67
        }