~facundo/encuentro/trunk

« back to all changes in this revision

Viewing changes to external/youtube-dl/youtube_dl/extractor/worldstarhiphop.py

  • Committer: Facundo Batista
  • Date: 2015-12-27 11:27:15 UTC
  • mto: This revision was merged to the branch mainline in revision 274.
  • Revision ID: facundo@taniquetil.com.ar-20151227112715-ztuasdhqm26hycug
Able to download TEDx.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
from __future__ import unicode_literals
 
2
 
 
3
import re
 
4
 
 
5
from .common import InfoExtractor
 
6
 
 
7
 
 
8
class WorldStarHipHopIE(InfoExtractor):
 
9
    _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
 
10
    _TESTS = [{
 
11
        "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO",
 
12
        "md5": "9d04de741161603bf7071bbf4e883186",
 
13
        "info_dict": {
 
14
            "id": "wshh6a7q1ny0G34ZwuIO",
 
15
            "ext": "mp4",
 
16
            "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
 
17
        }
 
18
    }, {
 
19
        'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
 
20
        'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
 
21
        'info_dict': {
 
22
            'id': 'wshh6a7q1ny0G34ZwuIO',
 
23
            'ext': 'mp4',
 
24
            "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
 
25
        }
 
26
    }]
 
27
 
 
28
    def _real_extract(self, url):
 
29
        video_id = self._match_id(url)
 
30
        webpage = self._download_webpage(url, video_id)
 
31
 
 
32
        m_vevo_id = re.search(r'videoId=(.*?)&amp?', webpage)
 
33
        if m_vevo_id is not None:
 
34
            return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
 
35
 
 
36
        video_url = self._search_regex(
 
37
            [r'so\.addVariable\("file","(.*?)"\)',
 
38
             r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
 
39
            webpage, 'video URL')
 
40
 
 
41
        if 'youtube' in video_url:
 
42
            return self.url_result(video_url, ie='Youtube')
 
43
 
 
44
        video_title = self._html_search_regex(
 
45
            [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
 
46
             r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
 
47
            webpage, 'title')
 
48
 
 
49
        # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
 
50
        thumbnail = self._html_search_regex(
 
51
            r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
 
52
            default=None)
 
53
        if not thumbnail:
 
54
            _title = r'candytitles.*>(.*)</span>'
 
55
            mobj = re.search(_title, webpage)
 
56
            if mobj is not None:
 
57
                video_title = mobj.group(1)
 
58
 
 
59
        return {
 
60
            'id': video_id,
 
61
            'url': video_url,
 
62
            'title': video_title,
 
63
            'thumbnail': thumbnail,
 
64
        }