1
from __future__ import unicode_literals
5
from .common import InfoExtractor
14
class FourTubeIE(InfoExtractor):
16
_VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P<id>\d+)'
19
'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
20
'md5': '6516c8ac63b03de06bc8eac14362db4f',
24
'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
25
'uploader': 'WCP Club',
26
'uploader_id': 'wcp-club',
27
'upload_date': '20131031',
28
'timestamp': 1383263892,
37
def _real_extract(self, url):
38
video_id = self._match_id(url)
39
webpage = self._download_webpage(url, video_id)
41
title = self._html_search_meta('name', webpage)
42
timestamp = parse_iso8601(self._html_search_meta(
43
'uploadDate', webpage))
44
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
45
uploader_id = self._html_search_regex(
46
r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
47
webpage, 'uploader id', fatal=False)
48
uploader = self._html_search_regex(
49
r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
50
webpage, 'uploader', fatal=False)
52
categories_html = self._search_regex(
53
r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>',
54
webpage, 'categories', fatal=False)
58
c.strip() for c in re.findall(
59
r'(?s)<li><a.*?>(.*?)</a>', categories_html)]
61
view_count = str_to_int(self._search_regex(
62
r'<meta itemprop="interactionCount" content="UserPlays:([0-9,]+)">',
63
webpage, 'view count', fatal=False))
64
like_count = str_to_int(self._search_regex(
65
r'<meta itemprop="interactionCount" content="UserLikes:([0-9,]+)">',
66
webpage, 'like count', fatal=False))
67
duration = parse_duration(self._html_search_meta('duration', webpage))
69
media_id = self._search_regex(
70
r'<button[^>]+data-id=(["\'])(?P<id>\d+)\1[^>]+data-quality=', webpage,
71
'media id', default=None, group='id')
74
for _, quality in re.findall(r'<button[^>]+data-quality=(["\'])(.+?)\1', webpage)]
75
if not (media_id and sources):
76
player_js = self._download_webpage(
78
r'<script[^>]id=(["\'])playerembed\1[^>]+src=(["\'])(?P<url>.+?)\2',
79
webpage, 'player JS', group='url'),
80
video_id, 'Downloading player JS')
81
params_js = self._search_regex(
82
r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)',
83
player_js, 'initialization parameters')
84
params = self._parse_json('[%s]' % params_js, video_id)
86
sources = ['%s' % p for p in params[2]]
88
token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format(
89
media_id, '+'.join(sources))
91
b'Content-Type': b'application/x-www-form-urlencoded',
92
b'Origin': b'http://www.4tube.com',
94
token_req = sanitized_Request(token_url, b'{}', headers)
95
tokens = self._download_json(token_req, video_id)
97
'url': tokens[format]['token'],
98
'format_id': format + 'p',
99
'resolution': format + 'p',
100
'quality': int(format),
101
} for format in sources]
102
self._sort_formats(formats)
108
'categories': categories,
109
'thumbnail': thumbnail,
110
'uploader': uploader,
111
'uploader_id': uploader_id,
112
'timestamp': timestamp,
113
'like_count': like_count,
114
'view_count': view_count,
115
'duration': duration,