151
150
If not explicitly set, calculated from timestamp.
152
151
uploader_id: Nickname or id of the video uploader.
153
152
location: Physical location where the video was filmed.
154
subtitles: The subtitle file contents as a dictionary in the format
155
{language: subtitles}.
153
subtitles: The available subtitles as a dictionary in the format
154
{language: subformats}. "subformats" is a list sorted from
155
lower to higher preference, each element is a dictionary
156
with the "ext" entry and one of:
157
* "data": The subtitles file contents
158
* "url": A url pointing to the subtitles file
159
automatic_captions: Like 'subtitles', used by the YoutubeIE for
160
automatically generated captions
156
161
duration: Length of the video in seconds, as an integer.
157
162
view_count: How many users have watched the video on the platform.
158
163
like_count: Number of positive ratings of the video
159
164
dislike_count: Number of negative ratings of the video
165
average_rating: Average rating give by users, the scale used depends on the webpage
160
166
comment_count: Number of comments on the video
161
167
comments: A list of comments, each with one or more of the following
162
168
properties (all but one of text or html optional):
265
271
def extract(self, url):
266
272
"""Extracts URL information and returns it in list of dicts."""
268
return self._real_extract(url)
275
return self._real_extract(url)
276
except ExtractorError:
278
except compat_http_client.IncompleteRead as e:
279
raise ExtractorError('A network error has occured.', cause=e, expected=True)
280
except (KeyError, StopIteration) as e:
281
raise ExtractorError('An extractor error has occured.', cause=e)
270
283
def set_downloader(self, downloader):
271
284
"""Sets the downloader for this IE."""
384
397
if blocked_iframe:
385
398
msg += ' Visit %s for more details' % blocked_iframe
386
399
raise ExtractorError(msg, expected=True)
400
if '<title>The URL you requested has been blocked</title>' in content[:512]:
402
'Access to this webpage has been blocked by Indian censorship. '
403
'Use a VPN or proxy server (with --proxy) to route around it.')
404
block_msg = self._html_search_regex(
405
r'</h1><p>(.*?)</p>',
406
content, 'block message', default=None)
408
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
409
raise ExtractorError(msg, expected=True)
657
680
return RATING_TABLE.get(rating.lower(), None)
682
def _family_friendly_search(self, html):
683
# See http://schema.org/VideoObject
684
family_friendly = self._html_search_meta('isFamilyFriendly', html)
686
if not family_friendly:
695
return RATING_TABLE.get(family_friendly.lower(), None)
659
697
def _twitter_search_player(self, html):
660
698
return self._html_search_meta('twitter:player', html,
661
699
'twitter card player')
706
744
f.get('language_preference') if f.get('language_preference') is not None else -1,
707
745
f.get('quality') if f.get('quality') is not None else -1,
708
746
f.get('tbr') if f.get('tbr') is not None else -1,
747
f.get('filesize') if f.get('filesize') is not None else -1,
709
748
f.get('vbr') if f.get('vbr') is not None else -1,
711
749
f.get('height') if f.get('height') is not None else -1,
712
750
f.get('width') if f.get('width') is not None else -1,
713
752
f.get('abr') if f.get('abr') is not None else -1,
714
753
audio_ext_preference,
715
754
f.get('fps') if f.get('fps') is not None else -1,
716
f.get('filesize') if f.get('filesize') is not None else -1,
717
755
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
718
756
f.get('source_preference') if f.get('source_preference') is not None else -1,
719
757
f.get('format_id'),
731
769
def _is_valid_url(self, url, video_id, item='video'):
733
self._request_webpage(
734
HEADRequest(url), video_id,
735
'Checking %s URL' % item)
771
self._request_webpage(url, video_id, 'Checking %s URL' % item)
737
773
except ExtractorError as e:
738
774
if isinstance(e.cause, compat_HTTPError):
778
814
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
779
815
for i, media_el in enumerate(media_nodes):
780
816
if manifest_version == '2.0':
781
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
782
+ (media_el.attrib.get('href') or media_el.attrib.get('url')))
817
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
818
(media_el.attrib.get('href') or media_el.attrib.get('url')))
783
819
tbr = int_or_none(media_el.attrib.get('bitrate'))
785
821
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
818
854
note='Downloading m3u8 information',
819
855
errnote='Failed to download m3u8 information')
821
858
kv_rex = re.compile(
822
859
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
823
860
for line in m3u8_doc.splitlines():
828
865
if v.startswith('"'):
830
867
last_info[m.group('key')] = v
868
elif line.startswith('#EXT-X-MEDIA:'):
870
for m in kv_rex.finditer(line):
872
if v.startswith('"'):
874
last_media[m.group('key')] = v
831
875
elif line.startswith('#') or not line.strip():
856
900
width_str, height_str = resolution.split('x')
857
901
f['width'] = int(width_str)
858
902
f['height'] = int(height_str)
903
if last_media is not None:
904
f['m3u8_media'] = last_media
859
906
formats.append(f)
861
908
self._sort_formats(formats)
877
for video in smil.findall('./body/switch/video'):
878
src = video.get('src')
881
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
882
width = int_or_none(video.get('width'))
883
height = int_or_none(video.get('height'))
884
proto = video.get('proto')
887
if base.startswith('rtmp'):
889
elif base.startswith('http'):
891
ext = video.get('ext')
893
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
894
elif proto == 'rtmp':
896
streamer = video.get('streamer') or base
901
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
924
if smil.findall('./body/seq/video'):
925
video = smil.findall('./body/seq/video')[0]
926
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
929
for video in smil.findall('./body/switch/video'):
930
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
906
933
self._sort_formats(formats)
937
def _parse_smil_video(self, video, video_id, base, rtmp_count):
938
src = video.get('src')
940
return ([], rtmp_count)
941
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
942
width = int_or_none(video.get('width'))
943
height = int_or_none(video.get('height'))
944
proto = video.get('proto')
947
if base.startswith('rtmp'):
949
elif base.startswith('http'):
951
ext = video.get('ext')
953
return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
954
elif proto == 'rtmp':
956
streamer = video.get('streamer') or base
961
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
966
elif proto.startswith('http'):
910
975
def _live_title(self, name):
911
976
""" Generate the title for a live video """
912
977
now = datetime.datetime.now()
970
1035
any_restricted = any_restricted or is_restricted
971
1036
return not any_restricted
1038
def extract_subtitles(self, *args, **kwargs):
1039
if (self._downloader.params.get('writesubtitles', False) or
1040
self._downloader.params.get('listsubtitles')):
1041
return self._get_subtitles(*args, **kwargs)
1044
def _get_subtitles(self, *args, **kwargs):
1045
raise NotImplementedError("This method must be implemented by subclasses")
1047
def extract_automatic_captions(self, *args, **kwargs):
1048
if (self._downloader.params.get('writeautomaticsub', False) or
1049
self._downloader.params.get('listsubtitles')):
1050
return self._get_automatic_captions(*args, **kwargs)
1053
def _get_automatic_captions(self, *args, **kwargs):
1054
raise NotImplementedError("This method must be implemented by subclasses")
974
1057
class SearchInfoExtractor(InfoExtractor):