~ubuntu-branches/ubuntu/vivid/youtube-dl/vivid

« back to all changes in this revision

Viewing changes to youtube_dl/extractor/common.py

Committer: Package Import Robot
Author(s): Rogério Brito
Date: 2015-03-01 02:12:13 UTC
mfrom: (44.1.24 sid)
Revision ID: package-import@ubuntu.com-20150301021213-8w657cue71kp77sz

Tags: 2015.02.28-1

http://bugs.debian.org/778765

Imported Upstream version 2015.02.28. Closes: #778765.

files added:
youtube_dl/extractor/aftenposten.py

youtube_dl/extractor/airmozilla.py

youtube_dl/extractor/camdemy.py

youtube_dl/extractor/cbssports.py

youtube_dl/extractor/ccc.py

youtube_dl/extractor/chirbit.py

youtube_dl/extractor/embedly.py

youtube_dl/extractor/history.py

youtube_dl/extractor/imgur.py

youtube_dl/extractor/kaltura.py

youtube_dl/extractor/letv.py

youtube_dl/extractor/nationalgeographic.py

youtube_dl/extractor/odnoklassniki.py

youtube_dl/extractor/puls4.py

youtube_dl/extractor/r7.py

youtube_dl/extractor/sandia.py

youtube_dl/extractor/svtplay.py

youtube_dl/extractor/tv4.py

youtube_dl/extractor/yam.py

youtube_dl/extractor/zapiks.py

files removed:
youtube_dl/extractor/soulanime.py

youtube_dl/extractor/subtitles.py

files modified:
Makefile

README.md

README.txt

debian/changelog

devscripts/check-porn.py

docs/supportedsites.md

test/helper.py

test/parameters.json

test/swftests/ArrayAccess.swf

test/swftests/ClassCall.swf

test/swftests/ClassConstruction.swf

test/swftests/ConstArrayAccess.swf

test/swftests/ConstantInt.swf

test/swftests/DictCall.swf

test/swftests/EqualsOperator.swf

test/swftests/LocalVars.swf

test/swftests/MemberAssignment.swf

test/swftests/NeOperator.swf

test/swftests/PrivateCall.swf

test/swftests/PrivateVoidCall.swf

test/swftests/StaticAssignment.swf

test/swftests/StaticRetrieval.swf

test/swftests/StringBasics.swf

test/swftests/StringCharCodeAt.swf

test/swftests/StringConversion.swf

test/test_YoutubeDL.py

test/test_jsinterp.py

test/test_subtitles.py

test/test_swfinterp.py

test/test_utils.py

test/test_youtube_signature.py

youtube-dl

youtube-dl.1

youtube-dl.bash-completion

youtube-dl.fish

youtube-dl.zsh

youtube_dl/YoutubeDL.py

youtube_dl/__init__.py

youtube_dl/aes.py

youtube_dl/downloader/__init__.py

youtube_dl/downloader/common.py

youtube_dl/downloader/external.py

youtube_dl/downloader/f4m.py

youtube_dl/downloader/hls.py

youtube_dl/downloader/http.py

youtube_dl/downloader/rtmp.py

youtube_dl/extractor/__init__.py

youtube_dl/extractor/adobetv.py

youtube_dl/extractor/adultswim.py

youtube_dl/extractor/aparat.py

youtube_dl/extractor/appletrailers.py

youtube_dl/extractor/atresplayer.py

youtube_dl/extractor/bambuser.py

youtube_dl/extractor/bandcamp.py

youtube_dl/extractor/bbccouk.py

youtube_dl/extractor/beeg.py

youtube_dl/extractor/blinkx.py

youtube_dl/extractor/bliptv.py

youtube_dl/extractor/bloomberg.py

youtube_dl/extractor/brightcove.py

youtube_dl/extractor/buzzfeed.py

youtube_dl/extractor/canalplus.py

youtube_dl/extractor/cbs.py

youtube_dl/extractor/ceskatelevize.py

youtube_dl/extractor/comedycentral.py

youtube_dl/extractor/common.py

youtube_dl/extractor/commonmistakes.py

youtube_dl/extractor/crunchyroll.py

youtube_dl/extractor/dailymotion.py

youtube_dl/extractor/dctp.py

youtube_dl/extractor/defense.py

youtube_dl/extractor/dotsub.py

youtube_dl/extractor/drtuber.py

youtube_dl/extractor/drtv.py

youtube_dl/extractor/eporner.py

youtube_dl/extractor/escapist.py

youtube_dl/extractor/facebook.py

youtube_dl/extractor/firstpost.py

youtube_dl/extractor/firsttv.py

youtube_dl/extractor/fivemin.py

youtube_dl/extractor/gamekings.py

youtube_dl/extractor/gamestar.py

youtube_dl/extractor/gdcvault.py

youtube_dl/extractor/generic.py

youtube_dl/extractor/goshgay.py

youtube_dl/extractor/ign.py

youtube_dl/extractor/izlesene.py

youtube_dl/extractor/laola1tv.py

youtube_dl/extractor/livestream.py

youtube_dl/extractor/lynda.py

youtube_dl/extractor/mit.py

youtube_dl/extractor/mitele.py

youtube_dl/extractor/mpora.py

youtube_dl/extractor/mtv.py

youtube_dl/extractor/musicvault.py

youtube_dl/extractor/nbc.py

youtube_dl/extractor/netzkino.py

youtube_dl/extractor/npo.py

youtube_dl/extractor/nrk.py

youtube_dl/extractor/ntvru.py

youtube_dl/extractor/patreon.py

youtube_dl/extractor/pornhd.py

youtube_dl/extractor/pornhub.py

youtube_dl/extractor/radiode.py

youtube_dl/extractor/rai.py

youtube_dl/extractor/rtlnl.py

youtube_dl/extractor/rtlnow.py

youtube_dl/extractor/rtp.py

youtube_dl/extractor/rtve.py

youtube_dl/extractor/sockshare.py

youtube_dl/extractor/soundgasm.py

youtube_dl/extractor/streamcz.py

youtube_dl/extractor/sunporno.py

youtube_dl/extractor/teamcoco.py

youtube_dl/extractor/ted.py

youtube_dl/extractor/telecinco.py

youtube_dl/extractor/theonion.py

youtube_dl/extractor/theplatform.py

youtube_dl/extractor/trilulilu.py

youtube_dl/extractor/tvigle.py

youtube_dl/extractor/twitch.py

youtube_dl/extractor/videolecturesnet.py

youtube_dl/extractor/viki.py

youtube_dl/extractor/vimeo.py

youtube_dl/extractor/vk.py

youtube_dl/extractor/walla.py

youtube_dl/extractor/wdr.py

youtube_dl/extractor/webofstories.py

youtube_dl/extractor/wsj.py

youtube_dl/extractor/xtube.py

youtube_dl/extractor/yahoo.py

youtube_dl/extractor/youtube.py

youtube_dl/jsinterp.py

youtube_dl/options.py

youtube_dl/postprocessor/__init__.py

youtube_dl/postprocessor/ffmpeg.py

youtube_dl/utils.py

youtube_dl/version.py

Show diffs side-by-side

added added

removed removed

youtube_dl/extractor/common.py

compiled_regex_type,

ExtractorError,

float_or_none,

HEADRequest,

int_or_none,

RegexNotFoundError,

sanitize_filename,

151

150

If not explicitly set, calculated from timestamp.

152

151

uploader_id: Nickname or id of the video uploader.

153

152

location: Physical location where the video was filmed.

154

subtitles: The subtitle file contents as a dictionary in the format

155

{language: subtitles}.

153

subtitles: The available subtitles as a dictionary in the format

154

{language: subformats}. "subformats" is a list sorted from

155

lower to higher preference, each element is a dictionary

156

with the "ext" entry and one of:

157

* "data": The subtitles file contents

158

* "url": A url pointing to the subtitles file

159

automatic_captions: Like 'subtitles', used by the YoutubeIE for

160

automatically generated captions

156

161

duration: Length of the video in seconds, as an integer.

157

162

view_count: How many users have watched the video on the platform.

158

163

like_count: Number of positive ratings of the video

159

164

dislike_count: Number of negative ratings of the video

165

average_rating: Average rating give by users, the scale used depends on the webpage

160

166

comment_count: Number of comments on the video

161

167

comments: A list of comments, each with one or more of the following

162

168

properties (all but one of text or html optional):

264

270

265

271

def extract(self, url):

266

272

"""Extracts URL information and returns it in list of dicts."""

267

self.initialize()

268

return self._real_extract(url)

273

try:

274

self.initialize()

275

return self._real_extract(url)

276

except ExtractorError:

277

raise

278

except compat_http_client.IncompleteRead as e:

279

raise ExtractorError('A network error has occured.', cause=e, expected=True)

280

except (KeyError, StopIteration) as e:

281

raise ExtractorError('An extractor error has occured.', cause=e)

269

282

270

283

def set_downloader(self, downloader):

271

284

"""Sets the downloader for this IE."""

384

397

if blocked_iframe:

385

398

msg += ' Visit %s for more details' % blocked_iframe

386

399

raise ExtractorError(msg, expected=True)

400

if '<title>The URL you requested has been blocked</title>' in content[:512]:

401

msg = (

402

'Access to this webpage has been blocked by Indian censorship. '

403

'Use a VPN or proxy server (with --proxy) to route around it.')

404

block_msg = self._html_search_regex(

405

r'</h1><p>(.*?)</p>',

406

content, 'block message', default=None)

407

if block_msg:

408

msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')

409

raise ExtractorError(msg, expected=True)

387

410

388

411

return content

389

412

507

530

if mobj:

508

531

break

509

532

510

if os.name != 'nt' and sys.stderr.isatty():

533

if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():

511

534

_name = '\033[0;34m%s\033[0m' % name

512

535

else:

513

536

_name = name

656

679

}

657

680

return RATING_TABLE.get(rating.lower(), None)

658

681

682

def _family_friendly_search(self, html):

683

# See http://schema.org/VideoObject

684

family_friendly = self._html_search_meta('isFamilyFriendly', html)

685

686

if not family_friendly:

687

return None

688

689

RATING_TABLE = {

690

'1': 0,

691

'true': 0,

692

'0': 18,

693

'false': 18,

694

}

695

return RATING_TABLE.get(family_friendly.lower(), None)

696

659

697

def _twitter_search_player(self, html):

660

698

return self._html_search_meta('twitter:player', html,

661

699

'twitter card player')

706

744

f.get('language_preference') if f.get('language_preference') is not None else -1,

707

745

f.get('quality') if f.get('quality') is not None else -1,

708

746

f.get('tbr') if f.get('tbr') is not None else -1,

747

f.get('filesize') if f.get('filesize') is not None else -1,

709

748

f.get('vbr') if f.get('vbr') is not None else -1,

710

ext_preference,

711

749

f.get('height') if f.get('height') is not None else -1,

712

750

f.get('width') if f.get('width') is not None else -1,

751

ext_preference,

713

752

f.get('abr') if f.get('abr') is not None else -1,

714

753

audio_ext_preference,

715

754

f.get('fps') if f.get('fps') is not None else -1,

716

f.get('filesize') if f.get('filesize') is not None else -1,

717

755

f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,

718

756

f.get('source_preference') if f.get('source_preference') is not None else -1,

719

757

f.get('format_id'),

730

768

731

769

def _is_valid_url(self, url, video_id, item='video'):

732

770

try:

733

self._request_webpage(

734

HEADRequest(url), video_id,

735

'Checking %s URL' % item)

771

self._request_webpage(url, video_id, 'Checking %s URL' % item)

736

772

return True

737

773

except ExtractorError as e:

738

774

if isinstance(e.cause, compat_HTTPError):

778

814

media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')

779

815

for i, media_el in enumerate(media_nodes):

780

816

if manifest_version == '2.0':

781

manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'

782

+ (media_el.attrib.get('href') or media_el.attrib.get('url')))

817

manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +

818

(media_el.attrib.get('href') or media_el.attrib.get('url')))

783

819

tbr = int_or_none(media_el.attrib.get('bitrate'))

784

820

formats.append({

785

821

'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),

803

839

'url': m3u8_url,

804

840

'ext': ext,

805

841

'protocol': 'm3u8',

806

'preference': -1,

842

'preference': preference - 1 if preference else -1,

807

843

'resolution': 'multiple',

808

844

'format_note': 'Quality selection URL',

809

845

}]

818

854

note='Downloading m3u8 information',

819

855

errnote='Failed to download m3u8 information')

820

856

last_info = None

857

last_media = None

821

858

kv_rex = re.compile(

822

859

r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')

823

860

for line in m3u8_doc.splitlines():

828

865

if v.startswith('"'):

829

866

v = v[1:-1]

830

867

last_info[m.group('key')] = v

868

elif line.startswith('#EXT-X-MEDIA:'):

869

last_media = {}

870

for m in kv_rex.finditer(line):

871

v = m.group('val')

872

if v.startswith('"'):

873

v = v[1:-1]

874

last_media[m.group('key')] = v

831

875

elif line.startswith('#') or not line.strip():

832

876

continue

833

877

else:

856

900

width_str, height_str = resolution.split('x')

857

901

f['width'] = int(width_str)

858

902

f['height'] = int(height_str)

903

if last_media is not None:

904

f['m3u8_media'] = last_media

905

last_media = None

859

906

formats.append(f)

860

907

last_info = {}

861

908

self._sort_formats(formats)

874

921

875

922

formats = []

876

923

rtmp_count = 0

877

for video in smil.findall('./body/switch/video'):

878

src = video.get('src')

879

if not src:

880

continue

881

bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)

882

width = int_or_none(video.get('width'))

883

height = int_or_none(video.get('height'))

884

proto = video.get('proto')

885

if not proto:

886

if base:

887

if base.startswith('rtmp'):

888

proto = 'rtmp'

889

elif base.startswith('http'):

890

proto = 'http'

891

ext = video.get('ext')

892

if proto == 'm3u8':

893

formats.extend(self._extract_m3u8_formats(src, video_id, ext))

894

elif proto == 'rtmp':

895

rtmp_count += 1

896

streamer = video.get('streamer') or base

897

formats.append({

898

'url': streamer,

899

'play_path': src,

900

'ext': 'flv',

901

'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),

902

'tbr': bitrate,

903

'width': width,

904

'height': height,

905

})

924

if smil.findall('./body/seq/video'):

925

video = smil.findall('./body/seq/video')[0]

926

fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)

927

formats.extend(fmts)

928

else:

929

for video in smil.findall('./body/switch/video'):

930

fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)

931

formats.extend(fmts)

932

906

933

self._sort_formats(formats)

907

934

908

935

return formats

909

936

937

def _parse_smil_video(self, video, video_id, base, rtmp_count):

938

src = video.get('src')

939

if not src:

940

return ([], rtmp_count)

941

bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)

942

width = int_or_none(video.get('width'))

943

height = int_or_none(video.get('height'))

944

proto = video.get('proto')

945

if not proto:

946

if base:

947

if base.startswith('rtmp'):

948

proto = 'rtmp'

949

elif base.startswith('http'):

950

proto = 'http'

951

ext = video.get('ext')

952

if proto == 'm3u8':

953

return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)

954

elif proto == 'rtmp':

955

rtmp_count += 1

956

streamer = video.get('streamer') or base

957

return ([{

958

'url': streamer,

959

'play_path': src,

960

'ext': 'flv',

961

'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),

962

'tbr': bitrate,

963

'width': width,

964

'height': height,

965

}], rtmp_count)

966

elif proto.startswith('http'):

967

return ([{

968

'url': base + src,

969

'ext': ext or 'flv',

970

'tbr': bitrate,

971

'width': width,

972

'height': height,

973

}], rtmp_count)

974

910

975

def _live_title(self, name):

911

976

""" Generate the title for a live video """

912

977

now = datetime.datetime.now()

970

1035

any_restricted = any_restricted or is_restricted

971

1036

return not any_restricted

972

1037

1038

def extract_subtitles(self, *args, **kwargs):

1039

if (self._downloader.params.get('writesubtitles', False) or

1040

self._downloader.params.get('listsubtitles')):

1041

return self._get_subtitles(*args, **kwargs)

1042

return {}

1043

1044

def _get_subtitles(self, *args, **kwargs):

1045

raise NotImplementedError("This method must be implemented by subclasses")

1046

1047

def extract_automatic_captions(self, *args, **kwargs):

1048

if (self._downloader.params.get('writeautomaticsub', False) or

1049

self._downloader.params.get('listsubtitles')):

1050

return self._get_automatic_captions(*args, **kwargs)

1051

return {}

1052

1053

def _get_automatic_captions(self, *args, **kwargs):

1054

raise NotImplementedError("This method must be implemented by subclasses")

1055

973

1056

974

1057

class SearchInfoExtractor(InfoExtractor):

975

1058

"""

Older »