2
from __future__ import unicode_literals
6
from .common import InfoExtractor
7
from ..compat import compat_str
16
class BiliBiliIE(InfoExtractor):
17
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
20
'url': 'http://www.bilibili.tv/video/av1074402/',
21
'md5': '2c301e4dab317596e837c3e7633e7d86',
27
'upload_date': '20140420',
28
'thumbnail': 're:^https?://.+\.jpg',
29
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
30
'timestamp': 1397983878,
34
'url': 'http://www.bilibili.com/video/av1041170/',
37
'title': '【BD1080P】刀语【诸神&异域】',
38
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
40
'timestamp': 1396501299,
45
def _real_extract(self, url):
46
mobj = re.match(self._VALID_URL, url)
47
video_id = mobj.group('id')
48
page_num = mobj.group('page_num') or '1'
50
view_data = self._download_json(
51
'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
53
if 'error' in view_data:
54
raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
56
cid = view_data['cid']
57
title = unescapeHTML(view_data['title'])
59
doc = self._download_xml(
60
'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
62
'Downloading page %s/%s' % (page_num, view_data['pages'])
65
if xpath_text(doc, './result') == 'error':
66
raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
70
for durl in doc.findall('./durl'):
71
size = xpath_text(durl, ['./filesize', './size'])
73
'url': durl.find('./url').text,
74
'filesize': int_or_none(size),
77
backup_urls = durl.find('./backup_url')
78
if backup_urls is not None:
79
for backup_url in backup_urls.findall('./url'):
80
formats.append({'url': backup_url.text})
84
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
86
'duration': int_or_none(xpath_text(durl, './length'), 1000),
91
'id': compat_str(cid),
93
'description': view_data.get('description'),
94
'thumbnail': view_data.get('pic'),
95
'uploader': view_data.get('author'),
96
'timestamp': int_or_none(view_data.get('created')),
97
'view_count': int_or_none(view_data.get('play')),
98
'duration': int_or_none(xpath_text(doc, './timelength')),
101
if len(entries) == 1:
102
entries[0].update(info)
106
'_type': 'multi_video',