~facundo/encuentro/trunk

Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding

192

"""

193

for style in sub_root.findall('./styles/style'):

194

output += 'Style: ' + style.attrib["name"]

195

output += ',' + style.attrib["font_name"]

196

output += ',' + style.attrib["font_size"]

197

output += ',' + style.attrib["primary_colour"]

198

output += ',' + style.attrib["secondary_colour"]

199

output += ',' + style.attrib["outline_colour"]

200

output += ',' + style.attrib["back_colour"]

201

output += ',' + ass_bool(style.attrib["bold"])

202

output += ',' + ass_bool(style.attrib["italic"])

203

output += ',' + ass_bool(style.attrib["underline"])

204

output += ',' + ass_bool(style.attrib["strikeout"])

205

output += ',' + style.attrib["scale_x"]

206

output += ',' + style.attrib["scale_y"]

207

output += ',' + style.attrib["spacing"]

208

output += ',' + style.attrib["angle"]

209

output += ',' + style.attrib["border_style"]

210

output += ',' + style.attrib["outline"]

211

output += ',' + style.attrib["shadow"]

212

output += ',' + style.attrib["alignment"]

213

output += ',' + style.attrib["margin_l"]

214

output += ',' + style.attrib["margin_r"]

215

output += ',' + style.attrib["margin_v"]

216

output += ',' + style.attrib["encoding"]

217

output += '\n'

218

219

output += """

220

[Events]

221

Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text

222

"""

223

for event in sub_root.findall('./events/event'):

224

output += 'Dialogue: 0'

225

output += ',' + event.attrib["start"]

226

output += ',' + event.attrib["end"]

227

output += ',' + event.attrib["style"]

228

output += ',' + event.attrib["name"]

229

output += ',' + event.attrib["margin_l"]

230

output += ',' + event.attrib["margin_r"]

231

output += ',' + event.attrib["margin_v"]

232

output += ',' + event.attrib["effect"]

233

output += ',' + event.attrib["text"]

234

output += '\n'

235

236

return output

237

238

def _extract_subtitles(self, subtitle):

239

sub_root = compat_etree_fromstring(subtitle)

240

return [{

241

'ext': 'srt',

242

'data': self._convert_subtitles_to_srt(sub_root),

243

}, {

244

'ext': 'ass',

245

'data': self._convert_subtitles_to_ass(sub_root),

246

}]

247

248

def _get_subtitles(self, video_id, webpage):

249

subtitles = {}

250

for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):

251

sub_page = self._download_webpage(

252

'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,

253

video_id, note='Downloading subtitles for ' + sub_name)

254

id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)

255

iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)

256

data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)

257

if not id or not iv or not data:

258

continue

259

subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')

260

lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)

261

if not lang_code:

262

continue

263

subtitles[lang_code] = self._extract_subtitles(subtitle)

264

return subtitles

265

266

def _real_extract(self, url):

267

mobj = re.match(self._VALID_URL, url)

268

video_id = mobj.group('video_id')

269

270

if mobj.group('prefix') == 'm':

271

mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')

272

webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')

273

else:

274

webpage_url = 'http://www.' + mobj.group('url')

275

276

webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')

277

note_m = self._html_search_regex(

278

r'<div class="showmedia-trailer-notice">(.+?)</div>',

279

webpage, 'trailer-notice', default='')

280

if note_m:

281

raise ExtractorError(note_m)

282

283

mobj = re.search(r'Page\.messaging_box_controller\.addItems$\[(?P<msg>{.+?})\]$', webpage)

284

if mobj:

285

msg = json.loads(mobj.group('msg'))

286

if msg.get('type') == 'error':

287

raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)

288

289

if 'To view this, please log in to verify you are 18 or older.' in webpage:

290

self.raise_login_required()

291

292

video_title = self._html_search_regex(

293

r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',

294

webpage, 'video_title')

295

video_title = re.sub(r' {2,}', ' ', video_title)

296

video_description = self._html_search_regex(

297

r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id,

298

webpage, 'description', default=None)

299

if video_description:

300

video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))

301

video_upload_date = self._html_search_regex(

302

[r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],

303

webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)

304

if video_upload_date:

305

video_upload_date = unified_strdate(video_upload_date)

306

video_uploader = self._html_search_regex(

307

r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,

308

'video_uploader', fatal=False)

309

310

playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))

311

playerdata_req = sanitized_Request(playerdata_url)

312

playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})

313

playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')

314

playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')

315

316

stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')

317

video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)

318

319

formats = []

320

for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):

321

stream_quality, stream_format = self._FORMAT_IDS[fmt]

322

video_format = fmt + 'p'

323

streamdata_req = sanitized_Request(

324

'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'

325

% (stream_id, stream_format, stream_quality),

326

compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))

327

streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')

328

streamdata = self._download_xml(

329

streamdata_req, video_id,

330

note='Downloading media info for %s' % video_format)

331

stream_info = streamdata.find('./{default}preload/stream_info')

332

video_url = stream_info.find('./host').text

333

video_play_path = stream_info.find('./file').text

334

metadata = stream_info.find('./metadata')

335

format_info = {

336

'format': video_format,

337

'format_id': video_format,

338

'height': int_or_none(xpath_text(metadata, './height')),

339

'width': int_or_none(xpath_text(metadata, './width')),

340

}

341

342

if '.fplive.net/' in video_url:

343

video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())

344

parsed_video_url = compat_urlparse.urlparse(video_url)

345

direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(

346

netloc='v.lvlt.crcdn.net',

347

path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))

348

if self._is_valid_url(direct_video_url, video_id, video_format):

349

format_info.update({

350

'url': direct_video_url,

351

})

352

formats.append(format_info)

353

continue

354

355

format_info.update({

356

'url': video_url,

357

'play_path': video_play_path,

358

'ext': 'flv',

359

})

360

formats.append(format_info)

361

362

subtitles = self.extract_subtitles(video_id, webpage)

363

364

return {

365

'id': video_id,

366

'title': video_title,

367

'description': video_description,

368

'thumbnail': video_thumbnail,

369

'uploader': video_uploader,

370

'upload_date': video_upload_date,

371

'subtitles': subtitles,

372

'formats': formats,

373

}

374

375

376

class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):

377

IE_NAME = "crunchyroll:playlist"

378

379

380

_TESTS = [{

381

'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',

382

'info_dict': {

383

'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',

384

'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'

385

386

'playlist_count': 13,

387

}, {

388

# geo-restricted (US), 18+ maturity wall, non-premium available

389

'url': 'http://www.crunchyroll.com/cosplay-complex-ova',

390

'info_dict': {

391

'id': 'cosplay-complex-ova',

392

'title': 'Cosplay Complex OVA'

393

394

'playlist_count': 3,

395

'skip': 'Georestricted',

396

}, {

397

# geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14

398

'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',

399

'only_matching': True,

400

}]

401

402

def _real_extract(self, url):

403

show_id = self._match_id(url)

404

405

webpage = self._download_webpage(self._add_skip_wall(url), show_id)

406

title = self._html_search_regex(

407

r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',

408

webpage, 'title')

409

episode_paths = re.findall(

410

r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"',

411

webpage)

412

entries = [

413

self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll')

414

for ep in episode_paths

415

]

416

entries.reverse()

417

418

return {

419

'_type': 'playlist',

420

'id': show_id,

421

'title': title,

422

'entries': entries,

423

}

Older »