19
class LyndaIE(SubtitlesInfoExtractor):
18
class LyndaIE(InfoExtractor):
21
20
IE_DESC = 'lynda.com videos'
22
_VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
21
_VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
23
22
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
24
23
_NETRC_MACHINE = 'lynda'
88
90
self._check_formats(formats, video_id)
89
91
self._sort_formats(formats)
91
if self._downloader.params.get('listsubtitles', False):
92
self._list_available_subtitles(video_id, page)
95
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
93
subtitles = self.extract_subtitles(video_id, page)
144
142
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
145
143
raise ExtractorError('Unable to log in')
147
def _fix_subtitles(self, subtitles):
148
if subtitles is None:
149
return subtitles # subtitles not requested
152
for k, v in subtitles.items():
157
for pos in range(0, len(subs) - 1):
158
seq_current = subs[pos]
159
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
160
if m_current is None:
162
seq_next = subs[pos + 1]
163
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
166
appear_time = m_current.group('timecode')
167
disappear_time = m_next.group('timecode')
168
text = seq_current['Caption']
169
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
171
fixed_subtitles[k] = srt
172
return fixed_subtitles
174
def _get_available_subtitles(self, video_id, webpage):
145
def _fix_subtitles(self, subs):
147
for pos in range(0, len(subs) - 1):
148
seq_current = subs[pos]
149
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
150
if m_current is None:
152
seq_next = subs[pos + 1]
153
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
156
appear_time = m_current.group('timecode')
157
disappear_time = m_next.group('timecode')
158
text = seq_current['Caption'].lstrip()
159
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
163
def _get_subtitles(self, video_id, webpage):
175
164
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
176
sub = self._download_webpage(url, None, False)
177
sub_json = json.loads(sub)
178
return {'en': url} if len(sub_json) > 0 else {}
165
subs = self._download_json(url, None, False)
167
return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
181
172
class LyndaCourseIE(InfoExtractor):