~ubuntu-branches/ubuntu/vivid/youtube-dl/vivid

« back to all changes in this revision

Viewing changes to youtube_dl/extractor/lynda.py

  • Committer: Package Import Robot
  • Author(s): Rogério Brito
  • Date: 2015-03-01 02:12:13 UTC
  • mfrom: (44.1.24 sid)
  • Revision ID: package-import@ubuntu.com-20150301021213-8w657cue71kp77sz
Tags: 2015.02.28-1
Imported Upstream version 2015.02.28. Closes: #778765.

Show diffs side-by-side

added added

removed removed

Lines of Context:
3
3
import re
4
4
import json
5
5
 
6
 
from .subtitles import SubtitlesInfoExtractor
7
6
from .common import InfoExtractor
8
7
from ..compat import (
9
8
    compat_str,
16
15
)
17
16
 
18
17
 
19
 
class LyndaIE(SubtitlesInfoExtractor):
 
18
class LyndaIE(InfoExtractor):
20
19
    IE_NAME = 'lynda'
21
20
    IE_DESC = 'lynda.com videos'
22
 
    _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
 
21
    _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
23
22
    _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
24
23
    _NETRC_MACHINE = 'lynda'
25
24
 
28
27
 
29
28
    ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
30
29
 
31
 
    _TEST = {
 
30
    _TESTS = [{
32
31
        'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
33
32
        'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
34
33
        'info_dict': {
37
36
            'title': 'Using the exercise files',
38
37
            'duration': 68
39
38
        }
40
 
    }
 
39
    }, {
 
40
        'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
 
41
        'only_matching': True,
 
42
    }]
41
43
 
42
44
    def _real_initialize(self):
43
45
        self._login()
88
90
        self._check_formats(formats, video_id)
89
91
        self._sort_formats(formats)
90
92
 
91
 
        if self._downloader.params.get('listsubtitles', False):
92
 
            self._list_available_subtitles(video_id, page)
93
 
            return
94
 
 
95
 
        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
 
93
        subtitles = self.extract_subtitles(video_id, page)
96
94
 
97
95
        return {
98
96
            'id': video_id,
144
142
        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
145
143
            raise ExtractorError('Unable to log in')
146
144
 
147
 
    def _fix_subtitles(self, subtitles):
148
 
        if subtitles is None:
149
 
            return subtitles  # subtitles not requested
150
 
 
151
 
        fixed_subtitles = {}
152
 
        for k, v in subtitles.items():
153
 
            subs = json.loads(v)
154
 
            if len(subs) == 0:
155
 
                continue
156
 
            srt = ''
157
 
            for pos in range(0, len(subs) - 1):
158
 
                seq_current = subs[pos]
159
 
                m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
160
 
                if m_current is None:
161
 
                    continue
162
 
                seq_next = subs[pos + 1]
163
 
                m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
164
 
                if m_next is None:
165
 
                    continue
166
 
                appear_time = m_current.group('timecode')
167
 
                disappear_time = m_next.group('timecode')
168
 
                text = seq_current['Caption']
169
 
                srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
170
 
            if srt:
171
 
                fixed_subtitles[k] = srt
172
 
        return fixed_subtitles
173
 
 
174
 
    def _get_available_subtitles(self, video_id, webpage):
 
145
    def _fix_subtitles(self, subs):
 
146
        srt = ''
 
147
        for pos in range(0, len(subs) - 1):
 
148
            seq_current = subs[pos]
 
149
            m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
 
150
            if m_current is None:
 
151
                continue
 
152
            seq_next = subs[pos + 1]
 
153
            m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
 
154
            if m_next is None:
 
155
                continue
 
156
            appear_time = m_current.group('timecode')
 
157
            disappear_time = m_next.group('timecode')
 
158
            text = seq_current['Caption'].lstrip()
 
159
            srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
 
160
        if srt:
 
161
            return srt
 
162
 
 
163
    def _get_subtitles(self, video_id, webpage):
175
164
        url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
176
 
        sub = self._download_webpage(url, None, False)
177
 
        sub_json = json.loads(sub)
178
 
        return {'en': url} if len(sub_json) > 0 else {}
 
165
        subs = self._download_json(url, None, False)
 
166
        if subs:
 
167
            return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
 
168
        else:
 
169
            return {}
179
170
 
180
171
 
181
172
class LyndaCourseIE(InfoExtractor):