2
from calibre.web.feeds.recipes import BasicNewsRecipe
5
class TNR(BasicNewsRecipe):
7
title = 'The New Republic'
8
__author__ = 'Krittika Goyal'
10
description = '''The New Republic is a journal of opinion with an emphasis
11
on politics and domestic and international affairs. It carries feature
12
articles by staff and contributing editors. The second half of each issue
13
is devoted to book and the arts, theater, motion pictures, music and art.'''
17
needs_subscription = True
19
preprocess_regexps = [
20
(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
21
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
24
def get_browser(self):
25
br = BasicNewsRecipe.get_browser(self)
26
br.open('http://www.newrepublic.com/user')
29
br['user'] = self.username
31
br['name'] = self.username
32
br['pass'] = self.password
33
self.log('Logging in...')
34
raw = br.submit().read()
35
if 'SIGN OUT' not in raw:
36
raise ValueError('Failed to log in to tnr.com, check your username and password')
37
self.log('Logged in successfully')
40
def parse_index(self):
41
raw = self.index_to_soup('http://www.newrepublic.com/current-issue', raw=True)
42
# raw = self.index_to_soup(open('/t/raw.html').read().decode('utf-8'), raw=True)
43
for pat, sub in self.preprocess_regexps:
44
raw = pat.sub(sub, raw)
45
soup = self.index_to_soup(raw)
46
feed_title = 'The New Republic Magazine Articles'
49
for div in soup.findAll('div', attrs={'class':lambda x: x and 'field-item' in x.split()}):
50
a = div.find('a', href=True, attrs={'class':lambda x: x != 'author'})
52
art_title = self.tag_to_string(a)
54
num = re.search(r'/(\d+)/', url)
57
url = 'http://www.newrepublic.com/node/%s/print'%art
58
self.log.info('\tFound article:', art_title, 'at', url)
59
article = {'title':art_title, 'url':url, 'description':'', 'date':''}
60
articles.append(article)
62
return [(feed_title, articles)]
2
from calibre.web.feeds.recipes import BasicNewsRecipe
5
class TNR(BasicNewsRecipe):
7
title = 'The New Republic'
8
__author__ = 'Krittika Goyal'
10
description = '''The New Republic is a journal of opinion with an emphasis
11
on politics and domestic and international affairs. It carries feature
12
articles by staff and contributing editors. The second half of each issue
13
is devoted to book and the arts, theater, motion pictures, music and art.'''
17
needs_subscription = True
19
preprocess_regexps = [
20
(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''),
21
(re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
24
def get_browser(self):
25
br = BasicNewsRecipe.get_browser(self)
26
br.open('http://www.newrepublic.com/user')
29
br['user'] = self.username
31
br['name'] = self.username
32
br['pass'] = self.password
33
self.log('Logging in...')
34
raw = br.submit().read()
35
if 'SIGN OUT' not in raw:
36
raise ValueError('Failed to log in to tnr.com, check your username and password')
37
self.log('Logged in successfully')
40
def parse_index(self):
41
raw = self.index_to_soup('http://www.newrepublic.com/current-issue', raw=True)
42
# raw = self.index_to_soup(open('/t/raw.html').read().decode('utf-8'), raw=True)
43
for pat, sub in self.preprocess_regexps:
44
raw = pat.sub(sub, raw)
45
soup = self.index_to_soup(raw)
46
feed_title = 'The New Republic Magazine Articles'
49
for div in soup.findAll('div', attrs={'class':lambda x: x and 'field-item' in x.split()}):
50
a = div.find('a', href=True, attrs={'class':lambda x: x != 'author'})
52
art_title = self.tag_to_string(a)
54
num = re.search(r'/(\d+)/', url)
57
url = 'http://www.newrepublic.com/node/%s/print'%art
58
self.log.info('\tFound article:', art_title, 'at', url)
59
article = {'title':art_title, 'url':url, 'description':'', 'date':''}
60
articles.append(article)
62
return [(feed_title, articles)]