2
## web2lrf profile to download articles from Barrons.com
3
## can download subscriber-only content if username and
4
## password are supplied.
11
from calibre.ebooks.lrf.web.profiles import DefaultProfile
13
class Barrons(DefaultProfile):
17
max_articles_per_feed = 50
18
needs_subscription = True
19
timefmt = ' [%a, %b %d, %Y]'
20
html_description = True
21
no_stylesheets = False
22
match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
23
html2lrf_options = [('--ignore-tables'),('--base-font-size=10')]
26
## Don't grab articles more than 7 days old
30
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
32
## Remove anything before the body of the article.
33
(r'<body.*?<!-- article start', lambda match: '<body><!-- article start'),
35
## Remove any insets from the body of the article.
36
(r'<div id="inset".*?</div>.?</div>.?<p', lambda match : '<p'),
38
## Remove any reprint info from the body of the article.
39
(r'<hr size.*?<p', lambda match : '<p'),
41
## Remove anything after the end of the article.
42
(r'<!-- article end.*?</body>', lambda match : '</body>'),
46
def get_browser(self):
47
br = DefaultProfile.get_browser()
48
if self.username is not None and self.password is not None:
49
br.open('http://commerce.barrons.com/auth/login')
50
br.select_form(name='login_form')
51
br['user'] = self.username
52
br['password'] = self.password
56
## Use the print version of a page when available.
58
def print_version(self, url):
59
return url.replace('/article/', '/article_print/')
61
## Comment out the feeds you don't want retrieved.
62
## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire
66
('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'),
67
('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'),
68
('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'),
69
('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'),
70
('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'),
71
('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
75
## NOT CURRENTLY WORKING
78
# self.browser.set_debug_responses(True)
80
# logger = logging.getLogger("mechanize")
81
# logger.addHandler(logging.StreamHandler(sys.stdout))
82
# logger.setLevel(logging.INFO)
84
# res = self.browser.open('http://online.barrons.com/logout')
87
# traceback.print_exc()