13
16
remove_javascript = True
14
17
remove_empty_feeds = True
15
18
cover_url = 'http://farm5.static.flickr.com/4043/4471139063_4dafced67f_o.jpg'
19
INDEX = 'http://www.theydrawandcook.com'
16
20
max_articles_per_feed = 30
18
22
remove_attributes = ['style', 'font']
35
39
current_articles = []
36
40
soup = self.index_to_soup(url)
37
recipes = soup.findAll('div', attrs={'class': 'date-outer'})
41
featured_major_slider = soup.find(name='div', attrs={'id':'featured_major_slider'})
42
recipes = featured_major_slider.findAll('li', attrs={'data-id': re.compile(r'artwork_entry_\d+', re.DOTALL)})
38
43
for recipe in recipes:
39
title = recipe.h3.a.string
40
page_url = recipe.h3.a['href']
44
page_url = self.INDEX + recipe.a['href']
45
print 'page_url is: ', page_url
46
title = recipe.find('strong').string
47
print 'title is: ', title
41
48
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':date})
42
49
return current_articles
45
keep_only_tags = [dict(name='h3', attrs={'class':'post-title entry-title'})
46
,dict(name='div', attrs={'class':'post-body entry-content'})
51
keep_only_tags = [dict(name='h1', attrs={'id':'page_title'})
52
,dict(name='section', attrs={'id':'artwork'})
49
remove_tags = [dict(name='div', attrs={'class':['separator']})
50
,dict(name='div', attrs={'class':['post-share-buttons']})
55
remove_tags = [dict(name='article', attrs={'id':['recipe_actions', 'metadata']})