2
from gourmet import convert
4
ing_match = re.compile('(^|\n)\s*%(num)s+[^.].*'%{'num':convert.NUMBER_REGEXP})
6
def keep_ing (txt,tag):
7
if ing_match.search(txt):
10
def reject_ing (txt,tag):
11
if not ing_match.search(txt):
14
class IngredientParser:
15
"""Create an ingredient parser that will iterate through a container when called.
17
We match either ingredients or groups. This makes it very simple
18
to parse something like a DIV that contains bolded ingredient
22
COMMENT_MATCHER = re.compile('<!--.*?-->')
25
group_match = {'tag':re.compile('^b$',re.IGNORECASE)},
26
ing_block_match={'tag':re.compile('.*')},
27
ing_match = {'tag':re.compile('.*')},
28
exclude_comments = True
30
self.group_match = group_match
31
self.ing_block_match = ing_match
32
self.ing_match = ing_match
34
def remove_comments (self, text):
35
m = self.COMMENT_MATCHER.search(text)
37
text = text[0:m.start()]+text[m.end():]
38
m = self.COMMENT_MATCHER.search(text)
41
def __call__ (self, text, container):
42
print 'CALLED WITH',text,container
44
if not container: return ret
46
items = container.contents
51
if self.test_match(self.group_match,itm):
52
self.group = itm.string
54
elif self.test_match(self.ing_block_match,itm):
55
for i in self.remove_comments(itm).split('\n'):
58
if self.group: ing['inggroup']=self.group
61
elif self.test_match(self.ing_match,itm):
62
txt = itm.string and self.remove_comments(itm.string)
64
ing = {'text':itm.string}
66
ing['inggroup']=self.group
69
if not added and hasattr(itm,'contents'):
70
sub_items = itm.contents
72
items.extend(sub_items)
75
def test_match (self, matcher_dic, tag):
79
if matcher_dic.get('tag'):
80
if not hasattr(tag,'name'):
82
elif not matcher_dic['tag'].match(tag.name):
86
if matcher_dic.get('string'):
87
if not hasattr(tag,'string') or not tag.string:
89
elif not matcher_dic['string'].match(tag.string):