~vorlon/ubuntu/saucy/gourmet/trunk

« back to all changes in this revision

Viewing changes to src/lib/importers/html_plugins/html_helpers.py

  • Committer: Bazaar Package Importer
  • Author(s): Rolf Leggewie
  • Date: 2008-07-26 13:29:41 UTC
  • Revision ID: james.westby@ubuntu.com-20080726132941-6ldd73qmacrzz0bn
Tags: upstream-0.14.0
ImportĀ upstreamĀ versionĀ 0.14.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
import re
 
2
from gourmet import convert
 
3
 
 
4
ing_match = re.compile('(^|\n)\s*%(num)s+[^.].*'%{'num':convert.NUMBER_REGEXP})
 
5
 
 
6
def keep_ing (txt,tag):
 
7
    if ing_match.search(txt):
 
8
        return txt
 
9
 
 
10
def reject_ing (txt,tag):
 
11
    if not ing_match.search(txt):
 
12
        return txt
 
13
 
 
14
class IngredientParser:
 
15
    """Create an ingredient parser that will iterate through a container when called.
 
16
 
 
17
    We match either ingredients or groups. This makes it very simple
 
18
    to parse something like a DIV that contains bolded ingredient
 
19
    groups
 
20
    """
 
21
 
 
22
    COMMENT_MATCHER = re.compile('<!--.*?-->')
 
23
    
 
24
    def __init__ (self,
 
25
                  group_match = {'tag':re.compile('^b$',re.IGNORECASE)},
 
26
                  ing_block_match={'tag':re.compile('.*')},
 
27
                  ing_match = {'tag':re.compile('.*')},
 
28
                  exclude_comments = True
 
29
                  ):
 
30
        self.group_match = group_match
 
31
        self.ing_block_match = ing_match
 
32
        self.ing_match = ing_match
 
33
 
 
34
    def remove_comments (self, text):
 
35
        m =  self.COMMENT_MATCHER.search(text)
 
36
        while m:
 
37
            text = text[0:m.start()]+text[m.end():]
 
38
            m =  self.COMMENT_MATCHER.search(text)
 
39
        return text
 
40
    
 
41
    def __call__ (self, text, container):
 
42
        print 'CALLED WITH',text,container
 
43
        ret = []
 
44
        if not container: return ret
 
45
        self.group = None
 
46
        items = container.contents
 
47
        items.reverse()
 
48
        while items:
 
49
            itm = items.pop()
 
50
            added = False
 
51
            if self.test_match(self.group_match,itm):
 
52
                self.group = itm.string
 
53
                added = True
 
54
            elif self.test_match(self.ing_block_match,itm):
 
55
                for i in self.remove_comments(itm).split('\n'):
 
56
                    if i:
 
57
                        ing = {'text':i}
 
58
                        if self.group: ing['inggroup']=self.group
 
59
                        ret.append(ing)
 
60
                        added=True
 
61
            elif self.test_match(self.ing_match,itm):
 
62
                txt = itm.string and self.remove_comments(itm.string)
 
63
                if txt:
 
64
                    ing = {'text':itm.string}
 
65
                    if self.group:
 
66
                        ing['inggroup']=self.group
 
67
                    ret.append(ing)
 
68
                    added = True
 
69
            if not added and hasattr(itm,'contents'):
 
70
                sub_items = itm.contents
 
71
                sub_items.reverse()
 
72
                items.extend(sub_items)
 
73
        return ret
 
74
 
 
75
    def test_match (self, matcher_dic, tag):
 
76
        ret = True
 
77
        if not matcher_dic:
 
78
            return False
 
79
        if matcher_dic.get('tag'):
 
80
            if not hasattr(tag,'name'):
 
81
                ret = False
 
82
            elif not matcher_dic['tag'].match(tag.name):
 
83
                return False
 
84
            else:
 
85
                ret = True
 
86
        if matcher_dic.get('string'):
 
87
            if not hasattr(tag,'string') or not tag.string:
 
88
                ret = False
 
89
            elif not matcher_dic['string'].match(tag.string):
 
90
                return False
 
91
            else:
 
92
                ret = True
 
93
        return ret
 
94
    
 
95
                
 
96
        
 
97
            
 
98