~vcs-imports/kupfer/master-new

« back to all changes in this revision

Viewing changes to bookmarks.py

  • Committer: Ulrik Sverdrup
  • Date: 2009-02-18 00:41:20 UTC
  • Revision ID: git-v1:4debb0ece5bd0731b612db62749b84ee97258e71
Move main application into kupfer/ package

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
#!/usr/bin/env python
2
 
# -*- coding: utf8 -*-
3
 
 
4
 
"""
5
 
Original file much thanks to
6
 
http://www.kylo.net/deli.py.txt
7
 
 
8
 
Modifications released under GPL v2 (or any later)
9
 
Ulrik Sverdrup <ulrik.sverdrup@gmail.com>
10
 
"""
11
 
 
12
 
from ConfigParser import RawConfigParser
13
 
from HTMLParser import HTMLParser
14
 
from os.path import join, expanduser, exists, basename
15
 
 
16
 
def get_firefox_home_file(needed_file):
17
 
    firefox_dir = expanduser("~/.mozilla/firefox/")
18
 
    config = RawConfigParser({"Default" : 0})
19
 
    config.read(expanduser(join(firefox_dir, "profiles.ini")))
20
 
    path = None
21
 
 
22
 
    for section in config.sections():
23
 
        if config.has_option(section, "Default") and config.get(section, "Default") == "1":
24
 
            path = config.get (section, "Path")
25
 
            break
26
 
        elif path == None and config.has_option(section, "Path"):
27
 
            path = config.get (section, "Path")
28
 
        
29
 
    if path == None:
30
 
        return ""
31
 
 
32
 
    if path.startswith("/"):
33
 
        return join(path, needed_file)
34
 
 
35
 
    return join(firefox_dir, path, needed_file)
36
 
 
37
 
 
38
 
class BookmarksParser(HTMLParser):
39
 
 
40
 
        def __init__(self):
41
 
                # this is python: explicitly invoke base class constructor
42
 
                HTMLParser.__init__(self)
43
 
                self.inH3               = False
44
 
                self.inA                 = False
45
 
                self.tagCount   = 0
46
 
                self.tags               = []
47
 
                self.currentTag  = ""
48
 
                self.href               = ""
49
 
                self.description = ""
50
 
                self.ignore       = ""
51
 
                
52
 
                self.debug = False
53
 
                self.all_items = []
54
 
 
55
 
        def setBaseTag(self, baseTag):
56
 
                self.tags.append(baseTag)
57
 
 
58
 
        def setIgnoreUrls(self, ignore):
59
 
                self.ignore = ignore
60
 
                
61
 
        # remove white space
62
 
        # remove apostrophes, quote, double-quotes, colons, commas
63
 
        def normalizeText(self, text):
64
 
                text = text.replace('\'', '')
65
 
                text = text.replace('"', '')
66
 
                text = text.replace('`', '')
67
 
                text = text.replace(':', '')
68
 
                text = text.replace(',', '')
69
 
                text = text.replace(' ', '')
70
 
                text = text.replace('   ', '')
71
 
                return text
72
 
 
73
 
        def handle_starttag(self, tag, attrs):
74
 
                if tag == "a":
75
 
                        self.inA = True
76
 
                        for attr in attrs:
77
 
                                if attr[0] == "href":
78
 
                                        self.href = attr[1]
79
 
                                        
80
 
 
81
 
                if tag == "h3":
82
 
                        self.inH3 = True
83
 
                        self.tagCount += 1
84
 
 
85
 
                if tag == "dl":
86
 
                        pass
87
 
                        #print "Entering folder list; tags are", self.tags
88
 
 
89
 
        def handle_endtag(self, tag):
90
 
                if tag == "h3":
91
 
                        self.tags.append(self.currentTag)
92
 
                        self.currentTag = ""
93
 
                        self.inH3 = False
94
 
 
95
 
                if tag == "a":
96
 
                        if self.debug == True:
97
 
                                print
98
 
                                print "href =", self.href
99
 
                                print "description =", self.description
100
 
                                print "tags =", self.tags
101
 
                                
102
 
                        # validate href
103
 
                        validHref = True
104
 
                        if len(self.href) == 0:
105
 
                                validHref = False
106
 
                        if not self.href.split(":")[0] in ["http", "https", "news", "ftp"]:
107
 
                                validHref = False
108
 
                        if self.href in self.ignore:
109
 
                                validHref = False
110
 
 
111
 
                        # actually post here, make sure there's a url to post
112
 
                        if validHref:
113
 
                                bookmark = {
114
 
                                        "href" : self.href,
115
 
                                        "title": self.description,
116
 
                                        "tags" : self.tags
117
 
                                }
118
 
                                self.all_items.append(bookmark)
119
 
                        
120
 
                        self.href = ""
121
 
                        self.description = ""
122
 
                        self.inA = False
123
 
 
124
 
                # exiting a dl means end of a bookmarks folder, pop the last tag off
125
 
                if tag == "dl":
126
 
                        self.tags = self.tags[:-1]
127
 
 
128
 
        # handle any data: note that this will miss the "escaped" stuff
129
 
        # fix this by adding handle_charref, etc methods
130
 
        def handle_data(self, data):
131
 
                if self.inH3:
132
 
                        self.currentTag += self.normalizeText(data)
133
 
 
134
 
                if self.inA:
135
 
                        self.description += data
136
 
 
137
 
def get_bookmarks(bookmarks_file):
138
 
        """
139
 
        Return a list of bookmarks (dictionaries)
140
 
        
141
 
        each bookmark has the keys:
142
 
        href: URL
143
 
        title: description
144
 
        tags: list of tags/the folder
145
 
        """
146
 
        # construct and configure the parser
147
 
        parser = BookmarksParser()
148
 
 
149
 
        # initiate the parse; this will submit requests to delicious
150
 
        parser.feed(open(bookmarks_file).read())
151
 
 
152
 
        # cleanup
153
 
        parser.close()
154
 
        
155
 
        return parser.all_items
156
 
 
157
 
def main():
158
 
        # go forth
159
 
        fileloc = get_firefox_home_file("bookmarks.html")
160
 
        print fileloc
161
 
        print get_bookmarks(fileloc)
162
 
 
163
 
if __name__ == "__main__":
164
 
        main()