~vcs-imports/kupfer/master-new

« back to all changes in this revision

Viewing changes to bookmarks.py

Committer: Ulrik Sverdrup
Date: 2009-02-18 00:41:20 UTC
Revision ID: git-v1:4debb0ece5bd0731b612db62749b84ee97258e71

Move main application into kupfer/ package

files added:
kupfer

kupfer/__init__.py

kupfer/bookmarks.py

kupfer/browser.py

kupfer/epiphany.py

kupfer/icons.py

kupfer/kupfer.py

kupfer/listen.py

kupfer/objects.py

kupfer/utils.py

files removed:
bookmarks.py

browser.py

epiphany.py

icons.py

kupfer.py

listen.py

objects.py

utils.py

files modified:
main.py

Show diffs side-by-side

added added

removed removed

bookmarks.py

#!/usr/bin/env python

# -*- coding: utf8 -*-

"""

Original file much thanks to

http://www.kylo.net/deli.py.txt

Modifications released under GPL v2 (or any later)

Ulrik Sverdrup <ulrik.sverdrup@gmail.com>

"""

from ConfigParser import RawConfigParser

from HTMLParser import HTMLParser

from os.path import join, expanduser, exists, basename

def get_firefox_home_file(needed_file):

firefox_dir = expanduser("~/.mozilla/firefox/")

config = RawConfigParser({"Default" : 0})

config.read(expanduser(join(firefox_dir, "profiles.ini")))

path = None

for section in config.sections():

if config.has_option(section, "Default") and config.get(section, "Default") == "1":

path = config.get (section, "Path")

break

elif path == None and config.has_option(section, "Path"):

path = config.get (section, "Path")

if path == None:

return ""

if path.startswith("/"):

return join(path, needed_file)

return join(firefox_dir, path, needed_file)

class BookmarksParser(HTMLParser):

def __init__(self):

# this is python: explicitly invoke base class constructor

HTMLParser.__init__(self)

self.inH3 = False

self.inA = False

self.tagCount = 0

self.tags = []

self.currentTag = ""

self.href = ""

self.description = ""

self.ignore = ""

self.debug = False

self.all_items = []

def setBaseTag(self, baseTag):

self.tags.append(baseTag)

def setIgnoreUrls(self, ignore):

self.ignore = ignore

# remove white space

# remove apostrophes, quote, double-quotes, colons, commas

def normalizeText(self, text):

text = text.replace('\'', '')

text = text.replace('"', '')

text = text.replace('`', '')

text = text.replace(':', '')

text = text.replace(',', '')

text = text.replace(' ', '')

return text

def handle_starttag(self, tag, attrs):

if tag == "a":

self.inA = True

for attr in attrs:

if attr[0] == "href":

self.href = attr[1]

if tag == "h3":

self.inH3 = True

self.tagCount += 1

if tag == "dl":

pass

#print "Entering folder list; tags are", self.tags

def handle_endtag(self, tag):

if tag == "h3":

self.tags.append(self.currentTag)

self.currentTag = ""

self.inH3 = False

if tag == "a":

if self.debug == True:

print "href =", self.href

print "description =", self.description

100

print "tags =", self.tags

101

102

# validate href

103

validHref = True

104

if len(self.href) == 0:

105

validHref = False

106

if not self.href.split(":")[0] in ["http", "https", "news", "ftp"]:

107

validHref = False

108

if self.href in self.ignore:

109

validHref = False

110

111

# actually post here, make sure there's a url to post

112

if validHref:

113

bookmark = {

114

"href" : self.href,

115

"title": self.description,

116

"tags" : self.tags

117

}

118

self.all_items.append(bookmark)

119

120

self.href = ""

121

self.description = ""

122

self.inA = False

123

124

# exiting a dl means end of a bookmarks folder, pop the last tag off

125

if tag == "dl":

126

self.tags = self.tags[:-1]

127

128

# handle any data: note that this will miss the "escaped" stuff

129

# fix this by adding handle_charref, etc methods

130

def handle_data(self, data):

131

if self.inH3:

132

self.currentTag += self.normalizeText(data)

133

134

if self.inA:

135

self.description += data

136

137

def get_bookmarks(bookmarks_file):

138

"""

139

Return a list of bookmarks (dictionaries)

140

141

each bookmark has the keys:

142

href: URL

143

title: description

144

tags: list of tags/the folder

145

"""

146

# construct and configure the parser

147

parser = BookmarksParser()

148

149

# initiate the parse; this will submit requests to delicious

150

parser.feed(open(bookmarks_file).read())

151

152

# cleanup

153

parser.close()

154

155

return parser.all_items

156

157

def main():

158

# go forth

159

fileloc = get_firefox_home_file("bookmarks.html")

160

print fileloc

161

print get_bookmarks(fileloc)

162

163

if __name__ == "__main__":

164

main()

Older »