3
import sys, zipfile, xml.dom.minidom
6
class OpenDocumentTextFile :
7
def __init__ (self, filepath) :
8
zip = zipfile.ZipFile(filepath)
9
self.content = xml.dom.minidom.parseString(zip.read("content.xml"))
12
""" Converts the document to a string. """
14
for val in ["text:p", "text:h", "text:list"]:
15
for paragraph in self.content.getElementsByTagName(val) :
16
buffer += self.textToString(paragraph) + "\n"
19
def textToString(self, element) :
21
for node in element.childNodes :
22
if node.nodeType == xml.dom.Node.TEXT_NODE :
23
buffer += node.nodeValue
24
elif node.nodeType == xml.dom.Node.ELEMENT_NODE :
25
buffer += self.textToString(node)
28
if __name__ == "__main__" :
29
s =StringIO.StringIO(file(sys.argv[1]).read())
30
odt = OpenDocumentTextFile(s)
31
print odt.toString().encode('ascii','replace')