4
##############################################################################
8
# 2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
9
# 2005 Fabien Pinckaers, TINY SPRL. (http://tiny.be)
11
# WARNING: This program as such is intended to be used by professional
12
# programmers who take the whole responsability of assessing all potential
13
# consequences resulting from its eventual inadequacies and bugs
14
# End users who are looking for a ready-to-use solution with commercial
15
# garantees and support are strongly adviced to contact a Free Software
18
# This program is Free Software; you can redistribute it and/or
19
# modify it under the terms of the GNU General Public License
20
# as published by the Free Software Foundation; either version 2
21
# of the License, or (at your option) any later version.
23
# This program is distributed in the hope that it will be useful,
24
# but WITHOUT ANY WARRANTY; without even the implied warranty of
25
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26
# GNU General Public License for more details.
28
# You should have received a copy of the GNU General Public License
29
# along with this program; if not, write to the Free Software
30
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
32
##############################################################################
35
Tiny SXW2RML - The Tiny ERP's report engine
37
Tiny SXW2RMLis part of the Tiny report project.
38
Tiny Report is a module that allows you to render high quality PDF document
39
from an OpenOffice template (.sxw) and any relationnal database.
41
The whole source code is distributed under the terms of the
44
(c) 2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
45
(c) 2005-TODAY, Fabien Pinckaers - Tiny sprl
54
import xml.dom.minidom
55
from reportlab.lib.units import toLength
59
"""General DOM API utilities."""
60
def __init__(self,content_string="",file=""):
61
self.content_string = content_string
62
self.re_digits = re.compile(r"(.*?\d)(pt|cm|mm|inch|in)")
64
def _unitTuple(self,string):
65
"""Split values and units to a tuple."""
66
temp = self.re_digits.findall(string)
72
def stringPercentToFloat(self,string):
73
temp = string.replace("""%""","")
74
return float(temp)/100
76
def findChildrenByName(self,parent,name,attr_dict={}):
77
"""Helper functions. Does not work recursively.
78
Optional: also test for certain attribute/value pairs."""
80
for c in parent.childNodes:
81
if c.nodeType == c.ELEMENT_NODE and c.nodeName == name:
86
return self._selectForAttributes(nodelist=children,attr_dict=attr_dict)
88
def _selectForAttributes(self,nodelist,attr_dict):
93
for a in attr_dict.keys():
94
if n.getAttribute(a) != attr_dict[a]:
95
# at least one incorrect attribute value?
98
selected_nodes.append(n)
101
def _stringToTuple(self,s):
102
"""Helper function."""
104
temp = string.split(s,",")
105
return int(temp[0]),int(temp[1])
109
def _tupleToString(self,t):
111
return self.openOfficeStringUtf8("%s,%s" % (t[0],t[1]))
115
def _lengthToFloat(self,value):
117
if not self.re_digits.search(v):
121
# OO files use "inch" instead of "in" in Reportlab units
126
c = round(toLength(v))
131
def openOfficeStringUtf8(self,string):
132
if type(string) == unicode:
133
return string.encode("utf-8")
134
tempstring = unicode(string,"cp1252").encode("utf-8")
137
class DomApi(DomApiGeneral):
138
"""This class provides a DOM-API for XML-Files from an SXW-Archive."""
139
def __init__(self,xml_content,xml_styles):
140
DomApiGeneral.__init__(self)
141
self.content_dom = xml.dom.minidom.parseString(xml_content)
142
self.styles_dom = xml.dom.minidom.parseString(xml_styles)
143
body = self.content_dom.getElementsByTagName("office:body")
144
self.body = body and body[0]
148
self.style_properties_dict = {}
150
# ******** always use the following order:
151
self.buildStyleDict()
152
self.buildStylePropertiesDict()
153
if self.styles_dom.getElementsByTagName("style:page-master").__len__()<>0:
154
self.page_master = self.styles_dom.getElementsByTagName("style:page-master")[0]
155
self.document = self.content_dom.getElementsByTagName("office:document-content")[0]
157
def buildStylePropertiesDict(self):
158
for s in self.style_dict.keys():
159
self.style_properties_dict[s] = self.getStylePropertiesDict(s)
161
def updateWithPercents(self,dict,updatedict):
162
"""Sometimes you find values like "115%" in the style hierarchy."""
164
# no style hierarchies for this style? =>
166
new_updatedict = copy.copy(updatedict)
167
for u in new_updatedict.keys():
169
if new_updatedict[u].find("""%""") != -1 and dict.has_key(u):
170
number = float(self.re_digits.search(dict[u]).group(1))
171
unit = self.re_digits.search(dict[u]).group(2)
172
new_number = self.stringPercentToFloat(new_updatedict[u]) * number
174
new_number = int(new_number)
175
# no floats allowed for "pt"
176
# OOo just takes the int, does not round (try it out!)
177
new_updatedict[u] = "%s%s" % (new_number,unit)
179
dict[u] = new_updatedict[u]
181
dict[u] = new_updatedict[u]
182
dict.update(new_updatedict)
184
def normalizeStyleProperties(self):
185
"""Transfer all style:style-properties attributes from the
186
self.style_properties_hierarchical dict to the automatic-styles
187
from content.xml. Use this function to preprocess content.xml for
188
XSLT transformations etc.Do not try to implement this function
189
with XSlT - believe me, it's a terrible task..."""
190
styles_styles = self.styles_dom.getElementsByTagName("style:style")
191
automatic_styles = self.content_dom.getElementsByTagName("office:automatic-styles")[0]
192
for s in styles_styles:
193
automatic_styles.appendChild(s.cloneNode(deep=1))
194
content_styles = self.content_dom.getElementsByTagName("style:style")
195
# these are the content_styles with styles_styles added!!!
196
for s in content_styles:
197
c = self.findChildrenByName(s,"style:properties")
199
# some derived automatic styles do not have "style:properties":
200
temp = self.content_dom.createElement("style:properties")
202
c = self.findChildrenByName(s,"style:properties")
204
dict = self.style_properties_dict[(s.getAttribute("style:name")).encode("latin-1")] or {}
205
for attribute in dict.keys():
206
c.setAttribute(self.openOfficeStringUtf8(attribute),self.openOfficeStringUtf8(dict[attribute]))
208
def transferStylesXml(self):
209
"""Transfer certain sub-trees from styles.xml to the normalized content.xml
210
(see above). It is not necessary to do this - for example - with paragraph styles.
211
the "normalized" style properties contain all information needed for
212
further processing."""
213
# TODO: What about table styles etc.?
214
outline_styles = self.styles_dom.getElementsByTagName("text:outline-style")
215
t = self.content_dom.createElement("transferredfromstylesxml")
216
self.document.insertBefore(t,self.body)
217
t_new = self.body.previousSibling
219
page_master = self.page_master
220
t_new.appendChild(page_master.cloneNode(deep=1))
221
t_new.appendChild(outline_styles[0].cloneNode(deep=1))
225
def normalizeLength(self):
226
"""Normalize all lengthes to floats (i.e: 1 inch = 72).
227
Always use this after "normalizeContent" and "transferStyles"!"""
228
# TODO: The complex attributes of table cell styles are not transferred yet.
229
#all_styles = self.content_dom.getElementsByTagName("style:properties")
230
#all_styles += self.content_dom.getElementsByTagName("draw:image")
231
all_styles = self.content_dom.getElementsByTagName("*")
233
for x in s._attrs.keys():
234
v = s.getAttribute(x)
235
s.setAttribute(x,"%s" % self._lengthToFloat(v))
236
# convert float to string first!
238
def normalizeTableColumns(self):
239
"""Handle this strange table:number-columns-repeated attribute."""
240
columns = self.content_dom.getElementsByTagName("table:table-column")
242
if c.hasAttribute("table:number-columns-repeated"):
243
number = int(c.getAttribute("table:number-columns-repeated"))
244
c.removeAttribute("table:number-columns-repeated")
245
for i in range(number-1):
246
(c.parentNode).insertBefore(c.cloneNode(deep=1),c)
248
def buildStyleDict(self):
249
"""Store all style:style-nodes from content.xml and styles.xml in self.style_dict.
250
Caution: in this dict the nodes from two dom apis are merged!"""
251
for st in (self.styles_dom,self.content_dom):
252
for s in st.getElementsByTagName("style:style"):
253
name = s.getAttribute("style:name").encode("latin-1")
254
self.style_dict[name] = s
258
return self.content_dom.toxml(encoding="utf-8")
260
def getStylePropertiesDict(self,style_name):
263
if self.style_dict[style_name].hasAttribute("style:parent-style-name"):
264
parent = self.style_dict[style_name].getAttribute("style:parent-style-name").encode("latin-1")
265
res = self.getStylePropertiesDict(parent)
267
childs = self.style_dict[style_name].childNodes
269
if c.nodeType == c.ELEMENT_NODE and c.nodeName == "style:properties":
270
for attr in c._attrs.keys():
271
res[attr] = c.getAttribute(attr).encode("latin-1")
274
class PyOpenOffice(object):
275
"""This is the main class which provides all functionality."""
276
def __init__(self, path='.', save_pict=False):
278
self.save_pict = save_pict
281
def oo_read(self,fname):
282
z = zipfile.ZipFile(fname,"r")
283
content = z.read('content.xml')
284
style = z.read('styles.xml')
287
if a[:9]=='Pictures/' and len(a)>10:
288
pic_content = z.read(a)
289
self.images[a[9:]] = pic_content
291
f=open(os.path.join(self.path, os.path.basename(a)),"wb")
297
def oo_replace(self,content):
299
(r"<para[^>]*/>", ""),
300
#(r"<text:ordered-list.*?>(.*?)</text:ordered-list>", "$1"),
301
#(r"<text:unordered-list.*?>(.*?)</text:unordered-list>", "$1"),
302
(r"<para(.*)>(.*?)<text:line-break[^>]*/>", "<para$1>$2</para><para$1>"),
304
for key,val in regex:
305
content = re.sub(key, val, content)
308
def unpackNormalize(self,sourcefile):
309
c,s = self.oo_read(sourcefile)
310
c = self.oo_replace(c)
312
dom.normalizeStyleProperties()
313
dom.transferStylesXml()
314
dom.normalizeLength()
315
dom.normalizeTableColumns()
319
def sxw2rml(sxw_file, xsl, output='.', save_pict=False):
322
tool = PyOpenOffice(output, save_pict = save_pict)
323
res = tool.unpackNormalize(sxw_file)
324
styledoc = libxml2.parseDoc(xsl)
325
style = libxslt.parseStylesheetDoc(styledoc)
326
doc = libxml2.parseMemory(res,len(res))
327
result = style.applyStylesheet(doc, None)
329
root = result.xpathEval("/document/stylesheet")
332
images = libxml2.newNode("images")
333
for img in tool.images:
334
node = libxml2.newNode('image')
335
node.setProp('name', img)
336
node.setContent( base64.encodestring(tool.images[img]))
337
images.addChild(node)
338
root.addNextSibling(images)
340
xml = style.saveResultToString(result)
345
if __name__ == "__main__":
347
parser = optparse.OptionParser(
348
version="Tiny Report v%s" % __version__,
349
usage = 'tiny_sxw2rml.py [options] file.sxw')
350
parser.add_option("-v", "--verbose", default=False, dest="verbose", help="enable basic debugging")
351
parser.add_option("-o", "--output", dest="output", default='.', help="directory of image output")
352
(opt, args) = parser.parse_args()
354
parser.error("incorrect number of arguments")
360
f = StringIO.StringIO(file(fname).read())
362
xsl = file(os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), 'normalized_oo2rml.xsl')).read()
363
result = sxw2rml(f, xsl, output=opt.output, save_pict=False)