~openerp-venezuela/openerp-venezuela-localization/l10n_ve_account_ifrs_pymes

« back to all changes in this revision

Viewing changes to report/tiny_sxw2rml/tiny_sxw2rml.py

  • Committer: Hbto (Netquatro)
  • Date: 2010-05-20 01:21:46 UTC
  • Revision ID: humbertoarocha@gmail.com-20100520012146-l0nrc06q4l14b934

[ADD] Report Folder Added

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/python
 
2
#coding: utf-8
 
3
 
 
4
##############################################################################
 
5
#
 
6
# Copyright (c):
 
7
#
 
8
#     2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
 
9
#     2005 Fabien Pinckaers, TINY SPRL. (http://tiny.be)
 
10
#
 
11
# WARNING: This program as such is intended to be used by professional
 
12
# programmers who take the whole responsability of assessing all potential
 
13
# consequences resulting from its eventual inadequacies and bugs
 
14
# End users who are looking for a ready-to-use solution with commercial
 
15
# garantees and support are strongly adviced to contact a Free Software
 
16
# Service Company
 
17
#
 
18
# This program is Free Software; you can redistribute it and/or
 
19
# modify it under the terms of the GNU General Public License
 
20
# as published by the Free Software Foundation; either version 2
 
21
# of the License, or (at your option) any later version.
 
22
#
 
23
# This program is distributed in the hope that it will be useful,
 
24
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
25
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
26
# GNU General Public License for more details.
 
27
#
 
28
# You should have received a copy of the GNU General Public License
 
29
# along with this program; if not, write to the Free Software
 
30
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
31
#
 
32
##############################################################################
 
33
 
 
34
"""
 
35
Tiny SXW2RML - The Tiny ERP's report engine
 
36
 
 
37
Tiny SXW2RMLis part of the Tiny report project.
 
38
Tiny Report is a module that allows you to render high quality PDF document
 
39
from an OpenOffice template (.sxw) and any relationnal database.
 
40
 
 
41
The whole source code is distributed under the terms of the
 
42
GNU Public Licence.
 
43
 
 
44
(c) 2005 pyopenoffice.py Martin Simon (http://www.bezirksreiter.de)
 
45
(c) 2005-TODAY, Fabien Pinckaers - Tiny sprl
 
46
"""
 
47
__version__ = '0.9'
 
48
 
 
49
 
 
50
import re
 
51
import string
 
52
import os
 
53
import zipfile
 
54
import xml.dom.minidom
 
55
from reportlab.lib.units import toLength
 
56
import base64
 
57
 
 
58
class DomApiGeneral:
 
59
        """General DOM API utilities."""
 
60
        def __init__(self,content_string="",file=""):
 
61
                self.content_string = content_string
 
62
                self.re_digits = re.compile(r"(.*?\d)(pt|cm|mm|inch|in)")
 
63
 
 
64
        def _unitTuple(self,string):
 
65
                """Split values and units to a tuple."""
 
66
                temp = self.re_digits.findall(string)
 
67
                if not temp:
 
68
                        return (string,"")
 
69
                else:
 
70
                        return (temp[0])
 
71
 
 
72
        def stringPercentToFloat(self,string):
 
73
                temp = string.replace("""%""","")
 
74
                return float(temp)/100
 
75
 
 
76
        def findChildrenByName(self,parent,name,attr_dict={}):
 
77
                """Helper functions. Does not work recursively.
 
78
                Optional: also test for certain attribute/value pairs."""
 
79
                children = []
 
80
                for c in parent.childNodes:
 
81
                        if c.nodeType == c.ELEMENT_NODE and c.nodeName == name:
 
82
                                children.append(c)
 
83
                if attr_dict == {}:
 
84
                        return children
 
85
                else:
 
86
                        return self._selectForAttributes(nodelist=children,attr_dict=attr_dict)
 
87
 
 
88
        def _selectForAttributes(self,nodelist,attr_dict):
 
89
                "Helper function."""
 
90
                selected_nodes = []
 
91
                for n in nodelist:
 
92
                        check = 1
 
93
                        for a in attr_dict.keys():
 
94
                                if n.getAttribute(a) != attr_dict[a]:
 
95
                                        # at least one incorrect attribute value?
 
96
                                        check = 0
 
97
                        if check:
 
98
                                selected_nodes.append(n)
 
99
                return selected_nodes
 
100
 
 
101
        def _stringToTuple(self,s):
 
102
                """Helper function."""
 
103
                try:
 
104
                        temp = string.split(s,",")
 
105
                        return int(temp[0]),int(temp[1])
 
106
                except:
 
107
                        return None
 
108
 
 
109
        def _tupleToString(self,t):
 
110
                try:
 
111
                        return self.openOfficeStringUtf8("%s,%s" % (t[0],t[1]))
 
112
                except:
 
113
                        return None
 
114
 
 
115
        def _lengthToFloat(self,value):
 
116
                v = value
 
117
                if not self.re_digits.search(v):
 
118
                        return v
 
119
                try:
 
120
                        if v[-4:] == "inch":
 
121
                                # OO files use "inch" instead of "in" in Reportlab units
 
122
                                v = v[:-2]
 
123
                except:
 
124
                        pass
 
125
                try:
 
126
                        c = round(toLength(v))
 
127
                        return c
 
128
                except:
 
129
                        return v
 
130
 
 
131
        def openOfficeStringUtf8(self,string):
 
132
                if type(string) == unicode:
 
133
                        return string.encode("utf-8")
 
134
                tempstring = unicode(string,"cp1252").encode("utf-8")
 
135
                return tempstring
 
136
 
 
137
class DomApi(DomApiGeneral):
 
138
        """This class provides a DOM-API for XML-Files from an SXW-Archive."""
 
139
        def __init__(self,xml_content,xml_styles):
 
140
                DomApiGeneral.__init__(self)
 
141
                self.content_dom = xml.dom.minidom.parseString(xml_content)
 
142
                self.styles_dom = xml.dom.minidom.parseString(xml_styles)
 
143
                body = self.content_dom.getElementsByTagName("office:body")
 
144
                self.body = body and body[0]
 
145
 
 
146
                # TODO:
 
147
                self.style_dict = {}
 
148
                self.style_properties_dict = {}
 
149
 
 
150
                # ******** always use the following order:
 
151
                self.buildStyleDict()
 
152
                self.buildStylePropertiesDict()
 
153
                if self.styles_dom.getElementsByTagName("style:page-master").__len__()<>0:
 
154
                        self.page_master = self.styles_dom.getElementsByTagName("style:page-master")[0]
 
155
                self.document = self.content_dom.getElementsByTagName("office:document-content")[0]
 
156
 
 
157
        def buildStylePropertiesDict(self):
 
158
                for s in self.style_dict.keys():
 
159
                        self.style_properties_dict[s] = self.getStylePropertiesDict(s)
 
160
 
 
161
        def updateWithPercents(self,dict,updatedict):
 
162
                """Sometimes you find values like "115%" in the style hierarchy."""
 
163
                if not updatedict:
 
164
                        # no style hierarchies for this style? =>
 
165
                        return
 
166
                new_updatedict = copy.copy(updatedict)
 
167
                for u in new_updatedict.keys():
 
168
                        try:
 
169
                                if new_updatedict[u].find("""%""") != -1 and dict.has_key(u):
 
170
                                        number = float(self.re_digits.search(dict[u]).group(1))
 
171
                                        unit = self.re_digits.search(dict[u]).group(2)
 
172
                                        new_number = self.stringPercentToFloat(new_updatedict[u]) * number
 
173
                                        if unit == "pt":
 
174
                                                new_number = int(new_number)
 
175
                                                # no floats allowed for "pt"
 
176
                                                # OOo just takes the int, does not round (try it out!)
 
177
                                        new_updatedict[u] = "%s%s" % (new_number,unit)
 
178
                                else:
 
179
                                        dict[u] = new_updatedict[u]
 
180
                        except:
 
181
                                dict[u] = new_updatedict[u]
 
182
                dict.update(new_updatedict)
 
183
 
 
184
        def normalizeStyleProperties(self):
 
185
                """Transfer all style:style-properties attributes from the
 
186
                self.style_properties_hierarchical dict to the automatic-styles
 
187
                from content.xml. Use this function to preprocess content.xml for
 
188
                XSLT transformations etc.Do not try to implement this function
 
189
                with XSlT - believe me, it's a terrible task..."""
 
190
                styles_styles = self.styles_dom.getElementsByTagName("style:style")
 
191
                automatic_styles = self.content_dom.getElementsByTagName("office:automatic-styles")[0]
 
192
                for s in styles_styles:
 
193
                        automatic_styles.appendChild(s.cloneNode(deep=1))
 
194
                content_styles = self.content_dom.getElementsByTagName("style:style")
 
195
                # these are the content_styles with styles_styles added!!!
 
196
                for s in content_styles:
 
197
                        c = self.findChildrenByName(s,"style:properties")
 
198
                        if c == []:
 
199
                                # some derived automatic styles do not have "style:properties":
 
200
                                temp = self.content_dom.createElement("style:properties")
 
201
                                s.appendChild(temp)
 
202
                                c = self.findChildrenByName(s,"style:properties")
 
203
                        c = c[0]
 
204
                        dict = self.style_properties_dict[(s.getAttribute("style:name")).encode("latin-1")] or {}
 
205
                        for attribute in dict.keys():
 
206
                                c.setAttribute(self.openOfficeStringUtf8(attribute),self.openOfficeStringUtf8(dict[attribute]))
 
207
 
 
208
        def transferStylesXml(self):
 
209
                """Transfer certain sub-trees from styles.xml to the normalized content.xml
 
210
                (see above). It is not necessary to do this - for example - with paragraph styles.
 
211
                the "normalized" style properties contain all information needed for
 
212
                further processing."""
 
213
                # TODO: What about table styles etc.?
 
214
                outline_styles = self.styles_dom.getElementsByTagName("text:outline-style")
 
215
                t = self.content_dom.createElement("transferredfromstylesxml")
 
216
                self.document.insertBefore(t,self.body)
 
217
                t_new = self.body.previousSibling
 
218
                try:
 
219
                        page_master = self.page_master
 
220
                        t_new.appendChild(page_master.cloneNode(deep=1))
 
221
                        t_new.appendChild(outline_styles[0].cloneNode(deep=1))
 
222
                except:
 
223
                        pass
 
224
 
 
225
        def normalizeLength(self):
 
226
                """Normalize all lengthes to floats (i.e: 1 inch = 72).
 
227
                Always use this after "normalizeContent" and "transferStyles"!"""
 
228
                # TODO: The complex attributes of table cell styles are not transferred yet.
 
229
                #all_styles = self.content_dom.getElementsByTagName("style:properties")
 
230
                #all_styles += self.content_dom.getElementsByTagName("draw:image")
 
231
                all_styles = self.content_dom.getElementsByTagName("*")
 
232
                for s in all_styles:
 
233
                        for x in s._attrs.keys():
 
234
                                v = s.getAttribute(x)
 
235
                                s.setAttribute(x,"%s" % self._lengthToFloat(v))
 
236
                                # convert float to string first!
 
237
 
 
238
        def normalizeTableColumns(self):
 
239
                """Handle this strange table:number-columns-repeated attribute."""
 
240
                columns = self.content_dom.getElementsByTagName("table:table-column")
 
241
                for c in columns:
 
242
                        if c.hasAttribute("table:number-columns-repeated"):
 
243
                                number = int(c.getAttribute("table:number-columns-repeated"))
 
244
                                c.removeAttribute("table:number-columns-repeated")
 
245
                                for i in range(number-1):
 
246
                                        (c.parentNode).insertBefore(c.cloneNode(deep=1),c)
 
247
 
 
248
        def buildStyleDict(self):
 
249
                """Store all style:style-nodes from content.xml and styles.xml in self.style_dict.
 
250
                Caution: in this dict the nodes from two dom apis are merged!"""
 
251
                for st in (self.styles_dom,self.content_dom):
 
252
                        for s in st.getElementsByTagName("style:style"):
 
253
                                name = s.getAttribute("style:name").encode("latin-1")
 
254
                                self.style_dict[name] = s
 
255
                return True
 
256
 
 
257
        def toxml(self):
 
258
                return self.content_dom.toxml(encoding="utf-8")
 
259
 
 
260
        def getStylePropertiesDict(self,style_name):
 
261
                res = {}
 
262
 
 
263
                if self.style_dict[style_name].hasAttribute("style:parent-style-name"):
 
264
                        parent = self.style_dict[style_name].getAttribute("style:parent-style-name").encode("latin-1")
 
265
                        res = self.getStylePropertiesDict(parent)
 
266
 
 
267
                childs = self.style_dict[style_name].childNodes
 
268
                for c in childs:
 
269
                        if c.nodeType == c.ELEMENT_NODE and c.nodeName == "style:properties":
 
270
                                for attr in c._attrs.keys():
 
271
                                        res[attr] = c.getAttribute(attr).encode("latin-1")
 
272
                return res
 
273
 
 
274
class PyOpenOffice(object):
 
275
        """This is the main class which provides all functionality."""
 
276
        def __init__(self, path='.', save_pict=False):
 
277
                self.path = path
 
278
                self.save_pict = save_pict
 
279
                self.images = {}
 
280
 
 
281
        def oo_read(self,fname):
 
282
                z = zipfile.ZipFile(fname,"r")
 
283
                content = z.read('content.xml')
 
284
                style = z.read('styles.xml')
 
285
                all = z.namelist()
 
286
                for a in all:
 
287
                        if a[:9]=='Pictures/' and len(a)>10:
 
288
                                pic_content = z.read(a)
 
289
                                self.images[a[9:]] = pic_content
 
290
                                if self.save_pict:
 
291
                                        f=open(os.path.join(self.path, os.path.basename(a)),"wb")
 
292
                                        f.write(pic_content)
 
293
                                        f.close()
 
294
                z.close()
 
295
                return content,style
 
296
 
 
297
        def oo_replace(self,content):
 
298
                regex = [
 
299
                        (r"<para[^>]*/>", ""),
 
300
                        #(r"<text:ordered-list.*?>(.*?)</text:ordered-list>", "$1"),
 
301
                        #(r"<text:unordered-list.*?>(.*?)</text:unordered-list>", "$1"),
 
302
                        (r"<para(.*)>(.*?)<text:line-break[^>]*/>", "<para$1>$2</para><para$1>"),
 
303
                ]
 
304
                for key,val in regex:
 
305
                        content = re.sub(key, val, content)
 
306
                return content
 
307
 
 
308
        def unpackNormalize(self,sourcefile):
 
309
                c,s = self.oo_read(sourcefile)
 
310
                c = self.oo_replace(c)
 
311
                dom = DomApi(c,s)
 
312
                dom.normalizeStyleProperties()
 
313
                dom.transferStylesXml()
 
314
                dom.normalizeLength()
 
315
                dom.normalizeTableColumns()
 
316
                new_c = dom.toxml()
 
317
                return new_c
 
318
 
 
319
def sxw2rml(sxw_file, xsl, output='.', save_pict=False):
 
320
        import libxslt
 
321
        import libxml2
 
322
        tool = PyOpenOffice(output, save_pict = save_pict)
 
323
        res = tool.unpackNormalize(sxw_file)
 
324
        styledoc = libxml2.parseDoc(xsl)
 
325
        style = libxslt.parseStylesheetDoc(styledoc)
 
326
        doc = libxml2.parseMemory(res,len(res))
 
327
        result = style.applyStylesheet(doc, None)
 
328
 
 
329
        root = result.xpathEval("/document/stylesheet")
 
330
        if root:
 
331
                root=root[0]
 
332
                images = libxml2.newNode("images")
 
333
                for img in tool.images:
 
334
                        node = libxml2.newNode('image')
 
335
                        node.setProp('name', img)
 
336
                        node.setContent( base64.encodestring(tool.images[img]))
 
337
                        images.addChild(node)
 
338
                root.addNextSibling(images)
 
339
        try:
 
340
                xml = style.saveResultToString(result)
 
341
                return xml
 
342
        except:
 
343
                return result
 
344
 
 
345
if __name__ == "__main__":
 
346
        import optparse
 
347
        parser = optparse.OptionParser(
 
348
                version="Tiny Report v%s" % __version__,
 
349
                usage = 'tiny_sxw2rml.py [options] file.sxw')
 
350
        parser.add_option("-v", "--verbose", default=False, dest="verbose", help="enable basic debugging")
 
351
        parser.add_option("-o", "--output", dest="output", default='.', help="directory of image output")
 
352
        (opt, args) = parser.parse_args()
 
353
        if len(args) != 1:
 
354
                parser.error("incorrect number of arguments")
 
355
 
 
356
        import sys
 
357
        import StringIO
 
358
 
 
359
        fname = sys.argv[1]
 
360
        f = StringIO.StringIO(file(fname).read())
 
361
 
 
362
        xsl = file(os.path.join(os.getcwd(), os.path.dirname(sys.argv[0]), 'normalized_oo2rml.xsl')).read()
 
363
        result = sxw2rml(f, xsl, output=opt.output, save_pict=False)
 
364
 
 
365
        print result