1
# -*- coding: utf-8 -*-
4
from mx import DateTime
5
from tools.translate import _
10
# example to read a Excel XML file in consumption_calculation/wizard/wizard_import_rac.py
11
class SpreadsheetTools():
12
defaultns = 'urn:schemas-microsoft-com:office:spreadsheet'
13
namespaces = {'ss': defaultns}
14
xa = {'namespaces': namespaces}
16
def get(self, node, attr, default=None):
17
return node.get(etree.QName(self.defaultns, attr), default)
19
class SpreadsheetCell(SpreadsheetTools):
23
def __init__(self, node=None):
25
data = node.find(etree.QName(self.defaultns, 'Data'))
27
dtype = self.get(data, 'Type')
30
if not self.data or '.' in self.data:
32
self.data = float(self.data or 0.0)
35
self.data = int(self.data)
36
elif dtype == 'Boolean':
37
self.data = self.data in ('1', 'T', 't', 'True', 'true')
39
elif dtype == 'DateTime':
40
self.data = DateTime.ISO.ParseDateTime(self.data)
41
self.type = 'datetime'
42
elif dtype == 'String':
46
return "%s"%(self.data, )
49
return "%s(<%s> %s)" % (self.__class__, self.type, self.data)
52
class SpreadsheetRow(SpreadsheetTools):
54
def __init__(self, node):
63
return SpreadsheetRow(self.node.next())
67
returns the num. of cells
70
for cell in self.node.xpath('ss:Cell', **self.xa):
71
currindex = self.get(cell, 'Index')
75
index = int(currindex)
76
merged = self.get(cell, 'MergeAcross', 0)
86
for cell in self.node.xpath('ss:Cell', **self.xa):
87
currindex = self.get(cell, 'Index')
91
currindex = int(currindex)
92
for i in xrange(index+1, currindex):
93
yield SpreadsheetCell()
95
merged = self.get(cell, 'MergeAcross', 0)
96
yield SpreadsheetCell(cell)
97
for i in xrange(0, int(merged)):
98
yield SpreadsheetCell()
100
def gen_cell_list(self):
101
for cell in self.iter_cells():
102
self.cell_list.append(cell)
104
def __getattr__(self, attr):
106
if not self.cell_list:
108
return self.cell_list
111
def __getitem__(self, attr):
112
if not self.cell_list:
114
return self.cell_list[attr]
116
class SpreadsheetXML(SpreadsheetTools):
118
def __init__(self, xmlfile=False, xmlstring=False):
121
self.xmlobj = etree.parse(xmlfile)
123
self.xmlobj = etree.XML(xmlstring)
124
except etree.XMLSyntaxError as e:
125
raise osv.except_osv(_('Error'), _('Wrong format: it should be in Spreadsheet XML 2003'))
127
def getWorksheets(self):
129
for wb in self.xmlobj.xpath('//ss:Worksheet', **self.xa):
130
ret.append(self.get(wb, 'Name'))
133
def getRows(self,worksheet=1):
134
table = self.xmlobj.xpath('//ss:Worksheet[%d]/ss:Table[1]'%(worksheet, ), **self.xa)
135
return SpreadsheetRow(table[0].getiterator(etree.QName(self.defaultns, 'Row')))
138
if isinstance(s, unicode):
139
return s.encode('utf8')
142
def to_csv(self, to_file=False, worksheet=1):
144
writer=csv.writer(to_file, 'UNIX')
147
for row in self.getRows(worksheet):
149
writer.writerow([self.enc(x.data) for x in row.iter_cells()])
151
data.append([self.enc(x.data) for x in row.iter_cells()])
158
if __name__=='__main__':
159
spreadML = SpreadsheetXML('/mnt/Tempo/TestJFB/test_dates.xml')
160
spreadML.getWorksheets()
161
# Iterates through all sheets
162
for ws_number in xrange(1, len(spreadML.getWorksheets())):
163
rows = spreadML.getRows(ws_number)
167
# number of cells: row.len()
168
# cells can be retrieve like a list: row.cells[0] or like an iterator:
169
for cell in row.iter_cells():
170
print "%s |"%cell.data,