1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
|
# -*- coding: utf-8 -*-
from lxml import etree
from mx import DateTime
from tools.translate import _
from osv import osv
import csv
# example to read a Excel XML file in consumption_calculation/wizard/wizard_import_rac.py
class SpreadsheetTools():
defaultns = 'urn:schemas-microsoft-com:office:spreadsheet'
namespaces = {'ss': defaultns}
xa = {'namespaces': namespaces}
def get(self, node, attr, default=None):
return node.get(etree.QName(self.defaultns, attr), default)
class SpreadsheetCell(SpreadsheetTools):
type = None
data = None
def __init__(self, node=None):
if node is not None:
data = node.find(etree.QName(self.defaultns, 'Data'))
if data is not None:
dtype = self.get(data, 'Type')
self.data = data.text
if dtype == 'Number':
if not self.data or '.' in self.data:
self.type = 'float'
self.data = float(self.data or 0.0)
else:
self.type = 'int'
self.data = int(self.data)
elif dtype == 'Boolean':
self.data = self.data in ('1', 'T', 't', 'True', 'true')
self.type = 'bool'
elif dtype == 'DateTime':
self.data = DateTime.ISO.ParseDateTime(self.data)
self.type = 'datetime'
elif dtype == 'String':
self.type = 'str'
def __str__(self):
return "%s"%(self.data, )
def __repr__(self):
return "%s(<%s> %s)" % (self.__class__, self.type, self.data)
class SpreadsheetRow(SpreadsheetTools):
def __init__(self, node):
self.node = node
self.cell_list = []
self.cell_index = 0
def __iter__(self):
return self
def next(self):
return SpreadsheetRow(self.node.next())
def len(self):
"""
returns the num. of cells
"""
index = 0
for cell in self.node.xpath('ss:Cell', **self.xa):
currindex = self.get(cell, 'Index')
if not currindex:
index += 1
else:
index = int(currindex)
merged = self.get(cell, 'MergeAcross', 0)
if merged:
index += int(merged)
return index
def __len__(self):
return self.len()
def iter_cells(self):
index = 0
for cell in self.node.xpath('ss:Cell', **self.xa):
currindex = self.get(cell, 'Index')
if not currindex:
index += 1
else:
currindex = int(currindex)
for i in xrange(index+1, currindex):
yield SpreadsheetCell()
index = currindex
merged = self.get(cell, 'MergeAcross', 0)
yield SpreadsheetCell(cell)
for i in xrange(0, int(merged)):
yield SpreadsheetCell()
def gen_cell_list(self):
for cell in self.iter_cells():
self.cell_list.append(cell)
def __getattr__(self, attr):
if attr == 'cells':
if not self.cell_list:
self.gen_cell_list()
return self.cell_list
raise AttributeError
def __getitem__(self, attr):
if not self.cell_list:
self.gen_cell_list()
return self.cell_list[attr]
class SpreadsheetXML(SpreadsheetTools):
def __init__(self, xmlfile=False, xmlstring=False):
try:
if xmlfile:
self.xmlobj = etree.parse(xmlfile)
else:
self.xmlobj = etree.XML(xmlstring)
except etree.XMLSyntaxError as e:
raise osv.except_osv(_('Error'), _('Wrong format: it should be in Spreadsheet XML 2003'))
def getWorksheets(self):
ret = []
for wb in self.xmlobj.xpath('//ss:Worksheet', **self.xa):
ret.append(self.get(wb, 'Name'))
return ret
def getRows(self,worksheet=1):
table = self.xmlobj.xpath('//ss:Worksheet[%d]/ss:Table[1]'%(worksheet, ), **self.xa)
return SpreadsheetRow(table[0].getiterator(etree.QName(self.defaultns, 'Row')))
def enc(self, s):
if isinstance(s, unicode):
return s.encode('utf8')
return s
def to_csv(self, to_file=False, worksheet=1):
if to_file:
writer=csv.writer(to_file, 'UNIX')
else:
data = []
for row in self.getRows(worksheet):
if to_file:
writer.writerow([self.enc(x.data) for x in row.iter_cells()])
else:
data.append([self.enc(x.data) for x in row.iter_cells()])
if to_file:
return True
return data
if __name__=='__main__':
spreadML = SpreadsheetXML('/mnt/Tempo/TestJFB/test_dates.xml')
spreadML.getWorksheets()
# Iterates through all sheets
for ws_number in xrange(1, len(spreadML.getWorksheets())):
rows = spreadML.getRows(ws_number)
# ignore the 1st row
rows.next()
for row in rows:
# number of cells: row.len()
# cells can be retrieve like a list: row.cells[0] or like an iterator:
for cell in row.iter_cells():
print "%s |"%cell.data,
print
print "-"*4
|