~unifield-team/unifield-wm/wm-2418-rw-fix

« back to all changes in this revision

Viewing changes to spreadsheet_xml/spreadsheet_xml.py

  • Committer: pierre-marie
  • Date: 2012-07-25 14:13:53 UTC
  • mfrom: (1038 unifield-wm)
  • mto: This revision was merged to the branch mainline in revision 1060.
  • Revision ID: pierre-marie@pierre-marie-laptop-20120725141353-9iwjdr1kltbei90e
Merge

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# -*- coding: utf-8 -*-
 
2
 
 
3
from lxml import etree
 
4
from mx import DateTime
 
5
from tools.translate import _
 
6
from osv import osv
 
7
 
 
8
# example to read a Excel XML file in consumption_calculation/wizard/wizard_import_rac.py
 
9
class SpreadsheetTools():
 
10
    defaultns = 'urn:schemas-microsoft-com:office:spreadsheet'
 
11
    namespaces = {'ss': defaultns}
 
12
    xa = {'namespaces': namespaces}
 
13
 
 
14
    def get(self, node, attr, default=None):
 
15
        return node.get(etree.QName(self.defaultns, attr), default)
 
16
 
 
17
class SpreadsheetCell(SpreadsheetTools):
 
18
    type = None
 
19
    data = None
 
20
 
 
21
    def __init__(self, node=None):
 
22
        if node is not None:
 
23
            data = node.find(etree.QName(self.defaultns, 'Data'))
 
24
            if data is not None:
 
25
                dtype = self.get(data, 'Type')
 
26
                self.data = data.text
 
27
                if dtype == 'Number':
 
28
                    if '.' in self.data:
 
29
                        self.type = 'float'
 
30
                        self.data = float(self.data)
 
31
                    else:
 
32
                        self.type = 'int'
 
33
                        self.data = int(self.data)
 
34
                elif dtype == 'Boolean':
 
35
                    self.data = self.data in ('1', 'T', 't', 'True', 'true')
 
36
                    self.type = 'bool'
 
37
                elif dtype == 'DateTime':
 
38
                    self.data = DateTime.ISO.ParseDateTime(self.data)
 
39
                    self.type = 'datetime'
 
40
                elif dtype == 'String':
 
41
                    self.type = 'str'
 
42
 
 
43
    def __str__(self):
 
44
        return "%s"%(self.data, )
 
45
 
 
46
    def __repr__(self):
 
47
        return "%s(<%s> %s)" % (self.__class__, self.type, self.data)
 
48
 
 
49
 
 
50
class SpreadsheetRow(SpreadsheetTools):
 
51
 
 
52
    def __init__(self, node):
 
53
        self.node = node
 
54
        self.cell_list = []
 
55
        self.cell_index = 0
 
56
 
 
57
    def __iter__(self):
 
58
        return self
 
59
 
 
60
    def next(self):
 
61
        return SpreadsheetRow(self.node.next())
 
62
 
 
63
    def len(self):
 
64
        """
 
65
            returns the num. of cells
 
66
        """
 
67
        index = 0
 
68
        for cell in self.node.xpath('ss:Cell', **self.xa):
 
69
            currindex = self.get(cell, 'Index')
 
70
            if not currindex:
 
71
                index += 1
 
72
            else:
 
73
                index = int(currindex)
 
74
            merged =  self.get(cell, 'MergeAcross', 0)
 
75
            if merged:
 
76
                index += int(merged)
 
77
        return index
 
78
 
 
79
    def __len__(self):
 
80
        return self.len()
 
81
 
 
82
    def iter_cells(self):
 
83
        index = 0
 
84
        for cell in self.node.xpath('ss:Cell', **self.xa):
 
85
            currindex = self.get(cell, 'Index')
 
86
            if not currindex:
 
87
                index += 1
 
88
            else:
 
89
                currindex = int(currindex)
 
90
                for i in xrange(index+1, currindex):
 
91
                    yield SpreadsheetCell()
 
92
                index = currindex
 
93
            merged =  self.get(cell, 'MergeAcross', 0)
 
94
            yield SpreadsheetCell(cell)
 
95
            for i in xrange(0, int(merged)):
 
96
                yield SpreadsheetCell()
 
97
 
 
98
    def gen_cell_list(self):
 
99
        for cell in self.iter_cells():
 
100
            self.cell_list.append(cell)
 
101
 
 
102
    def __getattr__(self, attr):
 
103
        if attr == 'cells':
 
104
            if not self.cell_list:
 
105
                self.gen_cell_list()
 
106
            return self.cell_list
 
107
        raise AttributeError
 
108
 
 
109
    def __getitem__(self, attr):
 
110
        if not self.cell_list:
 
111
            self.gen_cell_list()
 
112
        return self.cell_list[attr]
 
113
 
 
114
class SpreadsheetXML(SpreadsheetTools):
 
115
 
 
116
    def __init__(self, xmlfile=False, xmlstring=False):
 
117
        try:
 
118
            if xmlfile:
 
119
                self.xmlobj = etree.parse(xmlfile)
 
120
            else:
 
121
                self.xmlobj = etree.XML(xmlstring)
 
122
        except etree.XMLSyntaxError:
 
123
            raise osv.except_osv(_('Error'), _('Wrong format: it should be in Spreadsheet XML 2003'))
 
124
 
 
125
    def getWorksheets(self):
 
126
        ret = []
 
127
        for wb in self.xmlobj.xpath('//ss:Worksheet', **self.xa):
 
128
            ret.append(self.get(wb, 'Name'))
 
129
        return ret
 
130
 
 
131
    def getRows(self,worksheet=1):
 
132
        table = self.xmlobj.xpath('//ss:Worksheet[%d]/ss:Table[1]'%(worksheet, ), **self.xa)
 
133
        return SpreadsheetRow(table[0].getiterator(etree.QName(self.defaultns, 'Row')))
 
134
 
 
135
 
 
136
if __name__=='__main__':
 
137
    spreadML = SpreadsheetXML('/mnt/Tempo/TestJFB/test_dates.xml')
 
138
    spreadML.getWorksheets()
 
139
    # Iterates through all sheets
 
140
    for ws_number in xrange(1, len(spreadML.getWorksheets())):
 
141
        rows = spreadML.getRows(ws_number)
 
142
        # ignore the 1st row
 
143
        rows.next()
 
144
        for row in rows:
 
145
            # number of cells: row.len()
 
146
            # cells can be retrieve like a list: row.cells[0] or like an iterator:
 
147
            for cell in row.iter_cells():
 
148
                print "%s |"%cell.data,
 
149
            print
 
150
            print "-"*4