1
# -*- coding: utf-8 -*-
3
MoinMoin - Parser for CSV data
5
This parser uses the databrowser widget to display the data.
7
It supports the following parser arguments:
9
* delimiter/separator: the delimiter to use instead of ;
10
* quotechar: quoting character, default off, must be ascii!
11
* show: comma-separated list of columns to show only
12
* hide: comma-separated list of columns to hide
13
* autofilter: comma-separated list of columns to equip with
15
* name: name of the dataset
16
* link: comma separated list of columns that take links, separate
17
the link and the description with a space
18
* static_cols: comma-separated list of columns that are static
19
and present in each row
20
* static_vals: comma-separated list of values for those static
23
The static column feature is only really useful if the dataset
24
postprocessed by some other plugin collecting data from multiple
27
@copyright: 2007, 2008 Johannes Berg <johannes@sipsolutions.net>
28
@license: GNU GPL, see COPYING for details.
31
from csv import reader, QUOTE_NONE, QUOTE_MINIMAL, Sniffer
32
from _csv import Error
34
from MoinMoin.util.dataset import TupleDataset, Column
35
from MoinMoin.widget.browser import DataBrowserWidget
36
from MoinMoin.wikiutil import escape
39
Dependencies = ['time']
45
def _read_rows(self, r):
46
if self._first_row is not None:
51
def __init__(self, raw, request, **kw):
52
self.request = request
53
self._first_row = None
54
formatter = request.formatter
56
# workaround csv.reader deficiency by encoding to utf-8
57
# removes empty lines in front of the csv table
58
data = raw.encode('utf-8').lstrip('\n').split('\n')
61
# Previous versions of this parser have used only the delimiter ";" (by default).
62
# This version now tries to sniff the delimiter from the list preferred_delimiters
63
# Although the Python csv sniffer had quite some changes from py 2.3 to 2.5.1, we try
64
# to avoid problems for the case it does not find a delimiter in some given data.
65
# Newer versions of the sniffer do raise an _csv.Error while older versions do
66
# return a whitespace as delimiter.
69
preferred_delimiters = [',', '\t', ';', ' ', ':']
70
delimiter = Sniffer().sniff(data[0], preferred_delimiters).delimiter or ';'
81
quotechar = '\x00' # can't be entered
84
hdr = reader([kw.get('format_args', '').strip().encode('utf-8')], delimiter=" ")
88
arg = arg.decode('utf-8')
90
key, val = arg.split('=', 1)
92
# handle compatibility with original 'csv' parser
93
if arg.startswith('-'):
95
hiddenindexes.append(int(arg[1:]) - 1)
99
delimiter = arg.encode('utf-8')
101
if key == 'separator' or key == 'delimiter':
102
delimiter = val.encode('utf-8')
103
if key == 'quotechar':
104
if val == val.encode('utf-8'):
105
quotechar = val.encode('utf-8')
106
quoting = QUOTE_MINIMAL
108
visible = val.split(',')
110
hiddencols = val.split(',')
111
elif key == 'autofilter':
112
autofiltercols = val.split(',')
115
elif key == 'static_cols':
116
staticcols = val.split(',')
117
elif key == 'static_vals':
118
staticvals = val.split(',')
120
linkcols = val.split(',')
122
if len(staticcols) > len(staticvals):
123
staticvals.extend([''] * (len(staticcols)-len(staticvals)))
124
elif len(staticcols) < len(staticvals):
125
staticvals = staticvals[:len(staticcols)]
127
r = reader(data, delimiter=delimiter, quotechar=quotechar, quoting=quoting)
128
cols = map(lambda x: x.decode('utf-8'), r.next()) + staticcols
130
self._show_header = True
132
if cols == staticcols:
134
self._first_row = map(lambda x: x.decode('utf-8'), r.next())
135
cols = [None] * len(self._first_row) + staticcols
136
self._show_header = False
137
except StopIteration:
140
num_entry_cols = len(cols) - len(staticcols)
142
if not visible is None:
144
if not col in visible:
145
hiddencols.append(col)
147
linkparse = [False] * len(cols)
149
data = TupleDataset(name)
150
for colidx in range(len(cols)):
152
autofilter = col in autofiltercols
153
hidden = col in hiddencols or colidx in hiddenindexes
154
data.columns.append(Column(col, autofilter=autofilter, hidden=hidden))
156
linkparse[colidx] = col in linkcols
158
for row in self._read_rows(r):
159
row = map(lambda x: x.decode('utf-8'), row)
160
if len(row) > num_entry_cols:
161
row = row[:num_entry_cols]
162
elif len(row) < num_entry_cols:
163
row.extend([''] * (num_entry_cols-len(row)))
165
for colidx in range(len(row)):
167
if linkparse[colidx]:
169
url, item = item.split(' ', 1)
171
display = escape(item)
174
formatter.url(1, url=url),
175
formatter.text(item),
178
display = escape(item)
180
display = escape(item)
181
row[colidx] = (display, item)
182
data.addRow(tuple(row))
185
def format(self, formatter):
186
browser = DataBrowserWidget(self.request, show_header=self._show_header)
187
browser.setData(self.data)
188
self.request.write(browser.format())