1
# Copyright (C) 2001,2002 Python Software Foundation
2
# csv package unit tests
8
from io import StringIO
9
from tempfile import TemporaryFile
12
from test import support
14
class Test_Csv(unittest.TestCase):
16
Test the underlying C csv parser in ways that are not appropriate
17
from the high level interface. Further tests of this nature are done
18
in TestDialectRegistry.
20
def _test_arg_valid(self, ctor, arg):
21
self.assertRaises(TypeError, ctor)
22
self.assertRaises(TypeError, ctor, None)
23
self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
24
self.assertRaises(TypeError, ctor, arg, delimiter = 0)
25
self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
26
self.assertRaises(csv.Error, ctor, arg, 'foo')
27
self.assertRaises(TypeError, ctor, arg, delimiter=None)
28
self.assertRaises(TypeError, ctor, arg, delimiter=1)
29
self.assertRaises(TypeError, ctor, arg, quotechar=1)
30
self.assertRaises(TypeError, ctor, arg, lineterminator=None)
31
self.assertRaises(TypeError, ctor, arg, lineterminator=1)
32
self.assertRaises(TypeError, ctor, arg, quoting=None)
33
self.assertRaises(TypeError, ctor, arg,
34
quoting=csv.QUOTE_ALL, quotechar='')
35
self.assertRaises(TypeError, ctor, arg,
36
quoting=csv.QUOTE_ALL, quotechar=None)
38
def test_reader_arg_valid(self):
39
self._test_arg_valid(csv.reader, [])
41
def test_writer_arg_valid(self):
42
self._test_arg_valid(csv.writer, StringIO())
44
def _test_default_attrs(self, ctor, *args):
47
self.assertEqual(obj.dialect.delimiter, ',')
48
self.assertEqual(obj.dialect.doublequote, True)
49
self.assertEqual(obj.dialect.escapechar, None)
50
self.assertEqual(obj.dialect.lineterminator, "\r\n")
51
self.assertEqual(obj.dialect.quotechar, '"')
52
self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL)
53
self.assertEqual(obj.dialect.skipinitialspace, False)
54
self.assertEqual(obj.dialect.strict, False)
55
# Try deleting or changing attributes (they are read-only)
56
self.assertRaises(AttributeError, delattr, obj.dialect, 'delimiter')
57
self.assertRaises(AttributeError, setattr, obj.dialect, 'delimiter', ':')
58
self.assertRaises(AttributeError, delattr, obj.dialect, 'quoting')
59
self.assertRaises(AttributeError, setattr, obj.dialect,
62
def test_reader_attrs(self):
63
self._test_default_attrs(csv.reader, [])
65
def test_writer_attrs(self):
66
self._test_default_attrs(csv.writer, StringIO())
68
def _test_kw_attrs(self, ctor, *args):
69
# Now try with alternate options
70
kwargs = dict(delimiter=':', doublequote=False, escapechar='\\',
71
lineterminator='\r', quotechar='*',
72
quoting=csv.QUOTE_NONE, skipinitialspace=True,
74
obj = ctor(*args, **kwargs)
75
self.assertEqual(obj.dialect.delimiter, ':')
76
self.assertEqual(obj.dialect.doublequote, False)
77
self.assertEqual(obj.dialect.escapechar, '\\')
78
self.assertEqual(obj.dialect.lineterminator, "\r")
79
self.assertEqual(obj.dialect.quotechar, '*')
80
self.assertEqual(obj.dialect.quoting, csv.QUOTE_NONE)
81
self.assertEqual(obj.dialect.skipinitialspace, True)
82
self.assertEqual(obj.dialect.strict, True)
84
def test_reader_kw_attrs(self):
85
self._test_kw_attrs(csv.reader, [])
87
def test_writer_kw_attrs(self):
88
self._test_kw_attrs(csv.writer, StringIO())
90
def _test_dialect_attrs(self, ctor, *args):
91
# Now try with dialect-derived options
101
args = args + (dialect,)
103
self.assertEqual(obj.dialect.delimiter, '-')
104
self.assertEqual(obj.dialect.doublequote, False)
105
self.assertEqual(obj.dialect.escapechar, '^')
106
self.assertEqual(obj.dialect.lineterminator, "$")
107
self.assertEqual(obj.dialect.quotechar, '#')
108
self.assertEqual(obj.dialect.quoting, csv.QUOTE_ALL)
109
self.assertEqual(obj.dialect.skipinitialspace, True)
110
self.assertEqual(obj.dialect.strict, False)
112
def test_reader_dialect_attrs(self):
113
self._test_dialect_attrs(csv.reader, [])
115
def test_writer_dialect_attrs(self):
116
self._test_dialect_attrs(csv.writer, StringIO())
119
def _write_test(self, fields, expect, **kwargs):
120
with TemporaryFile("w+", newline='') as fileobj:
121
writer = csv.writer(fileobj, **kwargs)
122
writer.writerow(fields)
124
self.assertEqual(fileobj.read(),
125
expect + writer.dialect.lineterminator)
127
def test_write_arg_valid(self):
128
self.assertRaises(csv.Error, self._write_test, None, '')
129
self._write_test((), '')
130
self._write_test([None], '""')
131
self.assertRaises(csv.Error, self._write_test,
132
[None], None, quoting = csv.QUOTE_NONE)
133
# Check that exceptions are passed up the chain
137
def __getitem__(self, i):
140
self.assertRaises(OSError, self._write_test, BadList(), '')
144
self.assertRaises(OSError, self._write_test, [BadItem()], '')
146
def test_write_bigfield(self):
147
# This exercises the buffer realloc functionality
148
bigstring = 'X' * 50000
149
self._write_test([bigstring,bigstring], '%s,%s' % \
150
(bigstring, bigstring))
152
def test_write_quoting(self):
153
self._write_test(['a',1,'p,q'], 'a,1,"p,q"')
154
self.assertRaises(csv.Error,
156
['a',1,'p,q'], 'a,1,p,q',
157
quoting = csv.QUOTE_NONE)
158
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
159
quoting = csv.QUOTE_MINIMAL)
160
self._write_test(['a',1,'p,q'], '"a",1,"p,q"',
161
quoting = csv.QUOTE_NONNUMERIC)
162
self._write_test(['a',1,'p,q'], '"a","1","p,q"',
163
quoting = csv.QUOTE_ALL)
164
self._write_test(['a\nb',1], '"a\nb","1"',
165
quoting = csv.QUOTE_ALL)
167
def test_write_escape(self):
168
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
170
self.assertRaises(csv.Error,
172
['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
173
escapechar=None, doublequote=False)
174
self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
175
escapechar='\\', doublequote = False)
176
self._write_test(['"'], '""""',
177
escapechar='\\', quoting = csv.QUOTE_MINIMAL)
178
self._write_test(['"'], '\\"',
179
escapechar='\\', quoting = csv.QUOTE_MINIMAL,
181
self._write_test(['"'], '\\"',
182
escapechar='\\', quoting = csv.QUOTE_NONE)
183
self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
184
escapechar='\\', quoting = csv.QUOTE_NONE)
186
def test_writerows(self):
188
def write(self, buf):
190
writer = csv.writer(BrokenFile())
191
self.assertRaises(OSError, writer.writerows, [['a']])
193
with TemporaryFile("w+", newline='') as fileobj:
194
writer = csv.writer(fileobj)
195
self.assertRaises(TypeError, writer.writerows, None)
196
writer.writerows([['a','b'],['c','d']])
198
self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n")
200
@support.cpython_only
201
def test_writerows_legacy_strings(self):
204
c = _testcapi.unicode_legacy_string('a')
205
with TemporaryFile("w+", newline='') as fileobj:
206
writer = csv.writer(fileobj)
207
writer.writerows([[c]])
209
self.assertEqual(fileobj.read(), "a\r\n")
211
def _read_test(self, input, expect, **kwargs):
212
reader = csv.reader(input, **kwargs)
213
result = list(reader)
214
self.assertEqual(result, expect)
216
def test_read_oddinputs(self):
217
self._read_test([], [])
218
self._read_test([''], [[]])
219
self.assertRaises(csv.Error, self._read_test,
220
['"ab"c'], None, strict = 1)
221
# cannot handle null bytes for the moment
222
self.assertRaises(csv.Error, self._read_test,
223
['ab\0c'], None, strict = 1)
224
self._read_test(['"ab"c'], [['abc']], doublequote = 0)
226
self.assertRaises(csv.Error, self._read_test,
230
def test_read_eol(self):
231
self._read_test(['a,b'], [['a','b']])
232
self._read_test(['a,b\n'], [['a','b']])
233
self._read_test(['a,b\r\n'], [['a','b']])
234
self._read_test(['a,b\r'], [['a','b']])
235
self.assertRaises(csv.Error, self._read_test, ['a,b\rc,d'], [])
236
self.assertRaises(csv.Error, self._read_test, ['a,b\nc,d'], [])
237
self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], [])
239
def test_read_eof(self):
240
self._read_test(['a,"'], [['a', '']])
241
self._read_test(['"a'], [['a']])
242
self._read_test(['^'], [['\n']], escapechar='^')
243
self.assertRaises(csv.Error, self._read_test, ['a,"'], [], strict=True)
244
self.assertRaises(csv.Error, self._read_test, ['"a'], [], strict=True)
245
self.assertRaises(csv.Error, self._read_test,
246
['^'], [], escapechar='^', strict=True)
248
def test_read_escape(self):
249
self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\')
250
self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\')
251
self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\')
252
self._read_test(['a,"b,\\c"'], [['a', 'b,c']], escapechar='\\')
253
self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\')
254
self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\')
256
def test_read_quoting(self):
257
self._read_test(['1,",3,",5'], [['1', ',3,', '5']])
258
self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
259
quotechar=None, escapechar='\\')
260
self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
261
quoting=csv.QUOTE_NONE, escapechar='\\')
262
# will this fail where locale uses comma for decimals?
263
self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]],
264
quoting=csv.QUOTE_NONNUMERIC)
265
self._read_test(['"a\nb", 7'], [['a\nb', ' 7']])
266
self.assertRaises(ValueError, self._read_test,
268
quoting=csv.QUOTE_NONNUMERIC)
270
def test_read_bigfield(self):
271
# This exercises the buffer realloc functionality and field size
273
limit = csv.field_size_limit()
276
bigstring = 'X' * size
277
bigline = '%s,%s' % (bigstring, bigstring)
278
self._read_test([bigline], [[bigstring, bigstring]])
279
csv.field_size_limit(size)
280
self._read_test([bigline], [[bigstring, bigstring]])
281
self.assertEqual(csv.field_size_limit(), size)
282
csv.field_size_limit(size-1)
283
self.assertRaises(csv.Error, self._read_test, [bigline], [])
284
self.assertRaises(TypeError, csv.field_size_limit, None)
285
self.assertRaises(TypeError, csv.field_size_limit, 1, None)
287
csv.field_size_limit(limit)
289
def test_read_linenum(self):
290
r = csv.reader(['line,1', 'line,2', 'line,3'])
291
self.assertEqual(r.line_num, 0)
293
self.assertEqual(r.line_num, 1)
295
self.assertEqual(r.line_num, 2)
297
self.assertEqual(r.line_num, 3)
298
self.assertRaises(StopIteration, next, r)
299
self.assertEqual(r.line_num, 3)
301
def test_roundtrip_quoteed_newlines(self):
302
with TemporaryFile("w+", newline='') as fileobj:
303
writer = csv.writer(fileobj)
304
self.assertRaises(TypeError, writer.writerows, None)
305
rows = [['a\nb','b'],['c','x\r\nd']]
306
writer.writerows(rows)
308
for i, row in enumerate(csv.reader(fileobj)):
309
self.assertEqual(row, rows[i])
311
def test_roundtrip_escaped_unquoted_newlines(self):
312
with TemporaryFile("w+", newline='') as fileobj:
313
writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")
314
rows = [['a\nb','b'],['c','x\r\nd']]
315
writer.writerows(rows)
317
for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")):
318
self.assertEqual(row,rows[i])
320
class TestDialectRegistry(unittest.TestCase):
321
def test_registry_badargs(self):
322
self.assertRaises(TypeError, csv.list_dialects, None)
323
self.assertRaises(TypeError, csv.get_dialect)
324
self.assertRaises(csv.Error, csv.get_dialect, None)
325
self.assertRaises(csv.Error, csv.get_dialect, "nonesuch")
326
self.assertRaises(TypeError, csv.unregister_dialect)
327
self.assertRaises(csv.Error, csv.unregister_dialect, None)
328
self.assertRaises(csv.Error, csv.unregister_dialect, "nonesuch")
329
self.assertRaises(TypeError, csv.register_dialect, None)
330
self.assertRaises(TypeError, csv.register_dialect, None, None)
331
self.assertRaises(TypeError, csv.register_dialect, "nonesuch", 0, 0)
332
self.assertRaises(TypeError, csv.register_dialect, "nonesuch",
334
self.assertRaises(TypeError, csv.register_dialect, "nonesuch",
336
self.assertRaises(TypeError, csv.register_dialect, [])
338
def test_registry(self):
339
class myexceltsv(csv.excel):
342
expected_dialects = csv.list_dialects() + [name]
343
expected_dialects.sort()
344
csv.register_dialect(name, myexceltsv)
345
self.addCleanup(csv.unregister_dialect, name)
346
self.assertEqual(csv.get_dialect(name).delimiter, '\t')
347
got_dialects = sorted(csv.list_dialects())
348
self.assertEqual(expected_dialects, got_dialects)
350
def test_register_kwargs(self):
352
csv.register_dialect(name, delimiter=';')
353
self.addCleanup(csv.unregister_dialect, name)
354
self.assertEqual(csv.get_dialect(name).delimiter, ';')
355
self.assertEqual([['X', 'Y', 'Z']], list(csv.reader(['X;Y;Z'], name)))
357
def test_incomplete_dialect(self):
358
class myexceltsv(csv.Dialect):
360
self.assertRaises(csv.Error, myexceltsv)
362
def test_space_dialect(self):
363
class space(csv.excel):
365
quoting = csv.QUOTE_NONE
368
with TemporaryFile("w+") as fileobj:
369
fileobj.write("abc def\nc1ccccc1 benzene\n")
371
reader = csv.reader(fileobj, dialect=space())
372
self.assertEqual(next(reader), ["abc", "def"])
373
self.assertEqual(next(reader), ["c1ccccc1", "benzene"])
375
def compare_dialect_123(self, expected, *writeargs, **kwwriteargs):
377
with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj:
379
writer = csv.writer(fileobj, *writeargs, **kwwriteargs)
380
writer.writerow([1,2,3])
382
self.assertEqual(fileobj.read(), expected)
384
def test_dialect_apply(self):
385
class testA(csv.excel):
387
class testB(csv.excel):
389
class testC(csv.excel):
391
class testUni(csv.excel):
394
csv.register_dialect('testC', testC)
396
self.compare_dialect_123("1,2,3\r\n")
397
self.compare_dialect_123("1\t2\t3\r\n", testA)
398
self.compare_dialect_123("1:2:3\r\n", dialect=testB())
399
self.compare_dialect_123("1|2|3\r\n", dialect='testC')
400
self.compare_dialect_123("1;2;3\r\n", dialect=testA,
402
self.compare_dialect_123("1\u039B2\u039B3\r\n",
406
csv.unregister_dialect('testC')
408
def test_bad_dialect(self):
410
self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
412
self.assertRaises(TypeError, csv.reader, [], delimiter = None)
413
self.assertRaises(TypeError, csv.reader, [], quoting = -1)
414
self.assertRaises(TypeError, csv.reader, [], quoting = 100)
416
class TestCsvBase(unittest.TestCase):
417
def readerAssertEqual(self, input, expected_result):
418
with TemporaryFile("w+", newline='') as fileobj:
421
reader = csv.reader(fileobj, dialect = self.dialect)
422
fields = list(reader)
423
self.assertEqual(fields, expected_result)
425
def writerAssertEqual(self, input, expected_result):
426
with TemporaryFile("w+", newline='') as fileobj:
427
writer = csv.writer(fileobj, dialect = self.dialect)
428
writer.writerows(input)
430
self.assertEqual(fileobj.read(), expected_result)
432
class TestDialectExcel(TestCsvBase):
435
def test_single(self):
436
self.readerAssertEqual('abc', [['abc']])
438
def test_simple(self):
439
self.readerAssertEqual('1,2,3,4,5', [['1','2','3','4','5']])
441
def test_blankline(self):
442
self.readerAssertEqual('', [])
444
def test_empty_fields(self):
445
self.readerAssertEqual(',', [['', '']])
447
def test_singlequoted(self):
448
self.readerAssertEqual('""', [['']])
450
def test_singlequoted_left_empty(self):
451
self.readerAssertEqual('"",', [['','']])
453
def test_singlequoted_right_empty(self):
454
self.readerAssertEqual(',""', [['','']])
456
def test_single_quoted_quote(self):
457
self.readerAssertEqual('""""', [['"']])
459
def test_quoted_quotes(self):
460
self.readerAssertEqual('""""""', [['""']])
462
def test_inline_quote(self):
463
self.readerAssertEqual('a""b', [['a""b']])
465
def test_inline_quotes(self):
466
self.readerAssertEqual('a"b"c', [['a"b"c']])
468
def test_quotes_and_more(self):
469
# Excel would never write a field containing '"a"b', but when
470
# reading one, it will return 'ab'.
471
self.readerAssertEqual('"a"b', [['ab']])
473
def test_lone_quote(self):
474
self.readerAssertEqual('a"b', [['a"b']])
476
def test_quote_and_quote(self):
477
# Excel would never write a field containing '"a" "b"', but when
478
# reading one, it will return 'a "b"'.
479
self.readerAssertEqual('"a" "b"', [['a "b"']])
481
def test_space_and_quote(self):
482
self.readerAssertEqual(' "a"', [[' "a"']])
484
def test_quoted(self):
485
self.readerAssertEqual('1,2,3,"I think, therefore I am",5,6',
487
'I think, therefore I am',
490
def test_quoted_quote(self):
491
self.readerAssertEqual('1,2,3,"""I see,"" said the blind man","as he picked up his hammer and saw"',
493
'"I see," said the blind man',
494
'as he picked up his hammer and saw']])
496
def test_quoted_nl(self):
499
said the blind man","as he picked up his
502
self.readerAssertEqual(input,
504
'"I see,"\nsaid the blind man',
505
'as he picked up his\nhammer and saw'],
508
def test_dubious_quote(self):
509
self.readerAssertEqual('12,12,1",', [['12', '12', '1"', '']])
512
self.writerAssertEqual([], '')
514
def test_single_writer(self):
515
self.writerAssertEqual([['abc']], 'abc\r\n')
517
def test_simple_writer(self):
518
self.writerAssertEqual([[1, 2, 'abc', 3, 4]], '1,2,abc,3,4\r\n')
520
def test_quotes(self):
521
self.writerAssertEqual([[1, 2, 'a"bc"', 3, 4]], '1,2,"a""bc""",3,4\r\n')
523
def test_quote_fieldsep(self):
524
self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
526
def test_newlines(self):
527
self.writerAssertEqual([[1, 2, 'a\nbc', 3, 4]], '1,2,"a\nbc",3,4\r\n')
529
class EscapedExcel(csv.excel):
530
quoting = csv.QUOTE_NONE
533
class TestEscapedExcel(TestCsvBase):
534
dialect = EscapedExcel()
536
def test_escape_fieldsep(self):
537
self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n')
539
def test_read_escape_fieldsep(self):
540
self.readerAssertEqual('abc\\,def\r\n', [['abc,def']])
542
class TestDialectUnix(TestCsvBase):
545
def test_simple_writer(self):
546
self.writerAssertEqual([[1, 'abc def', 'abc']], '"1","abc def","abc"\n')
548
def test_simple_reader(self):
549
self.readerAssertEqual('"1","abc def","abc"\n', [['1', 'abc def', 'abc']])
551
class QuotedEscapedExcel(csv.excel):
552
quoting = csv.QUOTE_NONNUMERIC
555
class TestQuotedEscapedExcel(TestCsvBase):
556
dialect = QuotedEscapedExcel()
558
def test_write_escape_fieldsep(self):
559
self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
561
def test_read_escape_fieldsep(self):
562
self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']])
564
class TestDictFields(unittest.TestCase):
565
### "long" means the row is longer than the number of fieldnames
566
### "short" means there are fewer elements in the row than fieldnames
567
def test_write_simple_dict(self):
568
with TemporaryFile("w+", newline='') as fileobj:
569
writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
572
self.assertEqual(fileobj.readline(), "f1,f2,f3\r\n")
573
writer.writerow({"f1": 10, "f3": "abc"})
575
fileobj.readline() # header
576
self.assertEqual(fileobj.read(), "10,,abc\r\n")
578
def test_write_no_fields(self):
580
self.assertRaises(TypeError, csv.DictWriter, fileobj)
582
def test_write_fields_not_in_fieldnames(self):
583
with TemporaryFile("w+", newline='') as fileobj:
584
writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
585
# Of special note is the non-string key (issue 19449)
586
with self.assertRaises(ValueError) as cx:
587
writer.writerow({"f4": 10, "f2": "spam", 1: "abc"})
588
exception = str(cx.exception)
589
self.assertIn("fieldnames", exception)
590
self.assertIn("'f4'", exception)
591
self.assertNotIn("'f2'", exception)
592
self.assertIn("1", exception)
594
def test_read_dict_fields(self):
595
with TemporaryFile("w+") as fileobj:
596
fileobj.write("1,2,abc\r\n")
598
reader = csv.DictReader(fileobj,
599
fieldnames=["f1", "f2", "f3"])
600
self.assertEqual(next(reader), {"f1": '1', "f2": '2', "f3": 'abc'})
602
def test_read_dict_no_fieldnames(self):
603
with TemporaryFile("w+") as fileobj:
604
fileobj.write("f1,f2,f3\r\n1,2,abc\r\n")
606
reader = csv.DictReader(fileobj)
607
self.assertEqual(next(reader), {"f1": '1', "f2": '2', "f3": 'abc'})
608
self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
610
# Two test cases to make sure existing ways of implicitly setting
611
# fieldnames continue to work. Both arise from discussion in issue3436.
612
def test_read_dict_fieldnames_from_file(self):
613
with TemporaryFile("w+") as fileobj:
614
fileobj.write("f1,f2,f3\r\n1,2,abc\r\n")
616
reader = csv.DictReader(fileobj,
617
fieldnames=next(csv.reader(fileobj)))
618
self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
619
self.assertEqual(next(reader), {"f1": '1', "f2": '2', "f3": 'abc'})
621
def test_read_dict_fieldnames_chain(self):
623
with TemporaryFile("w+") as fileobj:
624
fileobj.write("f1,f2,f3\r\n1,2,abc\r\n")
626
reader = csv.DictReader(fileobj)
628
for row in itertools.chain([first], reader):
629
self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
630
self.assertEqual(row, {"f1": '1', "f2": '2', "f3": 'abc'})
632
def test_read_long(self):
633
with TemporaryFile("w+") as fileobj:
634
fileobj.write("1,2,abc,4,5,6\r\n")
636
reader = csv.DictReader(fileobj,
637
fieldnames=["f1", "f2"])
638
self.assertEqual(next(reader), {"f1": '1', "f2": '2',
639
None: ["abc", "4", "5", "6"]})
641
def test_read_long_with_rest(self):
642
with TemporaryFile("w+") as fileobj:
643
fileobj.write("1,2,abc,4,5,6\r\n")
645
reader = csv.DictReader(fileobj,
646
fieldnames=["f1", "f2"], restkey="_rest")
647
self.assertEqual(next(reader), {"f1": '1', "f2": '2',
648
"_rest": ["abc", "4", "5", "6"]})
650
def test_read_long_with_rest_no_fieldnames(self):
651
with TemporaryFile("w+") as fileobj:
652
fileobj.write("f1,f2\r\n1,2,abc,4,5,6\r\n")
654
reader = csv.DictReader(fileobj, restkey="_rest")
655
self.assertEqual(reader.fieldnames, ["f1", "f2"])
656
self.assertEqual(next(reader), {"f1": '1', "f2": '2',
657
"_rest": ["abc", "4", "5", "6"]})
659
def test_read_short(self):
660
with TemporaryFile("w+") as fileobj:
661
fileobj.write("1,2,abc,4,5,6\r\n1,2,abc\r\n")
663
reader = csv.DictReader(fileobj,
664
fieldnames="1 2 3 4 5 6".split(),
666
self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc',
667
"4": '4', "5": '5', "6": '6'})
668
self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc',
669
"4": 'DEFAULT', "5": 'DEFAULT',
672
def test_read_multi(self):
674
'2147483648,43.0e12,17,abc,def\r\n',
675
'147483648,43.0e2,17,abc,def\r\n',
676
'47483648,43.0,170,abc,def\r\n'
679
reader = csv.DictReader(sample,
680
fieldnames="i1 float i2 s1 s2".split())
681
self.assertEqual(next(reader), {"i1": '2147483648',
687
def test_read_with_blanks(self):
688
reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n",
689
"1,2,abc,4,5,6\r\n"],
690
fieldnames="1 2 3 4 5 6".split())
691
self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc',
692
"4": '4', "5": '5', "6": '6'})
693
self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc',
694
"4": '4', "5": '5', "6": '6'})
696
def test_read_semi_sep(self):
697
reader = csv.DictReader(["1;2;abc;4;5;6\r\n"],
698
fieldnames="1 2 3 4 5 6".split(),
700
self.assertEqual(next(reader), {"1": '1', "2": '2', "3": 'abc',
701
"4": '4', "5": '5', "6": '6'})
703
class TestArrayWrites(unittest.TestCase):
704
def test_int_write(self):
706
contents = [(20-i) for i in range(20)]
707
a = array.array('i', contents)
709
with TemporaryFile("w+", newline='') as fileobj:
710
writer = csv.writer(fileobj, dialect="excel")
712
expected = ",".join([str(i) for i in a])+"\r\n"
714
self.assertEqual(fileobj.read(), expected)
716
def test_double_write(self):
718
contents = [(20-i)*0.1 for i in range(20)]
719
a = array.array('d', contents)
720
with TemporaryFile("w+", newline='') as fileobj:
721
writer = csv.writer(fileobj, dialect="excel")
723
expected = ",".join([str(i) for i in a])+"\r\n"
725
self.assertEqual(fileobj.read(), expected)
727
def test_float_write(self):
729
contents = [(20-i)*0.1 for i in range(20)]
730
a = array.array('f', contents)
731
with TemporaryFile("w+", newline='') as fileobj:
732
writer = csv.writer(fileobj, dialect="excel")
734
expected = ",".join([str(i) for i in a])+"\r\n"
736
self.assertEqual(fileobj.read(), expected)
738
def test_char_write(self):
740
a = array.array('u', string.ascii_letters)
742
with TemporaryFile("w+", newline='') as fileobj:
743
writer = csv.writer(fileobj, dialect="excel")
745
expected = ",".join(a)+"\r\n"
747
self.assertEqual(fileobj.read(), expected)
749
class TestDialectValidity(unittest.TestCase):
750
def test_quoting(self):
751
class mydialect(csv.Dialect):
755
skipinitialspace = True
756
lineterminator = '\r\n'
757
quoting = csv.QUOTE_NONE
760
mydialect.quoting = None
761
self.assertRaises(csv.Error, mydialect)
763
mydialect.doublequote = True
764
mydialect.quoting = csv.QUOTE_ALL
765
mydialect.quotechar = '"'
768
mydialect.quotechar = "''"
769
self.assertRaises(csv.Error, mydialect)
771
mydialect.quotechar = 4
772
self.assertRaises(csv.Error, mydialect)
774
def test_delimiter(self):
775
class mydialect(csv.Dialect):
779
skipinitialspace = True
780
lineterminator = '\r\n'
781
quoting = csv.QUOTE_NONE
784
mydialect.delimiter = ":::"
785
self.assertRaises(csv.Error, mydialect)
787
mydialect.delimiter = 4
788
self.assertRaises(csv.Error, mydialect)
790
def test_lineterminator(self):
791
class mydialect(csv.Dialect):
795
skipinitialspace = True
796
lineterminator = '\r\n'
797
quoting = csv.QUOTE_NONE
800
mydialect.lineterminator = ":::"
803
mydialect.lineterminator = 4
804
self.assertRaises(csv.Error, mydialect)
807
class TestSniffer(unittest.TestCase):
809
Harry's, Arlington Heights, IL, 2/1/03, Kimi Hayes
810
Shark City, Glendale Heights, IL, 12/28/02, Prezence
811
Tommy's Place, Blue Island, IL, 12/28/02, Blue Sunday/White Crow
812
Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
815
'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'
816
'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'
817
'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
818
'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
821
"venue","city","state","date","performers"
824
05/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
825
05/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
826
05/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
830
2147483648;43.0e12;17;abc;def
831
147483648;43.0e2;17;abc;def
832
47483648;43.0;170;abc;def
835
sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
836
sample6 = "a|b|c\r\nd|e|f\r\n"
837
sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
839
# Issue 18155: Use a delimiter that is a special char to regex:
842
"venue"+"city"+"state"+"date"+"performers"
845
Harry's+ Arlington Heights+ IL+ 2/1/03+ Kimi Hayes
846
Shark City+ Glendale Heights+ IL+ 12/28/02+ Prezence
847
Tommy's Place+ Blue Island+ IL+ 12/28/02+ Blue Sunday/White Crow
848
Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back
851
'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes'
852
'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence'
853
'Tommy''s Place'+ Blue Island'+ 'IL'+ '12/28/02'+ 'Blue Sunday/White Crow'
854
'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back'
857
def test_has_header(self):
858
sniffer = csv.Sniffer()
859
self.assertEqual(sniffer.has_header(self.sample1), False)
860
self.assertEqual(sniffer.has_header(self.header1 + self.sample1),
863
def test_has_header_regex_special_delimiter(self):
864
sniffer = csv.Sniffer()
865
self.assertEqual(sniffer.has_header(self.sample8), False)
866
self.assertEqual(sniffer.has_header(self.header2 + self.sample8),
869
def test_sniff(self):
870
sniffer = csv.Sniffer()
871
dialect = sniffer.sniff(self.sample1)
872
self.assertEqual(dialect.delimiter, ",")
873
self.assertEqual(dialect.quotechar, '"')
874
self.assertEqual(dialect.skipinitialspace, True)
876
dialect = sniffer.sniff(self.sample2)
877
self.assertEqual(dialect.delimiter, ":")
878
self.assertEqual(dialect.quotechar, "'")
879
self.assertEqual(dialect.skipinitialspace, False)
881
def test_delimiters(self):
882
sniffer = csv.Sniffer()
883
dialect = sniffer.sniff(self.sample3)
884
# given that all three lines in sample3 are equal,
885
# I think that any character could have been 'guessed' as the
886
# delimiter, depending on dictionary order
887
self.assertIn(dialect.delimiter, self.sample3)
888
dialect = sniffer.sniff(self.sample3, delimiters="?,")
889
self.assertEqual(dialect.delimiter, "?")
890
dialect = sniffer.sniff(self.sample3, delimiters="/,")
891
self.assertEqual(dialect.delimiter, "/")
892
dialect = sniffer.sniff(self.sample4)
893
self.assertEqual(dialect.delimiter, ";")
894
dialect = sniffer.sniff(self.sample5)
895
self.assertEqual(dialect.delimiter, "\t")
896
dialect = sniffer.sniff(self.sample6)
897
self.assertEqual(dialect.delimiter, "|")
898
dialect = sniffer.sniff(self.sample7)
899
self.assertEqual(dialect.delimiter, "|")
900
self.assertEqual(dialect.quotechar, "'")
901
dialect = sniffer.sniff(self.sample8)
902
self.assertEqual(dialect.delimiter, '+')
903
dialect = sniffer.sniff(self.sample9)
904
self.assertEqual(dialect.delimiter, '+')
905
self.assertEqual(dialect.quotechar, "'")
907
def test_doublequote(self):
908
sniffer = csv.Sniffer()
909
dialect = sniffer.sniff(self.header1)
910
self.assertFalse(dialect.doublequote)
911
dialect = sniffer.sniff(self.header2)
912
self.assertFalse(dialect.doublequote)
913
dialect = sniffer.sniff(self.sample2)
914
self.assertTrue(dialect.doublequote)
915
dialect = sniffer.sniff(self.sample8)
916
self.assertFalse(dialect.doublequote)
917
dialect = sniffer.sniff(self.sample9)
918
self.assertTrue(dialect.doublequote)
925
@unittest.skipUnless(hasattr(sys, "gettotalrefcount"),
926
'requires sys.gettotalrefcount()')
927
class TestLeaks(unittest.TestCase):
928
def test_create_read(self):
930
lastrc = sys.gettotalrefcount()
933
self.assertEqual(gc.garbage, [])
934
rc = sys.gettotalrefcount()
935
csv.reader(["a,b,c\r\n"])
936
csv.reader(["a,b,c\r\n"])
937
csv.reader(["a,b,c\r\n"])
940
# if csv.reader() leaks, last delta should be 3 or more
941
self.assertEqual(delta < 3, True)
943
def test_create_write(self):
945
lastrc = sys.gettotalrefcount()
949
self.assertEqual(gc.garbage, [])
950
rc = sys.gettotalrefcount()
956
# if csv.writer() leaks, last delta should be 3 or more
957
self.assertEqual(delta < 3, True)
961
rows = ["a,b,c\r\n"]*5
962
lastrc = sys.gettotalrefcount()
965
self.assertEqual(gc.garbage, [])
966
rc = sys.gettotalrefcount()
967
rdr = csv.reader(rows)
972
# if reader leaks during read, delta should be 5 or more
973
self.assertEqual(delta < 5, True)
975
def test_write(self):
979
lastrc = sys.gettotalrefcount()
982
self.assertEqual(gc.garbage, [])
983
rc = sys.gettotalrefcount()
984
writer = csv.writer(s)
989
# if writer leaks during write, last delta should be 5 or more
990
self.assertEqual(delta < 5, True)
992
class TestUnicode(unittest.TestCase):
994
names = ["Martin von Löwis",
995
"Marc André Lemburg",
999
def test_unicode_read(self):
1001
with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj:
1002
fileobj.write(",".join(self.names) + "\r\n")
1004
reader = csv.reader(fileobj)
1005
self.assertEqual(list(reader), [self.names])
1008
def test_unicode_write(self):
1010
with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj:
1011
writer = csv.writer(fileobj)
1012
writer.writerow(self.names)
1013
expected = ",".join(self.names)+"\r\n"
1015
self.assertEqual(fileobj.read(), expected)
1020
mod = sys.modules[__name__]
1021
support.run_unittest(
1022
*[getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
1025
if __name__ == '__main__':