2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
17
package org.apache.commons.csv;
19
import java.io.IOException;
20
import java.io.Reader;
21
import java.io.StringReader;
22
import java.util.Arrays;
24
import junit.framework.Test;
25
import junit.framework.TestCase;
26
import junit.framework.TestSuite;
31
* The test are organized in three different sections:
32
* The 'setter/getter' section, the lexer section and finally the parser
33
* section. In case a test fails, you should follow a top-down approach for
34
* fixing a potential bug (its likely that the parser itself fails if the lexer
37
public class CSVParserTest extends TestCase {
42
class TestCSVParser extends CSVParser {
44
* Test parser to investigate the type of the internal Token.
47
TestCSVParser(Reader in) {
51
* Calls super.nextToken() and prints out a String representation of token
53
* @return String representation of token type and content
54
* @throws IOException like {@link CSVParser#nextToken()}
56
public String testNextToken() throws IOException {
57
Token t = super.nextToken();
58
String tmp = Integer.toString(t.type) + ";" + t.content + ";";
59
System.out.println("token=" + tmp);
65
* Constructor for JUnit.
66
* @param name Name to be used in JUnit Test Environment
68
public CSVParserTest(String name) {
73
* Returns a Test suite for JUnit.
74
* @return Test suite for JUnit
76
public static Test suite() {
77
return new TestSuite(CSVParserTest.class);
81
// ======================================================
83
// ======================================================
85
// Single line (without comment)
86
public void testNextToken1() throws IOException {
87
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
88
TestCSVParser parser = new TestCSVParser(new StringReader(code));
89
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
90
System.out.println("---------\n" + code + "\n-------------");
91
assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
92
assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
93
assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
94
assertEquals(CSVParser.TT_TOKEN + ";lmnop;", parser.testNextToken());
95
assertEquals(CSVParser.TT_TOKEN + ";qrst;", parser.testNextToken());
96
assertEquals(CSVParser.TT_TOKEN + ";uv;", parser.testNextToken());
97
assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken());
98
assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken());
99
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
100
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
103
// multiline including comments (and empty lines)
104
public void testNextToken2() throws IOException {
108
* # this is a comment
112
String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
113
TestCSVParser parser = new TestCSVParser(new StringReader(code));
114
parser.getStrategy().setIgnoreEmptyLines(false);
115
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
116
parser.getStrategy().setCommentStart('#');
117
System.out.println("---------\n" + code + "\n-------------");
118
assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
119
assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
120
assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
121
assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
122
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
123
assertEquals(CSVParser.TT_TOKEN + ";b x;", parser.testNextToken());
124
assertEquals(CSVParser.TT_EORECORD + ";c;", parser.testNextToken());
125
assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
126
assertEquals(CSVParser.TT_TOKEN + ";d;", parser.testNextToken());
127
assertEquals(CSVParser.TT_TOKEN + ";e;", parser.testNextToken());
128
assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
129
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
130
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
134
// simple token with escaping
135
public void testNextToken3() throws IOException {
139
String code = "a,\\,,b\n\\,,";
140
TestCSVParser parser = new TestCSVParser(new StringReader(code));
141
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
142
parser.getStrategy().setCommentStart('#');
143
System.out.println("---------\n" + code + "\n-------------");
144
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
145
// an unquoted single backslash is not an escape char
146
assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
147
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
148
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
149
// an unquoted single backslash is not an escape char
150
assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
151
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
152
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
155
// encapsulator tokenizer (sinle line)
156
public void testNextToken4() throws IOException {
159
* a,"foo " ,b // whitespace after closing encapsulator
163
"a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
164
TestCSVParser parser = new TestCSVParser(new StringReader(code));
165
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
166
System.out.println("---------\n" + code + "\n-------------");
167
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
168
assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
169
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
170
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
171
assertEquals(CSVParser.TT_TOKEN + "; foo;", parser.testNextToken());
172
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
173
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
174
assertEquals(CSVParser.TT_TOKEN + ";foo ;", parser.testNextToken());
175
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
176
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
177
assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken());
178
// assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
179
assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken());
182
// encapsulator tokenizer (multi line, delimiter in string)
183
public void testNextToken5() throws IOException {
185
"a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\",\"\\\"\""
188
TestCSVParser parser = new TestCSVParser(new StringReader(code));
189
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
190
System.out.println("---------\n" + code + "\n-------------");
191
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
192
assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
193
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
194
assertEquals(CSVParser.TT_EORECORD + ";foo\n baar ,,,;",
195
parser.testNextToken());
196
assertEquals(CSVParser.TT_TOKEN + ";\n\t \n;", parser.testNextToken());
197
assertEquals(CSVParser.TT_TOKEN + ";\";", parser.testNextToken());
198
// escape char in quoted input only escapes delimiter
199
assertEquals(CSVParser.TT_TOKEN + ";\\,;", parser.testNextToken());
200
assertEquals(CSVParser.TT_EOF + ";\";", parser.testNextToken());
203
// change delimiters, comment, encapsulater
204
public void testNextToken6() throws IOException {
205
/* file: a;'b and \' more
210
String code = "a;'b and \\' more\n'\n!comment;;;;\n;;";
211
TestCSVParser parser = new TestCSVParser(new StringReader(code));
212
parser.setStrategy( new CSVStrategy(';', '\'', '!') );
213
System.out.println("---------\n" + code + "\n-------------");
214
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
216
CSVParser.TT_EORECORD + ";b and ' more\n;",
217
parser.testNextToken());
221
// ======================================================
223
// ======================================================
228
+ "\"foo baar\", b,\n"
229
+ " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
231
{"a", "b", "c", "d"},
233
{"foo baar", "b", ""},
234
{"foo\n,,\n\",,\n\"", "d", "e"}
236
public void testGetLine() throws IOException {
237
CSVParser parser = new CSVParser(new StringReader(code));
238
System.out.println("---------\n" + code + "\n-------------");
240
for (int i = 0; i < res.length; i++) {
241
tmp = parser.getLine();
242
assertTrue(Arrays.equals(res[i], tmp));
244
tmp = parser.getLine();
245
assertTrue(tmp == null);
248
public void testNextValue() throws IOException {
249
CSVParser parser = new CSVParser(new StringReader(code));
250
System.out.println("---------\n" + code + "\n-------------");
252
for (int i = 0; i < res.length; i++) {
253
for (int j = 0; j < res[i].length; j++) {
254
tmp = parser.nextValue();
255
assertEquals(res[i][j], tmp);
258
tmp = parser.nextValue();
259
assertTrue(tmp == null);
262
public void testGetAllValues() throws IOException {
263
CSVParser parser = new CSVParser(new StringReader(code));
264
System.out.println("---------\n" + code + "\n-------------");
265
String[][] tmp = parser.getAllValues();
266
assertEquals(res.length, tmp.length);
267
assertTrue(tmp.length > 0);
268
for (int i = 0; i < res.length; i++) {
269
assertTrue(Arrays.equals(res[i], tmp[i]));
273
public void testExcelStrategy1() throws IOException {
275
"value1,value2,value3,value4\r\na,b,c,d\r\n x,,,"
276
+ "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n";
278
{"value1", "value2", "value3", "value4"},
279
{"a", "b", "c", "d"},
282
{"\"hello\"", " \"world\"", "abc\ndef", ""}
284
CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
285
System.out.println("---------\n" + code + "\n-------------");
286
String[][] tmp = parser.getAllValues();
287
assertEquals(res.length, tmp.length);
288
assertTrue(tmp.length > 0);
289
for (int i = 0; i < res.length; i++) {
290
assertTrue(Arrays.equals(res[i], tmp[i]));
294
public void testExcelStrategy2() throws Exception {
295
String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
303
CSVParser parser = new CSVParser(new StringReader(code));
304
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
305
System.out.println("---------\n" + code + "\n-------------");
306
String[][] tmp = parser.getAllValues();
307
assertEquals(res.length, tmp.length);
308
assertTrue(tmp.length > 0);
309
for (int i = 0; i < res.length; i++) {
310
for (int j = 0; j < tmp[i].length; j++) {
311
System.out.println("'" + tmp[i][j] + "'");
313
assertTrue(Arrays.equals(res[i], tmp[i]));
317
public void testEndOfFileBehaviourExcel() throws Exception {
319
"hello,\r\n\r\nworld,\r\n",
320
"hello,\r\n\r\nworld,",
321
"hello,\r\n\r\nworld,\"\"\r\n",
322
"hello,\r\n\r\nworld,\"\"",
323
"hello,\r\n\r\nworld,\n",
324
"hello,\r\n\r\nworld,",
325
"hello,\r\n\r\nworld,\"\"\n",
326
"hello,\r\n\r\nworld,\"\""
330
{""}, // ExcelStrategy does not ignore empty lines
334
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
335
code = codes[codeIndex];
336
CSVParser parser = new CSVParser(new StringReader(code));
337
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
338
System.out.println("---------\n" + code + "\n-------------");
339
String[][] tmp = parser.getAllValues();
340
assertEquals(res.length, tmp.length);
341
assertTrue(tmp.length > 0);
342
for (int i = 0; i < res.length; i++) {
343
for (int j = 0; j < tmp[i].length; j++) {
344
System.out.println("'" + tmp[i][j] + "'");
346
assertTrue(Arrays.equals(res[i], tmp[i]));
351
public void testEndOfFileBehaviorCSV() throws Exception {
353
"hello,\r\n\r\nworld,\r\n",
354
"hello,\r\n\r\nworld,",
355
"hello,\r\n\r\nworld,\"\"\r\n",
356
"hello,\r\n\r\nworld,\"\"",
357
"hello,\r\n\r\nworld,\n",
358
"hello,\r\n\r\nworld,",
359
"hello,\r\n\r\nworld,\"\"\n",
360
"hello,\r\n\r\nworld,\"\""
363
{"hello", ""}, // CSV Strategy ignores empty lines
367
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
368
code = codes[codeIndex];
369
CSVParser parser = new CSVParser(new StringReader(code));
370
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
371
System.out.println("---------\n" + code + "\n-------------");
372
String[][] tmp = parser.getAllValues();
373
assertEquals(res.length, tmp.length);
374
assertTrue(tmp.length > 0);
375
for (int i = 0; i < res.length; i++) {
376
for (int j = 0; j < tmp[i].length; j++) {
377
System.out.println("'" + tmp[i][j] + "'");
379
assertTrue(Arrays.equals(res[i], tmp[i]));
384
public void testEmptyLineBehaviourExcel() throws Exception {
386
"hello,\r\n\r\n\r\n",
388
"hello,\"\"\r\n\r\n\r\n",
393
{""}, // ExcelStrategy does not ignore empty lines
397
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
398
code = codes[codeIndex];
399
CSVParser parser = new CSVParser(new StringReader(code));
400
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
401
System.out.println("---------\n" + code + "\n-------------");
402
String[][] tmp = parser.getAllValues();
403
assertEquals(res.length, tmp.length);
404
assertTrue(tmp.length > 0);
405
for (int i = 0; i < res.length; i++) {
406
for (int j = 0; j < tmp[i].length; j++) {
407
System.out.println("'" + tmp[i][j] + "'");
409
assertTrue(Arrays.equals(res[i], tmp[i]));
414
public void testEmptyLineBehaviourCSV() throws Exception {
416
"hello,\r\n\r\n\r\n",
418
"hello,\"\"\r\n\r\n\r\n",
422
{"hello", ""} // CSV Strategy ignores empty lines
425
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
426
code = codes[codeIndex];
427
CSVParser parser = new CSVParser(new StringReader(code));
428
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
429
System.out.println("---------\n" + code + "\n-------------");
430
String[][] tmp = parser.getAllValues();
431
assertEquals(res.length, tmp.length);
432
assertTrue(tmp.length > 0);
433
for (int i = 0; i < res.length; i++) {
434
for (int j = 0; j < tmp[i].length; j++) {
435
System.out.println("'" + tmp[i][j] + "'");
437
assertTrue(Arrays.equals(res[i], tmp[i]));
442
public void testBackslashEscaping() throws IOException {
447
+ "one,\"tw\\\"o\"\n"
449
+ "one,two,\"th,ree\"\n"
454
{ "one", "two", "three" },
455
{ "on\\\"e", "two" },
458
{ "one", "t\\,wo" }, // backslash in quotes only escapes a delimiter (",")
459
{ "one", "two", "th,ree" },
460
{ "a\\\\" }, // backslash in quotes only escapes a delimiter (",")
461
{ "a\\", "b" }, // a backslash must be returnd
462
{ "a\\\\,b" } // backslash in quotes only escapes a delimiter (",")
464
CSVParser parser = new CSVParser(new StringReader(code));
465
System.out.println("---------\n" + code + "\n-------------");
466
String[][] tmp = parser.getAllValues();
467
assertEquals(res.length, tmp.length);
468
assertTrue(tmp.length > 0);
469
for (int i = 0; i < res.length; i++) {
470
for (int j = 0; j < tmp[i].length; j++) {
471
System.out.println("'" + tmp[i][j] + "'");
473
assertTrue(Arrays.equals(res[i], tmp[i]));
477
public void testUnicodeEscape() throws IOException {
478
String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
479
CSVParser parser = new CSVParser(new StringReader(code));
480
System.out.println("---------\n" + code + "\n-------------");
481
parser.getStrategy().setUnicodeEscapeInterpretation(true);
482
String[] data = parser.getLine();
483
assertEquals(2, data.length);
484
assertEquals("abc", data[0]);
485
assertEquals("public", data[1]);
488
public void testCarriageReturnLineFeedEndings() throws IOException {
489
String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
490
CSVParser parser = new CSVParser(new StringReader(code));
491
System.out.println("---------\n" + code + "\n-------------");
492
String[][] data = parser.getAllValues();
493
assertEquals(4, data.length);
496
public void testIgnoreEmptyLines() throws IOException {
497
String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
498
//String code = "world\r\n\n";
499
//String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
500
CSVParser parser = new CSVParser(new StringReader(code));
501
System.out.println("---------\n" + code + "\n-------------");
502
String[][] data = parser.getAllValues();
503
// for (int i = 0; i < data.length; i++) {
505
// System.out.print('\n');
507
// for (int j = 0; j < data[i].length; j++) {
508
// System.out.print("(" + j + ")'" + data[i][j] + "'");
511
// System.out.println("----------");
512
assertEquals(3, data.length);
515
public void testLineTokenConsistency() throws IOException {
516
String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
517
CSVParser parser = new CSVParser(new StringReader(code));
518
System.out.println("---------\n" + code + "\n-------------");
519
String[][] data = parser.getAllValues();
520
parser = new CSVParser(new StringReader(code));
521
CSVParser parser1 = new CSVParser(new StringReader(code));
522
for (int i = 0; i < data.length; i++) {
523
assertTrue(Arrays.equals(parser1.getLine(), data[i]));
524
for (int j = 0; j < data[i].length; j++) {
525
assertEquals(parser.nextValue(), data[i][j]);
531
public void testDelimiterIsWhitespace() throws IOException {
532
String code = "one\ttwo\t\tfour \t five\t six";
533
TestCSVParser parser = new TestCSVParser(new StringReader(code));
534
parser.setStrategy(CSVStrategy.TDF_STRATEGY);
535
System.out.println("---------\n" + code + "\n-------------");
536
assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
537
assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
538
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
539
assertEquals(CSVParser.TT_TOKEN + ";four;", parser.testNextToken());
540
assertEquals(CSVParser.TT_TOKEN + ";five;", parser.testNextToken());
541
assertEquals(CSVParser.TT_EOF + ";six;", parser.testNextToken());