56
54
public String testNextToken() throws IOException {
57
55
Token t = super.nextToken();
58
String tmp = Integer.toString(t.type) + ";" + t.content + ";";
59
System.out.println("token=" + tmp);
56
return Integer.toString(t.type) + ";" + t.content + ";";
65
* Constructor for JUnit.
66
* @param name Name to be used in JUnit Test Environment
68
public CSVParserTest(String name) {
73
* Returns a Test suite for JUnit.
74
* @return Test suite for JUnit
76
public static Test suite() {
77
return new TestSuite(CSVParserTest.class);
81
60
// ======================================================
83
62
// ======================================================
87
66
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
88
67
TestCSVParser parser = new TestCSVParser(new StringReader(code));
89
68
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
90
System.out.println("---------\n" + code + "\n-------------");
91
69
assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
92
70
assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
93
71
assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
114
92
parser.getStrategy().setIgnoreEmptyLines(false);
115
93
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
116
94
parser.getStrategy().setCommentStart('#');
117
System.out.println("---------\n" + code + "\n-------------");
118
95
assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
119
96
assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
120
97
assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
140
117
TestCSVParser parser = new TestCSVParser(new StringReader(code));
141
118
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
142
119
parser.getStrategy().setCommentStart('#');
143
System.out.println("---------\n" + code + "\n-------------");
144
120
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
145
121
// an unquoted single backslash is not an escape char
146
122
assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
163
139
"a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
164
140
TestCSVParser parser = new TestCSVParser(new StringReader(code));
165
141
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
166
System.out.println("---------\n" + code + "\n-------------");
167
142
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
168
143
assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
169
144
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
182
157
// encapsulator tokenizer (multi line, delimiter in string)
183
158
public void testNextToken5() throws IOException {
185
"a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\",\"\\\"\""
160
"a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";
188
161
TestCSVParser parser = new TestCSVParser(new StringReader(code));
189
162
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
190
System.out.println("---------\n" + code + "\n-------------");
191
163
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
192
164
assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
193
165
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
194
166
assertEquals(CSVParser.TT_EORECORD + ";foo\n baar ,,,;",
195
167
parser.testNextToken());
196
assertEquals(CSVParser.TT_TOKEN + ";\n\t \n;", parser.testNextToken());
197
assertEquals(CSVParser.TT_TOKEN + ";\";", parser.testNextToken());
198
// escape char in quoted input only escapes delimiter
199
assertEquals(CSVParser.TT_TOKEN + ";\\,;", parser.testNextToken());
200
assertEquals(CSVParser.TT_EOF + ";\";", parser.testNextToken());
168
assertEquals(CSVParser.TT_EOF + ";\n\t \n;", parser.testNextToken());
203
172
// change delimiters, comment, encapsulater
210
String code = "a;'b and \\' more\n'\n!comment;;;;\n;;";
179
String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
211
180
TestCSVParser parser = new TestCSVParser(new StringReader(code));
212
181
parser.setStrategy( new CSVStrategy(';', '\'', '!') );
213
System.out.println("---------\n" + code + "\n-------------");
214
182
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
216
184
CSVParser.TT_EORECORD + ";b and ' more\n;",
227
195
+ " a , b , 1 2 \n"
228
196
+ "\"foo baar\", b,\n"
229
+ " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
197
// + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
198
+ " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
231
200
{"a", "b", "c", "d"},
232
201
{"a", "b", "1 2"},
233
202
{"foo baar", "b", ""},
282
248
{"\"hello\"", " \"world\"", "abc\ndef", ""}
284
250
CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
285
System.out.println("---------\n" + code + "\n-------------");
286
251
String[][] tmp = parser.getAllValues();
287
252
assertEquals(res.length, tmp.length);
288
253
assertTrue(tmp.length > 0);
303
268
CSVParser parser = new CSVParser(new StringReader(code));
304
269
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
305
System.out.println("---------\n" + code + "\n-------------");
306
270
String[][] tmp = parser.getAllValues();
307
271
assertEquals(res.length, tmp.length);
308
272
assertTrue(tmp.length > 0);
309
273
for (int i = 0; i < res.length; i++) {
310
for (int j = 0; j < tmp[i].length; j++) {
311
System.out.println("'" + tmp[i][j] + "'");
313
274
assertTrue(Arrays.equals(res[i], tmp[i]));
335
296
code = codes[codeIndex];
336
297
CSVParser parser = new CSVParser(new StringReader(code));
337
298
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
338
System.out.println("---------\n" + code + "\n-------------");
339
299
String[][] tmp = parser.getAllValues();
340
300
assertEquals(res.length, tmp.length);
341
301
assertTrue(tmp.length > 0);
342
302
for (int i = 0; i < res.length; i++) {
343
for (int j = 0; j < tmp[i].length; j++) {
344
System.out.println("'" + tmp[i][j] + "'");
346
303
assertTrue(Arrays.equals(res[i], tmp[i]));
368
325
code = codes[codeIndex];
369
326
CSVParser parser = new CSVParser(new StringReader(code));
370
327
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
371
System.out.println("---------\n" + code + "\n-------------");
372
328
String[][] tmp = parser.getAllValues();
373
329
assertEquals(res.length, tmp.length);
374
330
assertTrue(tmp.length > 0);
375
331
for (int i = 0; i < res.length; i++) {
376
for (int j = 0; j < tmp[i].length; j++) {
377
System.out.println("'" + tmp[i][j] + "'");
379
332
assertTrue(Arrays.equals(res[i], tmp[i]));
398
351
code = codes[codeIndex];
399
352
CSVParser parser = new CSVParser(new StringReader(code));
400
353
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
401
System.out.println("---------\n" + code + "\n-------------");
402
354
String[][] tmp = parser.getAllValues();
403
355
assertEquals(res.length, tmp.length);
404
356
assertTrue(tmp.length > 0);
405
357
for (int i = 0; i < res.length; i++) {
406
for (int j = 0; j < tmp[i].length; j++) {
407
System.out.println("'" + tmp[i][j] + "'");
409
358
assertTrue(Arrays.equals(res[i], tmp[i]));
426
375
code = codes[codeIndex];
427
376
CSVParser parser = new CSVParser(new StringReader(code));
428
377
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
429
System.out.println("---------\n" + code + "\n-------------");
430
378
String[][] tmp = parser.getAllValues();
431
379
assertEquals(res.length, tmp.length);
432
380
assertTrue(tmp.length > 0);
433
381
for (int i = 0; i < res.length; i++) {
434
for (int j = 0; j < tmp[i].length; j++) {
435
System.out.println("'" + tmp[i][j] + "'");
437
382
assertTrue(Arrays.equals(res[i], tmp[i]));
442
public void testBackslashEscaping() throws IOException {
387
public void OLDtestBackslashEscaping() throws IOException {
444
389
"one,two,three\n"
445
390
+ "on\\\"e,two\n"
462
407
{ "a\\\\,b" } // backslash in quotes only escapes a delimiter (",")
464
409
CSVParser parser = new CSVParser(new StringReader(code));
465
System.out.println("---------\n" + code + "\n-------------");
466
410
String[][] tmp = parser.getAllValues();
467
411
assertEquals(res.length, tmp.length);
468
412
assertTrue(tmp.length > 0);
469
413
for (int i = 0; i < res.length; i++) {
470
for (int j = 0; j < tmp[i].length; j++) {
471
System.out.println("'" + tmp[i][j] + "'");
473
414
assertTrue(Arrays.equals(res[i], tmp[i]));
418
public void testBackslashEscaping() throws IOException {
420
// To avoid confusion over the need for escaping chars in java code,
421
// We will test with a forward slash as the escape char, and a single
422
// quote as the encapsulator.
425
"one,two,three\n" // 0
426
+ "'',''\n" // 1) empty encapsulators
427
+ "/',/'\n" // 2) single encapsulators
428
+ "'/'','/''\n" // 3) single encapsulators encapsulated via escape
429
+ "'''',''''\n" // 4) single encapsulators encapsulated via doubling
430
+ "/,,/,\n" // 5) separator escaped
431
+ "//,//\n" // 6) escape escaped
432
+ "'//','//'\n" // 7) escape escaped in encapsulation
433
+ " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces
434
+ "9, /\n \n" // escaped newline
437
{ "one", "two", "three" }, // 0
445
{ " 8 ", " \"quoted \"\" \" / string\" " },
450
CSVStrategy strategy = new CSVStrategy(',','\'',CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true);
452
CSVParser parser = new CSVParser(new StringReader(code), strategy);
453
String[][] tmp = parser.getAllValues();
454
assertTrue(tmp.length > 0);
455
for (int i = 0; i < res.length; i++) {
456
assertTrue(Arrays.equals(res[i], tmp[i]));
477
462
public void testUnicodeEscape() throws IOException {
478
463
String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
479
464
CSVParser parser = new CSVParser(new StringReader(code));
480
System.out.println("---------\n" + code + "\n-------------");
481
465
parser.getStrategy().setUnicodeEscapeInterpretation(true);
482
466
String[] data = parser.getLine();
483
467
assertEquals(2, data.length);
488
472
public void testCarriageReturnLineFeedEndings() throws IOException {
489
473
String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
490
474
CSVParser parser = new CSVParser(new StringReader(code));
491
System.out.println("---------\n" + code + "\n-------------");
492
475
String[][] data = parser.getAllValues();
493
476
assertEquals(4, data.length);
498
481
//String code = "world\r\n\n";
499
482
//String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
500
483
CSVParser parser = new CSVParser(new StringReader(code));
501
System.out.println("---------\n" + code + "\n-------------");
502
484
String[][] data = parser.getAllValues();
503
// for (int i = 0; i < data.length; i++) {
505
// System.out.print('\n');
507
// for (int j = 0; j < data[i].length; j++) {
508
// System.out.print("(" + j + ")'" + data[i][j] + "'");
511
// System.out.println("----------");
512
485
assertEquals(3, data.length);
515
488
public void testLineTokenConsistency() throws IOException {
516
489
String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
517
490
CSVParser parser = new CSVParser(new StringReader(code));
518
System.out.println("---------\n" + code + "\n-------------");
519
491
String[][] data = parser.getAllValues();
520
492
parser = new CSVParser(new StringReader(code));
521
493
CSVParser parser1 = new CSVParser(new StringReader(code));
532
504
String code = "one\ttwo\t\tfour \t five\t six";
533
505
TestCSVParser parser = new TestCSVParser(new StringReader(code));
534
506
parser.setStrategy(CSVStrategy.TDF_STRATEGY);
535
System.out.println("---------\n" + code + "\n-------------");
536
507
assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
537
508
assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
538
509
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());