134
137
* @deprecated use {@link #CSVParser(Reader,CSVStrategy)}.
136
139
public CSVParser(Reader input, char delimiter) {
137
this(input, delimiter, '"', (char) 0);
140
this(input, delimiter, '"', CSVStrategy.COMMENTS_DISABLED);
347
352
eol = isEndOfLine(c);
349
354
// ok, start of token reached: comment, encapsulated, or token
350
if (!strategy.isCommentingDisabled() && c == strategy.getCommentStart()) {
355
if (c == strategy.getCommentStart()) {
351
356
// ignore everything till end of line and continue (incr linecount)
353
358
tkn = nextToken(tkn.reset());
399
404
* @throws IOException on stream access error
401
406
private Token simpleTokenLexer(Token tkn, int c) throws IOException {
403
while (!tkn.isReady) {
404
408
if (isEndOfLine(c)) {
406
410
tkn.type = TT_EORECORD;
407
411
tkn.isReady = true;
408
413
} else if (isEndOfFile(c)) {
410
415
tkn.type = TT_EOF;
411
416
tkn.isReady = true;
412
418
} else if (c == strategy.getDelimiter()) {
414
420
tkn.type = TT_TOKEN;
415
421
tkn.isReady = true;
416
423
} else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
417
424
// interpret unicode escaped chars (like \u0070 -> p)
418
425
tkn.content.append((char) unicodeEscapeLexer(c));
419
} else if (isWhitespace(c)) {
420
// gather whitespaces
421
// (as long as they are not at the beginning of a token)
422
if (tkn.content.length() > 0) {
423
wsBuf.append((char) c);
426
} else if (c == strategy.getEscape()) {
427
tkn.content.append((char)readEscape(c));
426
// prepend whitespaces (if we have)
427
if (wsBuf.length() > 0) {
428
tkn.content.append(wsBuf);
431
429
tkn.content.append((char) c);
435
if (strategy.getIgnoreTrailingWhitespaces()) {
436
tkn.content.trimTrailingWhitespace();
457
458
int startLineNumber = getLineNumber();
458
459
// ignore the given delimiter
459
460
// assert c == delimiter;
461
while (!tkn.isReady) {
462
boolean skipRead = false;
463
if (c == strategy.getEncapsulator() || c == '\\') {
464
if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead()=='u') {
465
tkn.content.append((char) unicodeEscapeLexer(c));
466
} else if (c == strategy.getEscape()) {
467
tkn.content.append((char)readEscape(c));
468
} else if (c == strategy.getEncapsulator()) {
465
469
if (in.lookAhead() == strategy.getEncapsulator()) {
466
470
// double or escaped encapsulator -> add single encapsulator to token
468
472
tkn.content.append((char) c);
469
} else if (c == '\\' && in.lookAhead() == '\\') {
470
// doubled escape char, it does not escape itself, only encapsulator
471
// -> add both escape chars to stream
472
tkn.content.append((char) c);
474
tkn.content.append((char) c);
476
strategy.getUnicodeEscapeInterpretation()
478
&& in.lookAhead() == 'u') {
479
// interpret unicode escaped chars (like \u0070 -> p)
480
tkn.content.append((char) unicodeEscapeLexer(c));
481
} else if (c == '\\') {
482
// use a single escape character -> add it to stream
483
tkn.content.append((char) c);
485
474
// token finish mark (encapsulator) reached: ignore whitespace till delimiter
486
while (!tkn.isReady) {
488
477
if (c == strategy.getDelimiter()) {
489
478
tkn.type = TT_TOKEN;
490
479
tkn.isReady = true;
491
481
} else if (isEndOfFile(c)) {
492
482
tkn.type = TT_EOF;
493
483
tkn.isReady = true;
494
485
} else if (isEndOfLine(c)) {
495
486
// ok eo token reached
496
487
tkn.type = TT_EORECORD;
497
488
tkn.isReady = true;
498
490
} else if (!isWhitespace(c)) {
499
// error invalid char between token and next delimiter
500
throw new IOException(
501
"(line " + getLineNumber()
502
+ ") invalid char between encapsulated token end delimiter"
491
// error invalid char between token and next delimiter
492
throw new IOException(
493
"(line " + getLineNumber()
494
+ ") invalid char between encapsulated token end delimiter"
508
499
} else if (isEndOfFile(c)) {
509
500
// error condition (end of file before end of token)
510
501
throw new IOException(
511
"(startline " + startLineNumber + ")"
512
+ "eof reached before encapsulated token finished"
502
"(startline " + startLineNumber + ")"
503
+ "eof reached before encapsulated token finished"
515
506
// consume character
516
507
tkn.content.append((char) c);
519
if (!tkn.isReady && !skipRead) {
544
private int readEscape(int c) throws IOException {
545
// assume c is the escape char (normally a backslash)
549
case 'r': out='\r'; break;
550
case 'n': out='\n'; break;
551
case 't': out='\t'; break;
552
case 'b': out='\b'; break;
553
case 'f': out='\f'; break;
558
559
// ======================================================