~ubuntu-branches/ubuntu/precise/commons-csv/precise

« back to all changes in this revision

Viewing changes to src/java/org/apache/commons/csv/CSVParser.java

Committer: Bazaar Package Importer
Author(s): Jan-Pascal van Best
Date: 2008-09-10 20:58:00 UTC
mto: This revision was merged to the branch mainline in revision 3.
Revision ID: james.westby@ubuntu.com-20080910205800-ohoq6075nafu8n2b

Tags: upstream-0.1-SNAPSHOT+svn678580

Import upstream version 0.1-SNAPSHOT+svn678580

files removed:
build.xml

maven.xml

project.properties

project.xml

src/test/AllTests.java

src/test/org/apache/commons/csv/AllTests.java

src/test/org/apache/commons/csv/writer/AllTests.java

files modified:
NOTICE.txt

pom.xml

src/java/org/apache/commons/csv/CSVParser.java

src/java/org/apache/commons/csv/CSVStrategy.java

src/java/org/apache/commons/csv/CharBuffer.java

src/java/org/apache/commons/csv/ExtendedBufferedReader.java

src/java/org/apache/commons/csv/writer/CSVWriter.java

src/site/site.xml

src/site/xdoc/cvs-usage.xml

src/site/xdoc/examples.xml

src/site/xdoc/index.xml

src/site/xdoc/issue-tracking.xml

src/test/org/apache/commons/csv/CSVParserTest.java

src/test/org/apache/commons/csv/CSVPrinterTest.java

src/test/org/apache/commons/csv/CSVStrategyTest.java

src/test/org/apache/commons/csv/CSVUtilsTest.java

src/test/org/apache/commons/csv/ExtendedBufferedReaderTest.java

Show diffs side-by-side

added added

removed removed

src/java/org/apache/commons/csv/CSVParser.java

package org.apache.commons.csv;

import java.io.*;

import java.io.IOException;

import java.io.Reader;

import java.io.InputStreamReader;

import java.io.InputStream;

import java.util.ArrayList;

134

137

* @deprecated use {@link #CSVParser(Reader,CSVStrategy)}.

135

138

136

139

public CSVParser(Reader input, char delimiter) {

137

this(input, delimiter, '"', (char) 0);

140

this(input, delimiter, '"', CSVStrategy.COMMENTS_DISABLED);

138

141

}

139

142

140

143

/**

257

260

throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");

258

261

// unreachable: break;

259

262

}

260

if (reusableToken.type != TT_TOKEN) break;

263

if (reusableToken.type != TT_TOKEN) {

264

break;

265

}

261

266

}

262

267

if (!record.isEmpty()) {

263

268

ret = (String[]) record.toArray(new String[record.size()]);

347

352

eol = isEndOfLine(c);

348

353

}

349

354

// ok, start of token reached: comment, encapsulated, or token

350

if (!strategy.isCommentingDisabled() && c == strategy.getCommentStart()) {

355

if (c == strategy.getCommentStart()) {

351

356

// ignore everything till end of line and continue (incr linecount)

352

357

in.readLine();

353

358

tkn = nextToken(tkn.reset());

399

404

* @throws IOException on stream access error

400

405

401

406

private Token simpleTokenLexer(Token tkn, int c) throws IOException {

402

wsBuf.clear();

403

while (!tkn.isReady) {

407

for (;;) {

404

408

if (isEndOfLine(c)) {

405

409

// end of record

406

410

tkn.type = TT_EORECORD;

407

411

tkn.isReady = true;

412

break;

408

413

} else if (isEndOfFile(c)) {

409

414

// end of file

410

415

tkn.type = TT_EOF;

411

416

tkn.isReady = true;

417

break;

412

418

} else if (c == strategy.getDelimiter()) {

413

419

// end of token

414

420

tkn.type = TT_TOKEN;

415

421

tkn.isReady = true;

422

break;

416

423

} else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {

417

424

// interpret unicode escaped chars (like \u0070 -> p)

418

425

tkn.content.append((char) unicodeEscapeLexer(c));

419

} else if (isWhitespace(c)) {

420

// gather whitespaces

421

// (as long as they are not at the beginning of a token)

422

if (tkn.content.length() > 0) {

423

wsBuf.append((char) c);

424

}

426

} else if (c == strategy.getEscape()) {

427

tkn.content.append((char)readEscape(c));

425

428

} else {

426

// prepend whitespaces (if we have)

427

if (wsBuf.length() > 0) {

428

tkn.content.append(wsBuf);

429

wsBuf.clear();

430

}

431

429

tkn.content.append((char) c);

432

430

}

433

// get the next char

434

if (!tkn.isReady) {

435

c = in.read();

436

}

437

}

431

432

c = in.read();

433

}

434

435

if (strategy.getIgnoreTrailingWhitespaces()) {

436

tkn.content.trimTrailingWhitespace();

437

}

438

439

return tkn;

439

440

}

440

441

457

458

int startLineNumber = getLineNumber();

458

459

// ignore the given delimiter

459

460

// assert c == delimiter;

460

c = in.read();

461

while (!tkn.isReady) {

462

boolean skipRead = false;

463

if (c == strategy.getEncapsulator() || c == '\\') {

464

// check lookahead

461

for (;;) {

462

c = in.read();

463

464

if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead()=='u') {

465

tkn.content.append((char) unicodeEscapeLexer(c));

466

} else if (c == strategy.getEscape()) {

467

tkn.content.append((char)readEscape(c));

468

} else if (c == strategy.getEncapsulator()) {

465

469

if (in.lookAhead() == strategy.getEncapsulator()) {

466

470

// double or escaped encapsulator -> add single encapsulator to token

467

471

c = in.read();

468

472

tkn.content.append((char) c);

469

} else if (c == '\\' && in.lookAhead() == '\\') {

470

// doubled escape char, it does not escape itself, only encapsulator

471

// -> add both escape chars to stream

472

tkn.content.append((char) c);

473

c = in.read();

474

tkn.content.append((char) c);

475

} else if (

476

strategy.getUnicodeEscapeInterpretation()

477

&& c == '\\'

478

&& in.lookAhead() == 'u') {

479

// interpret unicode escaped chars (like \u0070 -> p)

480

tkn.content.append((char) unicodeEscapeLexer(c));

481

} else if (c == '\\') {

482

// use a single escape character -> add it to stream

483

tkn.content.append((char) c);

484

473

} else {

485

474

// token finish mark (encapsulator) reached: ignore whitespace till delimiter

486

while (!tkn.isReady) {

475

for (;;) {

487

476

c = in.read();

488

477

if (c == strategy.getDelimiter()) {

489

478

tkn.type = TT_TOKEN;

490

479

tkn.isReady = true;

480

return tkn;

491

481

} else if (isEndOfFile(c)) {

492

482

tkn.type = TT_EOF;

493

483

tkn.isReady = true;

484

return tkn;

494

485

} else if (isEndOfLine(c)) {

495

486

// ok eo token reached

496

487

tkn.type = TT_EORECORD;

497

488

tkn.isReady = true;

489

return tkn;

498

490

} else if (!isWhitespace(c)) {

499

// error invalid char between token and next delimiter

500

throw new IOException(

501

"(line " + getLineNumber()

502

+ ") invalid char between encapsulated token end delimiter"

503

);

504

}

491

// error invalid char between token and next delimiter

492

throw new IOException(

493

"(line " + getLineNumber()

494

+ ") invalid char between encapsulated token end delimiter"

495

);

496

}

505

497

}

506

skipRead = true;

507

498

}

508

499

} else if (isEndOfFile(c)) {

509

500

// error condition (end of file before end of token)

510

501

throw new IOException(

511

"(startline " + startLineNumber + ")"

512

+ "eof reached before encapsulated token finished"

513

);

502

"(startline " + startLineNumber + ")"

503

+ "eof reached before encapsulated token finished"

504

);

514

505

} else {

515

506

// consume character

516

507

tkn.content.append((char) c);

517

508

}

518

// get the next char

519

if (!tkn.isReady && !skipRead) {

520

c = in.read();

521

}

522

509

}

523

return tkn;

524

510

}

525

511

526

512

554

540

}

555

541

return ret;

556

542

}

543

544

private int readEscape(int c) throws IOException {

545

// assume c is the escape char (normally a backslash)

546

c = in.read();

547

int out;

548

switch (c) {

549

case 'r': out='\r'; break;

550

case 'n': out='\n'; break;

551

case 't': out='\t'; break;

552

case 'b': out='\b'; break;

553

case 'f': out='\f'; break;

554

default : out=c;

555

}

556

return out;

557

}

557

558

559

// ======================================================

559

560

// strategies

Older »