2
* $Id: PRTokeniser.java 3117 2008-01-31 05:53:22Z xlv $
4
* Copyright 2001, 2002 by Paulo Soares.
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* (the "License"); you may not use this file except in compliance with the License.
8
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
10
* Software distributed under the License is distributed on an "AS IS" basis,
11
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
* for the specific language governing rights and limitations under the License.
14
* The Original Code is 'iText, a free JAVA-PDF library'.
16
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
17
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
18
* All Rights Reserved.
19
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
20
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
22
* Contributor(s): all the names of the contributors are added in the source code
25
* Alternatively, the contents of this file may be used under the terms of the
26
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
27
* provisions of LGPL are applicable instead of those above. If you wish to
28
* allow use of your version of this file only under the terms of the LGPL
29
* License and not to allow others to use your version of this file under
30
* the MPL, indicate your decision by deleting the provisions above and
31
* replace them with the notice and other provisions required by the LGPL.
32
* If you do not delete the provisions above, a recipient may use your version
33
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
35
* This library is free software; you can redistribute it and/or modify it
36
* under the terms of the MPL as stated above or under the terms of the GNU
37
* Library General Public License as published by the Free Software Foundation;
38
* either version 2 of the License, or any later version.
40
* This library is distributed in the hope that it will be useful, but WITHOUT
41
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
42
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
45
* If you didn't download this code from the following link, you should check if
46
* you aren't using an obsolete version:
47
* http://www.lowagie.com/iText/
50
package com.lowagie.text.pdf;
52
import java.io.IOException;
55
* @author Paulo Soares (psoares@consiste.pt)
57
public class PRTokeniser {
59
public static final int TK_NUMBER = 1;
60
public static final int TK_STRING = 2;
61
public static final int TK_NAME = 3;
62
public static final int TK_COMMENT = 4;
63
public static final int TK_START_ARRAY = 5;
64
public static final int TK_END_ARRAY = 6;
65
public static final int TK_START_DIC = 7;
66
public static final int TK_END_DIC = 8;
67
public static final int TK_REF = 9;
68
public static final int TK_OTHER = 10;
69
public static final boolean delims[] = {
70
true, true, false, false, false, false, false, false, false, false,
71
true, true, false, true, true, false, false, false, false, false,
72
false, false, false, false, false, false, false, false, false, false,
73
false, false, false, true, false, false, false, false, true, false,
74
false, true, true, false, false, false, false, false, true, false,
75
false, false, false, false, false, false, false, false, false, false,
76
false, true, false, true, false, false, false, false, false, false,
77
false, false, false, false, false, false, false, false, false, false,
78
false, false, false, false, false, false, false, false, false, false,
79
false, false, true, false, true, false, false, false, false, false,
80
false, false, false, false, false, false, false, false, false, false,
81
false, false, false, false, false, false, false, false, false, false,
82
false, false, false, false, false, false, false, false, false, false,
83
false, false, false, false, false, false, false, false, false, false,
84
false, false, false, false, false, false, false, false, false, false,
85
false, false, false, false, false, false, false, false, false, false,
86
false, false, false, false, false, false, false, false, false, false,
87
false, false, false, false, false, false, false, false, false, false,
88
false, false, false, false, false, false, false, false, false, false,
89
false, false, false, false, false, false, false, false, false, false,
90
false, false, false, false, false, false, false, false, false, false,
91
false, false, false, false, false, false, false, false, false, false,
92
false, false, false, false, false, false, false, false, false, false,
93
false, false, false, false, false, false, false, false, false, false,
94
false, false, false, false, false, false, false, false, false, false,
95
false, false, false, false, false, false, false};
97
static final String EMPTY = "";
100
protected RandomAccessFileOrArray file;
102
protected String stringValue;
103
protected int reference;
104
protected int generation;
105
protected boolean hexString;
107
public PRTokeniser(String filename) throws IOException {
108
file = new RandomAccessFileOrArray(filename);
111
public PRTokeniser(byte pdfIn[]) {
112
file = new RandomAccessFileOrArray(pdfIn);
115
public PRTokeniser(RandomAccessFileOrArray file) {
119
public void seek(int pos) throws IOException {
123
public int getFilePointer() throws IOException {
124
return file.getFilePointer();
127
public void close() throws IOException {
131
public int length() throws IOException {
132
return file.length();
135
public int read() throws IOException {
139
public RandomAccessFileOrArray getSafeFile() {
140
return new RandomAccessFileOrArray(file);
143
public RandomAccessFileOrArray getFile() {
147
public String readString(int size) throws IOException {
148
StringBuffer buf = new StringBuffer();
150
while ((size--) > 0) {
154
buf.append((char)ch);
156
return buf.toString();
159
public static final boolean isWhitespace(int ch) {
160
return (ch == 0 || ch == 9 || ch == 10 || ch == 12 || ch == 13 || ch == 32);
163
public static final boolean isDelimiter(int ch) {
164
return (ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '/' || ch == '%');
167
public static final boolean isDelimiterWhitespace(int ch) {
168
return delims[ch + 1];
171
public int getTokenType() {
175
public String getStringValue() {
179
public int getReference() {
183
public int getGeneration() {
187
public void backOnePosition(int ch) {
189
file.pushBack((byte)ch);
192
public void throwError(String error) throws IOException {
193
throw new IOException(error + " at file pointer " + file.getFilePointer());
196
public char checkPdfHeader() throws IOException {
197
file.setStartOffset(0);
198
String str = readString(1024);
199
int idx = str.indexOf("%PDF-");
201
throw new IOException("PDF header signature not found.");
202
file.setStartOffset(idx);
203
return str.charAt(idx + 7);
206
public void checkFdfHeader() throws IOException {
207
file.setStartOffset(0);
208
String str = readString(1024);
209
int idx = str.indexOf("%FDF-1.2");
211
throw new IOException("FDF header signature not found.");
212
file.setStartOffset(idx);
215
public int getStartxref() throws IOException {
216
int size = Math.min(1024, file.length());
217
int pos = file.length() - size;
219
String str = readString(1024);
220
int idx = str.lastIndexOf("startxref");
222
throw new IOException("PDF startxref not found.");
226
public static int getHex(int v) {
227
if (v >= '0' && v <= '9')
229
if (v >= 'A' && v <= 'F')
231
if (v >= 'a' && v <= 'f')
236
public void nextValidToken() throws IOException {
241
while (nextToken()) {
242
if (type == TK_COMMENT)
247
if (type != TK_NUMBER)
249
ptr = file.getFilePointer();
256
if (type != TK_NUMBER) {
268
if (type != TK_OTHER || !stringValue.equals("R")) {
275
reference = Integer.parseInt(n1);
276
generation = Integer.parseInt(n2);
281
throwError("Unexpected end of file");
284
public boolean nextToken() throws IOException {
285
StringBuffer outBuf = null;
290
} while (ch != -1 && isWhitespace(ch));
295
type = TK_START_ARRAY;
302
outBuf = new StringBuffer();
309
ch = (getHex(file.read()) << 4) + getHex(file.read());
311
outBuf.append((char)ch);
319
throwError("'>' not expected");
324
int v1 = file.read();
329
outBuf = new StringBuffer();
334
while (isWhitespace(v1))
342
while (isWhitespace(v2))
346
outBuf.append((char)ch);
353
outBuf.append((char)ch);
356
if (v1 < 0 || v2 < 0)
357
throwError("Error reading string");
364
} while (ch != -1 && ch != '\r' && ch != '\n');
368
outBuf = new StringBuffer();
379
else if (ch == ')') {
382
else if (ch == '\\') {
383
boolean lineBreak = false;
416
if (ch < '0' || ch > '7') {
419
int octal = ch - '0';
421
if (ch < '0' || ch > '7') {
426
octal = (octal << 3) + ch - '0';
428
if (ch < '0' || ch > '7') {
433
octal = (octal << 3) + ch - '0';
443
else if (ch == '\r') {
454
outBuf.append((char)ch);
457
throwError("Error reading string");
462
outBuf = new StringBuffer();
463
if (ch == '-' || ch == '+' || ch == '.' || (ch >= '0' && ch <= '9')) {
466
outBuf.append((char)ch);
468
} while (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.'));
473
outBuf.append((char)ch);
475
} while (!delims[ch + 1]);
482
stringValue = outBuf.toString();
486
public int intValue() {
487
return Integer.parseInt(stringValue);
490
public boolean readLineSegment(byte input[]) throws IOException {
494
int len = input.length;
495
// ssteward, pdftk-1.10, 040922:
496
// skip initial whitespace; added this because PdfReader.rebuildXref()
497
// assumes that line provided by readLineSegment does not have init. whitespace;
499
while ( isWhitespace( (c = read()) ) );
501
while ( !eol && ptr < len ) {
509
int cur = getFilePointer();
510
if ((read()) != '\n') {
515
input[ptr++] = (byte)c;
519
// break loop? do it before we read() again
520
if( eol || len <= ptr ) {
530
switch (c = read()) {
537
int cur = getFilePointer();
538
if ((read()) != '\n') {
546
if ((c == -1) && (ptr == 0)) {
549
if (ptr + 2 <= len) {
550
input[ptr++] = (byte)' ';
551
input[ptr] = (byte)'X';
556
public static int[] checkObjectStart(byte line[]) {
558
PRTokeniser tk = new PRTokeniser(line);
561
if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER)
564
if (!tk.nextToken() || tk.getTokenType() != TK_NUMBER)
569
if (!tk.getStringValue().equals("obj"))
571
return new int[]{num, gen};
573
catch (Exception ioe) {
579
public boolean isHexString() {
580
return this.hexString;