2
* $HeadURL: https://svn.apache.org/repos/asf/httpcomponents/httpcore/tags/4.0.1/httpcore/src/main/java/org/apache/http/message/BasicTokenIterator.java $
4
* $Date: 2009-02-14 18:06:25 +0100 (Sat, 14 Feb 2009) $
6
* ====================================================================
7
* Licensed to the Apache Software Foundation (ASF) under one
8
* or more contributor license agreements. See the NOTICE file
9
* distributed with this work for additional information
10
* regarding copyright ownership. The ASF licenses this file
11
* to you under the Apache License, Version 2.0 (the
12
* "License"); you may not use this file except in compliance
13
* with the License. You may obtain a copy of the License at
15
* http://www.apache.org/licenses/LICENSE-2.0
17
* Unless required by applicable law or agreed to in writing,
18
* software distributed under the License is distributed on an
19
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20
* KIND, either express or implied. See the License for the
21
* specific language governing permissions and limitations
23
* ====================================================================
25
* This software consists of voluntary contributions made by many
26
* individuals on behalf of the Apache Software Foundation. For more
27
* information on the Apache Software Foundation, please see
28
* <http://www.apache.org/>.
32
package org.apache.http.message;
34
import java.util.NoSuchElementException;
36
import org.apache.http.HeaderIterator;
37
import org.apache.http.ParseException;
38
import org.apache.http.TokenIterator;
41
* Basic implementation of a {@link TokenIterator}.
42
* This implementation parses <tt>#token<tt> sequences as
43
* defined by RFC 2616, section 2.
44
* It extends that definition somewhat beyond US-ASCII.
46
* @version $Revision: 744527 $
50
public class BasicTokenIterator implements TokenIterator {
52
/** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
53
// the order of the characters here is adjusted to put the
54
// most likely candidates at the beginning of the collection
55
public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
58
/** The iterator from which to obtain the next header. */
59
protected final HeaderIterator headerIt;
62
* The value of the current header.
63
* This is the header value that includes {@link #currentToken}.
64
* Undefined if the iteration is over.
66
protected String currentHeader;
69
* The token to be returned by the next call to {@link #currentToken}.
70
* <code>null</code> if the iteration is over.
72
protected String currentToken;
75
* The position after {@link #currentToken} in {@link #currentHeader}.
76
* Undefined if the iteration is over.
78
protected int searchPos;
82
* Creates a new instance of {@link BasicTokenIterator}.
84
* @param headerIterator the iterator for the headers to tokenize
86
public BasicTokenIterator(final HeaderIterator headerIterator) {
87
if (headerIterator == null) {
88
throw new IllegalArgumentException
89
("Header iterator must not be null.");
92
this.headerIt = headerIterator;
93
this.searchPos = findNext(-1);
97
// non-javadoc, see interface TokenIterator
98
public boolean hasNext() {
99
return (this.currentToken != null);
104
* Obtains the next token from this iteration.
106
* @return the next token in this iteration
108
* @throws NoSuchElementException if the iteration is already over
109
* @throws ParseException if an invalid header value is encountered
111
public String nextToken()
112
throws NoSuchElementException, ParseException {
114
if (this.currentToken == null) {
115
throw new NoSuchElementException("Iteration already finished.");
118
final String result = this.currentToken;
119
// updates currentToken, may trigger ParseException:
120
this.searchPos = findNext(this.searchPos);
127
* Returns the next token.
128
* Same as {@link #nextToken}, but with generic return type.
130
* @return the next token in this iteration
132
* @throws NoSuchElementException if there are no more tokens
133
* @throws ParseException if an invalid header value is encountered
135
public final Object next()
136
throws NoSuchElementException, ParseException {
142
* Removing tokens is not supported.
144
* @throws UnsupportedOperationException always
146
public final void remove()
147
throws UnsupportedOperationException {
149
throw new UnsupportedOperationException
150
("Removing tokens is not supported.");
155
* Determines the next token.
156
* If found, the token is stored in {@link #currentToken}.
157
* The return value indicates the position after the token
158
* in {@link #currentHeader}. If necessary, the next header
159
* will be obtained from {@link #headerIt}.
160
* If not found, {@link #currentToken} is set to <code>null</code>.
162
* @param from the position in the current header at which to
163
* start the search, -1 to search in the first header
165
* @return the position after the found token in the current header, or
166
* negative if there was no next token
168
* @throws ParseException if an invalid header value is encountered
170
protected int findNext(int from)
171
throws ParseException {
174
// called from the constructor, initialize the first header
175
if (!this.headerIt.hasNext()) {
178
this.currentHeader = this.headerIt.nextHeader().getValue();
181
// called after a token, make sure there is a separator
182
from = findTokenSeparator(from);
185
int start = findTokenStart(from);
187
this.currentToken = null;
188
return -1; // nothing found
191
int end = findTokenEnd(start);
192
this.currentToken = createToken(this.currentHeader, start, end);
198
* Creates a new token to be returned.
199
* Called from {@link #findNext findNext} after the token is identified.
200
* The default implementation simply calls
201
* {@link java.lang.String#substring String.substring}.
203
* If header values are significantly longer than tokens, and some
204
* tokens are permanently referenced by the application, there can
205
* be problems with garbage collection. A substring will hold a
206
* reference to the full characters of the original string and
207
* therefore occupies more memory than might be expected.
208
* To avoid this, override this method and create a new string
209
* instead of a substring.
211
* @param value the full header value from which to create a token
212
* @param start the index of the first token character
213
* @param end the index after the last token character
215
* @return a string representing the token identified by the arguments
217
protected String createToken(String value, int start, int end) {
218
return value.substring(start, end);
223
* Determines the starting position of the next token.
224
* This method will iterate over headers if necessary.
226
* @param from the position in the current header at which to
229
* @return the position of the token start in the current header,
230
* negative if no token start could be found
232
protected int findTokenStart(int from) {
234
throw new IllegalArgumentException
235
("Search position must not be negative: " + from);
238
boolean found = false;
239
while (!found && (this.currentHeader != null)) {
241
final int to = this.currentHeader.length();
242
while (!found && (from < to)) {
244
final char ch = this.currentHeader.charAt(from);
245
if (isTokenSeparator(ch) || isWhitespace(ch)) {
246
// whitspace and token separators are skipped
248
} else if (isTokenChar(this.currentHeader.charAt(from))) {
249
// found the start of a token
252
throw new ParseException
253
("Invalid character before token (pos " + from +
254
"): " + this.currentHeader);
258
if (this.headerIt.hasNext()) {
259
this.currentHeader = this.headerIt.nextHeader().getValue();
262
this.currentHeader = null;
267
return found ? from : -1;
272
* Determines the position of the next token separator.
273
* Because of multi-header joining rules, the end of a
274
* header value is a token separator. This method does
275
* therefore not need to iterate over headers.
277
* @param from the position in the current header at which to
280
* @return the position of a token separator in the current header,
283
* @throws ParseException
284
* if a new token is found before a token separator.
285
* RFC 2616, section 2.1 explicitly requires a comma between
286
* tokens for <tt>#</tt>.
288
protected int findTokenSeparator(int from) {
290
throw new IllegalArgumentException
291
("Search position must not be negative: " + from);
294
boolean found = false;
295
final int to = this.currentHeader.length();
296
while (!found && (from < to)) {
297
final char ch = this.currentHeader.charAt(from);
298
if (isTokenSeparator(ch)) {
300
} else if (isWhitespace(ch)) {
302
} else if (isTokenChar(ch)) {
303
throw new ParseException
304
("Tokens without separator (pos " + from +
305
"): " + this.currentHeader);
307
throw new ParseException
308
("Invalid character after token (pos " + from +
309
"): " + this.currentHeader);
318
* Determines the ending position of the current token.
319
* This method will not leave the current header value,
320
* since the end of the header value is a token boundary.
322
* @param from the position of the first character of the token
324
* @return the position after the last character of the token.
325
* The behavior is undefined if <code>from</code> does not
326
* point to a token character in the current header value.
328
protected int findTokenEnd(int from) {
330
throw new IllegalArgumentException
331
("Token start position must not be negative: " + from);
334
final int to = this.currentHeader.length();
336
while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
345
* Checks whether a character is a token separator.
346
* RFC 2616, section 2.1 defines comma as the separator for
347
* <tt>#token</tt> sequences. The end of a header value will
348
* also separate tokens, but that is not a character check.
350
* @param ch the character to check
352
* @return <code>true</code> if the character is a token separator,
353
* <code>false</code> otherwise
355
protected boolean isTokenSeparator(char ch) {
361
* Checks whether a character is a whitespace character.
362
* RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
363
* The optional preceeding line break is irrelevant, since header
364
* continuation is handled transparently when parsing messages.
366
* @param ch the character to check
368
* @return <code>true</code> if the character is whitespace,
369
* <code>false</code> otherwise
371
protected boolean isWhitespace(char ch) {
373
// we do not use Character.isWhitspace(ch) here, since that allows
374
// many control characters which are not whitespace as per RFC 2616
375
return ((ch == '\t') || Character.isSpaceChar(ch));
380
* Checks whether a character is a valid token character.
381
* Whitespace, control characters, and HTTP separators are not
382
* valid token characters. The HTTP specification (RFC 2616, section 2.2)
383
* defines tokens only for the US-ASCII character set, this
384
* method extends the definition to other character sets.
386
* @param ch the character to check
388
* @return <code>true</code> if the character is a valid token start,
389
* <code>false</code> otherwise
391
protected boolean isTokenChar(char ch) {
393
// common sense extension of ALPHA + DIGIT
394
if (Character.isLetterOrDigit(ch))
397
// common sense extension of CTL
398
if (Character.isISOControl(ch))
401
// no common sense extension for this
402
if (isHttpSeparator(ch))
405
// RFC 2616, section 2.2 defines a token character as
406
// "any CHAR except CTLs or separators". The controls
407
// and separators are included in the checks above.
408
// This will yield unexpected results for Unicode format characters.
409
// If that is a problem, overwrite isHttpSeparator(char) to filter
410
// out the false positives.
416
* Checks whether a character is an HTTP separator.
417
* The implementation in this class checks only for the HTTP separators
418
* defined in RFC 2616, section 2.2. If you need to detect other
419
* separators beyond the US-ASCII character set, override this method.
421
* @param ch the character to check
423
* @return <code>true</code> if the character is an HTTP separator
425
protected boolean isHttpSeparator(char ch) {
426
return (HTTP_SEPARATORS.indexOf(ch) >= 0);
430
} // class BasicTokenIterator