~ubuntu-branches/ubuntu/trusty/httpcomponents-core/trusty

* $HeadURL: https://svn.apache.org/repos/asf/httpcomponents/httpcore/tags/4.0.1/httpcore/src/main/java/org/apache/http/message/BasicTokenIterator.java $

* $Revision: 744527 $

* $Date: 2009-02-14 18:06:25 +0100 (Sat, 14 Feb 2009) $

* ====================================================================

* Licensed to the Apache Software Foundation (ASF) under one

* or more contributor license agreements. See the NOTICE file

* distributed with this work for additional information

* regarding copyright ownership. The ASF licenses this file

* to you under the Apache License, Version 2.0 (the

* "License"); you may not use this file except in compliance

* with the License. You may obtain a copy of the License at

* http://www.apache.org/licenses/LICENSE-2.0

* Unless required by applicable law or agreed to in writing,

* software distributed under the License is distributed on an

* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

* KIND, either express or implied. See the License for the

* specific language governing permissions and limitations

* under the License.

* ====================================================================

* This software consists of voluntary contributions made by many

* individuals on behalf of the Apache Software Foundation. For more

* information on the Apache Software Foundation, please see

* <http://www.apache.org/>.

package org.apache.http.message;

import java.util.NoSuchElementException;

import org.apache.http.HeaderIterator;

import org.apache.http.ParseException;

import org.apache.http.TokenIterator;

/**

* Basic implementation of a {@link TokenIterator}.

* This implementation parses <tt>#token<tt> sequences as

* defined by RFC 2616, section 2.

* It extends that definition somewhat beyond US-ASCII.

* @version $Revision: 744527 $

* @since 4.0

public class BasicTokenIterator implements TokenIterator {

/** The HTTP separator characters. Defined in RFC 2616, section 2.2. */

// the order of the characters here is adjusted to put the

// most likely candidates at the beginning of the collection

public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";

/** The iterator from which to obtain the next header. */

protected final HeaderIterator headerIt;

/**

* The value of the current header.

* This is the header value that includes {@link #currentToken}.

* Undefined if the iteration is over.

protected String currentHeader;

/**

* The token to be returned by the next call to {@link #currentToken}.

* <code>null</code> if the iteration is over.

protected String currentToken;

/**

* The position after {@link #currentToken} in {@link #currentHeader}.

* Undefined if the iteration is over.

protected int searchPos;

/**

* Creates a new instance of {@link BasicTokenIterator}.

* @param headerIterator the iterator for the headers to tokenize

public BasicTokenIterator(final HeaderIterator headerIterator) {

if (headerIterator == null) {

throw new IllegalArgumentException

("Header iterator must not be null.");

}

this.headerIt = headerIterator;

this.searchPos = findNext(-1);

}

// non-javadoc, see interface TokenIterator

public boolean hasNext() {

return (this.currentToken != null);

100

}

101

102

103

/**

104

* Obtains the next token from this iteration.

105

106

* @return the next token in this iteration

107

108

* @throws NoSuchElementException if the iteration is already over

109

* @throws ParseException if an invalid header value is encountered

110

111

public String nextToken()

112

throws NoSuchElementException, ParseException {

113

114

if (this.currentToken == null) {

115

throw new NoSuchElementException("Iteration already finished.");

116

}

117

118

final String result = this.currentToken;

119

// updates currentToken, may trigger ParseException:

120

this.searchPos = findNext(this.searchPos);

121

122

return result;

123

}

124

125

126

/**

127

* Returns the next token.

128

* Same as {@link #nextToken}, but with generic return type.

129

130

* @return the next token in this iteration

131

132

* @throws NoSuchElementException if there are no more tokens

133

* @throws ParseException if an invalid header value is encountered

134

135

public final Object next()

136

throws NoSuchElementException, ParseException {

137

return nextToken();

138

}

139

140

141

/**

142

* Removing tokens is not supported.

143

144

* @throws UnsupportedOperationException always

145

146

public final void remove()

147

throws UnsupportedOperationException {

148

149

throw new UnsupportedOperationException

150

("Removing tokens is not supported.");

151

}

152

153

154

/**

155

* Determines the next token.

156

* If found, the token is stored in {@link #currentToken}.

157

* The return value indicates the position after the token

158

* in {@link #currentHeader}. If necessary, the next header

159

* will be obtained from {@link #headerIt}.

160

* If not found, {@link #currentToken} is set to <code>null</code>.

161

162

* @param from the position in the current header at which to

163

* start the search, -1 to search in the first header

164

165

* @return the position after the found token in the current header, or

166

* negative if there was no next token

167

168

* @throws ParseException if an invalid header value is encountered

169

170

protected int findNext(int from)

171

throws ParseException {

172

173

if (from < 0) {

174

// called from the constructor, initialize the first header

175

if (!this.headerIt.hasNext()) {

176

return -1;

177

}

178

this.currentHeader = this.headerIt.nextHeader().getValue();

179

from = 0;

180

} else {

181

// called after a token, make sure there is a separator

182

from = findTokenSeparator(from);

183

}

184

185

int start = findTokenStart(from);

186

if (start < 0) {

187

this.currentToken = null;

188

return -1; // nothing found

189

}

190

191

int end = findTokenEnd(start);

192

this.currentToken = createToken(this.currentHeader, start, end);

193

return end;

194

}

195

196

197

/**

198

* Creates a new token to be returned.

199

* Called from {@link #findNext findNext} after the token is identified.

200

* The default implementation simply calls

201

* {@link java.lang.String#substring String.substring}.

202

* <br/>

203

* If header values are significantly longer than tokens, and some

204

* tokens are permanently referenced by the application, there can

205

* be problems with garbage collection. A substring will hold a

206

* reference to the full characters of the original string and

207

* therefore occupies more memory than might be expected.

208

* To avoid this, override this method and create a new string

209

* instead of a substring.

210

211

* @param value the full header value from which to create a token

212

* @param start the index of the first token character

213

* @param end the index after the last token character

214

215

* @return a string representing the token identified by the arguments

216

217

protected String createToken(String value, int start, int end) {

218

return value.substring(start, end);

219

}

220

221

222

/**

223

* Determines the starting position of the next token.

224

* This method will iterate over headers if necessary.

225

226

* @param from the position in the current header at which to

227

* start the search

228

229

* @return the position of the token start in the current header,

230

* negative if no token start could be found

231

232

protected int findTokenStart(int from) {

233

if (from < 0) {

234

throw new IllegalArgumentException

235

("Search position must not be negative: " + from);

236

}

237

238

boolean found = false;

239

while (!found && (this.currentHeader != null)) {

240

241

final int to = this.currentHeader.length();

242

while (!found && (from < to)) {

243

244

final char ch = this.currentHeader.charAt(from);

245

if (isTokenSeparator(ch) || isWhitespace(ch)) {

246

// whitspace and token separators are skipped

247

from++;

248

} else if (isTokenChar(this.currentHeader.charAt(from))) {

249

// found the start of a token

250

found = true;

251

} else {

252

throw new ParseException

253

("Invalid character before token (pos " + from +

254

"): " + this.currentHeader);

255

}

256

}

257

if (!found) {

258

if (this.headerIt.hasNext()) {

259

this.currentHeader = this.headerIt.nextHeader().getValue();

260

from = 0;

261

} else {

262

this.currentHeader = null;

263

}

264

}

265

} // while headers

266

267

return found ? from : -1;

268

}

269

270

271

/**

272

* Determines the position of the next token separator.

273

* Because of multi-header joining rules, the end of a

274

* header value is a token separator. This method does

275

* therefore not need to iterate over headers.

276

277

* @param from the position in the current header at which to

278

* start the search

279

280

* @return the position of a token separator in the current header,

281

* or at the end

282

283

* @throws ParseException

284

* if a new token is found before a token separator.

285

* RFC 2616, section 2.1 explicitly requires a comma between

286

* tokens for <tt>#</tt>.

287

288

protected int findTokenSeparator(int from) {

289

if (from < 0) {

290

throw new IllegalArgumentException

291

("Search position must not be negative: " + from);

292

}

293

294

boolean found = false;

295

final int to = this.currentHeader.length();

296

while (!found && (from < to)) {

297

final char ch = this.currentHeader.charAt(from);

298

if (isTokenSeparator(ch)) {

299

found = true;

300

} else if (isWhitespace(ch)) {

301

from++;

302

} else if (isTokenChar(ch)) {

303

throw new ParseException

304

("Tokens without separator (pos " + from +

305

"): " + this.currentHeader);

306

} else {

307

throw new ParseException

308

("Invalid character after token (pos " + from +

309

"): " + this.currentHeader);

310

}

311

}

312

313

return from;

314

}

315

316

317

/**

318

* Determines the ending position of the current token.

319

* This method will not leave the current header value,

320

* since the end of the header value is a token boundary.

321

322

* @param from the position of the first character of the token

323

324

* @return the position after the last character of the token.

325

* The behavior is undefined if <code>from</code> does not

326

* point to a token character in the current header value.

327

328

protected int findTokenEnd(int from) {

329

if (from < 0) {

330

throw new IllegalArgumentException

331

("Token start position must not be negative: " + from);

332

}

333

334

final int to = this.currentHeader.length();

335

int end = from+1;

336

while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {

337

end++;

338

}

339

340

return end;

341

}

342

343

344

/**

345

* Checks whether a character is a token separator.

346

* RFC 2616, section 2.1 defines comma as the separator for

347

* <tt>#token</tt> sequences. The end of a header value will

348

* also separate tokens, but that is not a character check.

349

350

* @param ch the character to check

351

352

* @return <code>true</code> if the character is a token separator,

353

* <code>false</code> otherwise

354

355

protected boolean isTokenSeparator(char ch) {

356

return (ch == ',');

357

}

358

359

360

/**

361

* Checks whether a character is a whitespace character.

362

* RFC 2616, section 2.2 defines space and horizontal tab as whitespace.

363

* The optional preceeding line break is irrelevant, since header

364

* continuation is handled transparently when parsing messages.

365

366

* @param ch the character to check

367

368

* @return <code>true</code> if the character is whitespace,

369

* <code>false</code> otherwise

370

371

protected boolean isWhitespace(char ch) {

372

373

// we do not use Character.isWhitspace(ch) here, since that allows

374

// many control characters which are not whitespace as per RFC 2616

375

return ((ch == '\t') || Character.isSpaceChar(ch));

376

}

377

378

379

/**

380

* Checks whether a character is a valid token character.

381

* Whitespace, control characters, and HTTP separators are not

382

* valid token characters. The HTTP specification (RFC 2616, section 2.2)

383

* defines tokens only for the US-ASCII character set, this

384

* method extends the definition to other character sets.

385

386

* @param ch the character to check

387

388

* @return <code>true</code> if the character is a valid token start,

389

* <code>false</code> otherwise

390

391

protected boolean isTokenChar(char ch) {

392

393

// common sense extension of ALPHA + DIGIT

394

if (Character.isLetterOrDigit(ch))

395

return true;

396

397

// common sense extension of CTL

398

if (Character.isISOControl(ch))

399

return false;

400

401

// no common sense extension for this

402

if (isHttpSeparator(ch))

403

return false;

404

405

// RFC 2616, section 2.2 defines a token character as

406

// "any CHAR except CTLs or separators". The controls

407

// and separators are included in the checks above.

408

// This will yield unexpected results for Unicode format characters.

409

// If that is a problem, overwrite isHttpSeparator(char) to filter

410

// out the false positives.

411

return true;

412

}

413

414

415

/**

416

* Checks whether a character is an HTTP separator.

417

* The implementation in this class checks only for the HTTP separators

418

* defined in RFC 2616, section 2.2. If you need to detect other

419

* separators beyond the US-ASCII character set, override this method.

420

421

* @param ch the character to check

422

423

* @return <code>true</code> if the character is an HTTP separator

424

425

protected boolean isHttpSeparator(char ch) {

426

return (HTTP_SEPARATORS.indexOf(ch) >= 0);

427

}

428

429

430

} // class BasicTokenIterator

431

Older »