5
* A PHP-Based RSS and Atom Feed Framework.
6
* Takes the hard work out of managing a complete RSS/Atom solution.
8
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
11
* Redistribution and use in source and binary forms, with or without modification, are
12
* permitted provided that the following conditions are met:
14
* * Redistributions of source code must retain the above copyright notice, this list of
15
* conditions and the following disclaimer.
17
* * Redistributions in binary form must reproduce the above copyright notice, this list
18
* of conditions and the following disclaimer in the documentation and/or other materials
19
* provided with the distribution.
21
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
22
* to endorse or promote products derived from this software without specific prior
25
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33
* POSSIBILITY OF SUCH DAMAGE.
37
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
39
* @author Geoffrey Sneddon
41
* @link http://simplepie.org/ SimplePie
42
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
47
* HTTP Response Parser
52
class SimplePie_HTTP_Parser
59
public $http_version = 0.0;
66
public $status_code = 0;
76
* Key/value pairs of the headers
80
public $headers = array();
83
* Body of the response
90
* Current state of the state machine
94
protected $state = 'http_version';
101
protected $data = '';
104
* Input data length (to avoid calling strlen() everytime this is needed)
108
protected $data_length = 0;
111
* Current position of the pointer
115
protected $position = 0;
118
* Name of the hedaer currently being parsed
122
protected $name = '';
125
* Value of the hedaer currently being parsed
129
protected $value = '';
132
* Create an instance of the class with the input data
134
* @param string $data Input data
136
public function __construct($data)
139
$this->data_length = strlen($this->data);
143
* Parse the input data
145
* @return bool true on success, false on failure
147
public function parse()
149
while ($this->state && $this->state !== 'emit' && $this->has_data())
151
$state = $this->state;
155
if ($this->state === 'emit' || $this->state === 'body')
161
$this->http_version = '';
162
$this->status_code = '';
164
$this->headers = array();
171
* Check whether there is data beyond the pointer
173
* @return bool true if there is further data, false if not
175
protected function has_data()
177
return (bool) ($this->position < $this->data_length);
181
* See if the next character is LWS
183
* @return bool true if the next character is LWS, false if not
185
protected function is_linear_whitespace()
187
return (bool) ($this->data[$this->position] === "\x09"
188
|| $this->data[$this->position] === "\x20"
189
|| ($this->data[$this->position] === "\x0A"
190
&& isset($this->data[$this->position + 1])
191
&& ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
195
* Parse the HTTP version
197
protected function http_version()
199
if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
201
$len = strspn($this->data, '0123456789.', 5);
202
$this->http_version = substr($this->data, 5, $len);
203
$this->position += 5 + $len;
204
if (substr_count($this->http_version, '.') <= 1)
206
$this->http_version = (float) $this->http_version;
207
$this->position += strspn($this->data, "\x09\x20", $this->position);
208
$this->state = 'status';
212
$this->state = false;
217
$this->state = false;
222
* Parse the status code
224
protected function status()
226
if ($len = strspn($this->data, '0123456789', $this->position))
228
$this->status_code = (int) substr($this->data, $this->position, $len);
229
$this->position += $len;
230
$this->state = 'reason';
234
$this->state = false;
239
* Parse the reason phrase
241
protected function reason()
243
$len = strcspn($this->data, "\x0A", $this->position);
244
$this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
245
$this->position += $len + 1;
246
$this->state = 'new_line';
250
* Deal with a new line, shifting data around as needed
252
protected function new_line()
254
$this->value = trim($this->value, "\x0D\x20");
255
if ($this->name !== '' && $this->value !== '')
257
$this->name = strtolower($this->name);
258
// We should only use the last Content-Type header. c.f. issue #1
259
if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
261
$this->headers[$this->name] .= ', ' . $this->value;
265
$this->headers[$this->name] = $this->value;
270
if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
272
$this->position += 2;
273
$this->state = 'body';
275
elseif ($this->data[$this->position] === "\x0A")
278
$this->state = 'body';
282
$this->state = 'name';
287
* Parse a header name
289
protected function name()
291
$len = strcspn($this->data, "\x0A:", $this->position);
292
if (isset($this->data[$this->position + $len]))
294
if ($this->data[$this->position + $len] === "\x0A")
296
$this->position += $len;
297
$this->state = 'new_line';
301
$this->name = substr($this->data, $this->position, $len);
302
$this->position += $len + 1;
303
$this->state = 'value';
308
$this->state = false;
313
* Parse LWS, replacing consecutive LWS characters with a single space
315
protected function linear_whitespace()
319
if (substr($this->data, $this->position, 2) === "\x0D\x0A")
321
$this->position += 2;
323
elseif ($this->data[$this->position] === "\x0A")
327
$this->position += strspn($this->data, "\x09\x20", $this->position);
328
} while ($this->has_data() && $this->is_linear_whitespace());
329
$this->value .= "\x20";
333
* See what state to move to while within non-quoted header values
335
protected function value()
337
if ($this->is_linear_whitespace())
339
$this->linear_whitespace();
343
switch ($this->data[$this->position])
346
// Workaround for ETags: we have to include the quotes as
348
if (strtolower($this->name) === 'etag')
352
$this->state = 'value_char';
356
$this->state = 'quote';
361
$this->state = 'new_line';
365
$this->state = 'value_char';
372
* Parse a header value while outside quotes
374
protected function value_char()
376
$len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
377
$this->value .= substr($this->data, $this->position, $len);
378
$this->position += $len;
379
$this->state = 'value';
383
* See what state to move to while within quoted header values
385
protected function quote()
387
if ($this->is_linear_whitespace())
389
$this->linear_whitespace();
393
switch ($this->data[$this->position])
397
$this->state = 'value';
402
$this->state = 'new_line';
407
$this->state = 'quote_escaped';
411
$this->state = 'quote_char';
418
* Parse a header value while within quotes
420
protected function quote_char()
422
$len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
423
$this->value .= substr($this->data, $this->position, $len);
424
$this->position += $len;
425
$this->state = 'value';
429
* Parse an escaped character within quotes
431
protected function quote_escaped()
433
$this->value .= $this->data[$this->position];
435
$this->state = 'quote';
441
protected function body()
443
$this->body = substr($this->data, $this->position);
444
if (!empty($this->headers['transfer-encoding']))
446
unset($this->headers['transfer-encoding']);
447
$this->state = 'chunked';
451
$this->state = 'emit';
456
* Parsed a "Transfer-Encoding: chunked" body
458
protected function chunked()
460
if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
462
$this->state = 'emit';
467
$encoded = $this->body;
471
$is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
474
// Looks like it's not chunked after all
475
$this->state = 'emit';
479
$length = hexdec(trim($matches[1]));
482
// Ignore trailer headers
483
$this->state = 'emit';
484
$this->body = $decoded;
488
$chunk_length = strlen($matches[0]);
489
$decoded .= $part = substr($encoded, $chunk_length, $length);
490
$encoded = substr($encoded, $chunk_length + $length + 2);
492
if (trim($encoded) === '0' || empty($encoded))
494
$this->state = 'emit';
495
$this->body = $decoded;