5
* A PHP-Based RSS and Atom Feed Framework.
6
* Takes the hard work out of managing a complete RSS/Atom solution.
8
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
11
* Redistribution and use in source and binary forms, with or without modification, are
12
* permitted provided that the following conditions are met:
14
* * Redistributions of source code must retain the above copyright notice, this list of
15
* conditions and the following disclaimer.
17
* * Redistributions in binary form must reproduce the above copyright notice, this list
18
* of conditions and the following disclaimer in the documentation and/or other materials
19
* provided with the distribution.
21
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
22
* to endorse or promote products derived from this software without specific prior
25
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33
* POSSIBILITY OF SUCH DAMAGE.
37
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
39
* @author Geoffrey Sneddon
41
* @link http://simplepie.org/ SimplePie
42
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
47
* Decode HTML Entities
49
* This implements HTML5 as of revision 967 (2007-06-28)
51
* @deprecated Use DOMDocument instead!
54
class SimplePie_Decode_HTML_Entities
65
* Currently consumed bytes
73
* Position of the current byte being parsed
81
* Create an instance of the class with the input data
84
* @param string $data Input data
86
public function __construct($data)
92
* Parse the input data
95
* @return string Output data
97
public function parse()
99
while (($this->position = strpos($this->data, '&', $this->position)) !== false)
103
$this->consumed = '';
109
* Consume the next byte
112
* @return mixed The next byte, or false, if there is no more data
114
public function consume()
116
if (isset($this->data[$this->position]))
118
$this->consumed .= $this->data[$this->position];
119
return $this->data[$this->position++];
128
* Consume a range of characters
131
* @param string $chars Characters to consume
132
* @return mixed A series of characters that match the range, or false
134
public function consume_range($chars)
136
if ($len = strspn($this->data, $chars, $this->position))
138
$data = substr($this->data, $this->position, $len);
139
$this->consumed .= $data;
140
$this->position += $len;
154
public function unconsume()
156
$this->consumed = substr($this->consumed, 0, -1);
165
public function entity()
167
switch ($this->consume())
181
switch ($this->consume())
185
$range = '0123456789ABCDEFabcdef';
190
$range = '0123456789';
196
if ($codepoint = $this->consume_range($range))
198
static $windows_1252_specials = array(0x0D => "\x0A", 0x80 => "\xE2\x82\xAC", 0x81 => "\xEF\xBF\xBD", 0x82 => "\xE2\x80\x9A", 0x83 => "\xC6\x92", 0x84 => "\xE2\x80\x9E", 0x85 => "\xE2\x80\xA6", 0x86 => "\xE2\x80\xA0", 0x87 => "\xE2\x80\xA1", 0x88 => "\xCB\x86", 0x89 => "\xE2\x80\xB0", 0x8A => "\xC5\xA0", 0x8B => "\xE2\x80\xB9", 0x8C => "\xC5\x92", 0x8D => "\xEF\xBF\xBD", 0x8E => "\xC5\xBD", 0x8F => "\xEF\xBF\xBD", 0x90 => "\xEF\xBF\xBD", 0x91 => "\xE2\x80\x98", 0x92 => "\xE2\x80\x99", 0x93 => "\xE2\x80\x9C", 0x94 => "\xE2\x80\x9D", 0x95 => "\xE2\x80\xA2", 0x96 => "\xE2\x80\x93", 0x97 => "\xE2\x80\x94", 0x98 => "\xCB\x9C", 0x99 => "\xE2\x84\xA2", 0x9A => "\xC5\xA1", 0x9B => "\xE2\x80\xBA", 0x9C => "\xC5\x93", 0x9D => "\xEF\xBF\xBD", 0x9E => "\xC5\xBE", 0x9F => "\xC5\xB8");
202
$codepoint = hexdec($codepoint);
206
$codepoint = intval($codepoint);
209
if (isset($windows_1252_specials[$codepoint]))
211
$replacement = $windows_1252_specials[$codepoint];
215
$replacement = SimplePie_Misc::codepoint_to_utf8($codepoint);
218
if (!in_array($this->consume(), array(';', false), true))
223
$consumed_length = strlen($this->consumed);
224
$this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length);
225
$this->position += strlen($replacement) - $consumed_length;
230
static $entities = array(
231
'Aacute' => "\xC3\x81",
232
'aacute' => "\xC3\xA1",
233
'Aacute;' => "\xC3\x81",
234
'aacute;' => "\xC3\xA1",
235
'Acirc' => "\xC3\x82",
236
'acirc' => "\xC3\xA2",
237
'Acirc;' => "\xC3\x82",
238
'acirc;' => "\xC3\xA2",
239
'acute' => "\xC2\xB4",
240
'acute;' => "\xC2\xB4",
241
'AElig' => "\xC3\x86",
242
'aelig' => "\xC3\xA6",
243
'AElig;' => "\xC3\x86",
244
'aelig;' => "\xC3\xA6",
245
'Agrave' => "\xC3\x80",
246
'agrave' => "\xC3\xA0",
247
'Agrave;' => "\xC3\x80",
248
'agrave;' => "\xC3\xA0",
249
'alefsym;' => "\xE2\x84\xB5",
250
'Alpha;' => "\xCE\x91",
251
'alpha;' => "\xCE\xB1",
256
'and;' => "\xE2\x88\xA7",
257
'ang;' => "\xE2\x88\xA0",
259
'Aring' => "\xC3\x85",
260
'aring' => "\xC3\xA5",
261
'Aring;' => "\xC3\x85",
262
'aring;' => "\xC3\xA5",
263
'asymp;' => "\xE2\x89\x88",
264
'Atilde' => "\xC3\x83",
265
'atilde' => "\xC3\xA3",
266
'Atilde;' => "\xC3\x83",
267
'atilde;' => "\xC3\xA3",
268
'Auml' => "\xC3\x84",
269
'auml' => "\xC3\xA4",
270
'Auml;' => "\xC3\x84",
271
'auml;' => "\xC3\xA4",
272
'bdquo;' => "\xE2\x80\x9E",
273
'Beta;' => "\xCE\x92",
274
'beta;' => "\xCE\xB2",
275
'brvbar' => "\xC2\xA6",
276
'brvbar;' => "\xC2\xA6",
277
'bull;' => "\xE2\x80\xA2",
278
'cap;' => "\xE2\x88\xA9",
279
'Ccedil' => "\xC3\x87",
280
'ccedil' => "\xC3\xA7",
281
'Ccedil;' => "\xC3\x87",
282
'ccedil;' => "\xC3\xA7",
283
'cedil' => "\xC2\xB8",
284
'cedil;' => "\xC2\xB8",
285
'cent' => "\xC2\xA2",
286
'cent;' => "\xC2\xA2",
287
'Chi;' => "\xCE\xA7",
288
'chi;' => "\xCF\x87",
289
'circ;' => "\xCB\x86",
290
'clubs;' => "\xE2\x99\xA3",
291
'cong;' => "\xE2\x89\x85",
292
'COPY' => "\xC2\xA9",
293
'copy' => "\xC2\xA9",
294
'COPY;' => "\xC2\xA9",
295
'copy;' => "\xC2\xA9",
296
'crarr;' => "\xE2\x86\xB5",
297
'cup;' => "\xE2\x88\xAA",
298
'curren' => "\xC2\xA4",
299
'curren;' => "\xC2\xA4",
300
'Dagger;' => "\xE2\x80\xA1",
301
'dagger;' => "\xE2\x80\xA0",
302
'dArr;' => "\xE2\x87\x93",
303
'darr;' => "\xE2\x86\x93",
305
'deg;' => "\xC2\xB0",
306
'Delta;' => "\xCE\x94",
307
'delta;' => "\xCE\xB4",
308
'diams;' => "\xE2\x99\xA6",
309
'divide' => "\xC3\xB7",
310
'divide;' => "\xC3\xB7",
311
'Eacute' => "\xC3\x89",
312
'eacute' => "\xC3\xA9",
313
'Eacute;' => "\xC3\x89",
314
'eacute;' => "\xC3\xA9",
315
'Ecirc' => "\xC3\x8A",
316
'ecirc' => "\xC3\xAA",
317
'Ecirc;' => "\xC3\x8A",
318
'ecirc;' => "\xC3\xAA",
319
'Egrave' => "\xC3\x88",
320
'egrave' => "\xC3\xA8",
321
'Egrave;' => "\xC3\x88",
322
'egrave;' => "\xC3\xA8",
323
'empty;' => "\xE2\x88\x85",
324
'emsp;' => "\xE2\x80\x83",
325
'ensp;' => "\xE2\x80\x82",
326
'Epsilon;' => "\xCE\x95",
327
'epsilon;' => "\xCE\xB5",
328
'equiv;' => "\xE2\x89\xA1",
329
'Eta;' => "\xCE\x97",
330
'eta;' => "\xCE\xB7",
333
'ETH;' => "\xC3\x90",
334
'eth;' => "\xC3\xB0",
335
'Euml' => "\xC3\x8B",
336
'euml' => "\xC3\xAB",
337
'Euml;' => "\xC3\x8B",
338
'euml;' => "\xC3\xAB",
339
'euro;' => "\xE2\x82\xAC",
340
'exist;' => "\xE2\x88\x83",
341
'fnof;' => "\xC6\x92",
342
'forall;' => "\xE2\x88\x80",
343
'frac12' => "\xC2\xBD",
344
'frac12;' => "\xC2\xBD",
345
'frac14' => "\xC2\xBC",
346
'frac14;' => "\xC2\xBC",
347
'frac34' => "\xC2\xBE",
348
'frac34;' => "\xC2\xBE",
349
'frasl;' => "\xE2\x81\x84",
350
'Gamma;' => "\xCE\x93",
351
'gamma;' => "\xCE\xB3",
352
'ge;' => "\xE2\x89\xA5",
357
'hArr;' => "\xE2\x87\x94",
358
'harr;' => "\xE2\x86\x94",
359
'hearts;' => "\xE2\x99\xA5",
360
'hellip;' => "\xE2\x80\xA6",
361
'Iacute' => "\xC3\x8D",
362
'iacute' => "\xC3\xAD",
363
'Iacute;' => "\xC3\x8D",
364
'iacute;' => "\xC3\xAD",
365
'Icirc' => "\xC3\x8E",
366
'icirc' => "\xC3\xAE",
367
'Icirc;' => "\xC3\x8E",
368
'icirc;' => "\xC3\xAE",
369
'iexcl' => "\xC2\xA1",
370
'iexcl;' => "\xC2\xA1",
371
'Igrave' => "\xC3\x8C",
372
'igrave' => "\xC3\xAC",
373
'Igrave;' => "\xC3\x8C",
374
'igrave;' => "\xC3\xAC",
375
'image;' => "\xE2\x84\x91",
376
'infin;' => "\xE2\x88\x9E",
377
'int;' => "\xE2\x88\xAB",
378
'Iota;' => "\xCE\x99",
379
'iota;' => "\xCE\xB9",
380
'iquest' => "\xC2\xBF",
381
'iquest;' => "\xC2\xBF",
382
'isin;' => "\xE2\x88\x88",
383
'Iuml' => "\xC3\x8F",
384
'iuml' => "\xC3\xAF",
385
'Iuml;' => "\xC3\x8F",
386
'iuml;' => "\xC3\xAF",
387
'Kappa;' => "\xCE\x9A",
388
'kappa;' => "\xCE\xBA",
389
'Lambda;' => "\xCE\x9B",
390
'lambda;' => "\xCE\xBB",
391
'lang;' => "\xE3\x80\x88",
392
'laquo' => "\xC2\xAB",
393
'laquo;' => "\xC2\xAB",
394
'lArr;' => "\xE2\x87\x90",
395
'larr;' => "\xE2\x86\x90",
396
'lceil;' => "\xE2\x8C\x88",
397
'ldquo;' => "\xE2\x80\x9C",
398
'le;' => "\xE2\x89\xA4",
399
'lfloor;' => "\xE2\x8C\x8A",
400
'lowast;' => "\xE2\x88\x97",
401
'loz;' => "\xE2\x97\x8A",
402
'lrm;' => "\xE2\x80\x8E",
403
'lsaquo;' => "\xE2\x80\xB9",
404
'lsquo;' => "\xE2\x80\x98",
409
'macr' => "\xC2\xAF",
410
'macr;' => "\xC2\xAF",
411
'mdash;' => "\xE2\x80\x94",
412
'micro' => "\xC2\xB5",
413
'micro;' => "\xC2\xB5",
414
'middot' => "\xC2\xB7",
415
'middot;' => "\xC2\xB7",
416
'minus;' => "\xE2\x88\x92",
419
'nabla;' => "\xE2\x88\x87",
420
'nbsp' => "\xC2\xA0",
421
'nbsp;' => "\xC2\xA0",
422
'ndash;' => "\xE2\x80\x93",
423
'ne;' => "\xE2\x89\xA0",
424
'ni;' => "\xE2\x88\x8B",
426
'not;' => "\xC2\xAC",
427
'notin;' => "\xE2\x88\x89",
428
'nsub;' => "\xE2\x8A\x84",
429
'Ntilde' => "\xC3\x91",
430
'ntilde' => "\xC3\xB1",
431
'Ntilde;' => "\xC3\x91",
432
'ntilde;' => "\xC3\xB1",
435
'Oacute' => "\xC3\x93",
436
'oacute' => "\xC3\xB3",
437
'Oacute;' => "\xC3\x93",
438
'oacute;' => "\xC3\xB3",
439
'Ocirc' => "\xC3\x94",
440
'ocirc' => "\xC3\xB4",
441
'Ocirc;' => "\xC3\x94",
442
'ocirc;' => "\xC3\xB4",
443
'OElig;' => "\xC5\x92",
444
'oelig;' => "\xC5\x93",
445
'Ograve' => "\xC3\x92",
446
'ograve' => "\xC3\xB2",
447
'Ograve;' => "\xC3\x92",
448
'ograve;' => "\xC3\xB2",
449
'oline;' => "\xE2\x80\xBE",
450
'Omega;' => "\xCE\xA9",
451
'omega;' => "\xCF\x89",
452
'Omicron;' => "\xCE\x9F",
453
'omicron;' => "\xCE\xBF",
454
'oplus;' => "\xE2\x8A\x95",
455
'or;' => "\xE2\x88\xA8",
456
'ordf' => "\xC2\xAA",
457
'ordf;' => "\xC2\xAA",
458
'ordm' => "\xC2\xBA",
459
'ordm;' => "\xC2\xBA",
460
'Oslash' => "\xC3\x98",
461
'oslash' => "\xC3\xB8",
462
'Oslash;' => "\xC3\x98",
463
'oslash;' => "\xC3\xB8",
464
'Otilde' => "\xC3\x95",
465
'otilde' => "\xC3\xB5",
466
'Otilde;' => "\xC3\x95",
467
'otilde;' => "\xC3\xB5",
468
'otimes;' => "\xE2\x8A\x97",
469
'Ouml' => "\xC3\x96",
470
'ouml' => "\xC3\xB6",
471
'Ouml;' => "\xC3\x96",
472
'ouml;' => "\xC3\xB6",
473
'para' => "\xC2\xB6",
474
'para;' => "\xC2\xB6",
475
'part;' => "\xE2\x88\x82",
476
'permil;' => "\xE2\x80\xB0",
477
'perp;' => "\xE2\x8A\xA5",
478
'Phi;' => "\xCE\xA6",
479
'phi;' => "\xCF\x86",
482
'piv;' => "\xCF\x96",
483
'plusmn' => "\xC2\xB1",
484
'plusmn;' => "\xC2\xB1",
485
'pound' => "\xC2\xA3",
486
'pound;' => "\xC2\xA3",
487
'Prime;' => "\xE2\x80\xB3",
488
'prime;' => "\xE2\x80\xB2",
489
'prod;' => "\xE2\x88\x8F",
490
'prop;' => "\xE2\x88\x9D",
491
'Psi;' => "\xCE\xA8",
492
'psi;' => "\xCF\x88",
497
'radic;' => "\xE2\x88\x9A",
498
'rang;' => "\xE3\x80\x89",
499
'raquo' => "\xC2\xBB",
500
'raquo;' => "\xC2\xBB",
501
'rArr;' => "\xE2\x87\x92",
502
'rarr;' => "\xE2\x86\x92",
503
'rceil;' => "\xE2\x8C\x89",
504
'rdquo;' => "\xE2\x80\x9D",
505
'real;' => "\xE2\x84\x9C",
508
'REG;' => "\xC2\xAE",
509
'reg;' => "\xC2\xAE",
510
'rfloor;' => "\xE2\x8C\x8B",
511
'Rho;' => "\xCE\xA1",
512
'rho;' => "\xCF\x81",
513
'rlm;' => "\xE2\x80\x8F",
514
'rsaquo;' => "\xE2\x80\xBA",
515
'rsquo;' => "\xE2\x80\x99",
516
'sbquo;' => "\xE2\x80\x9A",
517
'Scaron;' => "\xC5\xA0",
518
'scaron;' => "\xC5\xA1",
519
'sdot;' => "\xE2\x8B\x85",
520
'sect' => "\xC2\xA7",
521
'sect;' => "\xC2\xA7",
523
'shy;' => "\xC2\xAD",
524
'Sigma;' => "\xCE\xA3",
525
'sigma;' => "\xCF\x83",
526
'sigmaf;' => "\xCF\x82",
527
'sim;' => "\xE2\x88\xBC",
528
'spades;' => "\xE2\x99\xA0",
529
'sub;' => "\xE2\x8A\x82",
530
'sube;' => "\xE2\x8A\x86",
531
'sum;' => "\xE2\x88\x91",
532
'sup;' => "\xE2\x8A\x83",
533
'sup1' => "\xC2\xB9",
534
'sup1;' => "\xC2\xB9",
535
'sup2' => "\xC2\xB2",
536
'sup2;' => "\xC2\xB2",
537
'sup3' => "\xC2\xB3",
538
'sup3;' => "\xC2\xB3",
539
'supe;' => "\xE2\x8A\x87",
540
'szlig' => "\xC3\x9F",
541
'szlig;' => "\xC3\x9F",
542
'Tau;' => "\xCE\xA4",
543
'tau;' => "\xCF\x84",
544
'there4;' => "\xE2\x88\xB4",
545
'Theta;' => "\xCE\x98",
546
'theta;' => "\xCE\xB8",
547
'thetasym;' => "\xCF\x91",
548
'thinsp;' => "\xE2\x80\x89",
549
'THORN' => "\xC3\x9E",
550
'thorn' => "\xC3\xBE",
551
'THORN;' => "\xC3\x9E",
552
'thorn;' => "\xC3\xBE",
553
'tilde;' => "\xCB\x9C",
554
'times' => "\xC3\x97",
555
'times;' => "\xC3\x97",
556
'TRADE;' => "\xE2\x84\xA2",
557
'trade;' => "\xE2\x84\xA2",
558
'Uacute' => "\xC3\x9A",
559
'uacute' => "\xC3\xBA",
560
'Uacute;' => "\xC3\x9A",
561
'uacute;' => "\xC3\xBA",
562
'uArr;' => "\xE2\x87\x91",
563
'uarr;' => "\xE2\x86\x91",
564
'Ucirc' => "\xC3\x9B",
565
'ucirc' => "\xC3\xBB",
566
'Ucirc;' => "\xC3\x9B",
567
'ucirc;' => "\xC3\xBB",
568
'Ugrave' => "\xC3\x99",
569
'ugrave' => "\xC3\xB9",
570
'Ugrave;' => "\xC3\x99",
571
'ugrave;' => "\xC3\xB9",
573
'uml;' => "\xC2\xA8",
574
'upsih;' => "\xCF\x92",
575
'Upsilon;' => "\xCE\xA5",
576
'upsilon;' => "\xCF\x85",
577
'Uuml' => "\xC3\x9C",
578
'uuml' => "\xC3\xBC",
579
'Uuml;' => "\xC3\x9C",
580
'uuml;' => "\xC3\xBC",
581
'weierp;' => "\xE2\x84\x98",
584
'Yacute' => "\xC3\x9D",
585
'yacute' => "\xC3\xBD",
586
'Yacute;' => "\xC3\x9D",
587
'yacute;' => "\xC3\xBD",
589
'yen;' => "\xC2\xA5",
590
'yuml' => "\xC3\xBF",
591
'Yuml;' => "\xC5\xB8",
592
'yuml;' => "\xC3\xBF",
593
'Zeta;' => "\xCE\x96",
594
'zeta;' => "\xCE\xB6",
595
'zwj;' => "\xE2\x80\x8D",
596
'zwnj;' => "\xE2\x80\x8C"
599
for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++)
601
$consumed = substr($this->consumed, 1);
602
if (isset($entities[$consumed]))
610
$this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1);
611
$this->position += strlen($entities[$match]) - strlen($consumed) - 1;