3
require_once 'HTMLPurifier/Strategy.php';
4
require_once 'HTMLPurifier/HTMLDefinition.php';
5
require_once 'HTMLPurifier/Generator.php';
7
require_once 'HTMLPurifier/Injector/AutoParagraph.php';
8
require_once 'HTMLPurifier/Injector/Linkify.php';
9
require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
11
HTMLPurifier_ConfigSchema::define(
12
'AutoFormat', 'Custom', array(), 'list', '
14
This directive can be used to add custom auto-format injectors.
15
Specify an array of injector names (class name minus the prefix)
16
or concrete implementations. Injector class must exist. This directive
17
has been available since 2.0.1.
23
* Takes tokens makes them well-formed (balance end tags, etc.)
25
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
29
* Locally shared variable references
32
var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
33
$currentInjector, $injectors;
35
function execute($tokens, $config, &$context) {
37
$definition = $config->getHTMLDefinition();
41
$generator = new HTMLPurifier_Generator();
42
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
43
$e =& $context->get('ErrorCollector', true);
46
$this->currentNesting = array();
47
$this->inputIndex = false;
48
$this->inputTokens =& $tokens;
49
$this->outputTokens =& $result;
52
$context->register('CurrentNesting', $this->currentNesting);
53
$context->register('InputIndex', $this->inputIndex);
54
$context->register('InputTokens', $tokens);
56
// -- begin INJECTOR --
58
$this->injectors = array();
60
$injectors = $config->getBatch('AutoFormat');
61
$custom_injectors = $injectors['Custom'];
62
unset($injectors['Custom']); // special case
63
foreach ($injectors as $injector => $b) {
64
$injector = "HTMLPurifier_Injector_$injector";
66
$this->injectors[] = new $injector;
68
foreach ($custom_injectors as $injector) {
69
if (is_string($injector)) {
70
$injector = "HTMLPurifier_Injector_$injector";
71
$injector = new $injector;
73
$this->injectors[] = $injector;
76
// array index of the injector that resulted in an array
77
// substitution. This enables processTokens() to know which
78
// injectors are affected by the added tokens and which are
79
// not (namely, the ones after the current injector are not
81
$this->currentInjector = false;
83
// give the injectors references to the definition and context
84
// variables for performance reasons
85
foreach ($this->injectors as $i => $x) {
86
$error = $this->injectors[$i]->prepare($config, $context);
87
if (!$error) continue;
88
list($injector) = array_splice($this->injectors, $i, 1);
89
$name = $injector->name;
90
trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
93
// warning: most foreach loops follow the convention $i => $x.
94
// be sure, for PHP4 compatibility, to only perform write operations
95
// directly referencing the object using $i: $x is only safe for reads
100
$context->register('CurrentToken', $token);
102
for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
104
// if all goes well, this token will be passed through unharmed
105
$token = $tokens[$this->inputIndex];
107
//printTokens($tokens, $this->inputIndex);
109
foreach ($this->injectors as $i => $x) {
110
if ($x->skip > 0) $this->injectors[$i]->skip--;
113
// quick-check: if it's not a tag, no need to process
114
if (empty( $token->is_tag )) {
115
if ($token->type === 'text') {
116
// injector handler code; duplicated for performance reasons
117
foreach ($this->injectors as $i => $x) {
118
if (!$x->skip) $this->injectors[$i]->handleText($token);
119
if (is_array($token)) {
120
$this->currentInjector = $i;
125
$this->processToken($token, $config, $context);
129
$info = $definition->info[$token->name]->child;
131
// quick tag checks: anything that's *not* an end tag
133
if ($info->type == 'empty' && $token->type == 'start') {
134
// test if it claims to be a start tag but is empty
135
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
137
} elseif ($info->type != 'empty' && $token->type == 'empty' ) {
138
// claims to be empty but really is a start tag
140
new HTMLPurifier_Token_Start($token->name, $token->attr),
141
new HTMLPurifier_Token_End($token->name)
144
} elseif ($token->type == 'empty') {
147
} elseif ($token->type == 'start') {
150
// ...unless they also have to close their parent
151
if (!empty($this->currentNesting)) {
153
$parent = array_pop($this->currentNesting);
154
$parent_info = $definition->info[$parent->name];
156
// this can be replaced with a more general algorithm:
157
// if the token is not allowed by the parent, auto-close
159
if (!isset($parent_info->child->elements[$token->name])) {
160
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
161
// close the parent, then re-loop to reprocess token
162
$result[] = new HTMLPurifier_Token_End($parent->name);
167
$this->currentNesting[] = $parent; // undo the pop
172
// injector handler code; duplicated for performance reasons
174
foreach ($this->injectors as $i => $x) {
175
if (!$x->skip) $this->injectors[$i]->handleElement($token);
176
if (is_array($token)) {
177
$this->currentInjector = $i;
181
$this->processToken($token, $config, $context);
185
// sanity check: we should be dealing with a closing tag
186
if ($token->type != 'end') continue;
188
// make sure that we have something open
189
if (empty($this->currentNesting)) {
190
if ($escape_invalid_tags) {
191
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
192
$result[] = new HTMLPurifier_Token_Text(
193
$generator->generateFromToken($token, $config, $context)
196
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
201
// first, check for the simplest case: everything closes neatly
202
$current_parent = array_pop($this->currentNesting);
203
if ($current_parent->name == $token->name) {
205
foreach ($this->injectors as $i => $x) {
206
$this->injectors[$i]->notifyEnd($token);
211
// okay, so we're trying to close the wrong tag
213
// undo the pop previous pop
214
$this->currentNesting[] = $current_parent;
216
// scroll back the entire nest, trying to find our tag.
217
// (feature could be to specify how far you'd like to go)
218
$size = count($this->currentNesting);
219
// -2 because -1 is the last element, but we already checked that
220
$skipped_tags = false;
221
for ($i = $size - 2; $i >= 0; $i--) {
222
if ($this->currentNesting[$i]->name == $token->name) {
223
// current nesting is modified
224
$skipped_tags = array_splice($this->currentNesting, $i);
229
// we still didn't find the tag, so remove
230
if ($skipped_tags === false) {
231
if ($escape_invalid_tags) {
232
$result[] = new HTMLPurifier_Token_Text(
233
$generator->generateFromToken($token, $config, $context)
235
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
237
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
242
// okay, we found it, close all the skipped tags
243
// note that skipped tags contains the element we need closed
244
for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
245
if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
246
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
248
$result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
249
foreach ($this->injectors as $j => $x) { // $j, not $i!!!
250
$this->injectors[$j]->notifyEnd($new_token);
256
$context->destroy('CurrentNesting');
257
$context->destroy('InputTokens');
258
$context->destroy('InputIndex');
259
$context->destroy('CurrentToken');
261
// we're at the end now, fix all still unclosed tags (this is
262
// duplicated from the end of the loop with some slight modifications)
263
// not using $skipped_tags since it would invariably be all of them
264
if (!empty($this->currentNesting)) {
265
for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
266
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
267
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
269
$result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
270
foreach ($this->injectors as $j => $x) { // $j, not $i!!!
271
$this->injectors[$j]->notifyEnd($new_token);
276
unset($this->outputTokens, $this->injectors, $this->currentInjector,
277
$this->currentNesting, $this->inputTokens, $this->inputIndex);
282
function processToken($token, $config, &$context) {
283
if (is_array($token)) {
284
// the original token was overloaded by an injector, time
285
// to some fancy acrobatics
287
// $this->inputIndex is decremented so that the entire set gets
289
array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
291
// adjust the injector skips based on the array substitution
292
if ($this->injectors) {
293
$offset = count($token);
294
for ($i = 0; $i <= $this->currentInjector; $i++) {
295
// because of the skip back, we need to add one more
296
// for uninitialized injectors. I'm not exactly
297
// sure why this is the case, but I think it has to
298
// do with the fact that we're decrementing skips
299
// before re-checking text
300
if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
301
$this->injectors[$i]->skip += $offset;
306
$this->outputTokens[] = $token;
307
if ($token->type == 'start') {
308
$this->currentNesting[] = $token;
309
} elseif ($token->type == 'end') {
310
array_pop($this->currentNesting); // not actually used