~ubuntu-branches/debian/sid/ampache/sid

« back to all changes in this revision

Viewing changes to modules/phpmailer/extras/htmlfilter.php

  • Committer: Package Import Robot
  • Author(s): Charlie Smotherman
  • Date: 2013-08-27 13:19:48 UTC
  • mfrom: (1.2.9)
  • Revision ID: package-import@ubuntu.com-20130827131948-1czew0zxn6u70dtv
Tags: 3.6-rzb2752+dfsg-1
* New upsteam snapshot.  Contains important bug fixes to the installer.
* Correct typo in ampache-common.postrm.
* Remove courtousy copy of php-getid3, during repack.  Closes: #701526
* Update package to use dh_linktree to make the needed sym links to the
  needed system libs that were removed during repack.
* Update debian/rules to reflect upstreams removing/moving of modules.
* Update debian/ampache-common.install to reflect upstreams removal of files.
* Updated to use new apache2.4 API. Closes: #669756
* Updated /debian/po/de.po thx David Prévot for the patch.  Closes:  #691963
* M3U import is now ordered, fixed upstream.  Closes: #684984
* Text input area has been resized so IPv6 addresses will now fit, fixed
  upstream.  Closes:  #716230
* Added ampache-common.preinst to make sure that the courtousy copies of code
  dirs are empty so dh_linktree can do it's magic on upgrades.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
<?php
2
 
/**
3
 
 * htmlfilter.inc
4
 
 * ---------------
5
 
 * This set of functions allows you to filter html in order to remove
6
 
 * any malicious tags from it. Useful in cases when you need to filter
7
 
 * user input for any cross-site-scripting attempts.
8
 
 *
9
 
 * Copyright (C) 2002-2004 by Duke University
10
 
 *
11
 
 * This library is free software; you can redistribute it and/or
12
 
 * modify it under the terms of the GNU Lesser General Public
13
 
 * License as published by the Free Software Foundation; either
14
 
 * version 2.1 of the License, or (at your option) any later version.
15
 
 *
16
 
 * This library is distributed in the hope that it will be useful,
17
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 
 * Lesser General Public License for more details.
20
 
 *
21
 
 * You should have received a copy of the GNU Lesser General Public
22
 
 * License along with this library; if not, write to the Free Software
23
 
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  
24
 
 * 02110-1301  USA
25
 
 *
26
 
 * @Author      Konstantin Riabitsev <icon@linux.duke.edu>
27
 
 * @Version 1.1 ($Date: 2011-07-04 14:02:23 -0400 (Mon, 04 Jul 2011) $)
28
 
 */
29
 
 
30
 
/**
31
 
 * @Author  Jim Jagielski <jim@jaguNET.com / jimjag@gmail.com>
32
 
 */
33
 
 
34
 
/**
35
 
 * This function returns the final tag out of the tag name, an array
36
 
 * of attributes, and the type of the tag. This function is called by 
37
 
 * tln_sanitize internally.
38
 
 *
39
 
 * @param  $tagname      the name of the tag.
40
 
 * @param  $attary       the array of attributes and their values
41
 
 * @param  $tagtype      The type of the tag (see in comments).
42
 
 * @return                       a string with the final tag representation.
43
 
 */
44
 
function tln_tagprint($tagname, $attary, $tagtype){
45
 
        $me = 'tln_tagprint';
46
 
        if ($tagtype == 2){
47
 
                $fulltag = '</' . $tagname . '>';
48
 
        } else {
49
 
                $fulltag = '<' . $tagname;
50
 
                if (is_array($attary) && sizeof($attary)){
51
 
                        $atts = Array();
52
 
                        while (list($attname, $attvalue) = each($attary)){
53
 
                                array_push($atts, "$attname=$attvalue");
54
 
                        }
55
 
                        $fulltag .= ' ' . join(' ', $atts);
56
 
                }
57
 
                if ($tagtype == 3){
58
 
                        $fulltag .= ' /';
59
 
                }
60
 
                $fulltag .= '>';
61
 
        }
62
 
        return $fulltag;
63
 
}
64
 
 
65
 
/**
66
 
 * A small helper function to use with array_walk. Modifies a by-ref
67
 
 * value and makes it lowercase.
68
 
 *
69
 
 * @param  $val a value passed by-ref.
70
 
 * @return              void since it modifies a by-ref value.
71
 
 */
72
 
function tln_casenormalize(&$val){
73
 
        $val = strtolower($val);
74
 
}
75
 
 
76
 
/**
77
 
 * This function skips any whitespace from the current position within
78
 
 * a string and to the next non-whitespace value.
79
 
 * 
80
 
 * @param  $body   the string
81
 
 * @param  $offset the offset within the string where we should start
82
 
 *                                 looking for the next non-whitespace character.
83
 
 * @return                 the location within the $body where the next
84
 
 *                                 non-whitespace char is located.
85
 
 */
86
 
function tln_skipspace($body, $offset){
87
 
        $me = 'tln_skipspace';
88
 
        preg_match('/^(\s*)/s', substr($body, $offset), $matches);
89
 
        if (sizeof($matches[1])){
90
 
                $count = strlen($matches[1]);
91
 
                $offset += $count;
92
 
        }
93
 
        return $offset;
94
 
}
95
 
 
96
 
/**
97
 
 * This function looks for the next character within a string.  It's
98
 
 * really just a glorified "strpos", except it catches the failures
99
 
 * nicely.
100
 
 *
101
 
 * @param  $body   The string to look for needle in.
102
 
 * @param  $offset Start looking from this position.
103
 
 * @param  $needle The character/string to look for.
104
 
 * @return                 location of the next occurance of the needle, or
105
 
 *                                 strlen($body) if needle wasn't found.
106
 
 */
107
 
function tln_findnxstr($body, $offset, $needle){
108
 
        $me = 'tln_findnxstr';
109
 
        $pos = strpos($body, $needle, $offset);
110
 
        if ($pos === FALSE){
111
 
                $pos = strlen($body);
112
 
        }
113
 
        return $pos;
114
 
}
115
 
 
116
 
/**
117
 
 * This function takes a PCRE-style regexp and tries to match it
118
 
 * within the string.
119
 
 *
120
 
 * @param  $body   The string to look for needle in.
121
 
 * @param  $offset Start looking from here.
122
 
 * @param  $reg    A PCRE-style regex to match.
123
 
 * @return                 Returns a false if no matches found, or an array
124
 
 *                                 with the following members:
125
 
 *                                 - integer with the location of the match within $body
126
 
 *                                 - string with whatever content between offset and the match
127
 
 *                                 - string with whatever it is we matched
128
 
 */
129
 
function tln_findnxreg($body, $offset, $reg){
130
 
        $me = 'tln_findnxreg';
131
 
        $matches = Array();
132
 
        $retarr = Array();
133
 
        $preg_rule = '%^(.*?)(' . $reg . ')%s';
134
 
        preg_match($preg_rule, substr($body, $offset), $matches);
135
 
        if (!isset($matches[0])){
136
 
                $retarr = false;
137
 
        } else {
138
 
                $retarr[0] = $offset + strlen($matches[1]);
139
 
                $retarr[1] = $matches[1];
140
 
                $retarr[2] = $matches[2];
141
 
        }
142
 
        return $retarr;
143
 
}
144
 
 
145
 
/**
146
 
 * This function looks for the next tag.
147
 
 *
148
 
 * @param  $body   String where to look for the next tag.
149
 
 * @param  $offset Start looking from here.
150
 
 * @return                 false if no more tags exist in the body, or
151
 
 *                                 an array with the following members:
152
 
 *                                 - string with the name of the tag
153
 
 *                                 - array with attributes and their values
154
 
 *                                 - integer with tag type (1, 2, or 3)
155
 
 *                                 - integer where the tag starts (starting "<")
156
 
 *                                 - integer where the tag ends (ending ">")
157
 
 *                                 first three members will be false, if the tag is invalid.
158
 
 */
159
 
function tln_getnxtag($body, $offset){
160
 
        $me = 'tln_getnxtag';
161
 
        if ($offset > strlen($body)){
162
 
                return false;
163
 
        }
164
 
        $lt = tln_findnxstr($body, $offset, '<');
165
 
        if ($lt == strlen($body)){
166
 
                return false;
167
 
        }
168
 
        /**
169
 
         * We are here:
170
 
         * blah blah <tag attribute="value">
171
 
         * \---------^
172
 
         */
173
 
        $pos = tln_skipspace($body, $lt + 1);
174
 
        if ($pos >= strlen($body)){
175
 
                return Array(false, false, false, $lt, strlen($body));
176
 
        }
177
 
        /**
178
 
         * There are 3 kinds of tags:
179
 
         * 1. Opening tag, e.g.:
180
 
         *        <a href="blah">
181
 
         * 2. Closing tag, e.g.:
182
 
         *        </a>
183
 
         * 3. XHTML-style content-less tag, e.g.:
184
 
         *        <img src="blah"/>
185
 
         */
186
 
        $tagtype = false;
187
 
        switch (substr($body, $pos, 1)){
188
 
        case '/':
189
 
                $tagtype = 2;
190
 
                $pos++;
191
 
                break;
192
 
        case '!':
193
 
                /**
194
 
                 * A comment or an SGML declaration.
195
 
                 */
196
 
                if (substr($body, $pos+1, 2) == '--'){
197
 
                        $gt = strpos($body, '-->', $pos);
198
 
                        if ($gt === false){
199
 
                                $gt = strlen($body);
200
 
                        } else {
201
 
                                $gt += 2;
202
 
                        }
203
 
                        return Array(false, false, false, $lt, $gt);
204
 
                } else {
205
 
                        $gt = tln_findnxstr($body, $pos, '>');
206
 
                        return Array(false, false, false, $lt, $gt);
207
 
                }
208
 
                break;
209
 
        default:
210
 
                /**
211
 
                 * Assume tagtype 1 for now. If it's type 3, we'll switch values
212
 
                 * later.
213
 
                 */
214
 
                $tagtype = 1;
215
 
                break;
216
 
        }
217
 
        
218
 
        $tag_start = $pos;
219
 
        $tagname = '';
220
 
        /**
221
 
         * Look for next [\W-_], which will indicate the end of the tag name.
222
 
         */
223
 
        $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
224
 
        if ($regary == false){
225
 
                return Array(false, false, false, $lt, strlen($body));
226
 
        }
227
 
        list($pos, $tagname, $match) = $regary;
228
 
        $tagname = strtolower($tagname);
229
 
        
230
 
        /**
231
 
         * $match can be either of these:
232
 
         * '>'  indicating the end of the tag entirely.
233
 
         * '\s' indicating the end of the tag name.
234
 
         * '/'  indicating that this is type-3 xhtml tag.
235
 
         * 
236
 
         * Whatever else we find there indicates an invalid tag.
237
 
         */
238
 
        switch ($match){
239
 
        case '/':
240
 
                /**
241
 
                 * This is an xhtml-style tag with a closing / at the
242
 
                 * end, like so: <img src="blah"/>. Check if it's followed
243
 
                 * by the closing bracket. If not, then this tag is invalid
244
 
                 */
245
 
                if (substr($body, $pos, 2) == '/>'){
246
 
                        $pos++;
247
 
                        $tagtype = 3;
248
 
                } else {
249
 
                        $gt = tln_findnxstr($body, $pos, '>');
250
 
                        $retary = Array(false, false, false, $lt, $gt);
251
 
                        return $retary;
252
 
                }
253
 
        case '>':
254
 
                return Array($tagname, false, $tagtype, $lt, $pos);
255
 
                break;
256
 
        default:
257
 
                /**
258
 
                 * Check if it's whitespace
259
 
                 */
260
 
                if (preg_match('/\s/', $match)){
261
 
                } else {
262
 
                        /**
263
 
                         * This is an invalid tag! Look for the next closing ">".
264
 
                         */
265
 
                        $gt = tln_findnxstr($body, $lt, '>');
266
 
                        return Array(false, false, false, $lt, $gt);
267
 
                }
268
 
        }
269
 
        
270
 
        /**
271
 
         * At this point we're here:
272
 
         * <tagname      attribute='blah'>
273
 
         * \-------^
274
 
         *
275
 
         * At this point we loop in order to find all attributes.
276
 
         */
277
 
        $attname = '';
278
 
        $atttype = false;
279
 
        $attary = Array();
280
 
        
281
 
        while ($pos <= strlen($body)){
282
 
                $pos = tln_skipspace($body, $pos);
283
 
                if ($pos == strlen($body)){
284
 
                        /**
285
 
                         * Non-closed tag.
286
 
                         */
287
 
                        return Array(false, false, false, $lt, $pos);
288
 
                }
289
 
                /**
290
 
                 * See if we arrived at a ">" or "/>", which means that we reached
291
 
                 * the end of the tag.
292
 
                 */
293
 
                $matches = Array();
294
 
                preg_match('%^(\s*)(>|/>)%s', substr($body, $pos), $matches);
295
 
                if (isset($matches[0]) && $matches[0]){
296
 
                        /**
297
 
                         * Yep. So we did.
298
 
                         */
299
 
                        $pos += strlen($matches[1]);
300
 
                        if ($matches[2] == '/>'){
301
 
                                $tagtype = 3;
302
 
                                $pos++;
303
 
                        }
304
 
                        return Array($tagname, $attary, $tagtype, $lt, $pos);
305
 
                }
306
 
                
307
 
                /**
308
 
                 * There are several types of attributes, with optional
309
 
                 * [:space:] between members.
310
 
                 * Type 1:
311
 
                 *       attrname[:space:]=[:space:]'CDATA'
312
 
                 * Type 2:
313
 
                 *       attrname[:space:]=[:space:]"CDATA"
314
 
                 * Type 3:
315
 
                 *       attr[:space:]=[:space:]CDATA
316
 
                 * Type 4:
317
 
                 *       attrname
318
 
                 *
319
 
                 * We leave types 1 and 2 the same, type 3 we check for
320
 
                 * '"' and convert to "&quot" if needed, then wrap in
321
 
                 * double quotes. Type 4 we convert into:
322
 
                 * attrname="yes".
323
 
                 */
324
 
                $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
325
 
                if ($regary == false){
326
 
                        /**
327
 
                         * Looks like body ended before the end of tag.
328
 
                         */
329
 
                        return Array(false, false, false, $lt, strlen($body));
330
 
                }
331
 
                list($pos, $attname, $match) = $regary;
332
 
                $attname = strtolower($attname);
333
 
                /**
334
 
                 * We arrived at the end of attribute name. Several things possible
335
 
                 * here:
336
 
                 * '>'  means the end of the tag and this is attribute type 4
337
 
                 * '/'  if followed by '>' means the same thing as above
338
 
                 * '\s' means a lot of things -- look what it's followed by.
339
 
                 *              anything else means the attribute is invalid.
340
 
                 */
341
 
                switch($match){
342
 
                case '/':
343
 
                        /**
344
 
                         * This is an xhtml-style tag with a closing / at the
345
 
                         * end, like so: <img src="blah"/>. Check if it's followed
346
 
                         * by the closing bracket. If not, then this tag is invalid
347
 
                         */
348
 
                        if (substr($body, $pos, 2) == '/>'){
349
 
                                $pos++;
350
 
                                $tagtype = 3;
351
 
                        } else {
352
 
                                $gt = tln_findnxstr($body, $pos, '>');
353
 
                                $retary = Array(false, false, false, $lt, $gt);
354
 
                                return $retary;
355
 
                        }
356
 
                case '>':
357
 
                        $attary{$attname} = '"yes"';
358
 
                        return Array($tagname, $attary, $tagtype, $lt, $pos);
359
 
                        break;
360
 
                default:
361
 
                        /**
362
 
                         * Skip whitespace and see what we arrive at.
363
 
                         */
364
 
                        $pos = tln_skipspace($body, $pos);
365
 
                        $char = substr($body, $pos, 1);
366
 
                        /**
367
 
                         * Two things are valid here:
368
 
                         * '=' means this is attribute type 1 2 or 3.
369
 
                         * \w means this was attribute type 4.
370
 
                         * anything else we ignore and re-loop. End of tag and
371
 
                         * invalid stuff will be caught by our checks at the beginning
372
 
                         * of the loop.
373
 
                         */
374
 
                        if ($char == '='){
375
 
                                $pos++;
376
 
                                $pos = tln_skipspace($body, $pos);
377
 
                                /**
378
 
                                 * Here are 3 possibilities:
379
 
                                 * "'"  attribute type 1
380
 
                                 * '"'  attribute type 2
381
 
                                 * everything else is the content of tag type 3
382
 
                                 */
383
 
                                $quot = substr($body, $pos, 1);
384
 
                                if ($quot == '\''){
385
 
                                        $regary = tln_findnxreg($body, $pos+1, '\'');
386
 
                                        if ($regary == false){
387
 
                                                return Array(false, false, false, $lt, strlen($body));
388
 
                                        }
389
 
                                        list($pos, $attval, $match) = $regary;
390
 
                                        $pos++;
391
 
                                        $attary{$attname} = '\'' . $attval . '\'';
392
 
                                } else if ($quot == '"'){
393
 
                                        $regary = tln_findnxreg($body, $pos+1, '\"');
394
 
                                        if ($regary == false){
395
 
                                                return Array(false, false, false, $lt, strlen($body));
396
 
                                        }
397
 
                                        list($pos, $attval, $match) = $regary;
398
 
                                        $pos++;
399
 
                                        $attary{$attname} = '"' . $attval . '"';
400
 
                                } else {
401
 
                                        /**
402
 
                                         * These are hateful. Look for \s, or >.
403
 
                                         */
404
 
                                        $regary = tln_findnxreg($body, $pos, '[\s>]');
405
 
                                        if ($regary == false){
406
 
                                                return Array(false, false, false, $lt, strlen($body));
407
 
                                        }
408
 
                                        list($pos, $attval, $match) = $regary;
409
 
                                        /**
410
 
                                         * If it's ">" it will be caught at the top.
411
 
                                         */
412
 
                                        $attval = preg_replace('/\"/s', '&quot;', $attval);
413
 
                                        $attary{$attname} = '"' . $attval . '"';
414
 
                                }
415
 
                        } else if (preg_match('|[\w/>]|', $char)) {
416
 
                                /**
417
 
                                 * That was attribute type 4.
418
 
                                 */
419
 
                                $attary{$attname} = '"yes"';
420
 
                        } else {
421
 
                                /**
422
 
                                 * An illegal character. Find next '>' and return.
423
 
                                 */
424
 
                                $gt = tln_findnxstr($body, $pos, '>');
425
 
                                return Array(false, false, false, $lt, $gt);
426
 
                        }
427
 
                }
428
 
        }
429
 
        /**
430
 
         * The fact that we got here indicates that the tag end was never
431
 
         * found. Return invalid tag indication so it gets stripped.
432
 
         */
433
 
        return Array(false, false, false, $lt, strlen($body));
434
 
}
435
 
 
436
 
/**
437
 
 * Translates entities into literal values so they can be checked.
438
 
 *
439
 
 * @param $attvalue the by-ref value to check.
440
 
 * @param $regex        the regular expression to check against.
441
 
 * @param $hex          whether the entites are hexadecimal.
442
 
 * @return                      True or False depending on whether there were matches.
443
 
 */
444
 
function tln_deent(&$attvalue, $regex, $hex=false){
445
 
        $me = 'tln_deent';
446
 
        $ret_match = false;
447
 
        preg_match_all($regex, $attvalue, $matches);
448
 
        if (is_array($matches) && sizeof($matches[0]) > 0){
449
 
                $repl = Array();
450
 
                for ($i = 0; $i < sizeof($matches[0]); $i++){
451
 
                        $numval = $matches[1][$i];
452
 
                        if ($hex){
453
 
                                $numval = hexdec($numval);
454
 
                        }
455
 
                        $repl{$matches[0][$i]} = chr($numval);
456
 
                }
457
 
                $attvalue = strtr($attvalue, $repl);
458
 
                return true;
459
 
        } else {
460
 
                return false;
461
 
        }
462
 
}
463
 
 
464
 
/**
465
 
 * This function checks attribute values for entity-encoded values
466
 
 * and returns them translated into 8-bit strings so we can run
467
 
 * checks on them.
468
 
 *
469
 
 * @param  $attvalue A string to run entity check against.
470
 
 * @return                       Nothing, modifies a reference value.
471
 
 */
472
 
function tln_defang(&$attvalue){
473
 
        $me = 'tln_defang';
474
 
        /**
475
 
         * Skip this if there aren't ampersands or backslashes.
476
 
         */
477
 
        if (strpos($attvalue, '&') === false
478
 
                && strpos($attvalue, '\\') === false){
479
 
                return;
480
 
        }
481
 
        $m = false;
482
 
        do {
483
 
                $m = false;
484
 
                $m = $m || tln_deent($attvalue, '/\&#0*(\d+);*/s');
485
 
                $m = $m || tln_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
486
 
                $m = $m || tln_deent($attvalue, '/\\\\(\d+)/s', true);
487
 
        } while ($m == true);
488
 
        $attvalue = stripslashes($attvalue);
489
 
}
490
 
 
491
 
/**
492
 
 * Kill any tabs, newlines, or carriage returns. Our friends the
493
 
 * makers of the browser with 95% market value decided that it'd
494
 
 * be funny to make "java[tab]script" be just as good as "javascript".
495
 
 * 
496
 
 * @param  attvalue      The attribute value before extraneous spaces removed.
497
 
 * @return attvalue      Nothing, modifies a reference value.
498
 
 */
499
 
function tln_unspace(&$attvalue){
500
 
        $me = 'tln_unspace';
501
 
        if (strcspn($attvalue, "\t\r\n\0 ") != strlen($attvalue)){
502
 
                $attvalue = str_replace(Array("\t", "\r", "\n", "\0", " "), 
503
 
                                                                Array('',       '',       '',   '',       ''), $attvalue);
504
 
        }
505
 
}
506
 
 
507
 
/**
508
 
 * This function runs various checks against the attributes.
509
 
 *
510
 
 * @param  $tagname                     String with the name of the tag.
511
 
 * @param  $attary                      Array with all tag attributes.
512
 
 * @param  $rm_attnames         See description for tln_sanitize
513
 
 * @param  $bad_attvals         See description for tln_sanitize
514
 
 * @param  $add_attr_to_tag See description for tln_sanitize
515
 
 * @return                                      Array with modified attributes.
516
 
 */
517
 
function tln_fixatts($tagname, 
518
 
                                 $attary, 
519
 
                                 $rm_attnames,
520
 
                                 $bad_attvals,
521
 
                                 $add_attr_to_tag
522
 
                                 ){
523
 
        $me = 'tln_fixatts';
524
 
        while (list($attname, $attvalue) = each($attary)){
525
 
                /**
526
 
                 * See if this attribute should be removed.
527
 
                 */
528
 
                foreach ($rm_attnames as $matchtag=>$matchattrs){
529
 
                        if (preg_match($matchtag, $tagname)){
530
 
                                foreach ($matchattrs as $matchattr){
531
 
                                        if (preg_match($matchattr, $attname)){
532
 
                                                unset($attary{$attname});
533
 
                                                continue;
534
 
                                        }
535
 
                                }
536
 
                        }
537
 
                }
538
 
                /**
539
 
                 * Remove any backslashes, entities, or extraneous whitespace.
540
 
                 */
541
 
                tln_defang($attvalue);
542
 
                tln_unspace($attvalue);
543
 
                
544
 
                /**
545
 
                 * Now let's run checks on the attvalues.
546
 
                 * I don't expect anyone to comprehend this. If you do,
547
 
                 * get in touch with me so I can drive to where you live and
548
 
                 * shake your hand personally. :)
549
 
                 */
550
 
                foreach ($bad_attvals as $matchtag=>$matchattrs){
551
 
                        if (preg_match($matchtag, $tagname)){
552
 
                                foreach ($matchattrs as $matchattr=>$valary){
553
 
                                        if (preg_match($matchattr, $attname)){
554
 
                                                /**
555
 
                                                 * There are two arrays in valary.
556
 
                                                 * First is matches.
557
 
                                                 * Second one is replacements
558
 
                                                 */
559
 
                                                list($valmatch, $valrepl) = $valary;
560
 
                                                $newvalue = preg_replace($valmatch,$valrepl,$attvalue);
561
 
                                                if ($newvalue != $attvalue){
562
 
                                                        $attary{$attname} = $newvalue;
563
 
                                                }
564
 
                                        }
565
 
                                }
566
 
                        }
567
 
                }
568
 
        }
569
 
        /**
570
 
         * See if we need to append any attributes to this tag.
571
 
         */
572
 
        foreach ($add_attr_to_tag as $matchtag=>$addattary){
573
 
                if (preg_match($matchtag, $tagname)){
574
 
                        $attary = array_merge($attary, $addattary);
575
 
                }
576
 
        }
577
 
        return $attary;
578
 
}
579
 
 
580
 
/**
581
 
 *
582
 
 * @param $body                                 the string with HTML you wish to filter
583
 
 * @param $tag_list                             see description above
584
 
 * @param $rm_tags_with_content see description above
585
 
 * @param $self_closing_tags    see description above
586
 
 * @param $force_tag_closing    see description above
587
 
 * @param $rm_attnames                  see description above
588
 
 * @param $bad_attvals                  see description above
589
 
 * @param $add_attr_to_tag              see description above
590
 
 * @return                                              tln_sanitized html safe to show on your pages.
591
 
 */
592
 
function tln_sanitize($body, 
593
 
                                  $tag_list, 
594
 
                                  $rm_tags_with_content,
595
 
                                  $self_closing_tags,
596
 
                                  $force_tag_closing,
597
 
                                  $rm_attnames,
598
 
                                  $bad_attvals,
599
 
                                  $add_attr_to_tag
600
 
                                  )
601
 
{
602
 
        $me = 'tln_sanitize';
603
 
        /**
604
 
         * Normalize rm_tags and rm_tags_with_content.
605
 
         */
606
 
        $rm_tags = array_shift($tag_list);
607
 
        @array_walk($tag_list, 'tln_casenormalize');
608
 
        @array_walk($rm_tags_with_content, 'tln_casenormalize');
609
 
        @array_walk($self_closing_tags, 'tln_casenormalize');
610
 
        /**
611
 
         * See if tag_list is of tags to remove or tags to allow.
612
 
         * false  means remove these tags
613
 
         * true   means allow these tags
614
 
         */
615
 
        $curpos = 0;
616
 
        $open_tags = Array();
617
 
        $trusted = "<!-- begin tln_sanitized html -->\n";
618
 
        $skip_content = false;
619
 
        /**
620
 
         * Take care of netscape's stupid javascript entities like
621
 
         * &{alert('boo')};
622
 
         */
623
 
        $body = preg_replace('/&(\{.*?\};)/si', '&amp;\\1', $body);
624
 
        while (($curtag = tln_getnxtag($body, $curpos)) != FALSE){
625
 
                list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
626
 
                $free_content = substr($body, $curpos, $lt - $curpos);
627
 
                if ($skip_content == false){
628
 
                        $trusted .= $free_content;
629
 
                } else {
630
 
                }
631
 
                if ($tagname != FALSE){
632
 
                        if ($tagtype == 2){
633
 
                                if ($skip_content == $tagname){
634
 
                                        /**
635
 
                                         * Got to the end of tag we needed to remove.
636
 
                                         */
637
 
                                        $tagname = false;
638
 
                                        $skip_content = false;
639
 
                                } else {
640
 
                                        if ($skip_content == false){
641
 
                                                if (isset($open_tags{$tagname}) && 
642
 
                                                        $open_tags{$tagname} > 0){
643
 
                                                        $open_tags{$tagname}--;
644
 
                                                } else {
645
 
                                                        $tagname = false;
646
 
                                                }
647
 
                                        } else {
648
 
                                        }
649
 
                                }
650
 
                        } else {
651
 
                                /**
652
 
                                 * $rm_tags_with_content
653
 
                                 */
654
 
                                if ($skip_content == false){
655
 
                                        /**
656
 
                                         * See if this is a self-closing type and change
657
 
                                         * tagtype appropriately.
658
 
                                         */
659
 
                                        if ($tagtype == 1
660
 
                                                && in_array($tagname, $self_closing_tags)){
661
 
                                                $tagtype = 3;
662
 
                                        }
663
 
                                        /**
664
 
                                         * See if we should skip this tag and any content
665
 
                                         * inside it.
666
 
                                         */
667
 
                                        if ($tagtype == 1 
668
 
                                                && in_array($tagname, $rm_tags_with_content)){
669
 
                                                $skip_content = $tagname;
670
 
                                        } else {
671
 
                                                if (($rm_tags == false 
672
 
                                                         && in_array($tagname, $tag_list)) ||
673
 
                                                        ($rm_tags == true 
674
 
                                                         && !in_array($tagname, $tag_list))){
675
 
                                                        $tagname = false;
676
 
                                                } else {
677
 
                                                        if ($tagtype == 1){
678
 
                                                                if (isset($open_tags{$tagname})){
679
 
                                                                        $open_tags{$tagname}++;
680
 
                                                                } else {
681
 
                                                                        $open_tags{$tagname} = 1;
682
 
                                                                }
683
 
                                                        }
684
 
                                                        /**
685
 
                                                         * This is where we run other checks.
686
 
                                                         */
687
 
                                                        if (is_array($attary) && sizeof($attary) > 0){
688
 
                                                                $attary = tln_fixatts($tagname,
689
 
                                                                                                  $attary,
690
 
                                                                                                  $rm_attnames,
691
 
                                                                                                  $bad_attvals,
692
 
                                                                                                  $add_attr_to_tag);
693
 
                                                        }
694
 
                                                }
695
 
                                        }
696
 
                                } else {
697
 
                                }
698
 
                        }
699
 
                        if ($tagname != false && $skip_content == false){
700
 
                                $trusted .= tln_tagprint($tagname, $attary, $tagtype);
701
 
                        }
702
 
                } else {
703
 
                }
704
 
                $curpos = $gt + 1;
705
 
        }
706
 
        $trusted .= substr($body, $curpos, strlen($body) - $curpos);
707
 
        if ($force_tag_closing == true){
708
 
                foreach ($open_tags as $tagname=>$opentimes){
709
 
                        while ($opentimes > 0){
710
 
                                $trusted .= '</' . $tagname . '>';
711
 
                                $opentimes--;
712
 
                        }
713
 
                }
714
 
                $trusted .= "\n";
715
 
        }
716
 
        $trusted .= "<!-- end tln_sanitized html -->\n";
717
 
        return $trusted;
718
 
}
719
 
 
720
 
// 
721
 
// Use the nifty htmlfilter library
722
 
//
723
 
 
724
 
 
725
 
function HTMLFilter($body, $trans_image_path, $block_external_images = false) {
726
 
 
727
 
        $tag_list = Array(
728
 
                false,
729
 
                "object",
730
 
                "meta",
731
 
                "html",
732
 
                "head",
733
 
                "base",
734
 
                "link",
735
 
                "frame",
736
 
                "iframe",
737
 
                "plaintext",
738
 
                "marquee"
739
 
        );
740
 
 
741
 
        $rm_tags_with_content = Array(
742
 
                "script",
743
 
                "applet",
744
 
                "embed",
745
 
                "title",
746
 
                "frameset",
747
 
                "xmp",
748
 
                "xml"
749
 
        );
750
 
 
751
 
        $self_closing_tags =  Array(
752
 
                "img",
753
 
                "br",
754
 
                "hr",
755
 
                "input",
756
 
                "outbind"
757
 
        );
758
 
 
759
 
        $force_tag_closing = true;
760
 
 
761
 
        $rm_attnames = Array(
762
 
                "/.*/" =>
763
 
                        Array(
764
 
                                // "/target/i",
765
 
                                "/^on.*/i",
766
 
                                "/^dynsrc/i",
767
 
                                "/^data.*/i",
768
 
                                "/^lowsrc.*/i"
769
 
                        )
770
 
        );
771
 
 
772
 
        $bad_attvals = Array(
773
 
                "/.*/" =>
774
 
                Array(
775
 
                        "/^src|background/i" =>
776
 
                        Array(
777
 
                                Array(
778
 
                                        "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
779
 
                                        "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
780
 
                                        "/^([\'\"])\s*about\s*:.*([\'\"])/si"
781
 
                                ),
782
 
                                Array(
783
 
                                        "\\1$trans_image_path\\2",
784
 
                                        "\\1$trans_image_path\\2",
785
 
                                        "\\1$trans_image_path\\2",
786
 
                                        "\\1$trans_image_path\\2"
787
 
                                )
788
 
                        ),
789
 
                        "/^href|action/i" =>
790
 
                        Array(
791
 
                                Array(
792
 
                                        "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
793
 
                                        "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
794
 
                                        "/^([\'\"])\s*about\s*:.*([\'\"])/si"
795
 
                                ),
796
 
                                Array(
797
 
                                        "\\1#\\1",
798
 
                                        "\\1#\\1",
799
 
                                        "\\1#\\1",
800
 
                                        "\\1#\\1"
801
 
                                )
802
 
                        ),
803
 
                        "/^style/i" =>
804
 
                        Array(
805
 
                                Array(
806
 
                                        "/expression/i",
807
 
                                        "/binding/i",
808
 
                                        "/behaviou*r/i",
809
 
                                        "/include-source/i",
810
 
                                        "/position\s*:\s*absolute/i",
811
 
                                        "/url\s*\(\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*\)/si",
812
 
                                        "/url\s*\(\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*\)/si",
813
 
                                        "/url\s*\(\s*([\'\"])\s*about\s*:.*([\'\"])\s*\)/si",
814
 
                                        "/(.*)\s*:\s*url\s*\(\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*\)/si"
815
 
                                ),
816
 
                                Array(
817
 
                                        "idiocy",
818
 
                                        "idiocy",
819
 
                                        "idiocy",
820
 
                                        "idiocy",
821
 
                                        "",
822
 
                                        "url(\\1#\\1)",
823
 
                                        "url(\\1#\\1)",
824
 
                                        "url(\\1#\\1)",
825
 
                                        "url(\\1#\\1)",
826
 
                                        "url(\\1#\\1)",
827
 
                                        "\\1:url(\\2#\\3)"
828
 
                                )
829
 
                        )
830
 
                )
831
 
        );
832
 
 
833
 
        if ($block_external_images){
834
 
                array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
835
 
                                '/^([\'\"])\s*https*:.*([\'\"])/si');
836
 
                array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
837
 
                                "\\1$trans_image_path\\1");
838
 
                array_push($bad_attvals{'/.*/'}{'/^style/i'}[0],
839
 
                                '/url\(([\'\"])\s*https*:.*([\'\"])\)/si');
840
 
                array_push($bad_attvals{'/.*/'}{'/^style/i'}[1],
841
 
                                "url(\\1$trans_image_path\\1)");
842
 
        }
843
 
 
844
 
        $add_attr_to_tag = Array(
845
 
                "/^a$/i" =>
846
 
                        Array('target'=>'"_blank"')
847
 
        );
848
 
 
849
 
        $trusted = tln_sanitize($body, 
850
 
                        $tag_list, 
851
 
                        $rm_tags_with_content,
852
 
                        $self_closing_tags,
853
 
                        $force_tag_closing,
854
 
                        $rm_attnames,
855
 
                        $bad_attvals,
856
 
                        $add_attr_to_tag
857
 
                        );
858
 
        return $trusted;
859
 
}
860
 
 
861
 
?>