~clint-fewbar/ubuntu/precise/php5/php5-5.4-merge

« back to all changes in this revision

Viewing changes to ext/standard/html.c

  • Committer: Package Import Robot
  • Author(s): Chuck Short
  • Date: 2011-02-22 09:46:37 UTC
  • mfrom: (1.1.20) (0.3.18 sid)
  • Revision ID: package-import@ubuntu.com-20110222094637-nlu2tvb7oqgaarl0
Tags: 5.3.5-1ubuntu1
* Merge from debian/unstable. Remaining changes:
 - debian/control:
    * Dropped firebird2.1-dev, libc-client-dev, libmcrypt-dev as it is in universe.
    * Dropped libmysqlclient15-dev, build against mysql 5.1.
    * Dropped libcurl-dev not in the archive.
    * Suggest php5-suhosin rather than recommends.
    * Dropped php5-imap, php5-interbase, php5-mcrypt since we have versions 
      already in universe.
    * Dropped libonig-dev and libqgdbm since its in universe. (will be re-added in lucid+1)
    * Dropped locales-all.
  - modulelist: Drop imap, interbase, sybase, and mcrypt.
  - debian/rules:
    * Dropped building of mcrypt, imap, and interbase.
    * Install apport hook for php5.
    * stop mysql instance on clean just in case we failed in tests

Show diffs side-by-side

added added

removed removed

Lines of Context:
18
18
   +----------------------------------------------------------------------+
19
19
*/
20
20
 
21
 
/* $Id: html.c 296121 2010-03-12 16:19:25Z moriyoshi $ */
 
21
/* $Id: html.c 304404 2010-10-14 19:14:06Z cataphract $ */
22
22
 
23
23
/*
24
24
 * HTML entity resources:
540
540
                                        MB_WRITE(c);
541
541
                                        this_char = c;
542
542
                                        pos++;
543
 
                                } else if (c < 0xc0) {
 
543
                                } else if (c < 0xc2) {
544
544
                                        MB_FAILURE(pos);
545
545
                                } else if (c < 0xe0) {
546
546
                                        CHECK_LEN(pos, 2);
572
572
                                        MB_WRITE((unsigned char)str[pos + 1]);
573
573
                                        MB_WRITE((unsigned char)str[pos + 2]);
574
574
                                        pos += 3;
575
 
                                } else if (c < 0xf8) {
 
575
                                } else if (c < 0xf5) {
576
576
                                        CHECK_LEN(pos, 4);
577
577
                                        if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
578
578
                                                MB_FAILURE(pos);
584
584
                                                MB_FAILURE(pos);
585
585
                                        }
586
586
                                        this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
587
 
                                        if (this_char < 0x10000) {
 
587
                                        if (this_char < 0x10000 || this_char > 0x10FFFF) {
588
588
                                                MB_FAILURE(pos);
589
589
                                        }
590
590
                                        MB_WRITE((unsigned char)c);
867
867
/* }}} */
868
868
 
869
869
/* {{{ php_utf32_utf8 */
870
 
size_t php_utf32_utf8(unsigned char *buf, int k)
 
870
size_t php_utf32_utf8(unsigned char *buf, unsigned k)
871
871
{
872
872
        size_t retval = 0;
873
873
 
1020
1020
                                                code = strtol(p + 2, &next, 10);
1021
1021
                                        }
1022
1022
 
1023
 
                                        if (next != NULL && *next == ';') {
 
1023
                                        if (code == '\'' && !(quote_style & ENT_HTML_QUOTE_SINGLE) ||
 
1024
                                                code == '"' && !(quote_style & ENT_HTML_QUOTE_DOUBLE)) {
 
1025
                                                invalid_code = 1;
 
1026
                                        }
 
1027
 
 
1028
                                        if (next != NULL && *next == ';' && !invalid_code) {
1024
1029
                                                switch (charset) {
1025
1030
                                                        case cs_utf_8:
1026
1031
                                                                q += php_utf32_utf8(q, code);
1032
1037
                                                                if ((code >= 0x80 && code < 0xa0) || code > 0xff) {
1033
1038
                                                                        invalid_code = 1;
1034
1039
                                                                } else {
1035
 
                                                                        if (code == 39 || !quote_style) {
1036
 
                                                                                invalid_code = 1;
1037
 
                                                                        } else {
1038
 
                                                                                *(q++) = code;
1039
 
                                                                        }
 
1040
                                                                        *(q++) = code;
1040
1041
                                                                }
1041
1042
                                                                break;
1042
1043
 
1407
1408
}
1408
1409
/* }}} */
1409
1410
 
1410
 
/* {{{ proto array get_html_translation_table([int table [, int quote_style]])
 
1411
/* {{{ proto array get_html_translation_table([int table [, int quote_style [, string charset_hint]]])
1411
1412
   Returns the internal translation table used by htmlspecialchars and htmlentities */
1412
1413
PHP_FUNCTION(get_html_translation_table)
1413
1414
{
1414
1415
        long which = HTML_SPECIALCHARS, quote_style = ENT_COMPAT;
1415
 
        int i, j;
1416
 
        char ind[2];
1417
 
        enum entity_charset charset = determine_charset(NULL TSRMLS_CC);
 
1416
        unsigned int i;
 
1417
        int j;
 
1418
        unsigned char ind[5]; /* max # of 8-bit code units (4; for UTF-8) + 1 for \0 */
 
1419
        void *dummy;
 
1420
        char *charset_hint = NULL;
 
1421
        int charset_hint_len;
 
1422
        enum entity_charset charset;
1418
1423
 
1419
 
        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ll", &which, &quote_style) == FAILURE) {
 
1424
        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|lls",
 
1425
                        &which, &quote_style, &charset_hint, &charset_hint_len) == FAILURE) {
1420
1426
                return;
1421
1427
        }
1422
1428
 
 
1429
        charset = determine_charset(charset_hint TSRMLS_CC);
 
1430
 
1423
1431
        array_init(return_value);
1424
1432
 
1425
 
        ind[1] = 0;
1426
 
 
1427
1433
        switch (which) {
1428
 
                case HTML_ENTITIES:
1429
 
                        for (j=0; entity_map[j].charset != cs_terminator; j++) {
1430
 
                                if (entity_map[j].charset != charset)
 
1434
        case HTML_ENTITIES:
 
1435
                for (j = 0; entity_map[j].charset != cs_terminator; j++) {
 
1436
                        if (entity_map[j].charset != charset)
 
1437
                                continue;
 
1438
                        for (i = 0; i <= entity_map[j].endchar - entity_map[j].basechar; i++) {
 
1439
                                char buffer[16];
 
1440
                                unsigned k;
 
1441
                                size_t written;
 
1442
 
 
1443
                                if (entity_map[j].table[i] == NULL)
1431
1444
                                        continue;
1432
 
                                for (i = 0; i <= entity_map[j].endchar - entity_map[j].basechar; i++) {
1433
 
                                        char buffer[16];
1434
 
 
1435
 
                                        if (entity_map[j].table[i] == NULL)
1436
 
                                                continue;
1437
 
                                        /* what about wide chars here ?? */
1438
 
                                        ind[0] = i + entity_map[j].basechar;
1439
 
                                        snprintf(buffer, sizeof(buffer), "&%s;", entity_map[j].table[i]);
1440
 
                                        add_assoc_string(return_value, ind, buffer, 1);
1441
 
 
 
1445
                                        
 
1446
                                k = i + entity_map[j].basechar;
 
1447
 
 
1448
                                switch (charset) {
 
1449
                                case cs_utf_8:
 
1450
                                        written = php_utf32_utf8(ind, k);
 
1451
                                        ind[written] = '\0';
 
1452
                                        break;
 
1453
                                case cs_big5:
 
1454
                                case cs_gb2312:
 
1455
                                case cs_big5hkscs:
 
1456
                                case cs_sjis:
 
1457
                                        /* we have no mappings for these, but if we had... */
 
1458
                                        /* break through */
 
1459
                                default: /* one byte */
 
1460
                                        written = 1;
 
1461
                                        ind[0] = (unsigned char)k;
 
1462
                                        ind[1] = '\0';
 
1463
                                        break;
 
1464
                                }
 
1465
 
 
1466
                                snprintf(buffer, sizeof(buffer), "&%s;", entity_map[j].table[i]);
 
1467
                                if (zend_hash_find(Z_ARRVAL_P(return_value), (const char*)ind, written+1, &dummy) == FAILURE) {
 
1468
                                        /* in case of the single quote, which is repeated, the first one wins,
 
1469
                                                * so don't replace the existint mapping */
 
1470
                                        add_assoc_string(return_value, (const char*)ind, buffer, 1);
1442
1471
                                }
1443
1472
                        }
1444
 
                        /* break thru */
1445
 
 
1446
 
                case HTML_SPECIALCHARS:
1447
 
                        for (j = 0; basic_entities[j].charcode != 0; j++) {
1448
 
 
1449
 
                                if (basic_entities[j].flags && (quote_style & basic_entities[j].flags) == 0)
1450
 
                                        continue;
 
1473
                }
 
1474
                /* break thru */
 
1475
 
 
1476
        case HTML_SPECIALCHARS:
 
1477
                add_assoc_stringl(return_value, "&", "&amp;", sizeof("&amp;") - 1, 1);
 
1478
                for (j = 0; basic_entities[j].charcode != 0; j++) {
 
1479
                        if (basic_entities[j].flags && (quote_style & basic_entities[j].flags) == 0)
 
1480
                                continue;
1451
1481
                                
1452
 
                                ind[0] = (unsigned char)basic_entities[j].charcode;
1453
 
                                add_assoc_stringl(return_value, ind, basic_entities[j].entity, basic_entities[j].entitylen, 1);
 
1482
                        ind[0] = (unsigned char)basic_entities[j].charcode;
 
1483
                        ind[1] = '\0';
 
1484
                        if (zend_hash_find(Z_ARRVAL_P(return_value), (const char*)ind, 2, &dummy) == FAILURE) {
 
1485
                                add_assoc_stringl(return_value, ind, basic_entities[j].entity,
 
1486
                                        basic_entities[j].entitylen, 1);
1454
1487
                        }
1455
 
                        add_assoc_stringl(return_value, "&", "&amp;", sizeof("&amp;") - 1, 1);
 
1488
                }
1456
1489
 
1457
 
                        break;
 
1490
                break;
1458
1491
        }
1459
1492
}
1460
1493
/* }}} */