~clint-fewbar/ubuntu/natty/php5/merge-5.3.3-3

« back to all changes in this revision

Viewing changes to ext/standard/html.c

  • Committer: Bazaar Package Importer
  • Author(s): Chuck Short
  • Date: 2010-01-26 14:09:58 UTC
  • mfrom: (1.1.16 upstream) (0.3.6 sid)
  • Revision ID: james.westby@ubuntu.com-20100126140958-sos69zwa00q2nt19
Tags: 5.2.12.dfsg.1-2ubuntu1
* Merge from debian testing.  Remaining changes:
  - debian/control, debian/rules: Disable a few build dependencies and
    accompanying binary packages which we do not want to support in main:
    + firebird2-dev/php5-interbase (we have a seperate php-interbase source)
    + libc-client/php5-imap (we have a seperate php-imap source)
    + libmcrypt-dev/php5-mcrypt (seperate php-mcrypt source)
    + readline support again, now that the libedit issue is fixed.
  - debian/control: Add build dependency: libedit-dev (>= 2.9.cvs.20050518-1)
    CLI readline support.
  - debian/rules:
    + Correctly mangle PHP5_* macros for lpia
  - debian/control:
    + Rename Vcs-Browser & Vcs-Git to XS-Original-Vcs-Browser & XS-Original-Vcs-Git (LP: #323731).
  - debian/control: Move php5-suhoshin to Suggests.
  - debian/rules: Fix broken symlink for pear.
  - main/php_version.h: updated with Ubuntu version info
  - debian/patches/series: Re-enable the 033-we_WANT_libtool.patch patch
  - debian/rules, debian/source_php5.py: Install apport hook. 
* Dropped patches: CVE-2009-3557.patch and CVE-2009-3558.patch, no longer needed.

Show diffs side-by-side

added added

removed removed

Lines of Context:
18
18
   +----------------------------------------------------------------------+
19
19
*/
20
20
 
21
 
/* $Id: html.c 272374 2008-12-31 11:17:49Z sebastian $ */
 
21
/* $Id: html.c 291821 2009-12-07 15:41:43Z moriyoshi $ */
22
22
 
23
23
/*
24
24
 * HTML entity resources:
484
484
                        }                        \
485
485
                        mbseq[mbpos++] = (mbchar); }
486
486
 
 
487
/* skip one byte and return */
 
488
#define MB_FAILURE(pos) do { \
 
489
        *newpos = pos + 1; \
 
490
        *status = FAILURE; \
 
491
        return 0; \
 
492
} while (0)
 
493
 
487
494
#define CHECK_LEN(pos, chars_need)                      \
488
 
        if((str_len - (pos)) < chars_need) {    \
489
 
                *status = FAILURE;                                      \
490
 
                return 0;                                                       \
 
495
        if (chars_need < 1) {                                           \
 
496
                if((str_len - (pos)) < chars_need) {    \
 
497
                        *newpos = pos;                                          \
 
498
                        *status = FAILURE;                                      \
 
499
                        return 0;                                                       \
 
500
                }                                                                               \
 
501
        } else {                                                                        \
 
502
                if((str_len - (pos)) < chars_need) {    \
 
503
                        *newpos = pos + 1;                                      \
 
504
                        *status = FAILURE;                                      \
 
505
                        return 0;                                                       \
 
506
                }                                                                               \
491
507
        }
492
508
 
493
509
/* {{{ get_next_char
494
510
 */
495
 
inline static unsigned short get_next_char(enum entity_charset charset,
 
511
inline static unsigned int get_next_char(enum entity_charset charset,
496
512
                unsigned char * str,
497
513
                int str_len,
498
514
                int * newpos,
503
519
        int pos = *newpos;
504
520
        int mbpos = 0;
505
521
        int mbspace = *mbseqlen;
506
 
        unsigned short this_char = str[pos++];
 
522
        unsigned int this_char = 0;
507
523
        unsigned char next_char;
508
524
 
509
525
        *status = SUCCESS;
510
 
        
 
526
 
511
527
        if (mbspace <= 0) {
512
528
                *mbseqlen = 0;
513
 
                return this_char;
 
529
                CHECK_LEN(pos, 1);
 
530
                *newpos = pos + 1;
 
531
                return str[pos];
514
532
        }
515
 
        
516
 
        MB_WRITE((unsigned char)this_char);
517
 
        
 
533
 
518
534
        switch (charset) {
519
535
                case cs_utf_8:
520
536
                        {
521
 
                                unsigned long utf = 0;
522
 
                                int stat = 0;
523
 
                                int more = 1;
524
 
 
525
 
                                /* unpack utf-8 encoding into a wide char.
526
 
                                 * Code stolen from the mbstring extension */
527
 
 
528
 
                                do {
 
537
                                unsigned char c;
 
538
                                CHECK_LEN(pos, 1);
 
539
                                c = str[pos];
 
540
                                if (c < 0x80) {
 
541
                                        MB_WRITE(c);
 
542
                                        this_char = c;
 
543
                                        pos++;
 
544
                                } else if (c < 0xc0) {
 
545
                                        MB_FAILURE(pos);
 
546
                                } else if (c < 0xe0) {
 
547
                                        CHECK_LEN(pos, 2);
 
548
                                        if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
 
549
                                                MB_FAILURE(pos);
 
550
                                        }
 
551
                                        this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
529
552
                                        if (this_char < 0x80) {
530
 
                                                more = 0;
531
 
                                                if(stat) {
532
 
                                                        /* we didn't finish the UTF sequence correctly */
533
 
                                                        *status = FAILURE;
534
 
                                                }
535
 
                                                break;
536
 
                                        } else if (this_char < 0xc0) {
537
 
                                                switch (stat) {
538
 
                                                        case 0x10:      /* 2, 2nd */
539
 
                                                        case 0x21:      /* 3, 3rd */
540
 
                                                        case 0x32:      /* 4, 4th */
541
 
                                                        case 0x43:      /* 5, 5th */
542
 
                                                        case 0x54:      /* 6, 6th */
543
 
                                                                /* last byte in sequence */
544
 
                                                                more = 0;
545
 
                                                                utf |= (this_char & 0x3f);
546
 
                                                                this_char = (unsigned short)utf;
547
 
                                                                break;
548
 
                                                        case 0x20:      /* 3, 2nd */
549
 
                                                        case 0x31:      /* 4, 3rd */
550
 
                                                        case 0x42:      /* 5, 4th */
551
 
                                                        case 0x53:      /* 6, 5th */
552
 
                                                                /* penultimate char */
553
 
                                                                utf |= ((this_char & 0x3f) << 6);
554
 
                                                                stat++;
555
 
                                                                break;
556
 
                                                        case 0x30:      /* 4, 2nd */
557
 
                                                        case 0x41:      /* 5, 3rd */
558
 
                                                        case 0x52:      /* 6, 4th */
559
 
                                                                utf |= ((this_char & 0x3f) << 12);
560
 
                                                                stat++;
561
 
                                                                break;
562
 
                                                        case 0x40:      /* 5, 2nd */
563
 
                                                        case 0x51:
564
 
                                                                utf |= ((this_char & 0x3f) << 18);
565
 
                                                                stat++;
566
 
                                                                break;
567
 
                                                        case 0x50:      /* 6, 2nd */
568
 
                                                                utf |= ((this_char & 0x3f) << 24);
569
 
                                                                stat++;
570
 
                                                                break;
571
 
                                                        default:
572
 
                                                                /* invalid */
573
 
                                                                *status = FAILURE;
574
 
                                                                more = 0;
575
 
                                                }
576
 
                                        }
577
 
                                        /* lead byte */
578
 
                                        else if (this_char < 0xe0) {
579
 
                                                stat = 0x10;    /* 2 byte */
580
 
                                                utf = (this_char & 0x1f) << 6;
581
 
                                                CHECK_LEN(pos, 1);
582
 
                                        } else if (this_char < 0xf0) {
583
 
                                                stat = 0x20;    /* 3 byte */
584
 
                                                utf = (this_char & 0xf) << 12;
585
 
                                                CHECK_LEN(pos, 2);
586
 
                                        } else if (this_char < 0xf8) {
587
 
                                                stat = 0x30;    /* 4 byte */
588
 
                                                utf = (this_char & 0x7) << 18;
589
 
                                                CHECK_LEN(pos, 3);
590
 
                                        } else if (this_char < 0xfc) {
591
 
                                                stat = 0x40;    /* 5 byte */
592
 
                                                utf = (this_char & 0x3) << 24;
593
 
                                                CHECK_LEN(pos, 4);
594
 
                                        } else if (this_char < 0xfe) {
595
 
                                                stat = 0x50;    /* 6 byte */
596
 
                                                utf = (this_char & 0x1) << 30;
597
 
                                                CHECK_LEN(pos, 5);
598
 
                                        } else {
599
 
                                                /* invalid; bail */
600
 
                                                more = 0;
601
 
                                                *status = FAILURE;
602
 
                                                break;
603
 
                                        }
604
 
 
605
 
                                        if (more) {
606
 
                                                this_char = str[pos++];
607
 
                                                MB_WRITE((unsigned char)this_char);
608
 
                                        }
609
 
                                } while (more);
 
553
                                                MB_FAILURE(pos);
 
554
                                        }
 
555
                                        MB_WRITE((unsigned char)c);
 
556
                                        MB_WRITE((unsigned char)str[pos + 1]);
 
557
                                        pos += 2;
 
558
                                } else if (c < 0xf0) {
 
559
                                        CHECK_LEN(pos, 3);
 
560
                                        if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
 
561
                                                MB_FAILURE(pos);
 
562
                                        }
 
563
                                        if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
 
564
                                                MB_FAILURE(pos);
 
565
                                        }
 
566
                                        this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
 
567
                                        if (this_char < 0x800) {
 
568
                                                MB_FAILURE(pos);
 
569
                                        } else if (this_char >= 0xd800 && this_char <= 0xdfff) {
 
570
                                                MB_FAILURE(pos);
 
571
                                        }
 
572
                                        MB_WRITE((unsigned char)c);
 
573
                                        MB_WRITE((unsigned char)str[pos + 1]);
 
574
                                        MB_WRITE((unsigned char)str[pos + 2]);
 
575
                                        pos += 3;
 
576
                                } else if (c < 0xf8) {
 
577
                                        CHECK_LEN(pos, 4);
 
578
                                        if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
 
579
                                                MB_FAILURE(pos);
 
580
                                        }
 
581
                                        if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
 
582
                                                MB_FAILURE(pos);
 
583
                                        }
 
584
                                        if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
 
585
                                                MB_FAILURE(pos);
 
586
                                        }
 
587
                                        this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
 
588
                                        if (this_char < 0x10000) {
 
589
                                                MB_FAILURE(pos);
 
590
                                        }
 
591
                                        MB_WRITE((unsigned char)c);
 
592
                                        MB_WRITE((unsigned char)str[pos + 1]);
 
593
                                        MB_WRITE((unsigned char)str[pos + 2]);
 
594
                                        MB_WRITE((unsigned char)str[pos + 3]);
 
595
                                        pos += 4;
 
596
                                } else {
 
597
                                        MB_FAILURE(pos);
 
598
                                }
610
599
                        }
611
600
                        break;
612
601
                case cs_big5:
613
602
                case cs_gb2312:
614
603
                case cs_big5hkscs:
615
604
                        {
 
605
                                CHECK_LEN(pos, 1);
 
606
                                this_char = str[pos++];
616
607
                                /* check if this is the first of a 2-byte sequence */
617
 
                                if (this_char >= 0xa1 && this_char <= 0xfe) {
 
608
                                if (this_char >= 0x81 && this_char <= 0xfe) {
618
609
                                        /* peek at the next char */
619
610
                                        CHECK_LEN(pos, 1);
620
 
                                        next_char = str[pos];
 
611
                                        next_char = str[pos++];
621
612
                                        if ((next_char >= 0x40 && next_char <= 0x7e) ||
622
613
                                                        (next_char >= 0xa1 && next_char <= 0xfe)) {
623
614
                                                /* yes, this a wide char */
624
 
                                                this_char <<= 8;
 
615
                                                MB_WRITE(this_char);
625
616
                                                MB_WRITE(next_char);
626
 
                                                this_char |= next_char;
627
 
                                                pos++;
 
617
                                                this_char = (this_char << 8) | next_char;
 
618
                                        } else {
 
619
                                                MB_FAILURE(pos);
628
620
                                        }
629
 
                                        
 
621
                                } else {
 
622
                                        MB_WRITE(this_char);
630
623
                                }
631
 
                                break;
632
624
                        }
 
625
                        break;
633
626
                case cs_sjis:
634
627
                        {
 
628
                                CHECK_LEN(pos, 1);
 
629
                                this_char = str[pos++];
635
630
                                /* check if this is the first of a 2-byte sequence */
636
 
                                if ( (this_char >= 0x81 && this_char <= 0x9f) ||
637
 
                                         (this_char >= 0xe0 && this_char <= 0xef)
638
 
                                        ) {
 
631
                                if ((this_char >= 0x81 && this_char <= 0x9f) ||
 
632
                                        (this_char >= 0xe0 && this_char <= 0xfc)) {
639
633
                                        /* peek at the next char */
640
634
                                        CHECK_LEN(pos, 1);
641
 
                                        next_char = str[pos];
 
635
                                        next_char = str[pos++];
642
636
                                        if ((next_char >= 0x40 && next_char <= 0x7e) ||
643
637
                                                (next_char >= 0x80 && next_char <= 0xfc))
644
638
                                        {
645
639
                                                /* yes, this a wide char */
646
 
                                                this_char <<= 8;
 
640
                                                MB_WRITE(this_char);
647
641
                                                MB_WRITE(next_char);
648
 
                                                this_char |= next_char;
649
 
                                                pos++;
 
642
                                                this_char = (this_char << 8) | next_char;
 
643
                                        } else {
 
644
                                                MB_FAILURE(pos);
650
645
                                        }
651
 
                                        
 
646
                                } else {
 
647
                                        MB_WRITE(this_char);
652
648
                                }
653
649
                                break;
654
650
                        }
655
651
                case cs_eucjp:
656
652
                        {
 
653
                                CHECK_LEN(pos, 1);
 
654
                                this_char = str[pos++];
657
655
                                /* check if this is the first of a multi-byte sequence */
658
656
                                if (this_char >= 0xa1 && this_char <= 0xfe) {
659
657
                                        /* peek at the next char */
660
658
                                        CHECK_LEN(pos, 1);
661
 
                                        next_char = str[pos];
 
659
                                        next_char = str[pos++];
662
660
                                        if (next_char >= 0xa1 && next_char <= 0xfe) {
663
661
                                                /* yes, this a jis kanji char */
664
 
                                                this_char <<= 8;
 
662
                                                MB_WRITE(this_char);
665
663
                                                MB_WRITE(next_char);
666
 
                                                this_char |= next_char;
667
 
                                                pos++;
 
664
                                                this_char = (this_char << 8) | next_char;
 
665
                                        } else {
 
666
                                                MB_FAILURE(pos);
668
667
                                        }
669
 
                                        
670
668
                                } else if (this_char == 0x8e) {
671
669
                                        /* peek at the next char */
672
670
                                        CHECK_LEN(pos, 1);
673
 
                                        next_char = str[pos];
 
671
                                        next_char = str[pos++];
674
672
                                        if (next_char >= 0xa1 && next_char <= 0xdf) {
675
673
                                                /* JIS X 0201 kana */
676
 
                                                this_char <<= 8;
 
674
                                                MB_WRITE(this_char);
677
675
                                                MB_WRITE(next_char);
678
 
                                                this_char |= next_char;
679
 
                                                pos++;
 
676
                                                this_char = (this_char << 8) | next_char;
 
677
                                        } else {
 
678
                                                MB_FAILURE(pos);
680
679
                                        }
681
 
                                        
682
680
                                } else if (this_char == 0x8f) {
683
681
                                        /* peek at the next two char */
684
682
                                        unsigned char next2_char;
685
683
                                        CHECK_LEN(pos, 2);
686
684
                                        next_char = str[pos];
687
 
                                        next2_char = str[pos+1];
 
685
                                        next2_char = str[pos + 1];
 
686
                                        pos += 2;
688
687
                                        if ((next_char >= 0xa1 && next_char <= 0xfe) &&
689
688
                                                (next2_char >= 0xa1 && next2_char <= 0xfe)) {
690
689
                                                /* JIS X 0212 hojo-kanji */
691
 
                                                this_char <<= 8;
 
690
                                                MB_WRITE(this_char);
692
691
                                                MB_WRITE(next_char);
693
 
                                                this_char |= next_char;
694
 
                                                pos++;
695
 
                                                this_char <<= 8;
696
692
                                                MB_WRITE(next2_char);
697
 
                                                this_char |= next2_char;
698
 
                                                pos++;
 
693
                                                this_char = (this_char << 16) | (next_char << 8) | next2_char;
 
694
                                        } else {
 
695
                                                MB_FAILURE(pos);
699
696
                                        }
700
 
                                        
 
697
                                } else {
 
698
                                        MB_WRITE(this_char);
701
699
                                }
702
700
                                break;
703
701
                        }
704
702
                default:
 
703
                        /* single-byte charsets */
 
704
                        CHECK_LEN(pos, 1);
 
705
                        this_char = str[pos++];
 
706
                        MB_WRITE(this_char);
705
707
                        break;
706
708
        }
707
709
        MB_RETURN;
1132
1134
                unsigned char mbsequence[16];   /* allow up to 15 characters in a multibyte sequence */
1133
1135
                int mbseqlen = sizeof(mbsequence);
1134
1136
                int status = SUCCESS;
1135
 
                unsigned short this_char = get_next_char(charset, old, oldlen, &i, mbsequence, &mbseqlen, &status);
 
1137
                unsigned int this_char = get_next_char(charset, old, oldlen, &i, mbsequence, &mbseqlen, &status);
1136
1138
 
1137
1139
                if(status == FAILURE) {
1138
1140
                        /* invalid MB sequence */