~ubuntu-branches/ubuntu/karmic/lynx-cur/karmic

« back to all changes in this revision

Viewing changes to WWW/Library/Implementation/SGML.c

  • Committer: Bazaar Package Importer
  • Author(s): Atsuhito KOHDA
  • Date: 2009-05-27 12:44:09 UTC
  • mfrom: (1.19.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20090527124409-oq9baobu4puj0wai
Tags: 2.8.7pre4-1
* New Upstream Release.
 - amend fix for Debian #388622  (Closes: #388622)
 - suppress check for "disabled" attribute in a select, as a workaround
   (Closes: #525934)
 - accommodate (in)compatibility "feature" in HTML5 draft  (Closes: #514897)
 - Sanitize build-dependencies.  Applied suggested patch which I forgot 
   to do in former versions.  (Closes: #481767)
* Sanitize build-dependencies.  Removed exim4, bzip2, unzip, zip also.
   There remain sharutils, telnet, openssh-client now.
* Uncomment SSL_CERT_FILE line in lynx.cfg  (Closes: #529482)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*
2
 
 * $LynxId: SGML.c,v 1.127 2009/04/16 00:21:21 tom Exp $
 
2
 * $LynxId: SGML.c,v 1.130 2009/05/25 18:17:36 tom Exp $
3
3
 *
4
4
 *                      General SGML Parser code                SGML.c
5
5
 *                      ========================
1898
1898
     * nor HTCJK is set.  - FM
1899
1899
     */
1900
1900
    if (TOASCII(unsign_c) > 127 && TOASCII(unsign_c) < 160 &&   /* S/390 -- gil -- 0847 */
1901
 
        !(PASSHICTRL || IS_CJK_TTY))
 
1901
        !(PASSHICTRL || IS_CJK_TTY)) {
 
1902
        /*
 
1903
         * If we happen to be reading from an "ISO-8859-1" or "US-ASCII"
 
1904
         * document, allow the cp-1252 codes, to accommodate the HTML5 draft
 
1905
         * recommendation for replacement encoding:
 
1906
         *
 
1907
         * http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#character-encodings-0
 
1908
         */
 
1909
        if (context->inUCLYhndl == LATIN1
 
1910
            || context->inUCLYhndl == US_ASCII) {
 
1911
            clong = LYcp1252ToUnicode(c);
 
1912
            goto top1;
 
1913
        }
1902
1914
        goto after_switch;
 
1915
    }
1903
1916
 
1904
1917
    /* Almost all CJK characters are double byte but only Japanese
1905
1918
     * JIS X0201 Kana is single byte. To prevent to fail SGML parsing
2565
2578
    case S_incro:
2566
2579
        /* S/390 -- gil -- 1075 */
2567
2580
        if ((TOASCII(unsign_c) < 127) &&
2568
 
            (context->isHex ? isxdigit(UCH(c)) :
2569
 
             isdigit(UCH(c)))) {
 
2581
            (context->isHex
 
2582
             ? isxdigit(UCH(c))
 
2583
             : isdigit(UCH(c)))) {
2570
2584
            /*
2571
2585
             * Accept only valid hex or ASCII digits.  - FM
2572
2586
             */
2602
2616
            if ((context->isHex ? sscanf(string->data, "%lx", &code) :
2603
2617
                 sscanf(string->data, "%lu", &code)) == 1) {
2604
2618
/* =============== work in ASCII below here ===============  S/390 -- gil -- 1092 */
2605
 
                if ((code == 1) ||
2606
 
                    (code > 127 && code < 156)) {
2607
 
                    /*
2608
 
                     * Assume these are Microsoft code points, inflicted on us
2609
 
                     * by FrontPage.  - FM
2610
 
                     *
2611
 
                     * MS FrontPage uses syntax like &#153; in 128-159 range
2612
 
                     * and doesn't follow Unicode standards for this area. 
2613
 
                     * Windows-1252 codepoints are assumed here.
2614
 
                     */
2615
 
                    switch (code) {
2616
 
                    case 1:
2617
 
                        /*
2618
 
                         * WHITE SMILING FACE
2619
 
                         */
2620
 
                        code = 0x263a;
2621
 
                        break;
2622
 
                    case 128:
2623
 
                        /*
2624
 
                         * EURO currency sign
2625
 
                         */
2626
 
                        code = 0x20ac;
2627
 
                        break;
2628
 
                    case 130:
2629
 
                        /*
2630
 
                         * SINGLE LOW-9 QUOTATION MARK (sbquo)
2631
 
                         */
2632
 
                        code = 0x201a;
2633
 
                        break;
2634
 
                    case 132:
2635
 
                        /*
2636
 
                         * DOUBLE LOW-9 QUOTATION MARK (bdquo)
2637
 
                         */
2638
 
                        code = 0x201e;
2639
 
                        break;
2640
 
                    case 133:
2641
 
                        /*
2642
 
                         * HORIZONTAL ELLIPSIS (hellip)
2643
 
                         */
2644
 
                        code = 0x2026;
2645
 
                        break;
2646
 
                    case 134:
2647
 
                        /*
2648
 
                         * DAGGER (dagger)
2649
 
                         */
2650
 
                        code = 0x2020;
2651
 
                        break;
2652
 
                    case 135:
2653
 
                        /*
2654
 
                         * DOUBLE DAGGER (Dagger)
2655
 
                         */
2656
 
                        code = 0x2021;
2657
 
                        break;
2658
 
                    case 137:
2659
 
                        /*
2660
 
                         * PER MILLE SIGN (permil)
2661
 
                         */
2662
 
                        code = 0x2030;
2663
 
                        break;
2664
 
                    case 139:
2665
 
                        /*
2666
 
                         * SINGLE LEFT-POINTING ANGLE QUOTATION MARK (lsaquo)
2667
 
                         */
2668
 
                        code = 0x2039;
2669
 
                        break;
2670
 
                    case 145:
2671
 
                        /*
2672
 
                         * LEFT SINGLE QUOTATION MARK (lsquo)
2673
 
                         */
2674
 
                        code = 0x2018;
2675
 
                        break;
2676
 
                    case 146:
2677
 
                        /*
2678
 
                         * RIGHT SINGLE QUOTATION MARK (rsquo)
2679
 
                         */
2680
 
                        code = 0x2019;
2681
 
                        break;
2682
 
                    case 147:
2683
 
                        /*
2684
 
                         * LEFT DOUBLE QUOTATION MARK (ldquo)
2685
 
                         */
2686
 
                        code = 0x201c;
2687
 
                        break;
2688
 
                    case 148:
2689
 
                        /*
2690
 
                         * RIGHT DOUBLE QUOTATION MARK (rdquo)
2691
 
                         */
2692
 
                        code = 0x201d;
2693
 
                        break;
2694
 
                    case 149:
2695
 
                        /*
2696
 
                         * BULLET (bull)
2697
 
                         */
2698
 
                        code = 0x2022;
2699
 
                        break;
2700
 
                    case 150:
2701
 
                        /*
2702
 
                         * EN DASH (ndash)
2703
 
                         */
2704
 
                        code = 0x2013;
2705
 
                        break;
2706
 
                    case 151:
2707
 
                        /*
2708
 
                         * EM DASH (mdash)
2709
 
                         */
2710
 
                        code = 0x2014;
2711
 
                        break;
2712
 
                    case 152:
2713
 
                        /*
2714
 
                         * SMALL TILDE (tilde)
2715
 
                         */
2716
 
                        code = 0x02dc;
2717
 
                        break;
2718
 
                    case 153:
2719
 
                        /*
2720
 
                         * TRADE MARK SIGN (trade)
2721
 
                         */
2722
 
                        code = 0x2122;
2723
 
                        break;
2724
 
                    case 155:
2725
 
                        /*
2726
 
                         * SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (rsaquo)
2727
 
                         */
2728
 
                        code = 0x203a;
2729
 
                        break;
2730
 
                    default:
2731
 
                        /*
2732
 
                         * Do not attempt a conversion to valid Unicode values.
2733
 
                         */
2734
 
                        break;
2735
 
                    }
2736
 
                }
 
2619
                code = LYcp1252ToUnicode(code);
2737
2620
                /*
2738
2621
                 * Check for special values.  - FM
2739
2622
                 */