~ubuntu-branches/ubuntu/vivid/icu4j-4.4/vivid

« back to all changes in this revision

Viewing changes to main/tests/translit/src/com/ibm/icu/dev/test/translit/RoundTripTest.java

  • Committer: Bazaar Package Importer
  • Author(s): Niels Thykier
  • Date: 2011-08-02 15:50:33 UTC
  • Revision ID: james.westby@ubuntu.com-20110802155033-itjzsl21y2lqdonn
Tags: upstream-4.4.2
ImportĀ upstreamĀ versionĀ 4.4.2

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/**
 
2
 *******************************************************************************
 
3
 * Copyright (C) 2000-2009, International Business Machines Corporation and    *
 
4
 * others. All Rights Reserved.                                                *
 
5
 *******************************************************************************
 
6
 */
 
7
package com.ibm.icu.dev.test.translit;
 
8
 
 
9
import java.io.BufferedWriter;
 
10
import java.io.ByteArrayOutputStream;
 
11
import java.io.File;
 
12
import java.io.FileNotFoundException;
 
13
import java.io.FileOutputStream;
 
14
import java.io.IOException;
 
15
import java.io.OutputStreamWriter;
 
16
import java.io.PrintWriter;
 
17
import java.io.UnsupportedEncodingException;
 
18
import java.util.MissingResourceException;
 
19
 
 
20
import com.ibm.icu.dev.test.TestFmwk;
 
21
import com.ibm.icu.impl.Utility;
 
22
import com.ibm.icu.lang.UCharacter;
 
23
import com.ibm.icu.lang.UProperty;
 
24
import com.ibm.icu.text.Normalizer;
 
25
import com.ibm.icu.text.Transliterator;
 
26
import com.ibm.icu.text.UTF16;
 
27
import com.ibm.icu.text.UnicodeSet;
 
28
import com.ibm.icu.text.UnicodeSetIterator;
 
29
import com.ibm.icu.util.LocaleData;
 
30
import com.ibm.icu.util.ULocale;
 
31
 
 
32
/**
 
33
 * @test
 
34
 * @summary Round trip test of Transliterator
 
35
 */
 
36
public class RoundTripTest extends TestFmwk {
 
37
 
 
38
    static final boolean EXTRA_TESTS = true;
 
39
    static final boolean PRINT_RULES = true;
 
40
 
 
41
    public static void main(String[] args) throws Exception {
 
42
        new RoundTripTest().run(args);
 
43
    }
 
44
    /*
 
45
    public void TestSingle() throws IOException, ParseException {
 
46
        Transliterator t = Transliterator.getInstance("Latin-Greek");
 
47
        String s = t.transliterate("\u0101\u0069");
 
48
    }
 
49
     */
 
50
 
 
51
    /*
 
52
    Note: Unicode 3.2 added new Hiragana/Katakana characters:
 
53
 
 
54
3095..3096    ; 3.2 #   [2] HIRAGANA LETTER SMALL KA..HIRAGANA LETTER SMALL KE
 
55
309F..30A0    ; 3.2 #   [2] HIRAGANA DIGRAPH YORI..KATAKANA-HIRAGANA DOUBLE HYPHEN
 
56
30FF          ; 3.2 #       KATAKANA DIGRAPH KOTO
 
57
31F0..31FF    ; 3.2 #  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
 
58
 
 
59
    Unicode 5.2 added another Hiragana character:
 
60
1F200         ; 5.2 #       SQUARE HIRAGANA HOKA
 
61
 
 
62
    We will not add them to the rules until they are more supported (e.g. in fonts on Windows)
 
63
    A bug has been filed to remind us to do this: #1979.
 
64
     */
 
65
 
 
66
    static String KATAKANA = "[[[:katakana:][\u30A1-\u30FA\u30FC]]-[\u30FF\u31F0-\u31FF]]";
 
67
    static String HIRAGANA = "[[[:hiragana:][\u3040-\u3094]]-[\u3095-\u3096\u309F-\u30A0\\U0001F200-\\U0001F2FF]]";
 
68
    static String LENGTH = "[\u30FC]";
 
69
    static String HALFWIDTH_KATAKANA = "[\uFF65-\uFF9D]";
 
70
    static String KATAKANA_ITERATION = "[\u30FD\u30FE]";
 
71
    static String HIRAGANA_ITERATION = "[\u309D\u309E]";
 
72
 
 
73
    //------------------------------------------------------------------
 
74
    // AbbreviatedUnicodeSetIterator
 
75
    //------------------------------------------------------------------
 
76
 
 
77
    static class AbbreviatedUnicodeSetIterator extends UnicodeSetIterator {
 
78
 
 
79
        private boolean abbreviated;
 
80
        private int perRange;
 
81
 
 
82
        public AbbreviatedUnicodeSetIterator() {
 
83
            super();
 
84
            abbreviated = false;
 
85
        }
 
86
 
 
87
        public void reset(UnicodeSet newSet) {
 
88
            reset(newSet, false);
 
89
        }
 
90
 
 
91
        public void reset(UnicodeSet newSet, boolean abb) {
 
92
            reset(newSet, abb, 100);
 
93
        }
 
94
 
 
95
        public void reset(UnicodeSet newSet, boolean abb, int density) {
 
96
            super.reset(newSet);
 
97
            abbreviated = abb;
 
98
            perRange = newSet.getRangeCount();
 
99
            if (perRange != 0) {
 
100
                perRange = density / perRange;
 
101
            }
 
102
        }
 
103
 
 
104
        protected void loadRange(int myRange) {
 
105
            super.loadRange(myRange);
 
106
            if (abbreviated && (endElement > nextElement + perRange)) {
 
107
                endElement = nextElement + perRange;
 
108
            }
 
109
        }
 
110
    }
 
111
 
 
112
    //--------------------------------------------------------------------
 
113
 
 
114
    public void showElapsed(long start, String name) {
 
115
        double dur = (System.currentTimeMillis() - start) / 1000.0;
 
116
        logln(name + " took " + dur + " seconds");
 
117
    }
 
118
 
 
119
    public void TestKana() throws IOException {
 
120
        long start = System.currentTimeMillis();
 
121
        new Test("Katakana-Hiragana")
 
122
        .test(KATAKANA, "[" + HIRAGANA + LENGTH + "]", "[" + HALFWIDTH_KATAKANA + LENGTH + "]", this, new Legal());
 
123
        showElapsed(start, "TestKana");
 
124
    }
 
125
 
 
126
    public void TestHiragana() throws IOException {
 
127
        long start = System.currentTimeMillis();
 
128
        new Test("Latin-Hiragana")
 
129
        .test("[a-zA-Z]", HIRAGANA, HIRAGANA_ITERATION, this, new Legal());
 
130
        showElapsed(start, "TestHiragana");
 
131
    }
 
132
 
 
133
    public void TestKatakana() throws IOException {
 
134
        long start = System.currentTimeMillis();
 
135
        new Test("Latin-Katakana")
 
136
        .test("[a-zA-Z]", KATAKANA, "[" + KATAKANA_ITERATION + HALFWIDTH_KATAKANA + "]", this, new Legal());
 
137
        showElapsed(start, "TestKatakana");
 
138
    }
 
139
 
 
140
    public void TestJamo() throws IOException {
 
141
        long start = System.currentTimeMillis();
 
142
        new Test("Latin-Jamo")
 
143
        .test("[a-zA-Z]", "[\u1100-\u1112 \u1161-\u1175 \u11A8-\u11C2]", "", this, new LegalJamo());
 
144
        showElapsed(start, "TestJamo");
 
145
    }
 
146
 
 
147
    /*
 
148
        SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
 
149
        LCount = 19, VCount = 21, TCount = 28,
 
150
        NCount = VCount * TCount,   // 588
 
151
        SCount = LCount * NCount,   // 11172
 
152
        LLimit = LBase + LCount,    // 1113
 
153
        VLimit = VBase + VCount,    // 1176
 
154
        TLimit = TBase + TCount,    // 11C3
 
155
        SLimit = SBase + SCount;    // D7A4
 
156
     */
 
157
 
 
158
    public void TestHangul() throws IOException {
 
159
        long start = System.currentTimeMillis();
 
160
        Test t = new Test("Latin-Hangul", 5);
 
161
        boolean TEST_ALL = "true".equalsIgnoreCase(getProperty("HangulRoundTripAll")); 
 
162
        if (TEST_ALL && getInclusion() == 10) {
 
163
            t.setPairLimit(Integer.MAX_VALUE); // only go to the limit if we have TEST_ALL and getInclusion
 
164
        }
 
165
        t.test("[a-zA-Z]", "[\uAC00-\uD7A4]", "", this, new Legal());
 
166
        showElapsed(start, "TestHangul");
 
167
    }
 
168
 
 
169
    /**
 
170
     * This is a shorter version of the test for doubles, that allows us to skip lots of cases, but
 
171
     * does check the ones that should cause problems (if any do).
 
172
     */
 
173
    public void TestHangul2() {
 
174
        Transliterator lh = Transliterator.getInstance("Latin-Hangul");
 
175
        Transliterator hl = lh.getInverse();
 
176
        final UnicodeSet representativeHangul = getRepresentativeHangul();
 
177
        for (UnicodeSetIterator it = new UnicodeSetIterator(representativeHangul); it.next();) {
 
178
            assertRoundTripTransform("Transform", it.getString(), lh, hl);
 
179
        }
 
180
    }
 
181
 
 
182
    private void assertRoundTripTransform(String message, String source, Transliterator lh, Transliterator hl) {
 
183
        String to = hl.transform(source);
 
184
        String back = lh.transform(to);
 
185
        if (!source.equals(back)) {
 
186
            String to2 = hl.transform(source.replaceAll("(.)", "$1 ").trim());
 
187
            String to3 = hl.transform(back.replaceAll("(.)", "$1 ").trim());
 
188
            assertEquals(message + " " + source + " [" + to + "/"+ to2 + "/"+ to3 + "]", source, back);
 
189
        }
 
190
    }
 
191
 
 
192
    public static UnicodeSet getRepresentativeHangul() {
 
193
        UnicodeSet extraSamples = new UnicodeSet("[\uCE20{\uAD6C\uB514}{\uAD73\uC774}{\uBB34\uB837}{\uBB3C\uC5FF}{\uC544\uAE4C}{\uC544\uB530}{\uC544\uBE60}{\uC544\uC2F8}{\uC544\uC9DC}{\uC544\uCC28}{\uC545\uC0AC}{\uC545\uC2F8}{\uC546\uCE74}{\uC548\uAC00}{\uC548\uC790}{\uC548\uC9DC}{\uC548\uD558}{\uC54C\uAC00}{\uC54C\uB530}{\uC54C\uB9C8}{\uC54C\uBC14}{\uC54C\uBE60}{\uC54C\uC0AC}{\uC54C\uC2F8}{\uC54C\uD0C0}{\uC54C\uD30C}{\uC54C\uD558}{\uC555\uC0AC}{\uC555\uC2F8}{\uC558\uC0AC}{\uC5C5\uC12F\uC501}{\uC5C6\uC5C8\uC2B5}]");
 
194
        UnicodeSet sourceSet = new UnicodeSet();
 
195
        addRepresentativeHangul(sourceSet, 2, false);
 
196
        addRepresentativeHangul(sourceSet, 3, false);
 
197
        addRepresentativeHangul(sourceSet, 2, true);
 
198
        addRepresentativeHangul(sourceSet, 3, true);
 
199
        // add the boundary cases; we want an example of each case of V + L and one example of each case of T+L
 
200
 
 
201
        UnicodeSet more = getRepresentativeBoundaryHangul();
 
202
        sourceSet.addAll(more);
 
203
        sourceSet.addAll(extraSamples);
 
204
        return sourceSet;
 
205
    }
 
206
 
 
207
    private static UnicodeSet getRepresentativeBoundaryHangul() {
 
208
        UnicodeSet resultToAddTo = new UnicodeSet();
 
209
        // U+1100 HANGUL CHOSEONG KIYEOK
 
210
        // U+1161 HANGUL JUNGSEONG A
 
211
        UnicodeSet L = new UnicodeSet("[:hst=L:]");
 
212
        UnicodeSet V = new UnicodeSet("[:hst=V:]");
 
213
        UnicodeSet T = new UnicodeSet("[:hst=T:]");
 
214
 
 
215
        String prefixLV = "\u1100\u1161";
 
216
        String prefixL = "\u1100";
 
217
        String suffixV = "\u1161";
 
218
        String nullL = "\u110B"; // HANGUL CHOSEONG IEUNG
 
219
 
 
220
        UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");
 
221
 
 
222
        // do all combinations of L0 + V + nullL + V
 
223
 
 
224
        for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next();) {
 
225
            for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next();) {
 
226
                for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next();) {
 
227
                    String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();
 
228
                    String trial = Normalizer.compose(sample, false);
 
229
                    if (trial.length() == 2) {
 
230
                        resultToAddTo.add(trial);
 
231
                    }
 
232
                }
 
233
            }
 
234
        }
 
235
 
 
236
        for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next();) {
 
237
            // do all combinations of "g" + V + L + "a"
 
238
            final String suffix = iL.getString() + suffixV;
 
239
            for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next();) {
 
240
                String sample = prefixL + iV.getString() + suffix;
 
241
                String trial = Normalizer.compose(sample, false);
 
242
                if (trial.length() == 2) {
 
243
                    resultToAddTo.add(trial);
 
244
                }
 
245
            }
 
246
            // do all combinations of "ga" + T + L + "a"
 
247
            for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next();) {
 
248
                String sample = prefixLV + iT.getString() + suffix;
 
249
                String trial = Normalizer.compose(sample, false);
 
250
                if (trial.length() == 2) {
 
251
                    resultToAddTo.add(trial);
 
252
                }
 
253
            }
 
254
        }
 
255
        return resultToAddTo;
 
256
    }
 
257
 
 
258
    private static void addRepresentativeHangul(UnicodeSet resultToAddTo, int leng, boolean noFirstConsonant) {
 
259
        UnicodeSet notYetSeen = new UnicodeSet();
 
260
        for (char c = '\uAC00'; c <  '\uD7AF'; ++c) {
 
261
            String charStr = String.valueOf(c);
 
262
            String decomp = Normalizer.decompose(charStr, false);
 
263
            if (decomp.length() != leng) {
 
264
                continue; // only take one length at a time
 
265
            }
 
266
            if (decomp.startsWith("\u110B ") != noFirstConsonant) {
 
267
                continue;
 
268
            }
 
269
            if (!notYetSeen.containsAll(decomp)) {
 
270
                resultToAddTo.add(c);
 
271
                notYetSeen.addAll(decomp);
 
272
            }
 
273
        }
 
274
    }
 
275
 
 
276
 
 
277
    public void TestHan() throws UnsupportedEncodingException, FileNotFoundException {
 
278
        try{
 
279
            UnicodeSet exemplars = LocaleData.getExemplarSet(new ULocale("zh"),0);
 
280
            // create string with all chars
 
281
            StringBuffer b = new StringBuffer();
 
282
            for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {
 
283
                UTF16.append(b,it.codepoint);
 
284
            }
 
285
            String source = b.toString();
 
286
            // transform with Han translit
 
287
            Transliterator han = Transliterator.getInstance("Han-Latin");
 
288
            String target = han.transliterate(source);
 
289
            // now verify that there are no Han characters left
 
290
            UnicodeSet allHan = new UnicodeSet("[:han:]");
 
291
            assertFalse("No Han must be left after Han-Latin transliteration",allHan.containsSome(target));
 
292
            // check the pinyin translit
 
293
            Transliterator pn = Transliterator.getInstance("Latin-NumericPinyin");
 
294
            String target2 = pn.transliterate(target);
 
295
            // verify that there are no marks
 
296
            Transliterator nfc = Transliterator.getInstance("nfc");
 
297
            String nfced = nfc.transliterate(target2);
 
298
            UnicodeSet allMarks = new UnicodeSet("[:mark:]");
 
299
            assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfced));
 
300
            // verify roundtrip
 
301
            Transliterator np = pn.getInverse();
 
302
            String target3 = np.transliterate(target);
 
303
            boolean roundtripOK = target3.equals(target);
 
304
            assertTrue("NumericPinyin must roundtrip", roundtripOK);
 
305
            if (!roundtripOK) {
 
306
                String filename = "numeric-pinyin.log.txt";
 
307
                PrintWriter out = new PrintWriter(
 
308
                        new BufferedWriter(
 
309
                                new OutputStreamWriter(
 
310
                                        new FileOutputStream(filename), "UTF8"), 4*1024));
 
311
                errln("Creating log file " + new File(filename).getAbsoluteFile());
 
312
                out.println("Pinyin:                " + target);
 
313
                out.println("Pinyin-Numeric-Pinyin: " + target2);
 
314
                out.close();
 
315
            }
 
316
        }catch(MissingResourceException ex){
 
317
            warnln("Could not load the locale data for fetching the exemplar characters.");
 
318
        }
 
319
    }
 
320
 
 
321
    public void TestSingle() {
 
322
        Transliterator t = Transliterator.getInstance("Latin-Greek");
 
323
        t.transliterate("\u0061\u0101\u0069");
 
324
    }
 
325
 
 
326
    String getGreekSet() {
 
327
        // Time bomb
 
328
        if (skipIfBeforeICU(4,5,0)) {
 
329
            // We temporarily filter against Unicode 4.1, but we only do this
 
330
            // before version 3.5.
 
331
            logln("TestGreek needs to be updated to remove delete the section marked [:Age=4.0:] filter");
 
332
        } else {
 
333
            errln("TestGreek needs to be updated to remove delete the [:Age=4.0:] filter ");
 
334
        }
 
335
        return 
 
336
        // isICU28() ? "[[\u003B\u00B7[:Greek:]-[\u03D7-\u03EF]]&[:Age=3.2:]]" :
 
337
        "[\u003B\u00B7[[:Greek:]&[:Letter:]]-[" +
 
338
        "\u1D26-\u1D2A" + // L&   [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
 
339
        "\u1D5D-\u1D61" + // Lm   [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
 
340
        "\u1D66-\u1D6A" + // L&   [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
 
341
        "\u03D7-\u03EF" + // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
 
342
        "] & [:Age=4.0:]]";
 
343
    }
 
344
 
 
345
    public void TestGreek() throws IOException {
 
346
        long start = System.currentTimeMillis();
 
347
        new Test("Latin-Greek", 50)
 
348
        .test("[a-zA-Z]", getGreekSet(),
 
349
                "[\u00B5\u037A\u03D0-\u03F5\u03F9]", /* roundtrip exclusions */
 
350
                this, new LegalGreek(true));
 
351
        showElapsed(start, "TestGreek");
 
352
    }
 
353
 
 
354
    public void TestGreekUNGEGN() throws IOException {
 
355
        long start = System.currentTimeMillis();
 
356
        new Test("Latin-Greek/UNGEGN")
 
357
        .test("[a-zA-Z]", getGreekSet(),
 
358
                "[\u00B5\u037A\u03D0-\uFFFF{\u039C\u03C0}]", /* roundtrip exclusions */
 
359
                this, new LegalGreek(false));
 
360
        showElapsed(start, "TestGreekUNGEGN");
 
361
    }
 
362
 
 
363
    public void Testel() throws IOException {
 
364
        long start = System.currentTimeMillis();
 
365
        new Test("Latin-el")
 
366
        .test("[a-zA-Z]", getGreekSet(),
 
367
                "[\u00B5\u037A\u03D0-\uFFFF{\u039C\u03C0}]", /* roundtrip exclusions */
 
368
                this, new LegalGreek(false));
 
369
        showElapsed(start, "Testel");
 
370
    }
 
371
 
 
372
    public void TestCyrillic() throws IOException {
 
373
        long start = System.currentTimeMillis();
 
374
        new Test("Latin-Cyrillic")
 
375
        .test("[a-zA-Z\u0110\u0111\u02BA\u02B9]", "[\u0400-\u045F]", null, this, new Legal());
 
376
        showElapsed(start, "TestCyrillic");
 
377
    }
 
378
 
 
379
    static final String ARABIC = "[\u06A9\u060C\u061B\u061F\u0621\u0627-\u063A\u0641-\u0655\u0660-\u066C\u067E\u0686\u0698\u06A4\u06AD\u06AF\u06CB-\u06CC\u06F0-\u06F9]";
 
380
 
 
381
    public void TestArabic() throws IOException {
 
382
        long start = System.currentTimeMillis();
 
383
        new Test("Latin-Arabic")
 
384
        .test("[a-zA-Z\u02BE\u02BF]", ARABIC, "[a-zA-Z\u02BE\u02BF\u207F]", null, this, new Legal()); //
 
385
        showElapsed(start, "TestArabic");
 
386
    }
 
387
 
 
388
    public void TestHebrew() throws IOException {
 
389
        //      Time bomb
 
390
        if (skipIfBeforeICU(4,5,0)) {
 
391
            // We temporarily filter against Unicode 4.1, but we only do this
 
392
            // before version 3.5.
 
393
            logln("TestHebrew needs to be updated to remove delete the section marked [:Age=4.0:] filter");
 
394
        } else {
 
395
            errln("TestHebrew needs to be updated to remove delete the [:Age=4.0:] filter ");
 
396
        }
 
397
        long start = System.currentTimeMillis();
 
398
        new Test("Latin-Hebrew")
 
399
        .test("[a-zA-Z\u02BC\u02BB]", "[[[:hebrew:]-[\u05BD\uFB00-\uFBFF]]& [:Age=4.0:]]", "[\u05F0\u05F1\u05F2]", this, new LegalHebrew());
 
400
        showElapsed(start, "TestHebrew");
 
401
    }
 
402
 
 
403
    public void TestThai() throws IOException {
 
404
        long start = System.currentTimeMillis();
 
405
        if(skipIfBeforeICU(4,5,0)){
 
406
            new Test("Latin-Thai")
 
407
            .test("[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02CC]",
 
408
                    "[\u0E01-\u0E3A\u0E40-\u0E5B]", 
 
409
                    "[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02B9\u02CC]",
 
410
                    "[\u0E4F]", this, new LegalThai());   
 
411
        }else{
 
412
            new Test("Latin-Thai")
 
413
            .test("[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02CC]",
 
414
                    "[\u0E01-\u0E3A\u0E40-\u0E5B]", 
 
415
                    "[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02B9\u02CC]",
 
416
                    null, this, new LegalThai());
 
417
        }
 
418
 
 
419
        showElapsed(start, "TestThai");
 
420
    }
 
421
 
 
422
    //----------------------------------
 
423
    // Inter-Indic Tests
 
424
    //----------------------------------
 
425
    public static class LegalIndic extends Legal{
 
426
        UnicodeSet vowelSignSet = new UnicodeSet();
 
427
 
 
428
        public LegalIndic(){
 
429
            vowelSignSet.addAll(new UnicodeSet("[\u0901\u0902\u0903\u0904\u093e-\u094c\u0962\u0963]"));               /* Devanagari */
 
430
            vowelSignSet.addAll(new UnicodeSet("[\u0981\u0982\u0983\u09be-\u09cc\u09e2\u09e3\u09D7]"));         /* Bengali */
 
431
            vowelSignSet.addAll(new UnicodeSet("[\u0a01\u0a02\u0a03\u0a3e-\u0a4c\u0a62\u0a63\u0a70\u0a71]"));   /* Gurmukhi */
 
432
            vowelSignSet.addAll(new UnicodeSet("[\u0a81\u0a82\u0a83\u0abe-\u0acc\u0ae2\u0ae3]"));               /* Gujarati */
 
433
            vowelSignSet.addAll(new UnicodeSet("[\u0b01\u0b02\u0b03\u0b3e-\u0b4c\u0b62\u0b63\u0b56\u0b57]"));   /* Oriya */
 
434
            vowelSignSet.addAll(new UnicodeSet("[\u0b81\u0b82\u0b83\u0bbe-\u0bcc\u0be2\u0be3\u0bd7]"));         /* Tamil */
 
435
            vowelSignSet.addAll(new UnicodeSet("[\u0c01\u0c02\u0c03\u0c3e-\u0c4c\u0c62\u0c63\u0c55\u0c56]"));   /* Telugu */
 
436
            vowelSignSet.addAll(new UnicodeSet("[\u0c81\u0c82\u0c83\u0cbe-\u0ccc\u0ce2\u0ce3\u0cd5\u0cd6]"));   /* Kannada */
 
437
            vowelSignSet.addAll(new UnicodeSet("[\u0d01\u0d02\u0d03\u0d3e-\u0d4c\u0d62\u0d63\u0d57]"));         /* Malayalam */
 
438
        }
 
439
 
 
440
        String avagraha = "\u093d\u09bd\u0abd\u0b3d\u0cbd";
 
441
        String nukta = "\u093c\u09bc\u0a3c\u0abc\u0b3c\u0cbc";
 
442
        String virama = "\u094d\u09cd\u0a4d\u0acd\u0b4d\u0bcd\u0c4d\u0ccd\u0d4d";
 
443
        String sanskritStressSigns = "\u0951\u0952\u0953\u0954\u097d";
 
444
        String chandrabindu = "\u0901\u0981\u0A81\u0b01\u0c01";
 
445
        public boolean is(String sourceString){
 
446
            int cp=sourceString.charAt(0);
 
447
 
 
448
            // A vowel sign cannot be the first char
 
449
            if(vowelSignSet.contains(cp)){
 
450
                return false;
 
451
            }else if(avagraha.indexOf(cp)!=-1){
 
452
                return false;
 
453
            }else if(virama.indexOf(cp)!=-1){
 
454
                return false;
 
455
            }else if(nukta.indexOf(cp)!=-1){
 
456
                return false;
 
457
            }else if(sanskritStressSigns.indexOf(cp)!=-1){
 
458
                return false;
 
459
            }else if((chandrabindu.indexOf(cp)!=-1) &&
 
460
                    (sourceString.length() >1 &&
 
461
                            vowelSignSet.contains(sourceString.charAt(1)))){
 
462
                return false;
 
463
            }
 
464
            return true;
 
465
        }
 
466
    }
 
467
    static String latinForIndic = "[['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD"+
 
468
    "\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F"+
 
469
    "\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148"+
 
470
    "\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0"+
 
471
    "\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB"+
 
472
    "\u0200-\u021B\u021E-\u021F\u0226-\u0233\u0294\u0303-\u0304\u0306\u0314-\u0315"+
 
473
    "\u0325\u040E\u0419\u0439\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7"+
 
474
    "\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03\u1F05"+
 
475
    "\u1F07\u1F09\u1F0B\u1F0D\u1F0F\u1F11\u1F13\u1F15\u1F19\u1F1B\u1F1D\u1F21"+
 
476
    "\u1F23\u1F25\u1F27\u1F29\u1F2B\u1F2D\u1F2F\u1F31\u1F33\u1F35\u1F37\u1F39"+
 
477
    "\u1F3B\u1F3D\u1F3F\u1F41\u1F43\u1F45\u1F49\u1F4B\u1F4D\u1F51\u1F53\u1F55"+
 
478
    "\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63\u1F65\u1F67\u1F69\u1F6B\u1F6D"+
 
479
    "\u1F6F\u1F81\u1F83\u1F85\u1F87\u1F89\u1F8B\u1F8D\u1F8F\u1F91\u1F93\u1F95"+
 
480
    "\u1F97\u1F99\u1F9B\u1F9D\u1F9F\u1FA1\u1FA3\u1FA5\u1FA7\u1FA9\u1FAB\u1FAD"+
 
481
    "\u1FAF-\u1FB1\u1FB8-\u1FB9\u1FD0-\u1FD1\u1FD8-\u1FD9\u1FE0-\u1FE1\u1FE5"+
 
482
    "\u1FE8-\u1FE9\u1FEC\u212A-\u212B\uE04D\uE064]"+
 
483
    "-[\uE000-\uE080 \u01E2\u01E3]& [[:latin:][:mark:]]]";
 
484
 
 
485
    public void TestDevanagariLatin() throws IOException {
 
486
        long start = System.currentTimeMillis();
 
487
        if (skipIfBeforeICU(4,5,0)) {
 
488
            logln("Warning: TestDevanagariLatin needs to be updated to remove delete the section marked [:Age=4.1:] filter");
 
489
        } else {
 
490
            //              We temporarily filter against Unicode 4.1, but we only do this
 
491
            // before version 3.4.
 
492
            errln("FAIL: TestDevanagariLatin needs to be updated to remove delete the [:Age=4.1:] filter ");
 
493
            return;
 
494
        }
 
495
        new Test("Latin-DEVANAGARI", 50)
 
496
        .test(latinForIndic, "[[[:Devanagari:][\u094d][\u0964\u0965]]&[:Age=4.1:]]", "[\u0965\u0904]", this, new LegalIndic());
 
497
        showElapsed(start, "TestDevanagariLatin");
 
498
    }
 
499
 
 
500
    private static final String [][] interIndicArray= new String[][]{
 
501
        new String [] {  "BENGALI-DEVANAGARI",
 
502
                "[:BENGALI:]", "[:Devanagari:]",
 
503
                "[\u0904\u0951-\u0954\u0943-\u0949\u094a\u0962\u0963\u090D\u090e\u0911\u0912\u0929\u0933\u0934\u0935\u0950\u0958\u0959\u095a\u095b\u095e\u097d]", /*roundtrip exclusions*/
 
504
        },
 
505
        new String [] {  "DEVANAGARI-BENGALI",
 
506
                "[:Devanagari:]", "[:BENGALI:]",
 
507
                "[\u09D7\u090D\u090e\u0911\u0912\u0929\u0933\u0934\u0935\u0950\u0958\u0959\u095a\u095b\u095e\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
 
508
        },
 
509
 
 
510
        new String [] {  "GURMUKHI-DEVANAGARI",
 
511
                "[:GURMUKHI:]", "[:Devanagari:]",
 
512
                "[\u0904\u0902\u0936\u0933\u0951-\u0954\u0902\u0903\u0943-\u0949\u094a\u0962\u0963\u090B\u090C\u090D\u090e\u0911\u0912\u0934\u0937\u093D\u0950\u0960\u0961\u097d]", /*roundtrip exclusions*/
 
513
        },
 
514
        new String [] {  "DEVANAGARI-GURMUKHI",
 
515
                "[:Devanagari:]", "[:GURMUKHI:]",
 
516
                "[\u0A02\u0946\u0A5C\u0951-\u0954\u0A70\u0A71\u090B\u090C\u090D\u090e\u0911\u0912\u0934\u0937\u093D\u0950\u0960\u0961\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/
 
517
        },
 
518
 
 
519
        new String [] {  "GUJARATI-DEVANAGARI",
 
520
                "[:GUJARATI:]", "[:Devanagari:]",
 
521
                "[\u0904\u0946\u094A\u0962\u0963\u0951-\u0954\u0961\u090c\u090e\u0912\u097d]", /*roundtrip exclusions*/
 
522
        },
 
523
        new String [] {  "DEVANAGARI-GUJARATI",
 
524
                "[:Devanagari:]", "[:GUJARATI:]",
 
525
                "[\u0951-\u0954\u0961\u090c\u090e\u0912]", /*roundtrip exclusions*/
 
526
        },
 
527
 
 
528
        new String [] {  "ORIYA-DEVANAGARI",
 
529
                "[:ORIYA:]", "[:Devanagari:]",
 
530
                "[\u0904\u0912\u0911\u090D\u090e\u0931\u0943-\u094a\u0962\u0963\u0951-\u0954\u0950\u097d]", /*roundtrip exclusions*/
 
531
        },
 
532
        new String [] {  "DEVANAGARI-ORIYA",
 
533
                "[:Devanagari:]", "[:ORIYA:]",
 
534
                "[\u0b5f\u0b56\u0b57\u0b70\u0b71\u0950\u090D\u090e\u0912\u0911\u0931]", /*roundtrip exclusions*/
 
535
        },
 
536
 
 
537
        new String [] {  "Tamil-DEVANAGARI",
 
538
                "[:tamil:]", "[:Devanagari:]",
 
539
                "[\u0901\u0904\u093c\u0943-\u094a\u0951-\u0954\u0962\u0963\u090B\u090C\u090D\u0911\u0916\u0917\u0918\u091B\u091D\u0920\u0921\u0922\u0925\u0926\u0927\u092B\u092C\u092D\u0936\u093d\u0950[\u0958-\u0961]\u097d]", /*roundtrip exclusions*/
 
540
        },
 
541
        new String [] {  "DEVANAGARI-Tamil",
 
542
                "[:Devanagari:]", "[:tamil:]",
 
543
                "[\u0bd7\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
 
544
        },
 
545
 
 
546
        new String [] {  "Telugu-DEVANAGARI",
 
547
                "[:telugu:]", "[:Devanagari:]",
 
548
                "[\u0904\u093c\u0950\u0945\u0949\u0951-\u0954\u0962\u0963\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/
 
549
        },
 
550
        new String [] {  "DEVANAGARI-TELUGU",
 
551
                "[:Devanagari:]", "[:TELUGU:]",
 
552
                "[\u0c55\u0c56\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
 
553
        },
 
554
 
 
555
        new String [] {  "KANNADA-DEVANAGARI",
 
556
                "[:KANNADA:]", "[:Devanagari:]",
 
557
                "[\u0901\u0904\u0946\u0950\u0945\u0949\u0951-\u0954\u0962\u0963\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/
 
558
        },
 
559
        new String [] {  "DEVANAGARI-KANNADA",
 
560
                "[:Devanagari:]", "[:KANNADA:]",
 
561
                "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cde\u0cd5\u0cd6\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
 
562
        },
 
563
 
 
564
        new String [] {  "MALAYALAM-DEVANAGARI",
 
565
                "[:MALAYALAM:]", "[:Devanagari:]",
 
566
                "[\u0901\u0904\u094a\u094b\u094c\u093c\u0950\u0944\u0945\u0949\u0951-\u0954\u0962\u0963\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/
 
567
        },
 
568
        new String [] {  "DEVANAGARI-MALAYALAM",
 
569
                "[:Devanagari:]", "[:MALAYALAM:]",
 
570
                "[\u0d4c\u0d57\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
 
571
        },
 
572
 
 
573
        new String [] {  "GURMUKHI-BENGALI",
 
574
                "[:GURMUKHI:]", "[:BENGALI:]",
 
575
                "[\u0982\u09b6\u09e2\u09e3\u09c3\u09c4\u09d7\u098B\u098C\u09B7\u09E0\u09E1\u09F0\u09F1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
 
576
        },
 
577
        new String [] {  "BENGALI-GURMUKHI",
 
578
                "[:BENGALI:]", "[:GURMUKHI:]",
 
579
                "[\u0A02\u0a5c\u0a47\u0a70\u0a71\u0A33\u0A35\u0A59\u0A5A\u0A5B\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
 
580
        },
 
581
 
 
582
        new String [] {  "GUJARATI-BENGALI",
 
583
                "[:GUJARATI:]", "[:BENGALI:]",
 
584
                "[\u09d7\u09e2\u09e3\u098c\u09e1\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
 
585
        },
 
586
        new String [] {  "BENGALI-GUJARATI",
 
587
                "[:BENGALI:]", "[:GUJARATI:]",
 
588
                "[\u0A82\u0a83\u0Ac9\u0Ac5\u0ac7\u0A8D\u0A91\u0AB3\u0AB5\u0ABD\u0AD0]", /*roundtrip exclusions*/
 
589
        },
 
590
 
 
591
        new String [] {  "ORIYA-BENGALI",
 
592
                "[:ORIYA:]", "[:BENGALI:]",
 
593
                "[\u09c4\u09e2\u09e3\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
 
594
        },
 
595
        new String [] {  "BENGALI-ORIYA",
 
596
                "[:BENGALI:]", "[:ORIYA:]",
 
597
                "[\u0b35\u0b71\u0b5f\u0b56\u0b33\u0b3d]", /*roundtrip exclusions*/
 
598
        },
 
599
 
 
600
        new String [] {  "Tamil-BENGALI",
 
601
                "[:tamil:]", "[:BENGALI:]",
 
602
                "[\u0981\u09bc\u09c3\u09c4\u09e2\u09e3\u09f0\u09f1\u098B\u098C\u0996\u0997\u0998\u099B\u099D\u09A0\u09A1\u09A2\u09A5\u09A6\u09A7\u09AB\u09AC\u09AD\u09B6\u09DC\u09DD\u09DF\u09E0\u09E1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
 
603
        },
 
604
        new String [] {  "BENGALI-Tamil",
 
605
                "[:BENGALI:]", "[:tamil:]",
 
606
                "[\u0bc6\u0bc7\u0bca\u0B8E\u0B92\u0BA9\u0BB1\u0BB3\u0BB4\u0BB5\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
 
607
        },
 
608
 
 
609
        new String [] {  "Telugu-BENGALI",
 
610
                "[:telugu:]", "[:BENGALI:]",
 
611
                "[\u09e2\u09e3\u09bc\u09d7\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
 
612
        },
 
613
        new String [] {  "BENGALI-TELUGU",
 
614
                "[:BENGALI:]", "[:TELUGU:]",
 
615
                "[\u0c55\u0c56\u0c47\u0c46\u0c4a\u0C0E\u0C12\u0C31\u0C33\u0C35]", /*roundtrip exclusions*/
 
616
        },
 
617
 
 
618
        new String [] {  "KANNADA-BENGALI",
 
619
                "[:KANNADA:]", "[:BENGALI:]",
 
620
                "[\u0981\u09e2\u09e3\u09bc\u09d7\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
 
621
        },
 
622
        new String [] {  "BENGALI-KANNADA",
 
623
                "[:BENGALI:]", "[:KANNADA:]",
 
624
                "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc6\u0cca\u0cd5\u0cd6\u0cc7\u0C8E\u0C92\u0CB1\u0cb3\u0cb5\u0cde]", /*roundtrip exclusions*/
 
625
        },
 
626
 
 
627
        new String [] {  "MALAYALAM-BENGALI",
 
628
                "[:MALAYALAM:]", "[:BENGALI:]",
 
629
                "[\u0981\u09e2\u09e3\u09bc\u09c4\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
 
630
        },
 
631
        new String [] {  "BENGALI-MALAYALAM",
 
632
                "[:BENGALI:]", "[:MALAYALAM:]",
 
633
                "[\u0d46\u0d4a\u0d47\u0d31-\u0d35\u0d0e\u0d12]", /*roundtrip exclusions*/
 
634
        },
 
635
 
 
636
        new String [] {  "GUJARATI-GURMUKHI",
 
637
                "[:GUJARATI:]", "[:GURMUKHI:]",
 
638
                "[\u0A02\u0ab3\u0ab6\u0A70\u0a71\u0a82\u0a83\u0ac3\u0ac4\u0ac5\u0ac9\u0a5c\u0a72\u0a73\u0a74\u0a8b\u0a8d\u0a91\u0abd]", /*roundtrip exclusions*/
 
639
        },
 
640
        new String [] {  "GURMUKHI-GUJARATI",
 
641
                "[:GURMUKHI:]", "[:GUJARATI:]",
 
642
                "[\u0a5c\u0A70\u0a71\u0a72\u0a73\u0a74\u0a82\u0a83\u0a8b\u0a8c\u0a8d\u0a91\u0ab3\u0ab6\u0ab7\u0abd\u0ac3\u0ac4\u0ac5\u0ac9\u0ad0\u0ae0\u0ae1]", /*roundtrip exclusions*/
 
643
        },
 
644
 
 
645
        new String [] {  "ORIYA-GURMUKHI",
 
646
                "[:ORIYA:]", "[:GURMUKHI:]",
 
647
                "[\u0A02\u0a5c\u0a21\u0a47\u0a71\u0b02\u0b03\u0b33\u0b36\u0b43\u0b56\u0b57\u0B0B\u0B0C\u0B37\u0B3D\u0B5F\u0B60\u0B61\u0a35\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/
 
648
        },
 
649
        new String [] {  "GURMUKHI-ORIYA",
 
650
                "[:GURMUKHI:]", "[:ORIYA:]",
 
651
                "[\u0a71\u0b02\u0b03\u0b33\u0b36\u0b43\u0b56\u0b57\u0B0B\u0B0C\u0B37\u0B3D\u0B5F\u0B60\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/
 
652
        },
 
653
 
 
654
        new String [] {  "TAMIL-GURMUKHI",
 
655
                "[:TAMIL:]", "[:GURMUKHI:]",
 
656
                "[\u0A01\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0a47\u0A16\u0A17\u0A18\u0A1B\u0A1D\u0A20\u0A21\u0A22\u0A25\u0A26\u0A27\u0A2B\u0A2C\u0A2D\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
 
657
        },
 
658
        new String [] {  "GURMUKHI-TAMIL",
 
659
                "[:GURMUKHI:]", "[:TAMIL:]",
 
660
                "[\u0b82\u0bc6\u0bca\u0bd7\u0bb7\u0bb3\u0b83\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0bb6\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
 
661
        },
 
662
 
 
663
        new String [] {  "TELUGU-GURMUKHI",
 
664
                "[:TELUGU:]", "[:GURMUKHI:]",
 
665
                "[\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
 
666
        },
 
667
        new String [] {  "GURMUKHI-TELUGU",
 
668
                "[:GURMUKHI:]", "[:TELUGU:]",
 
669
                "[\u0c02\u0c03\u0c33\u0c36\u0c44\u0c43\u0c46\u0c4a\u0c56\u0c55\u0C0B\u0C0C\u0C0E\u0C12\u0C31\u0C37\u0C60\u0C61]", /*roundtrip exclusions*/
 
670
        },
 
671
        new String [] {  "KANNADA-GURMUKHI",
 
672
                "[:KANNADA:]", "[:GURMUKHI:]",
 
673
                "[\u0A01\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
 
674
        },
 
675
        new String [] {  "GURMUKHI-KANNADA",
 
676
                "[:GURMUKHI:]", "[:KANNADA:]",
 
677
                "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0c82\u0c83\u0cb3\u0cb6\u0cc4\u0cc3\u0cc6\u0cca\u0cd5\u0cd6\u0C8B\u0C8C\u0C8E\u0C92\u0CB1\u0CB7\u0cbd\u0CE0\u0CE1\u0cde]", /*roundtrip exclusions*/
 
678
        },
 
679
 
 
680
        new String [] {  "MALAYALAM-GURMUKHI",
 
681
                "[:MALAYALAM:]", "[:GURMUKHI:]",
 
682
                "[\u0A01\u0A02\u0a4b\u0a4c\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
 
683
        },
 
684
        new String [] {  "GURMUKHI-MALAYALAM",
 
685
                "[:GURMUKHI:]", "[:MALAYALAM:]",
 
686
                "[\u0d02\u0d03\u0d33\u0d36\u0d43\u0d46\u0d4a\u0d4c\u0d57\u0D0B\u0D0C\u0D0E\u0D12\u0D31\u0D34\u0D37\u0D60\u0D61]", /*roundtrip exclusions*/
 
687
        },
 
688
 
 
689
        new String [] {  "GUJARATI-ORIYA",
 
690
                "[:GUJARATI:]", "[:ORIYA:]",
 
691
                "[\u0b56\u0b57\u0B0C\u0B5F\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/
 
692
        },
 
693
        new String [] {  "ORIYA-GUJARATI",
 
694
                "[:ORIYA:]", "[:GUJARATI:]",
 
695
                "[\u0Ac4\u0Ac5\u0Ac9\u0Ac7\u0A8D\u0A91\u0AB5\u0Ad0]", /*roundtrip exclusions*/
 
696
        },
 
697
 
 
698
        new String [] {  "TAMIL-GUJARATI",
 
699
                "[:TAMIL:]", "[:GUJARATI:]",
 
700
                "[\u0A81\u0a8c\u0abc\u0ac3\u0Ac4\u0Ac5\u0Ac9\u0Ac7\u0A8B\u0A8D\u0A91\u0A96\u0A97\u0A98\u0A9B\u0A9D\u0AA0\u0AA1\u0AA2\u0AA5\u0AA6\u0AA7\u0AAB\u0AAC\u0AAD\u0AB6\u0ABD\u0AD0\u0AE0\u0AE1]", /*roundtrip exclusions*/
 
701
        },
 
702
        new String [] {  "GUJARATI-TAMIL",
 
703
                "[:GUJARATI:]", "[:TAMIL:]",
 
704
                "[\u0Bc6\u0Bca\u0Bd7\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
 
705
        },
 
706
 
 
707
        new String [] {  "TELUGU-GUJARATI",
 
708
                "[:TELUGU:]", "[:GUJARATI:]",
 
709
                "[\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
 
710
        },
 
711
        new String [] {  "GUJARATI-TELUGU",
 
712
                "[:GUJARATI:]", "[:TELUGU:]",
 
713
                "[\u0c46\u0c4a\u0c55\u0c56\u0C0C\u0C0E\u0C12\u0C31\u0C61]", /*roundtrip exclusions*/
 
714
        },
 
715
 
 
716
        new String [] {  "KANNADA-GUJARATI",
 
717
                "[:KANNADA:]", "[:GUJARATI:]",
 
718
                "[\u0A81\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
 
719
        },
 
720
        new String [] {  "GUJARATI-KANNADA",
 
721
                "[:GUJARATI:]", "[:KANNADA:]",
 
722
                "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc6\u0cca\u0cd5\u0cd6\u0C8C\u0C8E\u0C92\u0CB1\u0CDE\u0CE1]", /*roundtrip exclusions*/
 
723
        },
 
724
 
 
725
        new String [] {  "MALAYALAM-GUJARATI",
 
726
                "[:MALAYALAM:]", "[:GUJARATI:]",
 
727
                "[\u0A81\u0ac4\u0acb\u0acc\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
 
728
        },
 
729
        new String [] {  "GUJARATI-MALAYALAM",
 
730
                "[:GUJARATI:]", "[:MALAYALAM:]",
 
731
                "[\u0d46\u0d4a\u0d4c\u0d55\u0d57\u0D0C\u0D0E\u0D12\u0D31\u0D34\u0D61]", /*roundtrip exclusions*/
 
732
        },
 
733
 
 
734
        new String [] {  "TAMIL-ORIYA",
 
735
                "[:TAMIL:]", "[:ORIYA:]",
 
736
                "[\u0B01\u0b3c\u0b43\u0b56\u0B0B\u0B0C\u0B16\u0B17\u0B18\u0B1B\u0B1D\u0B20\u0B21\u0B22\u0B25\u0B26\u0B27\u0B2B\u0B2C\u0B2D\u0B36\u0B3D\u0B5C\u0B5D\u0B5F\u0B60\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/
 
737
        },
 
738
        new String [] {  "ORIYA-TAMIL",
 
739
                "[:ORIYA:]", "[:TAMIL:]",
 
740
                "[\u0bc6\u0bca\u0bc7\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0BB5\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
 
741
        },
 
742
 
 
743
        new String [] {  "TELUGU-ORIYA",
 
744
                "[:TELUGU:]", "[:ORIYA:]",
 
745
                "[\u0b3c\u0b57\u0b56\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/
 
746
        },
 
747
        new String [] {  "ORIYA-TELUGU",
 
748
                "[:ORIYA:]", "[:TELUGU:]",
 
749
                "[\u0c44\u0c46\u0c4a\u0c55\u0c47\u0C0E\u0C12\u0C31\u0C35]", /*roundtrip exclusions*/
 
750
        },
 
751
 
 
752
        new String [] {  "KANNADA-ORIYA",
 
753
                "[:KANNADA:]", "[:ORIYA:]",
 
754
                "[\u0B01\u0b3c\u0b57\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/
 
755
        },
 
756
        new String [] {  "ORIYA-KANNADA",
 
757
                "[:ORIYA:]", "[:KANNADA:]",
 
758
                "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc4\u0cc6\u0cca\u0cd5\u0cc7\u0C8E\u0C92\u0CB1\u0CB5\u0CDE]", /*roundtrip exclusions*/
 
759
        },
 
760
 
 
761
        new String [] {  "MALAYALAM-ORIYA",
 
762
                "[:MALAYALAM:]", "[:ORIYA:]",
 
763
                "[\u0B01\u0b3c\u0b56\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/
 
764
        },
 
765
        new String [] {  "ORIYA-MALAYALAM",
 
766
                "[:ORIYA:]", "[:MALAYALAM:]",
 
767
                "[\u0D47\u0D46\u0D4a\u0D0E\u0D12\u0D31\u0D34\u0D35]", /*roundtrip exclusions*/
 
768
        },
 
769
 
 
770
        new String [] {  "TELUGU-TAMIL",
 
771
                "[:TELUGU:]", "[:TAMIL:]",
 
772
                "[\u0bd7\u0ba9\u0bb4\u0BF0\u0BF1\u0BF2\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
 
773
        },
 
774
        new String [] {  "TAMIL-TELUGU",
 
775
                "[:TAMIL:]", "[:TELUGU:]",
 
776
                "[\u0C01\u0c43\u0c44\u0c46\u0c47\u0c55\u0c56\u0c66\u0C0B\u0C0C\u0C16\u0C17\u0C18\u0C1B\u0C1D\u0C20\u0C21\u0C22\u0C25\u0C26\u0C27\u0C2B\u0C2C\u0C2D\u0C36\u0C60\u0C61]", /*roundtrip exclusions*/
 
777
        },
 
778
 
 
779
        new String [] {  "KANNADA-TAMIL",
 
780
                "[:KANNADA:]", "[:TAMIL:]",
 
781
                "[\u0bd7\u0bc6\u0ba9\u0bb4\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
 
782
        },
 
783
        new String [] {  "TAMIL-KANNADA",
 
784
                "[:TAMIL:]", "[:KANNADA:]",
 
785
                "[\u0cc3\u0cc4\u0cc6\u0cc7\u0cd5\u0cd6\u0C8B\u0C8C\u0C96\u0C97\u0C98\u0C9B\u0C9D\u0CA0\u0CA1\u0CA2\u0CA5\u0CA6\u0CA7\u0CAB\u0CAC\u0CAD\u0CB6\u0cbc\u0cbd\u0CDE\u0CE0\u0CE1]", /*roundtrip exclusions*/
 
786
        },
 
787
 
 
788
        new String [] {  "MALAYALAM-TAMIL",
 
789
                "[:MALAYALAM:]", "[:TAMIL:]",
 
790
                "[\u0ba9\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
 
791
        },
 
792
        new String [] {  "TAMIL-MALAYALAM",
 
793
                "[:TAMIL:]", "[:MALAYALAM:]",
 
794
                "[\u0d43\u0d12\u0D0B\u0D0C\u0D16\u0D17\u0D18\u0D1B\u0D1D\u0D20\u0D21\u0D22\u0D25\u0D26\u0D27\u0D2B\u0D2C\u0D2D\u0D36\u0D60\u0D61]", /*roundtrip exclusions*/
 
795
        },
 
796
 
 
797
        new String [] {  "KANNADA-TELUGU",
 
798
                "[:KANNADA:]", "[:TELUGU:]",
 
799
                "[\u0C01\u0c3f\u0c46\u0c48\u0c4a]", /*roundtrip exclusions*/
 
800
        },
 
801
        new String [] {  "TELUGU-KANNADA",
 
802
                "[:TELUGU:]", "[:KANNADA:]",
 
803
                "[\u0cc8\u0cd5\u0cd6\u0CDE\u0cbc\u0cbd]", /*roundtrip exclusions*/
 
804
        },
 
805
 
 
806
        new String [] {  "MALAYALAM-TELUGU",
 
807
                "[:MALAYALAM:]", "[:TELUGU:]",
 
808
                "[\u0C01\u0c44\u0c4a\u0c4c\u0c4b\u0c55\u0c56]", /*roundtrip exclusions*/
 
809
        },
 
810
        new String [] {  "TELUGU-MALAYALAM",
 
811
                "[:TELUGU:]", "[:MALAYALAM:]",
 
812
                "[\u0d4c\u0d57\u0D34]", /*roundtrip exclusions*/
 
813
        },
 
814
 
 
815
        new String [] {  "MALAYALAM-KANNADA",
 
816
                "[:MALAYALAM:]", "[:KANNADA:]",
 
817
                "[\u0cbc\u0cbd\u0cc4\u0cc6\u0cca\u0ccc\u0ccb\u0cd5\u0cd6\u0cDe]", /*roundtrip exclusions*/
 
818
        },
 
819
        new String [] {  "Latin-Bengali",
 
820
                latinForIndic, "[[:Bengali:][\u0964\u0965]]",
 
821
                "[\u0965\u09f0-\u09fa\u09ce]", /*roundtrip exclusions*/
 
822
        },
 
823
        new String [] {  "Latin-Gurmukhi",
 
824
                latinForIndic, "[[:Gurmukhi:][\u0964\u0965]]",
 
825
                "[\u0a01\u0a02\u0965\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/
 
826
        },
 
827
        new String [] {  "Latin-Gujarati",
 
828
                latinForIndic, "[[:Gujarati:][\u0964\u0965]]",
 
829
                "[\u0965]", /*roundtrip exclusions*/
 
830
        },
 
831
        new String [] {  "Latin-Oriya",
 
832
                latinForIndic, "[[:Oriya:][\u0964\u0965]]",
 
833
                "[\u0965\u0b70]", /*roundtrip exclusions*/
 
834
        },
 
835
        new String [] {  "Latin-Tamil",
 
836
                latinForIndic, "[:Tamil:]",
 
837
                "[\u0BF0\u0BF1\u0BF2]", /*roundtrip exclusions*/
 
838
        },
 
839
        new String [] {  "Latin-Telugu",
 
840
                latinForIndic, "[:Telugu:]",
 
841
                null, /*roundtrip exclusions*/
 
842
        },
 
843
        new String [] {  "Latin-Kannada",
 
844
                latinForIndic, "[:Kannada:]",
 
845
                null, /*roundtrip exclusions*/
 
846
        },
 
847
        new String [] {  "Latin-Malayalam",
 
848
                latinForIndic, "[:Malayalam:]",
 
849
                null, /*roundtrip exclusions*/
 
850
        },
 
851
    };
 
852
 
 
853
    public void TestInterIndic() throws Exception{
 
854
        long start = System.currentTimeMillis();
 
855
        int num = interIndicArray.length;
 
856
        if (isQuick()) {
 
857
            logln("Testing only 5 of "+ interIndicArray.length+" Skipping rest (use -e for exhaustive)");
 
858
            num = 5;
 
859
        }
 
860
        if (skipIfBeforeICU(4,5,0)) {
 
861
            logln("Warning: TestInterIndic needs to be updated to remove delete the section marked [:Age=4.1:] filter");
 
862
        } else {
 
863
            //          We temporarily filter against Unicode 4.1, but we only do this
 
864
            // before version 3.4.
 
865
            errln("FAIL: TestInterIndic needs to be updated to remove delete the [:Age=4.1:] filter ");
 
866
            return;
 
867
        }
 
868
        for(int i=0; i<num;i++){
 
869
            logln("Testing " + interIndicArray[i][0] + " at index " + i   );
 
870
            /*TODO: uncomment the line below when the transliterator is fixed
 
871
            new Test(interIndicArray[i][0], 50)
 
872
                .test(interIndicArray[i][1],
 
873
                      interIndicArray[i][2],
 
874
                      interIndicArray[i][3],
 
875
                      this, new LegalIndic());
 
876
             */
 
877
            /* comment lines below  when transliterator is fixed */
 
878
            // start
 
879
            new Test(interIndicArray[i][0], 50)
 
880
            .test("["+interIndicArray[i][1]+" &[:Age=4.1:]]",
 
881
                    "["+interIndicArray[i][2]+" &[:Age=4.1:]]",
 
882
                    interIndicArray[i][3],
 
883
                    this, new LegalIndic());
 
884
            //end
 
885
        }
 
886
        showElapsed(start, "TestInterIndic");
 
887
    }
 
888
 
 
889
    //---------------
 
890
    // End Indic
 
891
    //---------------
 
892
 
 
893
    public static class Legal {
 
894
        public boolean is(String sourceString) {return true;}
 
895
    }
 
896
 
 
897
    public static class LegalJamo extends Legal {
 
898
        // any initial must be followed by a medial (or initial)
 
899
        // any medial must follow an initial (or medial)
 
900
        // any final must follow a medial (or final)
 
901
 
 
902
        public boolean is(String sourceString) {
 
903
            try {
 
904
                int t;
 
905
                String decomp = Normalizer.normalize(sourceString, Normalizer.NFD);
 
906
                for (int i = 0; i < decomp.length(); ++i) { // don't worry about surrogates
 
907
                    switch (getType(decomp.charAt(i))) {
 
908
                    case 0:
 
909
                        t = getType(decomp.charAt(i+1));
 
910
                        if (t != 0 && t != 1) return false;
 
911
                        break;
 
912
                    case 1:
 
913
                        t = getType(decomp.charAt(i-1));
 
914
                        if (t != 0 && t != 1) return false;
 
915
                        break;
 
916
                    case 2:
 
917
                        t = getType(decomp.charAt(i-1));
 
918
                        if (t != 1 && t != 2) return false;
 
919
                        break;
 
920
                    }
 
921
                }
 
922
                return true;
 
923
            } catch (StringIndexOutOfBoundsException e) {
 
924
                return false;
 
925
            }
 
926
        }
 
927
 
 
928
        public int getType(char c) {
 
929
            if ('\u1100' <= c && c <= '\u1112') return 0;
 
930
            else if ('\u1161' <= c && c  <= '\u1175') return 1;
 
931
            else if ('\u11A8' <= c && c  <= '\u11C2') return 2;
 
932
            return -1; // other
 
933
        }
 
934
    }
 
935
 
 
936
    //static BreakIterator thaiBreak = BreakIterator.getWordInstance(new Locale("th", "TH"));
 
937
    // anything is legal except word ending with Logical-order-exception
 
938
    public static class LegalThai extends Legal {
 
939
        public boolean is(String sourceString) {
 
940
            if (sourceString.length() == 0) return true;
 
941
            char ch = sourceString.charAt(sourceString.length() - 1); // don't worry about surrogates.
 
942
            if (UCharacter.hasBinaryProperty(ch, UProperty.LOGICAL_ORDER_EXCEPTION)) return false;
 
943
 
 
944
 
 
945
            // disallow anything with a wordbreak between
 
946
            /*
 
947
            if (UTF16.countCodePoint(sourceString) <= 1) return true;
 
948
            thaiBreak.setText(sourceString);
 
949
            for (int pos = thaiBreak.first(); pos != BreakIterator.DONE; pos = thaiBreak.next()) {
 
950
                if (pos > 0 && pos < sourceString.length()) {
 
951
                    System.out.println("Skipping " + Utility.escape(sourceString));
 
952
                    return false;
 
953
                }
 
954
            }
 
955
             */
 
956
            return true;
 
957
        }
 
958
    }
 
959
 
 
960
    // anything is legal except that Final letters can't be followed by letter; NonFinal must be
 
961
    public static class LegalHebrew extends Legal {
 
962
        static UnicodeSet FINAL = new UnicodeSet("[\u05DA\u05DD\u05DF\u05E3\u05E5]");
 
963
        static UnicodeSet NON_FINAL = new UnicodeSet("[\u05DB\u05DE\u05E0\u05E4\u05E6]");
 
964
        static UnicodeSet LETTER = new UnicodeSet("[:letter:]");
 
965
        public boolean is(String sourceString) {
 
966
            if (sourceString.length() == 0) return true;
 
967
            // don't worry about surrogates.
 
968
            for (int i = 0; i < sourceString.length(); ++i) {
 
969
                char ch = sourceString.charAt(i);
 
970
                char next = i+1 == sourceString.length() ? '\u0000' : sourceString.charAt(i);
 
971
                if (FINAL.contains(ch)) {
 
972
                    if (LETTER.contains(next)) return false;
 
973
                } else if (NON_FINAL.contains(ch)) {
 
974
                    if (!LETTER.contains(next)) return false;
 
975
                }
 
976
            }
 
977
            return true;
 
978
        }
 
979
    }
 
980
 
 
981
 
 
982
    public static class LegalGreek extends Legal {
 
983
 
 
984
        boolean full;
 
985
 
 
986
        public LegalGreek(boolean full) {
 
987
            this.full = full;
 
988
        }
 
989
 
 
990
        static final char IOTA_SUBSCRIPT = '\u0345';
 
991
        static final UnicodeSet breathing = new UnicodeSet("[\\u0313\\u0314']");
 
992
        static final UnicodeSet validSecondVowel = new UnicodeSet("[\\u03C5\\u03B9\\u03A5\\u0399]");
 
993
 
 
994
        public static boolean isVowel(char c) {
 
995
            return "\u03B1\u03B5\u03B7\u03B9\u03BF\u03C5\u03C9\u0391\u0395\u0397\u0399\u039F\u03A5\u03A9".indexOf(c) >= 0;
 
996
        }
 
997
 
 
998
        public static boolean isRho(char c) {
 
999
            return "\u03C1\u03A1".indexOf(c) >= 0;
 
1000
        }
 
1001
 
 
1002
        public boolean is(String sourceString) {
 
1003
            try {
 
1004
                String decomp = Normalizer.normalize(sourceString, Normalizer.NFD);
 
1005
 
 
1006
                // modern is simpler: don't care about anything but a grave
 
1007
                if (!full) {
 
1008
                    //if (sourceString.equals("\u039C\u03C0")) return false;
 
1009
                    for (int i = 0; i < decomp.length(); ++i) {
 
1010
                        char c = decomp.charAt(i);
 
1011
                        // exclude all the accents
 
1012
                        if (c == '\u0313' || c == '\u0314' || c == '\u0300' || c == '\u0302'
 
1013
                            || c == '\u0342' || c == '\u0345'
 
1014
                        ) return false;
 
1015
                    }
 
1016
                    return true;
 
1017
                }
 
1018
 
 
1019
                // Legal full Greek has breathing marks IFF there is a vowel or RHO at the start
 
1020
                // IF it has them, it has exactly one.
 
1021
                // IF it starts with a RHO, then the breathing mark must come before the second letter.
 
1022
                // IF it starts with a vowel, then it must before the third letter.
 
1023
                //  it will only come after the second if of the format [vowel] [no iota subscript!] [upsilon or iota]
 
1024
                // Since there are no surrogates in greek, don't worry about them
 
1025
 
 
1026
                boolean firstIsVowel = false;
 
1027
                boolean firstIsRho = false;
 
1028
                boolean noLetterYet = true;
 
1029
                int breathingCount = 0;
 
1030
                int letterCount = 0;
 
1031
                //int breathingPosition = -1;
 
1032
 
 
1033
                for (int i = 0; i < decomp.length(); ++i) {
 
1034
                    char c = decomp.charAt(i);
 
1035
                    if (UCharacter.isLetter(c)) {
 
1036
                        ++letterCount;
 
1037
                        if (firstIsVowel && !validSecondVowel.contains(c) && breathingCount == 0) return false;
 
1038
                        if (noLetterYet) {
 
1039
                            noLetterYet = false;
 
1040
                            firstIsVowel = isVowel(c);
 
1041
                            firstIsRho = isRho(c);
 
1042
                        }
 
1043
                        if (firstIsRho && letterCount == 2 && breathingCount == 0) return false;
 
1044
                    }
 
1045
                    if (c == IOTA_SUBSCRIPT && firstIsVowel && breathingCount == 0) return false;
 
1046
                    if (breathing.contains(c)) {
 
1047
                        // breathingPosition = i;
 
1048
                        ++breathingCount;
 
1049
                    }
 
1050
                }
 
1051
 
 
1052
                if (firstIsVowel || firstIsRho) return breathingCount == 1;
 
1053
                return breathingCount == 0;
 
1054
            } catch (Throwable t) {
 
1055
                System.out.println(t.getClass().getName() + " " + t.getMessage());
 
1056
                return true;
 
1057
            }
 
1058
        }
 
1059
    }
 
1060
 
 
1061
    static class Test {
 
1062
 
 
1063
        PrintWriter out;
 
1064
 
 
1065
        private String transliteratorID;
 
1066
        private int errorLimit = 500;
 
1067
        private int errorCount = 0;
 
1068
        private long pairLimit  = 1000000; // make default be 1M.
 
1069
        private int density = 100;
 
1070
        UnicodeSet sourceRange;
 
1071
        UnicodeSet targetRange;
 
1072
        UnicodeSet toSource;
 
1073
        UnicodeSet toTarget;
 
1074
        UnicodeSet roundtripExclusions;
 
1075
 
 
1076
        RoundTripTest log;
 
1077
        Legal legalSource;
 
1078
        UnicodeSet badCharacters;
 
1079
 
 
1080
        /*
 
1081
         * create a test for the given script transliterator.
 
1082
         */
 
1083
        Test(String transliteratorID) {
 
1084
            this(transliteratorID, 100);
 
1085
        }
 
1086
 
 
1087
        Test(String transliteratorID, int dens) {
 
1088
            this.transliteratorID = transliteratorID;
 
1089
            this.density = dens;
 
1090
        }
 
1091
 
 
1092
        public void setErrorLimit(int limit) {
 
1093
            errorLimit = limit;
 
1094
        }
 
1095
 
 
1096
        public void setPairLimit(int limit) {
 
1097
            pairLimit = limit;
 
1098
        }
 
1099
 
 
1100
        // Added to do better equality check.
 
1101
 
 
1102
        public static boolean isSame(String a, String b) {
 
1103
            if (a.equals(b)) return true;
 
1104
            if (a.equalsIgnoreCase(b) && isCamel(a)) return true;
 
1105
            a = Normalizer.normalize(a, Normalizer.NFD);
 
1106
            b = Normalizer.normalize(b, Normalizer.NFD);
 
1107
            if (a.equals(b)) return true;
 
1108
            if (a.equalsIgnoreCase(b) && isCamel(a)) return true;
 
1109
            return false;
 
1110
        }
 
1111
 
 
1112
        /*
 
1113
        public boolean includesSome(UnicodeSet set, String a) {
 
1114
            int cp;
 
1115
            for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {
 
1116
                cp = UTF16.charAt(a, i);
 
1117
                if (set.contains(cp)) return true;
 
1118
            }
 
1119
            return false;
 
1120
        }
 
1121
         */
 
1122
 
 
1123
        public static boolean isCamel(String a) {
 
1124
            //System.out.println("CamelTest");
 
1125
            // see if string is of the form aB; e.g. lower, then upper or title
 
1126
            int cp;
 
1127
            boolean haveLower = false;
 
1128
            for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {
 
1129
                cp = UTF16.charAt(a, i);
 
1130
                int t = UCharacter.getType(cp);
 
1131
                //System.out.println("\t" + t + " " + Integer.toString(cp,16) + " " + UCharacter.getName(cp));
 
1132
                switch (t) {
 
1133
                case Character.UPPERCASE_LETTER:
 
1134
                    if (haveLower) return true;
 
1135
                    break;
 
1136
                case Character.TITLECASE_LETTER:
 
1137
                    if (haveLower) return true;
 
1138
                    // drop through, since second letter is lower.
 
1139
                case Character.LOWERCASE_LETTER:
 
1140
                    haveLower = true;
 
1141
                    break;
 
1142
                }
 
1143
            }
 
1144
            //System.out.println("FALSE");
 
1145
            return false;
 
1146
        }
 
1147
 
 
1148
        static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]");
 
1149
        static final UnicodeSet neverOk = new UnicodeSet("[:Other:]");
 
1150
 
 
1151
        public void test(String srcRange, String trgtRange,
 
1152
                String rdtripExclusions, RoundTripTest logger, Legal legalSrc)
 
1153
        throws java.io.IOException {
 
1154
            test(srcRange, trgtRange, srcRange, rdtripExclusions, logger, legalSrc);
 
1155
        }
 
1156
 
 
1157
        /**
 
1158
         * Will test 
 
1159
         * that everything in sourceRange maps to targetRange,
 
1160
         * that everything in targetRange maps to backtoSourceRange
 
1161
         * that everything roundtrips from target -> source -> target, except roundtripExceptions
 
1162
         */
 
1163
        public void test(String srcRange, String trgtRange, String backtoSourceRange,
 
1164
                String rdtripExclusions, RoundTripTest logger, Legal legalSrc)
 
1165
        throws java.io.IOException {
 
1166
 
 
1167
            legalSource = legalSrc;
 
1168
            sourceRange = new UnicodeSet(srcRange);
 
1169
            sourceRange.removeAll(neverOk);
 
1170
 
 
1171
            targetRange = new UnicodeSet(trgtRange);
 
1172
            targetRange.removeAll(neverOk);
 
1173
 
 
1174
            toSource = new UnicodeSet(backtoSourceRange);
 
1175
            toSource.addAll(okAnyway);
 
1176
 
 
1177
            toTarget = new UnicodeSet(trgtRange);
 
1178
            toTarget.addAll(okAnyway);
 
1179
 
 
1180
            if (rdtripExclusions != null && rdtripExclusions.length() > 0) {
 
1181
                roundtripExclusions = new UnicodeSet(rdtripExclusions);
 
1182
            }else{
 
1183
                roundtripExclusions = new UnicodeSet(); // empty
 
1184
            }
 
1185
 
 
1186
            log = logger;
 
1187
 
 
1188
            log.logln(Utility.escape("Source:  " + sourceRange));
 
1189
            log.logln(Utility.escape("Target:  " + targetRange));
 
1190
            log.logln(Utility.escape("Exclude: " + roundtripExclusions));
 
1191
            if (log.isQuick()) log.logln("Abbreviated Test");
 
1192
 
 
1193
            badCharacters = new UnicodeSet("[:other:]");
 
1194
 
 
1195
            // make a UTF-8 output file we can read with a browser
 
1196
 
 
1197
            // note: check that every transliterator transliterates the null string correctly!
 
1198
 
 
1199
            // {dlf} reorganize so can run test in protected security environment
 
1200
            //              String logFileName = "test_" + transliteratorID.replace('/', '_') + ".html";
 
1201
 
 
1202
            //              File lf = new File(logFileName);
 
1203
            //              log.logln("Creating log file " + lf.getAbsoluteFile());
 
1204
 
 
1205
            //              out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
 
1206
            //                        new FileOutputStream(logFileName), "UTF8"), 4*1024));
 
1207
 
 
1208
            ByteArrayOutputStream bast = new ByteArrayOutputStream();
 
1209
            out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
 
1210
                    bast, "UTF8"), 4*1024));
 
1211
            //out.write('\uFFEF');    // BOM
 
1212
            out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">");
 
1213
            out.println("<HTML><HEAD>");
 
1214
            out.println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");
 
1215
            out.println("<BODY bgcolor='#FFFFFF' style='font-family: Arial Unicode MS'>");
 
1216
 
 
1217
            try {
 
1218
                test2();
 
1219
            } catch (TestTruncated e) {
 
1220
                out.println(e.getMessage());
 
1221
            }
 
1222
            out.println("</BODY></HTML>");
 
1223
            out.close();
 
1224
 
 
1225
            if (errorCount > 0) {
 
1226
                try {
 
1227
                    File translitErrorDirectory = new File("translitErrorLogs");
 
1228
                    if (!translitErrorDirectory.exists()) {
 
1229
                        translitErrorDirectory.mkdir();
 
1230
                    }
 
1231
                    String logFileName = "translitErrorLogs/test_" + transliteratorID.replace('/', '_') + ".html";
 
1232
                    File lf = new File(logFileName);
 
1233
                    logger.logln("Creating log file " + lf.getAbsoluteFile());
 
1234
                    FileOutputStream fos = new FileOutputStream(lf);
 
1235
                    fos.write(bast.toByteArray());
 
1236
                    fos.close();
 
1237
                    logger.errln(transliteratorID + " errors: "
 
1238
                            + errorCount + (errorCount > errorLimit ? " (at least!)" : "")
 
1239
                            + ", see " + lf.getAbsoluteFile());
 
1240
                }
 
1241
                catch (SecurityException e) {
 
1242
                    logger.errln(transliteratorID + " errors: "
 
1243
                            + errorCount + (errorCount > errorLimit ? " (at least!)" : "")
 
1244
                            + ", no log provided due to protected test domain");
 
1245
                }
 
1246
            } else {
 
1247
                logger.logln(transliteratorID + " ok");
 
1248
                //                  new File(logFileName).delete();
 
1249
            }
 
1250
        }
 
1251
 
 
1252
        // ok if at least one is not equal
 
1253
        public boolean checkIrrelevants(Transliterator t, String irrelevants) {
 
1254
            for (int i = 0; i < irrelevants.length(); ++i) {
 
1255
                char c = irrelevants.charAt(i);
 
1256
                String cs = UTF16.valueOf(c);
 
1257
                String targ = t.transliterate(cs);
 
1258
                if (cs.equals(targ)) return true;
 
1259
            }
 
1260
            return false;
 
1261
        }
 
1262
 
 
1263
        AbbreviatedUnicodeSetIterator usi = new AbbreviatedUnicodeSetIterator();
 
1264
        AbbreviatedUnicodeSetIterator usi2 = new AbbreviatedUnicodeSetIterator();
 
1265
 
 
1266
        Transliterator sourceToTarget;
 
1267
        Transliterator targetToSource;
 
1268
 
 
1269
        public void test2() {
 
1270
 
 
1271
            sourceToTarget = Transliterator.getInstance(transliteratorID);
 
1272
            targetToSource = sourceToTarget.getInverse();
 
1273
 
 
1274
            log.logln("Checking that at least one irrevant characters is not NFC'ed");
 
1275
            out.println("<h3>Checking that at least one irrevant characters is not NFC'ed</h3>");
 
1276
 
 
1277
            String irrelevants = "\u2000\u2001\u2126\u212A\u212B\u2329"; // string is from NFC_NO in the UCD
 
1278
 
 
1279
            if (!checkIrrelevants(sourceToTarget, irrelevants)) {
 
1280
                logFails("" + getSourceTarget(transliteratorID) + ", Must not NFC everything");
 
1281
            }
 
1282
            if (!checkIrrelevants(targetToSource, irrelevants)) {
 
1283
                logFails("" + getTargetSource(transliteratorID) + ", irrelevants");
 
1284
            }
 
1285
 
 
1286
            if (EXTRA_TESTS) {
 
1287
                log.logln("Checking that toRules works");
 
1288
                String rules = "";
 
1289
                Transliterator sourceToTarget2;
 
1290
                Transliterator targetToSource2;
 
1291
                try {
 
1292
                    rules = sourceToTarget.toRules(false);
 
1293
                    sourceToTarget2 = Transliterator.createFromRules("s2t2", rules, Transliterator.FORWARD);
 
1294
                    if (PRINT_RULES) {
 
1295
                        out.println("<h3>Forward Rules:</h3><p>");
 
1296
                        out.println(TestUtility.replace(rules, "\n", "\u200E<br>\n\u200E"));
 
1297
                        out.println("</p>");
 
1298
                    }
 
1299
                    rules = targetToSource.toRules(false);
 
1300
                    targetToSource2 = Transliterator.createFromRules("t2s2", rules, Transliterator.FORWARD);
 
1301
                    if (PRINT_RULES) {
 
1302
                        out.println("<h3>Backward Rules:</h3><p>");
 
1303
                        out.println(TestUtility.replace(rules, "\n", "\u200E<br>\n\u200E"));
 
1304
                        out.println("</p>");
 
1305
                    }
 
1306
                } catch (RuntimeException e) {
 
1307
                    out.println("<h3>Broken Rules:</h3><p>");
 
1308
                    out.println(TestUtility.replace(rules, "\n", "<br>\n"));
 
1309
                    out.println("</p>");
 
1310
                    out.flush();
 
1311
                    throw e;
 
1312
                }
 
1313
 
 
1314
                out.println("<h3>Roundtrip Exclusions: " + new UnicodeSet(roundtripExclusions) + "</h3>");
 
1315
                out.flush();
 
1316
 
 
1317
                checkSourceTargetSource(sourceToTarget2);
 
1318
 
 
1319
                checkTargetSourceTarget(targetToSource2);
 
1320
            }
 
1321
 
 
1322
            UnicodeSet failSourceTarg = new UnicodeSet();
 
1323
 
 
1324
 
 
1325
            checkSourceTargetSingles(failSourceTarg);
 
1326
 
 
1327
            boolean quickRt = checkSourceTargetDoubles(failSourceTarg);
 
1328
 
 
1329
            UnicodeSet failTargSource = new UnicodeSet();
 
1330
            UnicodeSet failRound = new UnicodeSet();
 
1331
 
 
1332
            checkTargetSourceSingles(failTargSource, failRound);
 
1333
            checkTargetSourceDoubles(quickRt, failTargSource, failRound);
 
1334
        }
 
1335
 
 
1336
        private void checkSourceTargetSource(Transliterator sourceToTarget2) {
 
1337
            log.logln("Checking that source -> target -> source");
 
1338
            out.println("<h3>Checking that source -> target -> source</h3>");
 
1339
 
 
1340
            usi.reset(sourceRange);
 
1341
            while (usi.next()) {
 
1342
                int c = usi.codepoint;
 
1343
 
 
1344
                String cs = UTF16.valueOf(c);
 
1345
                String targ = sourceToTarget.transliterate(cs);
 
1346
                String targ2 = sourceToTarget2.transliterate(cs);
 
1347
                if (!targ.equals(targ2)) {
 
1348
                    logToRulesFails("" + getSourceTarget(transliteratorID) + ", toRules", cs, targ, targ2);
 
1349
                }
 
1350
            }
 
1351
        }
 
1352
 
 
1353
        private void checkTargetSourceTarget(Transliterator targetToSource2) {
 
1354
            log.logln("Checking that target -> source -> target");
 
1355
            out.println("<h3>Checking that target -> source -> target</h3>");
 
1356
            usi.reset(targetRange);
 
1357
            while (usi.next()) {
 
1358
                int c = usi.codepoint;
 
1359
 
 
1360
                String cs = UTF16.valueOf(c);
 
1361
                String targ = targetToSource.transliterate(cs);
 
1362
                String targ2 = targetToSource2.transliterate(cs);
 
1363
                if (!targ.equals(targ2)) {
 
1364
                    logToRulesFails("" + getTargetSource(transliteratorID) + ", toRules", cs, targ, targ2);
 
1365
                }
 
1366
            }
 
1367
        }
 
1368
 
 
1369
        private void checkSourceTargetSingles(UnicodeSet failSourceTarg) {
 
1370
            log.logln("Checking that source characters convert to target - Singles");
 
1371
            out.println("<h3>Checking that source characters convert to target - Singles</h3>");
 
1372
 
 
1373
 
 
1374
            /*
 
1375
            for (char c = 0; c < 0xFFFF; ++c) {
 
1376
                if (!sourceRange.contains(c)) continue;
 
1377
             */
 
1378
            usi.reset(sourceRange);
 
1379
            while (usi.next()) {
 
1380
                int c = usi.codepoint;
 
1381
 
 
1382
                String cs = UTF16.valueOf(c);
 
1383
                String targ = sourceToTarget.transliterate(cs);
 
1384
                if (!toTarget.containsAll(targ)
 
1385
                        || badCharacters.containsSome(targ)) {
 
1386
                    String targD = Normalizer.normalize(targ, Normalizer.NFD);
 
1387
                    if (!toTarget.containsAll(targD)
 
1388
                            || badCharacters.containsSome(targD)) {
 
1389
                        logWrongScript("" + getSourceTarget(transliteratorID) + "", cs, targ, toTarget, badCharacters);
 
1390
                        failSourceTarg.add(c);
 
1391
                        continue;
 
1392
                    }
 
1393
                }
 
1394
 
 
1395
                String cs2 = Normalizer.normalize(cs, Normalizer.NFD);
 
1396
                String targ2 = sourceToTarget.transliterate(cs2);
 
1397
                if (!targ.equals(targ2)) {
 
1398
                    logNotCanonical("" + getSourceTarget(transliteratorID) + "", cs, targ, cs2, targ2);
 
1399
                }
 
1400
            }
 
1401
        }
 
1402
 
 
1403
        private boolean checkSourceTargetDoubles(UnicodeSet failSourceTarg) {
 
1404
            log.logln("Checking that source characters convert to target - Doubles");
 
1405
            out.println("<h3>Checking that source characters convert to target - Doubles</h3>");
 
1406
            long count = 0;
 
1407
 
 
1408
            /*
 
1409
            for (char c = 0; c < 0xFFFF; ++c) {
 
1410
                if (TestUtility.isUnassigned(c) ||
 
1411
                    !sourceRange.contains(c)) continue;
 
1412
                if (failSourceTarg.get(c)) continue;
 
1413
 
 
1414
             */
 
1415
 
 
1416
            UnicodeSet sourceRangeMinusFailures = new UnicodeSet(sourceRange);
 
1417
            sourceRangeMinusFailures.removeAll(failSourceTarg);
 
1418
 
 
1419
            boolean quickRt = log.getInclusion() < 10;
 
1420
 
 
1421
            usi.reset(sourceRangeMinusFailures, quickRt, density);
 
1422
 
 
1423
            while (usi.next()) {
 
1424
                int c = usi.codepoint;
 
1425
 
 
1426
                /*
 
1427
                for (char d = 0; d < 0xFFFF; ++d) {
 
1428
                    if (TestUtility.isUnassigned(d) ||
 
1429
                        !sourceRange.contains(d)) continue;
 
1430
                    if (failSourceTarg.get(d)) continue;
 
1431
                 */
 
1432
                log.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));
 
1433
                usi2.reset(sourceRangeMinusFailures, quickRt, density);
 
1434
 
 
1435
                while (usi2.next()) {
 
1436
                    int d = usi2.codepoint;
 
1437
                    ++count;
 
1438
 
 
1439
                    String cs = UTF16.valueOf(c) + UTF16.valueOf(d);
 
1440
                    String targ = sourceToTarget.transliterate(cs);
 
1441
                    if (!toTarget.containsAll(targ)
 
1442
                            || badCharacters.containsSome(targ)) {
 
1443
                        String targD = Normalizer.normalize(targ, Normalizer.NFD);
 
1444
                        if (!toTarget.containsAll(targD)
 
1445
                                || badCharacters.containsSome(targD)) {
 
1446
                            logWrongScript("" + getSourceTarget(transliteratorID) + "", cs, targ, toTarget, badCharacters);
 
1447
                            continue;
 
1448
                        }
 
1449
                    }
 
1450
                    String cs2 = Normalizer.normalize(cs, Normalizer.NFD);
 
1451
                    String targ2 = sourceToTarget.transliterate(cs2);
 
1452
                    if (!targ.equals(targ2)) {
 
1453
                        logNotCanonical("" + getSourceTarget(transliteratorID) + "", cs, targ, cs2, targ2);
 
1454
                    }
 
1455
                }
 
1456
            }
 
1457
            return quickRt;
 
1458
        }
 
1459
 
 
1460
        void checkTargetSourceSingles(UnicodeSet failTargSource, UnicodeSet failRound) {
 
1461
            log.logln("Checking that target characters convert to source and back - Singles");
 
1462
            out.println("<h3>Checking that target characters convert to source and back - Singles</h3>");
 
1463
 
 
1464
 
 
1465
            /*for (char c = 0; c < 0xFFFF; ++c) {
 
1466
                if (TestUtility.isUnassigned(c) ||
 
1467
                    !targetRange.contains(c)) continue;
 
1468
             */
 
1469
 
 
1470
            usi.reset(targetRange);
 
1471
            while (usi.next()) {
 
1472
                String cs;
 
1473
                int c;
 
1474
                if(usi.codepoint == UnicodeSetIterator.IS_STRING){
 
1475
                    cs = usi.string;
 
1476
                    c = UTF16.charAt(cs,0);
 
1477
                }else{
 
1478
                    c = usi.codepoint;
 
1479
                    cs =UTF16.valueOf(c);
 
1480
                }
 
1481
 
 
1482
                String targ = targetToSource.transliterate(cs);
 
1483
                String reverse = sourceToTarget.transliterate(targ);
 
1484
 
 
1485
                if (!toSource.containsAll(targ)
 
1486
                        || badCharacters.containsSome(targ)) {
 
1487
                    String targD = Normalizer.normalize(targ, Normalizer.NFD);
 
1488
                    if (!toSource.containsAll(targD)
 
1489
                            || badCharacters.containsSome(targD)) {
 
1490
                        /*UnicodeSet temp = */new UnicodeSet().addAll(targD);
 
1491
                        logWrongScript("" + getTargetSource(transliteratorID) + "", cs, targ, toSource, badCharacters);
 
1492
                        failTargSource.add(cs);
 
1493
                        continue;
 
1494
                    }
 
1495
                }
 
1496
                if (!isSame(cs, reverse) && !roundtripExclusions.contains(c)
 
1497
                        && !roundtripExclusions.contains(cs)) {
 
1498
                    logRoundTripFailure(cs,targetToSource.getID(), targ,sourceToTarget.getID(), reverse);
 
1499
                    failRound.add(c);
 
1500
                    continue;
 
1501
                }
 
1502
                String targ2 = Normalizer.normalize(targ, Normalizer.NFD);
 
1503
                String reverse2 = sourceToTarget.transliterate(targ2);
 
1504
                if (!reverse.equals(reverse2)) {
 
1505
                    logNotCanonical("" + getTargetSource(transliteratorID) + "", targ, reverse, targ2, reverse2);
 
1506
                }
 
1507
            }
 
1508
 
 
1509
        }
 
1510
 
 
1511
        private void checkTargetSourceDoubles(boolean quickRt, UnicodeSet failTargSource,
 
1512
                UnicodeSet failRound) {
 
1513
            log.logln("Checking that target characters convert to source and back - Doubles");
 
1514
            out.println("<h3>Checking that target characters convert to source and back - Doubles</h3>");
 
1515
            long count = 0;
 
1516
 
 
1517
            UnicodeSet targetRangeMinusFailures = new UnicodeSet(targetRange);
 
1518
            targetRangeMinusFailures.removeAll(failTargSource);
 
1519
            targetRangeMinusFailures.removeAll(failRound);
 
1520
 
 
1521
            //char[] buf = new char[4]; // maximum we can have with 2 code points
 
1522
            /*
 
1523
            for (char c = 0; c < 0xFFFF; ++c) {
 
1524
                if (TestUtility.isUnassigned(c) ||
 
1525
                    !targetRange.contains(c)) continue;
 
1526
             */
 
1527
            
 
1528
            usi.reset(targetRangeMinusFailures, quickRt, density);
 
1529
 
 
1530
            while (usi.next()) {
 
1531
                int c = usi.codepoint;
 
1532
 
 
1533
                //log.log(TestUtility.hex(c));
 
1534
 
 
1535
                /*
 
1536
                for (char d = 0; d < 0xFFFF; ++d) {
 
1537
                    if (TestUtility.isUnassigned(d) ||
 
1538
                        !targetRange.contains(d)) continue;
 
1539
                 */
 
1540
                log.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));
 
1541
                usi2.reset(targetRangeMinusFailures, quickRt, density);
 
1542
 
 
1543
                while (usi2.next()) {
 
1544
                    
 
1545
                    int d = usi2.codepoint;
 
1546
                    if (d < 0) break;
 
1547
                    
 
1548
                    if (++count > pairLimit) {
 
1549
                        throw new TestTruncated("Test truncated at " + pairLimit);
 
1550
                    }
 
1551
 
 
1552
                    String cs = UTF16.valueOf(c) + UTF16.valueOf(d);
 
1553
                    String targ = targetToSource.transliterate(cs);
 
1554
                    String reverse = sourceToTarget.transliterate(targ);
 
1555
 
 
1556
                    if (!toSource.containsAll(targ) /*&& !failTargSource.contains(c) && !failTargSource.contains(d)*/
 
1557
                            || badCharacters.containsSome(targ)) {
 
1558
                        String targD = Normalizer.normalize(targ, Normalizer.NFD);
 
1559
                        if (!toSource.containsAll(targD) /*&& !failTargSource.contains(c) && !failTargSource.contains(d)*/
 
1560
                                || badCharacters.containsSome(targD)) {
 
1561
                            logWrongScript("" + getTargetSource(transliteratorID) + "", cs, targ, toSource, badCharacters);
 
1562
                            continue;
 
1563
                        }
 
1564
                    }
 
1565
                    if (!isSame(cs, reverse) /*&& !failRound.contains(c) && !failRound.contains(d)*/
 
1566
                            && !roundtripExclusions.contains(c)
 
1567
                            && !roundtripExclusions.contains(d)
 
1568
                            && !roundtripExclusions.contains(cs)) {
 
1569
                        logRoundTripFailure(cs,targetToSource.getID(), targ,sourceToTarget.getID(), reverse);
 
1570
                        continue;
 
1571
                    }
 
1572
                    String targ2 = Normalizer.normalize(targ, Normalizer.NFD);
 
1573
                    String reverse2 = sourceToTarget.transliterate(targ2);
 
1574
                    if (!reverse.equals(reverse2)) {
 
1575
                        logNotCanonical("" + getTargetSource(transliteratorID) + "", targ, reverse, targ2, reverse2);
 
1576
                    }
 
1577
                }
 
1578
            }
 
1579
            log.logln("");
 
1580
        }
 
1581
 
 
1582
        /**
 
1583
         * @param transliteratorID2
 
1584
         * @return
 
1585
         */
 
1586
        private String getTargetSource(String transliteratorID2) {
 
1587
            return "Target-Source [" + transliteratorID2 + "]";
 
1588
        }
 
1589
 
 
1590
        /**
 
1591
         * @param transliteratorID2
 
1592
         * @return
 
1593
         */
 
1594
        private String getSourceTarget(String transliteratorID2) {
 
1595
            return "Source-Target [" + transliteratorID2 + "]";
 
1596
        }
 
1597
 
 
1598
        final String info(String s) {
 
1599
            StringBuffer result = new StringBuffer();
 
1600
            result.append("\u200E").append(s).append("\u200E (").append(TestUtility.hex(s)).append("/");
 
1601
            if (false) { // append age, as a check
 
1602
                int cp = 0;    
 
1603
                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
 
1604
                    cp = UTF16.charAt(s, i);
 
1605
                    if (i > 0) result.append(", ");
 
1606
                    result.append(UCharacter.getAge(cp));
 
1607
                }
 
1608
            }
 
1609
            result.append(")");
 
1610
            return result.toString();
 
1611
        }
 
1612
 
 
1613
        final void logWrongScript(String label, String from, String to, 
 
1614
                UnicodeSet shouldContainAll, UnicodeSet shouldNotContainAny) {
 
1615
            if (++errorCount > errorLimit) {
 
1616
                throw new TestTruncated("Test truncated; too many failures");
 
1617
            }
 
1618
            String toD = Normalizer.normalize(to, Normalizer.NFD);
 
1619
            UnicodeSet temp = new UnicodeSet().addAll(toD);
 
1620
            UnicodeSet bad = new UnicodeSet(shouldNotContainAny).retainAll(temp)
 
1621
            .addAll(new UnicodeSet(temp).removeAll(shouldContainAll));
 
1622
 
 
1623
            out.println("<br>Fail " + label + ": " +
 
1624
                    info(from) + " => " + info(to) + " " + bad
 
1625
            );
 
1626
        }
 
1627
 
 
1628
        final void logNotCanonical(String label, String from, String to, String fromCan, String toCan) {
 
1629
            if (++errorCount > errorLimit) {
 
1630
                throw new TestTruncated("Test truncated; too many failures");
 
1631
            }
 
1632
            out.println("<br>Fail (can.equiv) " + label + ": " +
 
1633
                    info(from) + " => " + info(to) +
 
1634
                    " -- " +
 
1635
                    info(fromCan) + " => " + info(toCan) + ")"
 
1636
            );
 
1637
        }
 
1638
 
 
1639
        final void logFails(String label) {
 
1640
            if (++errorCount > errorLimit) {
 
1641
                throw new TestTruncated("Test truncated; too many failures");
 
1642
            }
 
1643
            out.println("<br>Fail (can.equiv)" + label);
 
1644
        }
 
1645
 
 
1646
        final void logToRulesFails(String label, String from, String to, String toCan) {
 
1647
            if (++errorCount > errorLimit) {
 
1648
                throw new TestTruncated("Test truncated; too many failures");
 
1649
            }
 
1650
            out.println("<br>Fail " + label + ": " +
 
1651
                    info(from) + " => " + info(to) + ", " + info(toCan)
 
1652
            );
 
1653
        }
 
1654
 
 
1655
        final void logRoundTripFailure(String from,String toID, String to,String backID, String back) {
 
1656
            if (!legalSource.is(from)) return; // skip illegals
 
1657
 
 
1658
            if (++errorCount > errorLimit) {
 
1659
                throw new TestTruncated("Test truncated; too many failures");
 
1660
            }
 
1661
            out.println("<br>Fail Roundtrip: " +
 
1662
                    info(from) + " "+toID+" => " + info(to) + " " + backID+" => " + info(back)
 
1663
            );
 
1664
        }
 
1665
 
 
1666
        /*
 
1667
         * Characters to filter for source-target mapping completeness
 
1668
         * Typically is base alphabet, minus extended characters
 
1669
         * Default is ASCII letters for Latin
 
1670
         */
 
1671
        /*
 
1672
        public boolean isSource(char c) {
 
1673
            if (!sourceRange.contains(c)) return false;
 
1674
            return true;
 
1675
        }
 
1676
         */
 
1677
 
 
1678
        /*
 
1679
         * Characters to check for target back to source mapping.
 
1680
         * Typically the same as the target script, plus punctuation
 
1681
         */
 
1682
        /*
 
1683
        public boolean isReceivingSource(char c) {
 
1684
            if (!targetRange.contains(c)) return false;
 
1685
            return true;
 
1686
        }
 
1687
         */
 
1688
        /*
 
1689
         * Characters to filter for target-source mapping
 
1690
         * Typically is base alphabet, minus extended characters
 
1691
         */
 
1692
        /*
 
1693
        public boolean isTarget(char c) {
 
1694
            byte script = TestUtility.getScript(c);
 
1695
            if (script != targetScript) return false;
 
1696
            if (!TestUtility.isLetter(c)) return false;
 
1697
            if (targetRange != null && !targetRange.contains(c)) return false;
 
1698
            return true;
 
1699
        }
 
1700
         */
 
1701
 
 
1702
        /*
 
1703
         * Characters to check for target-source mapping
 
1704
         * Typically the same as the source script, plus punctuation
 
1705
         */
 
1706
        /*
 
1707
        public boolean isReceivingTarget(char c) {
 
1708
            byte script = TestUtility.getScript(c);
 
1709
            return (script == targetScript || script == TestUtility.COMMON_SCRIPT);
 
1710
        }
 
1711
 
 
1712
        final boolean isSource(String s) {
 
1713
            for (int i = 0; i < s.length(); ++i) {
 
1714
                if (!isSource(s.charAt(i))) return false;
 
1715
            }
 
1716
            return true;
 
1717
        }
 
1718
 
 
1719
        final boolean isTarget(String s) {
 
1720
            for (int i = 0; i < s.length(); ++i) {
 
1721
                if (!isTarget(s.charAt(i))) return false;
 
1722
            }
 
1723
            return true;
 
1724
        }
 
1725
 
 
1726
        final boolean isReceivingSource(String s) {
 
1727
            for (int i = 0; i < s.length(); ++i) {
 
1728
                if (!isReceivingSource(s.charAt(i))) return false;
 
1729
            }
 
1730
            return true;
 
1731
        }
 
1732
 
 
1733
        final boolean isReceivingTarget(String s) {
 
1734
            for (int i = 0; i < s.length(); ++i) {
 
1735
                if (!isReceivingTarget(s.charAt(i))) return false;
 
1736
            }
 
1737
            return true;
 
1738
        }
 
1739
         */
 
1740
 
 
1741
        static class TestTruncated extends RuntimeException {
 
1742
            /**
 
1743
             * For serialization
 
1744
             */
 
1745
            private static final long serialVersionUID = 3361828190488168323L;
 
1746
 
 
1747
            TestTruncated(String msg) {
 
1748
                super(msg);
 
1749
            }
 
1750
        }
 
1751
    }
 
1752
 
 
1753
    //  static class TestHangul extends Test {
 
1754
    //      TestHangul () {
 
1755
    //          super("Jamo-Hangul", TestUtility.JAMO_SCRIPT, TestUtility.HANGUL_SCRIPT);
 
1756
    //      }
 
1757
    //
 
1758
    //      public boolean isSource(char c) {
 
1759
    //          if (0x1113 <= c && c <= 0x1160) return false;
 
1760
    //          if (0x1176 <= c && c <= 0x11F9) return false;
 
1761
    //          if (0x3131 <= c && c <= 0x318E) return false;
 
1762
    //          return super.isSource(c);
 
1763
    //      }
 
1764
    //  }
 
1765
 
 
1766
 
 
1767
}