2
*******************************************************************************
3
* Copyright (C) 1996-2010, International Business Machines Corporation and *
4
* others. All Rights Reserved. *
5
**********************************************************************
7
**********************************************************************
10
package com.ibm.icu.dev.test.util;
12
import java.io.IOException;
13
import java.text.FieldPosition;
14
import java.util.Comparator;
15
import java.util.TreeSet;
17
import com.ibm.icu.impl.Utility;
18
import com.ibm.icu.lang.UCharacter;
19
import com.ibm.icu.text.StringTransform;
20
import com.ibm.icu.text.UTF16;
21
import com.ibm.icu.text.UnicodeSet;
22
import com.ibm.icu.text.UnicodeSetIterator;
23
import com.ibm.icu.text.UTF16.StringComparator;
25
/** Provides more flexible formatting of UnicodeSet patterns.
27
public class PrettyPrinter {
28
private static final StringComparator CODEPOINT_ORDER = new UTF16.StringComparator(true,false,0);
29
private static final UnicodeSet PATTERN_WHITESPACE = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();
30
private static final UnicodeSet SORT_AT_END = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
31
private static final UnicodeSet QUOTED_SYNTAX = (UnicodeSet) new UnicodeSet("[\\[\\]\\-\\^\\&\\\\\\{\\}\\$\\:]").addAll(PATTERN_WHITESPACE).freeze();
33
private boolean first = true;
34
private StringBuffer target = new StringBuffer();
35
private int firstCodePoint = -2;
36
private int lastCodePoint = -2;
37
private boolean compressRanges = true;
38
private String lastString = "";
39
private UnicodeSet toQuote = new UnicodeSet(PATTERN_WHITESPACE);
40
private StringTransform quoter = null;
42
private Comparator<String> ordering;
43
private Comparator<String> spaceComp;
45
public PrettyPrinter() {
48
public StringTransform getQuoter() {
52
public PrettyPrinter setQuoter(StringTransform quoter) {
54
return this; // for chaining
57
public boolean isCompressRanges() {
58
return compressRanges;
62
* @param compressRanges if you want abcde instead of a-e, make this false
65
public PrettyPrinter setCompressRanges(boolean compressRanges) {
66
this.compressRanges = compressRanges;
70
public Comparator<String> getOrdering() {
75
* @param ordering the resulting ordering of the list of characters in the pattern
78
public PrettyPrinter setOrdering(Comparator ordering) {
79
this.ordering = ordering == null ? CODEPOINT_ORDER : new com.ibm.icu.impl.MultiComparator<String>(ordering, CODEPOINT_ORDER);
83
public Comparator<String> getSpaceComparator() {
88
* @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters
89
* @return this, for chaining
91
public PrettyPrinter setSpaceComparator(Comparator spaceComp) {
92
this.spaceComp = spaceComp;
96
public UnicodeSet getToQuote() {
101
* a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace)
104
public PrettyPrinter setToQuote(UnicodeSet toQuote) {
105
if (toQuote != null) {
106
toQuote = (UnicodeSet)toQuote.cloneAsThawed();
107
toQuote.addAll(PATTERN_WHITESPACE);
108
this.toQuote = toQuote;
115
* Get the pattern for a particular set.
117
* @return formatted UnicodeSet
119
public String format(UnicodeSet uset) {
121
UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(SORT_AT_END); // remove all the unassigned gorp for now
122
// make sure that comparison separates all strings, even canonically equivalent ones
123
TreeSet<String> orderedStrings = new TreeSet<String>(ordering);
124
for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {
125
if (it.codepoint == UnicodeSetIterator.IS_STRING) {
126
orderedStrings.add(it.string);
128
for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
129
if (!putAtEnd.contains(i)) {
130
orderedStrings.add(UTF16.valueOf(i));
137
for (String item : orderedStrings) {
138
appendUnicodeSetItem(item);
140
for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp
141
appendUnicodeSetItem(it.codepoint); // we know that these are only codepoints, not strings, so this is safe
145
String sresult = target.toString();
147
// double check the results. This can be removed once we have more tests.
149
// UnicodeSet doubleCheck = new UnicodeSet(sresult);
150
// if (!uset.equals(doubleCheck)) {
151
// throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + Utility.LINE_SEPARATOR + " source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) + Utility.LINE_SEPARATOR + " result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
153
// } catch (RuntimeException e) {
154
// throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
159
private PrettyPrinter appendUnicodeSetItem(String s) {
160
if (UTF16.hasMoreCodePointsThan(s, 1)) {
162
addSpaceAsNeededBefore(s);
166
appendUnicodeSetItem(UTF16.charAt(s, 0));
171
private void appendUnicodeSetItem(int cp) {
174
if (cp == lastCodePoint + 1) {
175
lastCodePoint = cp; // continue range
176
} else { // start range
178
firstCodePoint = lastCodePoint = cp;
184
private void addSpaceAsNeededBefore(String s) {
187
} else if (spaceComp != null && spaceComp.compare(s, lastString) != 0) {
190
int cp = UTF16.charAt(s,0);
191
if (!toQuote.contains(cp) && !QUOTED_SYNTAX.contains(cp)) {
192
int type = UCharacter.getType(cp);
193
if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
195
} else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
196
target.append(' '); // make sure we don't accidentally merge two surrogates
202
private void addSpaceAsNeededBefore(int codepoint) {
203
addSpaceAsNeededBefore(UTF16.valueOf(codepoint));
206
private void flushLast() {
207
if (lastCodePoint >= 0) {
208
addSpaceAsNeededBefore(firstCodePoint);
209
if (firstCodePoint != lastCodePoint) {
210
appendQuoted(firstCodePoint);
211
if (firstCodePoint + 1 != lastCodePoint) {
214
addSpaceAsNeededBefore(lastCodePoint);
217
appendQuoted(lastCodePoint);
218
lastString = UTF16.valueOf(lastCodePoint);
219
firstCodePoint = lastCodePoint = -2;
224
private void appendQuoted(String s) {
225
if (toQuote.containsSome(s) && quoter != null) {
226
target.append(quoter.transform(s));
230
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
231
appendQuoted(cp = UTF16.charAt(s, i));
237
PrettyPrinter appendQuoted(int codePoint) {
238
if (toQuote.contains(codePoint)) {
239
if (quoter != null) {
240
target.append(quoter.transform(UTF16.valueOf(codePoint)));
243
if (codePoint > 0xFFFF) {
244
target.append("\\U");
245
target.append(Utility.hex(codePoint,8));
247
target.append("\\u");
248
target.append(Utility.hex(codePoint,4));
253
case '[': // SET_OPEN:
254
case ']': // SET_CLOSE:
256
case '^': // COMPLEMENT:
257
case '&': // INTERSECTION:
258
case '\\': //BACKSLASH:
267
if (PATTERN_WHITESPACE.contains(codePoint)) {
272
UTF16.append(target, codePoint);
275
// Appender append(String s) {
279
// public String toString() {
280
// return target.toString();
283
public Appendable format(UnicodeSet obj, Appendable toAppendTo, FieldPosition pos) {
285
return toAppendTo.append(format(obj));
286
} catch (IOException e) {
287
throw new IllegalArgumentException(e);