4
* This program is free software; you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License version 3 or
6
* version 2 as published by the Free Software Foundation.
8
* This program is distributed in the hope that it will be useful, but
9
* WITHOUT ANY WARRANTY; without even the implied warranty of
10
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
* General Public License for more details.
15
import java.nio.charset.Charset;
16
import java.text.CollationKey;
17
import java.text.Collator;
18
import java.text.ParseException;
19
import java.text.RuleBasedCollator;
20
import java.util.ArrayList;
21
import java.util.Collections;
22
import java.util.List;
23
import java.util.Random;
25
import uk.me.parabola.imgfmt.app.srt.Sort;
26
import uk.me.parabola.imgfmt.app.srt.SortKey;
27
import uk.me.parabola.mkgmap.srt.SrtTextReader;
30
* Test to compare sorting results and timings between sort keys and collator.
32
* Also have tested against java7 RuleBasedCollator and the ICU one.
34
* In general our implementation is fastest by a long way; key based sort 3 times faster, collation
35
* based sort even more so. The java collator does not result in the same sort as using sort keys.
37
* I also tried out the ICU collation with mixed results. Could not get the correct desired results with
38
* it. It was not faster than our implementation for a 1252 cp sort.
40
public class SortTest {
42
private static final int LIST_SIZE = 500000;
45
private boolean fullOutput;
46
private boolean quiet;
47
private boolean unicode;
49
private void test() throws Exception {
50
sort = SrtTextReader.sortForCodepage(unicode? 65001: 1252);
54
Charset charset = sort.getCharset();
56
Random rand = new Random(21909278L);
58
List<String> list = createList(rand, charset);
61
// Run a few times without output, to warm up
62
compareLists(sortWithKeys(list), sortWithKeys(list));
63
compareLists(sortWithCollator(list), sortWithCollator(list));
64
compareLists(sortWithJavaKeys(list), sortWithJavaKeys(list));
65
compareLists(sortWithJavaCollator(list), sortWithJavaCollator(list));
66
// re-create the list to make sure it wasn't too optimised to the data
67
list = createList(rand, charset);
70
System.out.println("Compare key sort and collator sort");
71
int n = compareLists(sortWithKeys(list), sortWithCollator(list));
72
System.out.println("N errors " + n);
75
System.out.println("Compare our sort with java sort");
76
n = compareLists(sortWithKeys(list), sortWithJavaKeys(list));
77
System.out.println("N errors " + n);
81
System.out.println("Compare java keys with java collator");
82
n = compareLists(sortWithJavaKeys(list), sortWithJavaCollator(list));
83
System.out.println("N errors " + n);
87
private List<String> createList(Random rand, Charset charset) {
88
List<String> list = new ArrayList<>();
90
for (int n = 0; n < LIST_SIZE; n++) {
91
int len = rand.nextInt(6)+1;
93
len = rand.nextInt(5) + 2;
96
char[] c = new char[len];
97
for (int i = 0; i < len; i++) {
100
if (rand.nextInt(10) > 6)
101
ch = rand.nextInt(6 * 256);
103
ch = rand.nextInt(256);
104
} while (reject(rand, ch));
108
list.add(new String(c));
110
byte[] b = new byte[len];
111
for (int i = 0; i < len; i++) {
115
ch = rand.nextInt(256);
116
// reject unassigned. Also low chars most of the time
117
} while (reject(rand, ch));
121
list.add(new String(b, charset));
125
list = Collections.unmodifiableList(list);
129
private int compareLists(List<String> r1, List<String> r2) {
131
for (int i = 0; i < LIST_SIZE; i++) {
132
String s1 = r1.get(i);
133
String s2 = r2.get(i);
135
if (!s1.equals(s2)) {
140
if (fullOutput || (!mark.isEmpty() && !quiet))
141
System.out.printf("%6d |%-10s |%-10s %s\n", i, s1, s2, mark);
146
private boolean reject(Random rand, int ch) {
151
case 0x81:case 0x8d:case 0x8f:
155
switch (Character.getType(ch)) {
156
case Character.UNASSIGNED:
158
case Character.CONTROL:
162
// Reject low characters most of the time
163
if (ch < 0x20 && rand.nextInt(100) < 95)
165
if (ch > 255 && rand.nextInt(100) > 99)
170
private List<String> sortWithKeys(List<String> list) {
171
long start = System.currentTimeMillis();
172
List<SortKey<String>> keys = new ArrayList<>();
173
for (String s : list) {
174
SortKey<String> key = sort.createSortKey(s, s);
177
Collections.sort(keys);
179
long end = System.currentTimeMillis();
181
List<String> ret = new ArrayList<>();
183
for (SortKey<String> key : keys) {
184
ret.add(key.getObject());
186
System.out.println("time keys: " + (end-start) + "ms");
190
private List<String> sortWithCollator(List<String> list) {
191
long start = System.currentTimeMillis();
192
List<String> ret = new ArrayList<>(list);
193
Collections.sort(ret, sort.getCollator());
194
System.out.println("time coll: " + (System.currentTimeMillis() - start) + "ms");
198
private List<String> sortWithJavaKeys(List<String> list) {
200
long start = System.currentTimeMillis();
201
List<CollationKey> keys = new ArrayList<>();
204
jcol = new RuleBasedCollator(getRules(false));
205
} catch (ParseException e) {
209
for (String s : list) {
210
CollationKey key = jcol.getCollationKey(s);
213
Collections.sort(keys);
215
long end = System.currentTimeMillis();
217
List<String> ret = new ArrayList<>();
218
for (CollationKey key : keys) {
219
ret.add(key.getSourceString());
221
System.out.println("time J keys: " + (end - start) + "ms");
225
private List<String> sortWithJavaCollator(List<String> list) {
227
long start = System.currentTimeMillis();
229
List<String> out = new ArrayList<>(list);
232
jcol = new RuleBasedCollator(getRules(false));
233
jcol.setStrength(Collator.TERTIARY);
234
} catch (ParseException e) {
239
Collections.sort(out, jcol);
241
System.out.println("time J collator: " + (System.currentTimeMillis() - start) + "ms");
245
private String getRules(boolean forICU) {
246
return "='\u0008'='\u000e'='\u000f'='\u0010'='\u0011'='\u0012'='\u0013'='\u0014'='\u0015'='\u0016'"
247
+ "='\u0017' ='\u0018' = '\u0019' ='\u001a' ='\u001b'= '\u001c' ='\u001d'= '\u001e'= '\u001f' "
248
+ "='\u007f' ='\u00ad'"
249
+ ", '\u0001', '\u0002', '\u0003', '\u0004' ,'\u0005' ,'\u0006', '\u0007'"
250
+ "< '\u0009' < '\n' < '\u000b' < '\u000c' < '\r' < '\u0020','\u00a0'"
251
+ "< '_' < '-' < '–' < '—' < '\u002c' < '\u003b' < ':' < '!' < '¡' < '?' < '¿'"
253
+ ((forICU)? "< \\' ": "< ''' ")
254
+ "< '‘' < '’' < '‚' < '‹' < '›' < '“' < '”' < '„' < '«' < '»' "
256
+ "< '“' < '”' < '„' < '«'< '»' < '(' < ')' "
257
+ "< '[' < ']' < '{' < '}' < '§' < '¶' < '@' < '*' < '/' < '\\' < '&' < '#' < '%'"
258
+ "< '‰' < '†' < '‡' < '•' < '`' < '´' < '^' < '¯' < '¨' < '¸' < 'ˆ' < '°' < '©' < '®'"
259
+ "< '+' < '±' < '÷' < '×' < '\u003c' < '\u003d' < '>' < '¬' < '|' < '¦' < '~' ; '˜' < '¤'"
260
+ "< '¢' < '$' < '£' < '¥' < '€' < 0 < 1,¹ < 2,² < 3,³ < 4 < 5 < 6 < 7 < 8 < 9"
261
+ "< a,ª,A ; á,Á ; à,À ; â, ; å,Å ; ä,Ä ; ã,Ã"
265
+ "< e,E ; é,É ; è,È ; ê,Ê ; ë,Ë"
270
+ "< i,I ; í,Í ; ì,Ì ; î,Î ; ï,Ï"
276
+ "< o,º,O ; ó,Ó ; ò,Ò ; ô,Ô ; ö,Ö ; õ,Õ ; ø,Ø"
282
+ "< u,U ; ú,Ú ; ù,Ù ; û,Û ; ü,Ü"
286
+ "< y,Y ; ý,Ý ; ÿ,Ÿ"
290
+ "&'1/4'=¼ &'1/2'=½ &'3/4'=¾"
291
+ "&ae = æ &AE = Æ &ss = ß &OE= Œ &oe= œ &TM = ™ &'...' = … "
295
public static void main(String[] args) throws Exception {
296
SortTest sortTest = new SortTest();
297
for (String arg : args) {
300
sortTest.time = true;
303
sortTest.fullOutput = true;
306
sortTest.quiet = true;
309
sortTest.unicode = true;