2
**********************************************************************
3
* Copyright (C) 2007, International Business Machines
4
* Corporation and others. All Rights Reserved.
5
**********************************************************************
6
* file name: unisetperf.cpp
8
* tab size: 8 (not used)
11
* created on: 2007jan31
12
* created by: Markus Scherer
18
#include "unicode/uperf.h"
19
#include "unicode/uniset.h"
20
#include "unicode/unistr.h"
23
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
25
// Command-line options specific to unisetperf.
26
// Options do not have abbreviations: Force readable command lines.
27
// (Using U+0001 for abbreviation characters.)
31
UNISETPERF_OPTIONS_COUNT
34
static UOption options[UNISETPERF_OPTIONS_COUNT]={
35
UOPTION_DEF("pattern", '\x01', UOPT_REQUIRES_ARG),
36
UOPTION_DEF("type", '\x01', UOPT_REQUIRES_ARG)
39
static const char *const unisetperf_usage =
40
"\t--pattern UnicodeSet pattern for instantiation.\n"
41
"\t Default: [:ID_Continue:]\n"
42
"\t--type Type of UnicodeSet: slow fast\n"
45
// Test object with setup data.
46
class UnicodeSetPerformanceTest : public UPerfTest {
48
UnicodeSetPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
49
: UPerfTest(argc, argv, options, LENGTHOF(options), unisetperf_usage, status),
50
utf8(NULL), utf8Length(0), countInputCodePoints(0), spanCount(0) {
51
if (U_SUCCESS(status)) {
52
UnicodeString pattern=UnicodeString(options[SET_PATTERN].value, -1, US_INV).unescape();
53
set.applyPattern(pattern, status);
55
if(0==strcmp(options[FAST_TYPE].value, "fast")) {
60
UPerfTest::getBuffer(inputLength, status);
61
if(U_SUCCESS(status) && inputLength>0) {
62
countInputCodePoints = u_countChar32(buffer, bufferLen);
66
// Preflight the UTF-8 length and allocate utf8.
67
u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status);
68
if(status==U_BUFFER_OVERFLOW_ERROR) {
69
utf8=(char *)malloc(utf8Length);
72
u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status);
74
status=U_MEMORY_ALLOCATION_ERROR;
79
printf("code points:%ld len16:%ld len8:%ld spans:%ld "
80
"cp/span:%.3g UChar/span:%.3g B/span:%.3g B/cp:%.3g\n",
81
(long)countInputCodePoints, (long)bufferLen, (long)utf8Length, (long)spanCount,
82
(double)countInputCodePoints/spanCount, (double)bufferLen/spanCount, (double)utf8Length/spanCount,
83
(double)utf8Length/countInputCodePoints);
89
virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
91
// Count spans of characters that are in the set,
92
// and spans of characters that are not in the set.
93
// If the very first character is in the set, then one additional
94
// not-span is counted.
96
const UChar *s=getBuffer();
97
int32_t length=getBufferLen();
101
i=span(s, length, i, tf);
106
int32_t span(const UChar *s, int32_t length, int32_t start, UBool tf) const {
109
while((prev=start)<length) {
110
U16_NEXT(s, start, length, c);
111
if(tf!=set.contains(c)) {
118
const UChar *getBuffer() const { return buffer; }
119
int32_t getBufferLen() const { return bufferLen; }
124
// Number of code points in the input text.
125
int32_t countInputCodePoints;
129
UnicodeSet prefrozen;
132
// Performance test function object.
133
class Command : public UPerfFunction {
135
Command(const UnicodeSetPerformanceTest &testcase) : testcase(testcase) {}
138
virtual ~Command() {}
140
// virtual void call(UErrorCode* pErrorCode) { ... }
142
virtual long getOperationsPerIteration() {
143
// Number of code points tested:
144
// Input code points, plus one for the end of each span except the last span.
145
return testcase.countInputCodePoints+testcase.spanCount-1;
148
virtual long getEventsPerIteration() {
149
return testcase.spanCount;
152
const UnicodeSetPerformanceTest &testcase;
155
class Contains : public Command {
157
Contains(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
158
// Verify that the frozen set is equal to the unfrozen one.
162
for(c=0; c<=0x10ffff; ++c) {
163
if(testcase.set.contains(c)) {
167
if(set!=testcase.set) {
168
fprintf(stderr, "error: frozen set != original!\n");
172
static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
173
return new Contains(testcase);
175
virtual void call(UErrorCode* pErrorCode) {
176
const UnicodeSet &set=testcase.set;
177
const UChar *s=testcase.getBuffer();
178
int32_t length=testcase.getBufferLen();
183
i+=span(set, s+i, length-i, tf);
187
if(count!=testcase.spanCount) {
188
fprintf(stderr, "error: Contains() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
189
(long)count, (long)testcase.spanCount);
192
static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
194
int32_t start=0, prev;
195
while((prev=start)<length) {
196
U16_NEXT(s, start, length, c);
197
if(tf!=set.contains(c)) {
205
class SpanUTF16 : public Command {
207
SpanUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
208
// Verify that the frozen set is equal to the unfrozen one.
213
for(c=0; c<=0xffff; ++c) {
215
if(testcase.set.span(utf16, 1, USET_SPAN_CONTAINED)>0) {
219
for(c=0xd800; c<=0xdbff; ++c) {
221
for(c2=0xdc00; c2<=0xdfff; ++c2) {
223
if(testcase.set.span(utf16, 2, USET_SPAN_CONTAINED)>0) {
224
set.add(U16_GET_SUPPLEMENTARY(c, c2));
229
if(set!=testcase.set) {
230
fprintf(stderr, "error: frozen set != original!\n");
234
static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
235
return new SpanUTF16(testcase);
237
virtual void call(UErrorCode* pErrorCode) {
238
const UnicodeSet &set=testcase.set;
239
const UChar *s=testcase.getBuffer();
240
int32_t length=testcase.getBufferLen();
245
i+=set.span(s+i, length-i, (USetSpanCondition)tf);
249
if(count!=testcase.spanCount) {
250
fprintf(stderr, "error: SpanUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
251
(long)count, (long)testcase.spanCount);
256
class SpanBackUTF16 : public Command {
258
SpanBackUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
259
// Verify that the frozen set is equal to the unfrozen one.
264
for(c=0; c<=0xffff; ++c) {
266
if(testcase.set.spanBack(utf16, 1, USET_SPAN_CONTAINED)==0) {
270
for(c=0xd800; c<=0xdbff; ++c) {
272
for(c2=0xdc00; c2<=0xdfff; ++c2) {
274
if(testcase.set.spanBack(utf16, 2, USET_SPAN_CONTAINED)==0) {
275
set.add(U16_GET_SUPPLEMENTARY(c, c2));
280
if(set!=testcase.set) {
281
fprintf(stderr, "error: frozen set != original!\n");
285
static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
286
return new SpanBackUTF16(testcase);
288
virtual void call(UErrorCode* pErrorCode) {
289
const UnicodeSet &set=testcase.set;
290
const UChar *s=testcase.getBuffer();
291
int32_t length=testcase.getBufferLen();
294
* Get the same spans as with span() where we always start with a not-contained span.
295
* If testcase.spanCount is an odd number, then the last span() was not-contained.
296
* The last spanBack() must be not-contained to match the first span().
298
UBool tf=(UBool)((testcase.spanCount&1)==0);
299
while(length>0 || !tf) {
300
length=set.spanBack(s, length, (USetSpanCondition)tf);
304
if(count!=testcase.spanCount) {
305
fprintf(stderr, "error: SpanBackUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
306
(long)count, (long)testcase.spanCount);
311
class SpanUTF8 : public Command {
313
SpanUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
314
// Verify that the frozen set is equal to the unfrozen one.
320
for(c=0; c<=0x10ffff; ++c) {
325
U8_APPEND_UNSAFE(utf8, length, c);
326
if(testcase.set.spanUTF8(utf8, length, USET_SPAN_CONTAINED)>0) {
330
if(set!=testcase.set) {
331
fprintf(stderr, "error: frozen set != original!\n");
335
static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
336
return new SpanUTF8(testcase);
338
virtual void call(UErrorCode* pErrorCode) {
339
const UnicodeSet &set=testcase.set;
340
const char *s=testcase.utf8;
341
int32_t length=testcase.utf8Length;
346
i+=set.spanUTF8(s+i, length-i, (USetSpanCondition)tf);
350
if(count!=testcase.spanCount) {
351
fprintf(stderr, "error: SpanUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
352
(long)count, (long)testcase.spanCount);
357
class SpanBackUTF8 : public Command {
359
SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
360
// Verify that the frozen set is equal to the unfrozen one.
366
for(c=0; c<=0x10ffff; ++c) {
371
U8_APPEND_UNSAFE(utf8, length, c);
372
if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) {
376
if(set!=testcase.set) {
377
fprintf(stderr, "error: frozen set != original!\n");
381
static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
382
return new SpanBackUTF8(testcase);
384
virtual void call(UErrorCode* pErrorCode) {
385
const UnicodeSet &set=testcase.set;
386
const char *s=testcase.utf8;
387
int32_t length=testcase.utf8Length;
390
* Get the same spans as with span() where we always start with a not-contained span.
391
* If testcase.spanCount is an odd number, then the last span() was not-contained.
392
* The last spanBack() must be not-contained to match the first span().
394
UBool tf=(UBool)((testcase.spanCount&1)==0);
395
while(length>0 || !tf) {
396
length=set.spanBackUTF8(s, length, (USetSpanCondition)tf);
400
if(count!=testcase.spanCount) {
401
fprintf(stderr, "error: SpanBackUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
402
(long)count, (long)testcase.spanCount);
407
UPerfFunction* UnicodeSetPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
409
case 0: name = "Contains"; if (exec) return Contains::get(*this); break;
410
case 1: name = "SpanUTF16"; if (exec) return SpanUTF16::get(*this); break;
411
case 2: name = "SpanBackUTF16";if (exec) return SpanBackUTF16::get(*this); break;
412
case 3: name = "SpanUTF8"; if (exec) return SpanUTF8::get(*this); break;
413
case 4: name = "SpanBackUTF8"; if (exec) return SpanBackUTF8::get(*this); break;
414
default: name = ""; break;
419
int main(int argc, const char *argv[])
421
// Default values for command-line options.
422
options[SET_PATTERN].value = "[:ID_Continue:]";
423
options[FAST_TYPE].value = "slow";
425
UErrorCode status = U_ZERO_ERROR;
426
UnicodeSetPerformanceTest test(argc, argv, status);
428
if (U_FAILURE(status)){
429
printf("The error is %s\n", u_errorName(status));
434
if (test.run() == FALSE){
435
fprintf(stderr, "FAILED: Tests could not be run, please check the "