2
*******************************************************************************
4
* Copyright (C) 2003-2011, International Business Machines
5
* Corporation and others. All Rights Reserved.
7
*******************************************************************************
8
* file name: unorm_it.c
10
* tab size: 8 (not used)
13
* created on: 2003jan21
14
* created by: Markus W. Scherer
17
#include "unicode/utypes.h"
19
#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
21
#include "unicode/uiter.h"
22
#include "unicode/unorm.h"
23
#include "unicode/utf.h"
27
/* UNormIterator ------------------------------------------------------------ */
33
struct UNormIterator {
38
* chars and states either use the static buffers
39
* or are allocated in the same memory block
41
* They are parallel arrays with states[] holding the getState() values
42
* from normalization boundaries, and UITER_NO_STATE in between.
48
* api.start: first valid character & state in the arrays
49
* api.index: current position
50
* api.limit: one past the last valid character in chars[], but states[limit] is valid
51
* capacity: length of allocated arrays
55
/* the current iter->getState(), saved to avoid unnecessary setState() calls; may not correspond to api->index! */
58
/* there are UChars available before start or after limit? */
59
UBool hasPrevious, hasNext, isStackAllocated;
61
UNormalizationMode mode;
63
UChar charsBuffer[INITIAL_CAPACITY];
64
uint32_t statesBuffer[INITIAL_CAPACITY+1]; /* one more than charsBuffer[]! */
68
initIndexes(UNormIterator *uni, UCharIterator *iter) {
69
/* do not pass api so that the compiler knows it's an alias pointer to uni itself */
70
UCharIterator *api=&uni->api;
72
if(!iter->hasPrevious(iter)) {
73
/* set indexes to the beginning of the arrays */
74
api->start=api->index=api->limit=0;
75
uni->hasPrevious=FALSE;
76
uni->hasNext=iter->hasNext(iter);
77
} else if(!iter->hasNext(iter)) {
78
/* set indexes to the end of the arrays */
79
api->start=api->index=api->limit=uni->capacity;
81
uni->hasPrevious=iter->hasPrevious(iter);
83
/* set indexes into the middle of the arrays */
84
api->start=api->index=api->limit=uni->capacity/2;
85
uni->hasPrevious=uni->hasNext=TRUE;
90
reallocArrays(UNormIterator *uni, int32_t capacity, UBool addAtStart) {
91
/* do not pass api so that the compiler knows it's an alias pointer to uni itself */
92
UCharIterator *api=&uni->api;
98
states=(uint32_t *)uprv_malloc((capacity+1)*4+capacity*2);
103
chars=(UChar *)(states+(capacity+1));
104
uni->capacity=capacity;
110
/* copy old contents to the end of the new arrays */
113
delta=capacity-uni->capacity;
114
uprv_memcpy(states+delta+start, uni->states+start, (limit-start+1)*4);
115
uprv_memcpy(chars+delta+start, uni->chars+start, (limit-start)*4);
117
api->start=start+delta;
119
api->limit=limit+delta;
121
/* copy old contents to the beginning of the new arrays */
122
uprv_memcpy(states+start, uni->states+start, (limit-start+1)*4);
123
uprv_memcpy(chars+start, uni->chars+start, (limit-start)*4);
133
moveContentsTowardStart(UCharIterator *api, UChar chars[], uint32_t states[], int32_t delta) {
134
/* move array contents up to make room */
135
int32_t srcIndex, destIndex, limit;
139
if(srcIndex>api->start) {
140
/* look for a position in the arrays with a known state */
141
while(srcIndex<limit && states[srcIndex]==UITER_NO_STATE) {
146
/* now actually move the array contents */
147
api->start=destIndex=0;
148
while(srcIndex<limit) {
149
chars[destIndex]=chars[srcIndex];
150
states[destIndex++]=states[srcIndex++];
153
/* copy states[limit] as well! */
154
states[destIndex]=states[srcIndex];
156
api->limit=destIndex;
160
moveContentsTowardEnd(UCharIterator *api, UChar chars[], uint32_t states[], int32_t delta) {
161
/* move array contents up to make room */
162
int32_t srcIndex, destIndex, start;
165
destIndex=((UNormIterator *)api)->capacity;
166
srcIndex=destIndex-delta;
167
if(srcIndex<api->limit) {
168
/* look for a position in the arrays with a known state */
169
while(srcIndex>start && states[srcIndex]==UITER_NO_STATE) {
174
/* now actually move the array contents */
175
api->limit=destIndex;
177
/* copy states[limit] as well! */
178
states[destIndex]=states[srcIndex];
180
while(srcIndex>start) {
181
chars[--destIndex]=chars[--srcIndex];
182
states[destIndex]=states[srcIndex];
185
api->start=destIndex;
188
/* normalize forward from the limit, assume hasNext is true */
190
readNext(UNormIterator *uni, UCharIterator *iter) {
191
/* do not pass api so that the compiler knows it's an alias pointer to uni itself */
192
UCharIterator *api=&uni->api;
194
/* make capacity/4 room at the end of the arrays */
195
int32_t limit, capacity, room;
196
UErrorCode errorCode;
199
capacity=uni->capacity;
201
if(room>(capacity-limit)) {
202
/* move array contents to make room */
203
moveContentsTowardStart(api, uni->chars, uni->states, room);
204
api->index=limit=api->limit;
205
uni->hasPrevious=TRUE;
208
/* normalize starting from the limit position */
209
errorCode=U_ZERO_ERROR;
210
if(uni->state!=uni->states[limit]) {
211
uiter_setState(iter, uni->states[limit], &errorCode);
212
if(U_FAILURE(errorCode)) {
213
uni->state=UITER_NO_STATE;
219
room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode);
220
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
222
/* empty and re-use the arrays */
223
uni->states[0]=uni->states[limit];
224
api->start=api->index=api->limit=limit=0;
225
uni->hasPrevious=TRUE;
228
if(!reallocArrays(uni, capacity, FALSE)) {
229
uni->state=UITER_NO_STATE;
236
errorCode=U_ZERO_ERROR;
237
uiter_setState(iter, uni->states[limit], &errorCode);
238
room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode);
240
if(U_FAILURE(errorCode) || room==0) {
241
uni->state=UITER_NO_STATE;
247
++limit; /* leave the known states[limit] alone */
248
for(--room; room>0; --room) {
249
/* set unknown states for all but the normalization boundaries */
250
uni->states[limit++]=UITER_NO_STATE;
252
uni->states[limit]=uni->state=uiter_getState(iter);
253
uni->hasNext=iter->hasNext(iter);
258
/* normalize backward from the start, assume hasPrevious is true */
260
readPrevious(UNormIterator *uni, UCharIterator *iter) {
261
/* do not pass api so that the compiler knows it's an alias pointer to uni itself */
262
UCharIterator *api=&uni->api;
264
/* make capacity/4 room at the start of the arrays */
265
int32_t start, capacity, room;
266
UErrorCode errorCode;
269
capacity=uni->capacity;
272
/* move array contents to make room */
273
moveContentsTowardEnd(api, uni->chars, uni->states, room);
274
api->index=start=api->start;
278
/* normalize ending at the start position */
279
errorCode=U_ZERO_ERROR;
280
if(uni->state!=uni->states[start]) {
281
uiter_setState(iter, uni->states[start], &errorCode);
282
if(U_FAILURE(errorCode)) {
283
uni->state=UITER_NO_STATE;
284
uni->hasPrevious=FALSE;
289
room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &errorCode);
290
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
292
/* empty and re-use the arrays */
293
uni->states[capacity]=uni->states[start];
294
api->start=api->index=api->limit=start=capacity;
298
if(!reallocArrays(uni, capacity, TRUE)) {
299
uni->state=UITER_NO_STATE;
300
uni->hasPrevious=FALSE;
306
errorCode=U_ZERO_ERROR;
307
uiter_setState(iter, uni->states[start], &errorCode);
308
room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &errorCode);
310
if(U_FAILURE(errorCode) || room==0) {
311
uni->state=UITER_NO_STATE;
312
uni->hasPrevious=FALSE;
318
/* copy the UChars from chars[0..room[ to chars[(start-room)..start[ */
319
uni->chars[--start]=uni->chars[--room];
320
/* set unknown states for all but the normalization boundaries */
321
uni->states[start]=UITER_NO_STATE;
323
uni->states[start]=uni->state=uiter_getState(iter);
324
uni->hasPrevious=iter->hasPrevious(iter);
329
/* Iterator runtime API functions ------------------------------------------- */
331
static int32_t U_CALLCONV
332
unormIteratorGetIndex(UCharIterator *api, UCharIteratorOrigin origin) {
340
return UITER_UNKNOWN_INDEX;
342
/* not a valid origin */
343
/* Should never get here! */
348
static int32_t U_CALLCONV
349
unormIteratorMove(UCharIterator *api, int32_t delta, UCharIteratorOrigin origin) {
350
UNormIterator *uni=(UNormIterator *)api;
351
UCharIterator *iter=uni->iter;
357
/* restart from the beginning */
358
if(uni->hasPrevious) {
359
iter->move(iter, 0, UITER_START);
360
api->start=api->index=api->limit=0;
361
uni->states[api->limit]=uni->state=uiter_getState(iter);
362
uni->hasPrevious=FALSE;
363
uni->hasNext=iter->hasNext(iter);
365
/* we already have the beginning of the normalized text */
366
api->index=api->start;
373
/* restart from the end */
375
iter->move(iter, 0, UITER_LIMIT);
376
api->start=api->index=api->limit=uni->capacity;
377
uni->states[api->limit]=uni->state=uiter_getState(iter);
378
uni->hasPrevious=iter->hasPrevious(iter);
381
/* we already have the end of the normalized text */
382
api->index=api->limit;
386
return -1; /* Error */
389
/* move relative to the current position by delta normalized UChars */
393
/* go forward until the requested position is in the buffer */
395
pos=api->index+delta; /* requested position */
396
delta=pos-api->limit; /* remainder beyond buffered text */
398
api->index=pos; /* position reached */
402
/* go to end of buffer and normalize further */
403
api->index=api->limit;
404
if(!uni->hasNext || !readNext(uni, iter)) {
405
break; /* reached end of text */
408
} else /* delta<0 */ {
409
/* go backward until the requested position is in the buffer */
411
pos=api->index+delta; /* requested position */
412
delta=pos-api->start; /* remainder beyond buffered text */
414
api->index=pos; /* position reached */
418
/* go to start of buffer and normalize further */
419
api->index=api->start;
420
if(!uni->hasPrevious || !readPrevious(uni, iter)) {
421
break; /* reached start of text */
426
if(api->index==api->start && !uni->hasPrevious) {
429
return UITER_UNKNOWN_INDEX;
433
static UBool U_CALLCONV
434
unormIteratorHasNext(UCharIterator *api) {
435
return api->index<api->limit || ((UNormIterator *)api)->hasNext;
438
static UBool U_CALLCONV
439
unormIteratorHasPrevious(UCharIterator *api) {
440
return api->index>api->start || ((UNormIterator *)api)->hasPrevious;
443
static UChar32 U_CALLCONV
444
unormIteratorCurrent(UCharIterator *api) {
445
UNormIterator *uni=(UNormIterator *)api;
447
if( api->index<api->limit ||
448
(uni->hasNext && readNext(uni, uni->iter))
450
return uni->chars[api->index];
456
static UChar32 U_CALLCONV
457
unormIteratorNext(UCharIterator *api) {
458
UNormIterator *uni=(UNormIterator *)api;
460
if( api->index<api->limit ||
461
(uni->hasNext && readNext(uni, uni->iter))
463
return uni->chars[api->index++];
469
static UChar32 U_CALLCONV
470
unormIteratorPrevious(UCharIterator *api) {
471
UNormIterator *uni=(UNormIterator *)api;
473
if( api->index>api->start ||
474
(uni->hasPrevious && readPrevious(uni, uni->iter))
476
return uni->chars[--api->index];
482
static uint32_t U_CALLCONV
483
unormIteratorGetState(const UCharIterator *api) {
484
/* not uni->state because that may not be at api->index */
485
return ((UNormIterator *)api)->states[api->index];
488
static void U_CALLCONV
489
unormIteratorSetState(UCharIterator *api, uint32_t state, UErrorCode *pErrorCode) {
490
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
492
} else if(api==NULL) {
493
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
494
} else if(state==UITER_NO_STATE) {
495
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
497
UNormIterator *uni=(UNormIterator *)api;
498
UCharIterator *iter=((UNormIterator *)api)->iter;
499
if(state!=uni->state) {
501
uiter_setState(iter, state, pErrorCode);
505
* Try shortcuts: If the requested state is in the array contents
506
* then just set the index there.
508
* We assume that the state is unique per position!
510
if(state==uni->states[api->index]) {
512
} else if(state==uni->states[api->limit]) {
513
api->index=api->limit;
516
/* search for the index with this state */
519
for(i=api->start; i<api->limit; ++i) {
520
if(state==uni->states[i]) {
527
/* there is no array index for this state, reset for fresh contents */
528
initIndexes((UNormIterator *)api, iter);
529
uni->states[api->limit]=state;
533
static const UCharIterator unormIterator={
535
unormIteratorGetIndex,
537
unormIteratorHasNext,
538
unormIteratorHasPrevious,
539
unormIteratorCurrent,
541
unormIteratorPrevious,
543
unormIteratorGetState,
544
unormIteratorSetState
547
/* Setup functions ---------------------------------------------------------- */
549
U_CAPI UNormIterator * U_EXPORT2
550
unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode) {
553
/* argument checking */
554
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
560
if(stackMem!=NULL && stackMemSize>=sizeof(UNormIterator)) {
561
if(U_ALIGNMENT_OFFSET(stackMem)==0) {
562
/* already aligned */
563
uni=(UNormIterator *)stackMem;
565
int32_t align=(int32_t)U_ALIGNMENT_OFFSET_UP(stackMem);
566
if((stackMemSize-=align)>=(int32_t)sizeof(UNormIterator)) {
567
/* needs alignment */
568
uni=(UNormIterator *)((char *)stackMem+align);
571
/* else does not fit */
575
uni->isStackAllocated=TRUE;
577
uni=(UNormIterator *)uprv_malloc(sizeof(UNormIterator));
579
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
582
uni->isStackAllocated=FALSE;
587
* do not memset because that would unnecessarily initialize the arrays
590
uni->chars=uni->charsBuffer;
591
uni->states=uni->statesBuffer;
592
uni->capacity=INITIAL_CAPACITY;
593
uni->state=UITER_NO_STATE;
594
uni->hasPrevious=uni->hasNext=FALSE;
595
uni->mode=UNORM_NONE;
597
/* set a no-op iterator into the api */
598
uiter_setString(&uni->api, NULL, 0);
602
U_CAPI void U_EXPORT2
603
unorm_closeIter(UNormIterator *uni) {
605
if(uni->states!=uni->statesBuffer) {
606
/* chars and states are allocated in the same memory block */
607
uprv_free(uni->states);
609
if(!uni->isStackAllocated) {
615
U_CAPI UCharIterator * U_EXPORT2
616
unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode) {
617
/* argument checking */
618
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
622
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
625
if( iter==NULL || iter->getState==NULL || iter->setState==NULL ||
626
mode<UNORM_NONE || UNORM_MODE_COUNT<=mode
628
/* set a no-op iterator into the api */
629
uiter_setString(&uni->api, NULL, 0);
630
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
634
/* set the iterator and initialize */
635
uprv_memcpy(&uni->api, &unormIterator, sizeof(unormIterator));
640
initIndexes(uni, iter);
641
uni->states[uni->api.limit]=uni->state=uiter_getState(iter);
646
#endif /* uconfig.h switches */