21
20
#include "parrot/parrot.h"
21
#include "../unicode.h"
22
22
#include "shared.h"
25
PARROT_DOES_NOT_RETURN
26
static void no_ICU_lib(PARROT_INTERP) /* HEADERIZER SKIP */
28
Parrot_ex_throw_from_c_args(interp, NULL,
29
EXCEPTION_LIBRARY_ERROR,
34
24
/* HEADERIZER HFILE: none */
36
26
/* HEADERIZER BEGIN: static */
37
27
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
39
static size_t ucs2_hash(PARROT_INTERP,
40
ARGIN(const STRING *s),
30
static void ucs2_check_codepoint(PARROT_INTERP, UINTVAL c)
31
__attribute__nonnull__(1);
33
static size_t ucs2_hash(SHIM_INTERP,
34
ARGIN(const STRING *src),
42
__attribute__nonnull__(1)
43
36
__attribute__nonnull__(2);
45
static UINTVAL ucs2_iter_get(PARROT_INTERP,
38
static UINTVAL ucs2_iter_get(SHIM_INTERP,
46
39
ARGIN(const STRING *str),
47
40
ARGIN(const String_iter *i),
49
__attribute__nonnull__(1)
50
42
__attribute__nonnull__(2)
51
43
__attribute__nonnull__(3);
53
static UINTVAL ucs2_iter_get_and_advance(PARROT_INTERP,
45
static UINTVAL ucs2_iter_get_and_advance(SHIM_INTERP,
54
46
ARGIN(const STRING *str),
55
47
ARGMOD(String_iter *i))
56
__attribute__nonnull__(1)
57
48
__attribute__nonnull__(2)
58
49
__attribute__nonnull__(3)
68
59
FUNC_MODIFIES(*str)
71
static void ucs2_iter_set_position(PARROT_INTERP,
72
ARGIN(const STRING *str),
73
ARGMOD(String_iter *i),
75
__attribute__nonnull__(1)
76
__attribute__nonnull__(2)
77
__attribute__nonnull__(3)
80
static void ucs2_iter_skip(PARROT_INTERP,
81
ARGIN(const STRING *str),
62
static void ucs2_iter_skip(SHIM_INTERP,
63
SHIM(const STRING *str),
82
64
ARGMOD(String_iter *i),
84
__attribute__nonnull__(1)
85
__attribute__nonnull__(2)
86
66
__attribute__nonnull__(3)
89
static UINTVAL ucs2_ord(PARROT_INTERP,
90
ARGIN(const STRING *src),
69
static UINTVAL ucs2_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
92
70
__attribute__nonnull__(1)
93
71
__attribute__nonnull__(2);
100
78
PARROT_WARN_UNUSED_RESULT
101
79
PARROT_CANNOT_RETURN_NULL
102
static STRING * ucs2_substr(PARROT_INTERP,
103
ARGIN(const STRING *src),
106
__attribute__nonnull__(1)
107
__attribute__nonnull__(2);
109
PARROT_WARN_UNUSED_RESULT
110
PARROT_CANNOT_RETURN_NULL
111
80
static STRING * ucs2_to_encoding(PARROT_INTERP, ARGIN(const STRING *src))
112
81
__attribute__nonnull__(1)
113
82
__attribute__nonnull__(2);
84
#define ASSERT_ARGS_ucs2_check_codepoint __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
85
PARROT_ASSERT_ARG(interp))
115
86
#define ASSERT_ARGS_ucs2_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
116
PARROT_ASSERT_ARG(interp) \
117
, PARROT_ASSERT_ARG(s))
87
PARROT_ASSERT_ARG(src))
118
88
#define ASSERT_ARGS_ucs2_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
119
PARROT_ASSERT_ARG(interp) \
120
, PARROT_ASSERT_ARG(str) \
89
PARROT_ASSERT_ARG(str) \
121
90
, PARROT_ASSERT_ARG(i))
122
91
#define ASSERT_ARGS_ucs2_iter_get_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
123
PARROT_ASSERT_ARG(interp) \
124
, PARROT_ASSERT_ARG(str) \
92
PARROT_ASSERT_ARG(str) \
125
93
, PARROT_ASSERT_ARG(i))
126
94
#define ASSERT_ARGS_ucs2_iter_set_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
127
95
PARROT_ASSERT_ARG(interp) \
128
96
, PARROT_ASSERT_ARG(str) \
129
97
, PARROT_ASSERT_ARG(i))
130
#define ASSERT_ARGS_ucs2_iter_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
131
PARROT_ASSERT_ARG(interp) \
132
, PARROT_ASSERT_ARG(str) \
133
, PARROT_ASSERT_ARG(i))
134
98
#define ASSERT_ARGS_ucs2_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
135
PARROT_ASSERT_ARG(interp) \
136
, PARROT_ASSERT_ARG(str) \
137
, PARROT_ASSERT_ARG(i))
138
100
#define ASSERT_ARGS_ucs2_ord __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
139
101
PARROT_ASSERT_ARG(interp) \
140
102
, PARROT_ASSERT_ARG(src))
141
103
#define ASSERT_ARGS_ucs2_scan __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
142
104
PARROT_ASSERT_ARG(interp) \
143
105
, PARROT_ASSERT_ARG(src))
144
#define ASSERT_ARGS_ucs2_substr __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
145
PARROT_ASSERT_ARG(interp) \
146
, PARROT_ASSERT_ARG(src))
147
106
#define ASSERT_ARGS_ucs2_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
148
107
PARROT_ASSERT_ARG(interp) \
149
108
, PARROT_ASSERT_ARG(src))
150
109
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
151
110
/* HEADERIZER END: static */
154
# include <unicode/ustring.h>
157
112
#define UNIMPL Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED, \
178
133
STRING * const result =
179
134
Parrot_utf16_encoding_ptr->to_encoding(interp, src);
181
/* conversion to utf16 downgrads to ucs-2 if possible - check result */
136
/* conversion to utf16 downgrades to ucs-2 if possible - check result */
182
137
if (result->encoding == Parrot_utf16_encoding_ptr)
183
138
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
184
"can't convert string with surrogates to ucs2");
139
"Lossy conversion to UCS-2\n");
146
=item C<static void ucs2_check_codepoint(PARROT_INTERP, UINTVAL c)>
148
Throws an exception if codepoint C<c> is invalid.
156
ucs2_check_codepoint(PARROT_INTERP, UINTVAL c)
158
ASSERT_ARGS(ucs2_check_codepoint)
160
if (UNICODE_IS_SURROGATE(c)
161
|| (c >= 0xFDD0 && c <= 0xFDEF)
163
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARACTER,
164
"Invalid character in UCS-2 string\n");
191
169
=item C<static UINTVAL ucs2_scan(PARROT_INTERP, const STRING *src)>
193
171
Returns the number of codepoints in string C<src>.
201
179
ucs2_scan(PARROT_INTERP, ARGIN(const STRING *src))
203
181
ASSERT_ARGS(ucs2_scan)
206
return src->bufused / sizeof (UChar);
182
const utf16_t * const ptr = (utf16_t *)src->strstart;
183
const UINTVAL len = src->bufused >> 1;
186
if (src->bufused & 1)
187
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARACTER,
188
"Unaligned end in UCS-2 string\n");
190
for (i = 0; i < len; ++i) {
191
ucs2_check_codepoint(interp, ptr[i]);
215
=item C<static UINTVAL ucs2_ord(PARROT_INTERP, const STRING *src, UINTVAL
199
=item C<static UINTVAL ucs2_ord(PARROT_INTERP, const STRING *src, INTVAL idx)>
218
201
Returns the codepoint in string C<src> at position C<offset>.
225
ucs2_ord(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset)
208
ucs2_ord(PARROT_INTERP, ARGIN(const STRING *src), INTVAL idx)
227
210
ASSERT_ARGS(ucs2_ord)
229
const UChar * const s = (const UChar*) src->strstart;
241
=item C<static STRING * ucs2_substr(PARROT_INTERP, const STRING *src, UINTVAL
242
offset, UINTVAL count)>
244
Returns the codepoints in string C<src> at position C<offset> and length
251
PARROT_WARN_UNUSED_RESULT
252
PARROT_CANNOT_RETURN_NULL
254
ucs2_substr(PARROT_INTERP, ARGIN(const STRING *src), UINTVAL offset, UINTVAL count)
256
ASSERT_ARGS(ucs2_substr)
257
STRING * const return_string = Parrot_str_copy(interp, src);
260
return_string->strstart = (char*)src->strstart + offset * sizeof (UChar);
261
return_string->bufused = count * sizeof (UChar);
262
return_string->strlen = count;
263
return_string->hashval = 0;
264
return return_string;
211
const utf16_t * const ptr = (utf16_t *)src->strstart;
212
const UINTVAL len = STRING_length(src);
217
if ((UINTVAL)idx >= len)
218
encoding_ord_error(interp, src, idx);
285
ucs2_iter_get(PARROT_INTERP,
236
ucs2_iter_get(SHIM_INTERP,
286
237
ARGIN(const STRING *str), ARGIN(const String_iter *i), INTVAL offset)
288
239
ASSERT_ARGS(ucs2_iter_get)
289
return ucs2_ord(interp, str, i->charpos + offset);
240
const utf16_t * const ptr = (utf16_t *)str->strstart;
242
return ptr[i->charpos + offset];
304
ucs2_iter_skip(PARROT_INTERP,
305
ARGIN(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
257
ucs2_iter_skip(SHIM_INTERP,
258
SHIM(const STRING *str), ARGMOD(String_iter *i), INTVAL skip)
307
260
ASSERT_ARGS(ucs2_iter_skip)
311
262
i->charpos += skip;
312
i->bytepos += skip * sizeof (UChar);
263
i->bytepos += skip * 2;
332
ucs2_iter_get_and_advance(PARROT_INTERP,
278
ucs2_iter_get_and_advance(SHIM_INTERP,
333
279
ARGIN(const STRING *str), ARGMOD(String_iter *i))
335
281
ASSERT_ARGS(ucs2_iter_get_and_advance)
338
UChar * const s = (UChar*) str->strstart;
339
size_t pos = i->bytepos / sizeof (UChar);
341
/* TODO either make sure that we don't go past end or use SAFE
344
const UChar c = s[pos++];
346
i->bytepos = pos * sizeof (UChar);
282
const utf16_t * const ptr = (utf16_t *)str->strstart;
283
const UINTVAL c = ptr[i->charpos];
352
return (UINTVAL)0; /* Stop the static analyzers from panicing */
370
305
ARGMOD(STRING *str), ARGMOD(String_iter *i), UINTVAL c)
372
307
ASSERT_ARGS(ucs2_iter_set_and_advance)
375
UChar * const s = (UChar*) str->strstart;
376
UINTVAL pos = i->bytepos / sizeof (UChar);
379
i->bytepos = pos * sizeof (UChar);
390
=item C<static void ucs2_iter_set_position(PARROT_INTERP, const STRING *str,
391
String_iter *i, UINTVAL n)>
393
Moves the string iterator C<i> to the position C<n> in the string.
400
ucs2_iter_set_position(PARROT_INTERP,
401
ARGIN(const STRING *str), ARGMOD(String_iter *i), UINTVAL n)
403
ASSERT_ARGS(ucs2_iter_set_position)
408
i->bytepos = n * sizeof (UChar);
418
=item C<static size_t ucs2_hash(PARROT_INTERP, const STRING *s, size_t hashval)>
308
utf16_t * const ptr = (utf16_t *)str->strstart;
310
ucs2_check_codepoint(interp, c);
320
=item C<static size_t ucs2_hash(PARROT_INTERP, const STRING *src, size_t
420
323
Returns the hashed value of the string, given a seed in hashval.
427
ucs2_hash(PARROT_INTERP, ARGIN(const STRING *s), size_t hashval)
330
ucs2_hash(SHIM_INTERP, ARGIN(const STRING *src), size_t hashval)
429
332
ASSERT_ARGS(ucs2_hash)
431
const UChar *pos = (const UChar*) s->strstart;
432
UINTVAL len = s->strlen;
334
STRING * const s = PARROT_const_cast(STRING *, src);
335
const utf16_t *ptr = (utf16_t *)s->strstart;
336
UINTVAL len = s->strlen;
436
339
hashval += hashval << 5;
343
s->hashval = hashval;
450
348
static STR_VTABLE Parrot_ucs2_encoding = {