3434
* Calculate the number of bytes needed to encode the source
3435
* operarand to UTF-8. If the source operand is invalid (e.g. wrong
3436
* type or range) we return a nonsense integer result (0 or 4). We
3437
* can get away with that because we KNOW that bs_put_utf8 will do
3438
* full error checking.
3440
OpCase(i_bs_utf8_size_sd): {
3445
if (arg < make_small(0x80UL)) {
3446
result = make_small(1);
3447
} else if (arg < make_small(0x800UL)) {
3448
result = make_small(2);
3449
} else if (arg < make_small(0x10000UL)) {
3450
result = make_small(3);
3452
result = make_small(4);
3454
StoreBifResult(1, result);
3457
OpCase(i_bs_put_utf8_js): {
3461
if (!erts_bs_put_utf8(ERL_BITS_ARGS_1(arg))) {
3468
* Calculate the number of bytes needed to encode the source
3469
* operarand to UTF-8. If the source operand is invalid (e.g. wrong
3470
* type or range) we return a nonsense integer result (2 or 4). We
3471
* can get away with that because we KNOW that bs_put_utf16 will do
3472
* full error checking.
3475
OpCase(i_bs_utf16_size_sd): {
3477
Eterm result = make_small(2);
3480
if (arg >= make_small(0x10000UL)) {
3481
result = make_small(4);
3483
StoreBifResult(1, result);
3486
OpCase(i_bs_put_utf16_jIs): {
3490
if (!erts_bs_put_utf16(ERL_BITS_ARGS_2(arg, Arg(1)))) {
3497
* Only used for validating a value about to be stored in a binary.
3499
OpCase(i_bs_validate_unicode_js): {
3505
* There is no need to untag the integer, but it IS necessary
3506
* to make sure it is small (if the term is a bignum, it could
3507
* slip through the test, and there is no further test that
3508
* would catch it, since bit syntax construction silently masks
3511
if (is_not_small(val) || val > make_small(0x10FFFFUL) ||
3512
(make_small(0xD800UL) <= val && val <= make_small(0xDFFFUL)) ||
3513
val == make_small(0xFFFEUL) || val == make_small(0xFFFFUL)) {
3520
* Only used for validating a value matched out.
3522
* tmp_arg1 = Integer to validate
3523
* tmp_arg2 = Match context
3525
OpCase(i_bs_validate_unicode_retract_j): {
3527
* There is no need to untag the integer, but it IS necessary
3528
* to make sure it is small (a bignum pointer could fall in
3531
if (is_not_small(tmp_arg1) || tmp_arg1 > make_small(0x10FFFFUL) ||
3532
(make_small(0xD800UL) <= tmp_arg1 && tmp_arg1 <= make_small(0xDFFFUL)) ||
3533
tmp_arg1 == make_small(0xFFFEUL) || tmp_arg1 == make_small(0xFFFFUL)) {
3534
ErlBinMatchBuffer *mb = ms_matchbuffer(tmp_arg2);
3466
3543
* Matching of binaries.
3796
3873
StoreBifResult(3, result);
3876
/* Operands: MatchContext Fail Dst */
3877
OpCase(i_bs_get_utf8_rfd): {
3879
goto do_bs_get_utf8;
3882
OpCase(i_bs_get_utf8_xfd): {
3883
tmp_arg1 = xb(Arg(0));
3888
* tmp_arg1 = match_context
3889
* Operands: Fail Dst
3893
ErlBinMatchBuffer* _mb;
3895
Uint remaining_bits;
3901
* Number of trailing bytes for each value of the first byte.
3903
static const byte erts_trailing_bytes_for_utf8[256] = {
3904
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3905
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3906
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3907
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3908
9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
3909
9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
3910
9,9,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
3911
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,9,9,9,9,9,9,9,9
3914
_mb = ms_matchbuffer(tmp_arg1);
3915
if ((remaining_bits = _mb->size - _mb->offset) < 8) {
3918
if (BIT_OFFSET(_mb->offset) == 0) {
3919
pos = _mb->base + BYTE_OFFSET(_mb->offset);
3921
erts_align_utf8_bytes(_mb, tmp_buf);
3925
switch (erts_trailing_bytes_for_utf8[result]) {
3932
if (remaining_bits < 16) {
3936
if ((a & 0xC0) != 0x80) {
3939
result = (result << 6) + a - (Eterm) 0x00003080UL;
3945
if (remaining_bits < 24) {
3950
if ((a & 0xC0) != 0x80 || (b & 0xC0) != 0x80 ||
3951
(result == 0xE0 && a < 0xA0)) {
3954
result = (((result << 6) + a) << 6) + b - (Eterm) 0x000E2080UL;
3955
if ((0xD800 <= result && result <= 0xDFFF) ||
3956
result == 0xFFFE || result == 0xFFFF) {
3964
if (remaining_bits < 32) {
3970
if ((a & 0xC0) != 0x80 || (b & 0xC0) != 0x80 ||
3971
(c & 0xC0) != 0x80 ||
3972
(result == 0xF0 && a < 0x90)) {
3975
result = (((((result << 6) + a) << 6) + b) << 6) +
3976
c - (Eterm) 0x03C82080UL;
3977
if (result > 0x10FFFF) {
3985
result = make_small(result);
3986
StoreBifResult(1, result);
3989
/* Operands: MatchContext Fail Flags Dst */
3990
OpCase(i_bs_get_utf16_rfId): {
3992
goto do_bs_get_utf16;
3995
OpCase(i_bs_get_utf16_xfId): {
3996
tmp_arg1 = xb(Arg(0));
4001
* tmp_arg1 = match_context
4002
* Operands: Fail Flags Dst
4005
Eterm result = erts_bs_get_utf16(ms_matchbuffer(tmp_arg1), Arg(1));
4006
if (is_non_value(result)) {
4009
StoreBifResult(2, result);
3800
4013
ErlBinMatchBuffer* mb;