2
@param c the character to encode in utf-8
3
@param o the callback for each octet in the sequence
5
defn u8_encode_char = ( c : char, o : (c:octet)->() ) -> {
6
//FEATURE: shouldn't need lossy, code_val could return integer with range?
7
var v : binary = lossy( code_val(c) )
9
//FEATURE: a suffix marker for binary might be nice
10
literal c_imask : binary = 0b0011_1111
11
literal c_val : binary = 0b1000_0000
13
defn lo = ( c : binary ) -> {
16
//FEATURE: A binary formatting function/op
20
}, v <= 0b0111_1111_1111 ? {
21
lo( bit_shr(v,6) or 0b1100_0000 )
22
lo( (v and c_imask) or c_val )
23
}, v <= 0b1111_1111_1111_1111 ? {
24
lo( bit_shr(v,12) or 0b1110_0000 )
25
lo( ( bit_shr(v,6) and c_imask ) or c_val )
26
lo( ( v and c_imask ) or c_val )
27
}, v <= 0b0001_1111_1111_1111_1111_1111 ? {
28
lo( bit_shr(v,18) or 0b1111_0000 )
29
lo( ( bit_shr(v,12) and c_imask ) or c_val )
30
lo( ( bit_shr(v,6) and c_imask ) or c_val )
31
lo( ( v and c_imask ) or c_val )
39
@return the number of octects for character `c` in utf-8
41
defn u8_octet_len = ( c : char ) -> {
42
var v : binary = lossy( code_val(c) )
46
}, v <= 0b0111_1111_1111 ? {
48
}, v <= 0b1111_1111_1111_1111 ? {
50
}, v <= 0b0001_1111_1111_1111_1111_1111 ? {
58
defn u8_octet_len = ( s : array「char」 ) -> {
60
for i in range(0,s.size) {
61
c = c + u8_octet_len(s#i)
67
defn u8_encode = ( s : array「char」 ) -> ( t : array「octet」 ) {
68
var len = u8_octet_len( s )
69
var r = type「 array「abi_char」 」(len+1)
72
for i in range(0,s.size) {
73
u8_encode_char( s#i, (c)-> {
83
//TODO: these should all be "literal" instead
84
//these are marked `binary` to be large enough for bitops, not 8bit.
85
//TODO: I'd prefer they are octet and there be a simple way to mark a large region of bit os as 32bit
86
var b_mask_1 : binary = 0b1000_0000
87
var b_val_1 : binary = 0b0000_0000
88
var b_mask_2 : binary = 0b1110_0000
89
var b_val_2 : binary = 0b1100_0000
90
var b_mask_3 : binary = 0b1111_0000
91
var b_val_3 : binary = 0b1110_0000
92
var b_mask_4 : binary = 0b1111_1000
93
var b_val_4 : binary = 0b1111_0000
94
var c_mask : binary = 0b1100_0000
95
var c_val : binary = 0b1000_0000
97
defn u8_len = ( s : array「octet」, max : integer ) -> {
103
(c and b_mask_1) == b_val_1 ? {
105
}, (c and b_mask_2) == b_val_2 ? {
107
}, (c and b_mask_3) == b_val_3 ? {
109
}, (c and b_mask_4) == b_val_4 ? {
123
defn u8_decode = ( u : array「octet」, max : integer ) -> ( s : array「char」 ) {
124
var clen = u8_len(u,max)
126
s = type「 array「char」 」(clen)
130
var c : binary = u#at
135
(c and b_mask_1) == b_val_1 ? {
138
}, (c and b_mask_2) == b_val_2 ? {
139
cp = (bit_shl(c and not b_mask_2,6) or
140
(u#(at+1) and not c_mask))
142
}, (c and b_mask_3) == b_val_3 ? {
143
cp = (bit_shl((c and not b_mask_3),12) or
144
bit_shl((u#(at+1) and not c_mask),6) or
145
(u#(at+2) and not c_mask))
147
}, (c and b_mask_4) == b_val_4 ? {
148
cp = (bit_shl(c and not b_mask_4,18) or
149
bit_shl(u#(at+1) and not c_mask,12) or
150
bit_shl(u#(at+2) and not c_mask,6) or
151
(u#(at+3) and not c_mask))
158
s#sat = char_val(lossy(cp))
b'\\ No newline at end of file'