1
package com.fasterxml.jackson.core.io;
3
import java.lang.ref.SoftReference;
5
import com.fasterxml.jackson.core.util.BufferRecycler;
6
import com.fasterxml.jackson.core.util.ByteArrayBuilder;
7
import com.fasterxml.jackson.core.util.TextBuffer;
10
* Helper class used for efficient encoding of JSON String values (including
11
* JSON field names) into Strings or UTF-8 byte arrays.
13
* Note that methods in here are somewhat optimized, but not ridiculously so.
14
* Reason is that conversion method results are expected to be cached so that
15
* these methods will not be hot spots during normal operation.
17
public final class JsonStringEncoder
19
private final static char[] HEX_CHARS = CharTypes.copyHexChars();
21
private final static byte[] HEX_BYTES = CharTypes.copyHexBytes();
23
private final static int SURR1_FIRST = 0xD800;
24
private final static int SURR1_LAST = 0xDBFF;
25
private final static int SURR2_FIRST = 0xDC00;
26
private final static int SURR2_LAST = 0xDFFF;
28
private final static int INT_BACKSLASH = '\\';
29
private final static int INT_U = 'u';
30
private final static int INT_0 = '0';
33
* This <code>ThreadLocal</code> contains a {@link java.lang.ref.SoftReference}
34
* to a {@link BufferRecycler} used to provide a low-cost
35
* buffer recycling between reader and writer instances.
37
final protected static ThreadLocal<SoftReference<JsonStringEncoder>> _threadEncoder
38
= new ThreadLocal<SoftReference<JsonStringEncoder>>();
41
* Lazily constructed text buffer used to produce JSON encoded Strings
42
* as characters (without UTF-8 encoding)
44
protected TextBuffer _textBuffer;
47
* Lazily-constructed builder used for UTF-8 encoding of text values
48
* (quoted and unquoted)
50
protected ByteArrayBuilder _byteBuilder;
53
* Temporary buffer used for composing quote/escape sequences
55
protected final char[] _quoteBuffer;
58
/**********************************************************
59
/* Construction, instance access
60
/**********************************************************
63
public JsonStringEncoder()
65
_quoteBuffer = new char[6];
66
_quoteBuffer[0] = '\\';
67
_quoteBuffer[2] = '0';
68
_quoteBuffer[3] = '0';
72
* Factory method for getting an instance; this is either recycled per-thread instance,
73
* or a newly constructed one.
75
public static JsonStringEncoder getInstance()
77
SoftReference<JsonStringEncoder> ref = _threadEncoder.get();
78
JsonStringEncoder enc = (ref == null) ? null : ref.get();
81
enc = new JsonStringEncoder();
82
_threadEncoder.set(new SoftReference<JsonStringEncoder>(enc));
88
/**********************************************************
90
/**********************************************************
94
* Method that will quote text contents using JSON standard quoting,
95
* and return results as a character array
97
public char[] quoteAsString(String input)
99
TextBuffer textBuffer = _textBuffer;
100
if (textBuffer == null) {
101
// no allocator; can add if we must, shouldn't need to
102
_textBuffer = textBuffer = new TextBuffer(null);
104
char[] outputBuffer = textBuffer.emptyAndGetCurrentSegment();
105
final int[] escCodes = CharTypes.get7BitOutputEscapes();
106
final int escCodeCount = escCodes.length;
108
final int inputLen = input.length();
112
while (inPtr < inputLen) {
115
char c = input.charAt(inPtr);
116
if (c < escCodeCount && escCodes[c] != 0) {
119
if (outPtr >= outputBuffer.length) {
120
outputBuffer = textBuffer.finishCurrentSegment();
123
outputBuffer[outPtr++] = c;
124
if (++inPtr >= inputLen) {
128
// something to escape; 2 or 6-char variant?
129
char d = input.charAt(inPtr++);
130
int escCode = escCodes[d];
131
int length = (escCode < 0)
132
? _appendNumericEscape(d, _quoteBuffer)
133
: _appendNamedEscape(escCode, _quoteBuffer);
135
if ((outPtr + length) > outputBuffer.length) {
136
int first = outputBuffer.length - outPtr;
138
System.arraycopy(_quoteBuffer, 0, outputBuffer, outPtr, first);
140
outputBuffer = textBuffer.finishCurrentSegment();
141
int second = length - first;
142
System.arraycopy(_quoteBuffer, first, outputBuffer, 0, second);
145
System.arraycopy(_quoteBuffer, 0, outputBuffer, outPtr, length);
149
textBuffer.setCurrentLength(outPtr);
150
return textBuffer.contentsAsArray();
154
* Will quote given JSON String value using standard quoting, encode
155
* results as UTF-8, and return result as a byte array.
157
public byte[] quoteAsUTF8(String text)
159
ByteArrayBuilder byteBuilder = _byteBuilder;
160
if (byteBuilder == null) {
161
// no allocator; can add if we must, shouldn't need to
162
_byteBuilder = byteBuilder = new ByteArrayBuilder(null);
165
int inputEnd = text.length();
167
byte[] outputBuffer = byteBuilder.resetAndGetFirstSegment();
170
while (inputPtr < inputEnd) {
171
final int[] escCodes = CharTypes.get7BitOutputEscapes();
173
inner_loop: // ASCII and escapes
175
int ch = text.charAt(inputPtr);
176
if (ch > 0x7F || escCodes[ch] != 0) {
179
if (outputPtr >= outputBuffer.length) {
180
outputBuffer = byteBuilder.finishCurrentSegment();
183
outputBuffer[outputPtr++] = (byte) ch;
184
if (++inputPtr >= inputEnd) {
188
if (outputPtr >= outputBuffer.length) {
189
outputBuffer = byteBuilder.finishCurrentSegment();
192
// Ok, so what did we hit?
193
int ch = (int) text.charAt(inputPtr++);
194
if (ch <= 0x7F) { // needs quoting
195
int escape = escCodes[ch];
196
// ctrl-char, 6-byte escape...
197
outputPtr = _appendByteEscape(ch, escape, byteBuilder, outputPtr);
198
outputBuffer = byteBuilder.getCurrentSegment();
200
} else if (ch <= 0x7FF) { // fine, just needs 2 byte output
201
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
202
ch = (0x80 | (ch & 0x3f));
203
} else { // 3 or 4 bytes
205
if (ch < SURR1_FIRST || ch > SURR2_LAST) { // nope
206
outputBuffer[outputPtr++] = (byte) (0xe0 | (ch >> 12));
207
if (outputPtr >= outputBuffer.length) {
208
outputBuffer = byteBuilder.finishCurrentSegment();
211
outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f));
212
ch = (0x80 | (ch & 0x3f));
213
} else { // yes, surrogate pair
214
if (ch > SURR1_LAST) { // must be from first range
215
_illegalSurrogate(ch);
217
// and if so, followed by another from next range
218
if (inputPtr >= inputEnd) {
219
_illegalSurrogate(ch);
221
ch = _convertSurrogate(ch, text.charAt(inputPtr++));
222
if (ch > 0x10FFFF) { // illegal, as per RFC 4627
223
_illegalSurrogate(ch);
225
outputBuffer[outputPtr++] = (byte) (0xf0 | (ch >> 18));
226
if (outputPtr >= outputBuffer.length) {
227
outputBuffer = byteBuilder.finishCurrentSegment();
230
outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 12) & 0x3f));
231
if (outputPtr >= outputBuffer.length) {
232
outputBuffer = byteBuilder.finishCurrentSegment();
235
outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f));
236
ch = (0x80 | (ch & 0x3f));
239
if (outputPtr >= outputBuffer.length) {
240
outputBuffer = byteBuilder.finishCurrentSegment();
243
outputBuffer[outputPtr++] = (byte) ch;
245
return _byteBuilder.completeAndCoalesce(outputPtr);
249
* Will encode given String as UTF-8 (without any quoting), return
250
* resulting byte array.
252
@SuppressWarnings("resource")
253
public byte[] encodeAsUTF8(String text)
255
ByteArrayBuilder byteBuilder = _byteBuilder;
256
if (byteBuilder == null) {
257
// no allocator; can add if we must, shouldn't need to
258
_byteBuilder = byteBuilder = new ByteArrayBuilder(null);
261
int inputEnd = text.length();
263
byte[] outputBuffer = byteBuilder.resetAndGetFirstSegment();
264
int outputEnd = outputBuffer.length;
267
while (inputPtr < inputEnd) {
268
int c = text.charAt(inputPtr++);
270
// first tight loop for ascii
272
if (outputPtr >= outputEnd) {
273
outputBuffer = byteBuilder.finishCurrentSegment();
274
outputEnd = outputBuffer.length;
277
outputBuffer[outputPtr++] = (byte) c;
278
if (inputPtr >= inputEnd) {
281
c = text.charAt(inputPtr++);
284
// then multi-byte...
285
if (outputPtr >= outputEnd) {
286
outputBuffer = byteBuilder.finishCurrentSegment();
287
outputEnd = outputBuffer.length;
290
if (c < 0x800) { // 2-byte
291
outputBuffer[outputPtr++] = (byte) (0xc0 | (c >> 6));
292
} else { // 3 or 4 bytes
294
if (c < SURR1_FIRST || c > SURR2_LAST) { // nope
295
outputBuffer[outputPtr++] = (byte) (0xe0 | (c >> 12));
296
if (outputPtr >= outputEnd) {
297
outputBuffer = byteBuilder.finishCurrentSegment();
298
outputEnd = outputBuffer.length;
301
outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
302
} else { // yes, surrogate pair
303
if (c > SURR1_LAST) { // must be from first range
304
_illegalSurrogate(c);
306
// and if so, followed by another from next range
307
if (inputPtr >= inputEnd) {
308
_illegalSurrogate(c);
310
c = _convertSurrogate(c, text.charAt(inputPtr++));
311
if (c > 0x10FFFF) { // illegal, as per RFC 4627
312
_illegalSurrogate(c);
314
outputBuffer[outputPtr++] = (byte) (0xf0 | (c >> 18));
315
if (outputPtr >= outputEnd) {
316
outputBuffer = byteBuilder.finishCurrentSegment();
317
outputEnd = outputBuffer.length;
320
outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
321
if (outputPtr >= outputEnd) {
322
outputBuffer = byteBuilder.finishCurrentSegment();
323
outputEnd = outputBuffer.length;
326
outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
329
if (outputPtr >= outputEnd) {
330
outputBuffer = byteBuilder.finishCurrentSegment();
331
outputEnd = outputBuffer.length;
334
outputBuffer[outputPtr++] = (byte) (0x80 | (c & 0x3f));
336
return _byteBuilder.completeAndCoalesce(outputPtr);
340
/**********************************************************
342
/**********************************************************
345
private int _appendNumericEscape(int value, char[] quoteBuffer)
347
quoteBuffer[1] = 'u';
348
// We know it's a control char, so only the last 2 chars are non-0
349
quoteBuffer[4] = HEX_CHARS[value >> 4];
350
quoteBuffer[5] = HEX_CHARS[value & 0xF];
354
private int _appendNamedEscape(int escCode, char[] quoteBuffer)
356
quoteBuffer[1] = (char) escCode;
360
private int _appendByteEscape(int ch, int escCode, ByteArrayBuilder byteBuilder, int ptr)
362
byteBuilder.setCurrentSegmentLength(ptr);
363
byteBuilder.append(INT_BACKSLASH);
364
if (escCode < 0) { // standard escape
365
byteBuilder.append(INT_U);
368
byteBuilder.append(HEX_BYTES[hi >> 4]);
369
byteBuilder.append(HEX_BYTES[hi & 0xF]);
372
byteBuilder.append(INT_0);
373
byteBuilder.append(INT_0);
375
byteBuilder.append(HEX_BYTES[ch >> 4]);
376
byteBuilder.append(HEX_BYTES[ch & 0xF]);
377
} else { // 2-char simple escape
378
byteBuilder.append((byte) escCode);
380
return byteBuilder.getCurrentSegmentLength();
383
protected static int _convertSurrogate(int firstPart, int secondPart)
385
// Ok, then, is the second part valid?
386
if (secondPart < SURR2_FIRST || secondPart > SURR2_LAST) {
387
throw new IllegalArgumentException("Broken surrogate pair: first char 0x"+Integer.toHexString(firstPart)+", second 0x"+Integer.toHexString(secondPart)+"; illegal combination");
389
return 0x10000 + ((firstPart - SURR1_FIRST) << 10) + (secondPart - SURR2_FIRST);
392
protected static void _illegalSurrogate(int code) {
393
throw new IllegalArgumentException(UTF8Writer.illegalSurrogateDesc(code));