2
2
* ProGuard -- shrinking, optimization, obfuscation, and preverification
5
* Copyright (c) 2002-2008 Eric Lafortune (eric@graphics.cornell.edu)
5
* Copyright (c) 2002-2009 Eric Lafortune (eric@graphics.cornell.edu)
7
7
* This program is free software; you can redistribute it and/or modify it
8
8
* under the terms of the GNU General Public License as published by the Free
33
33
public class Utf8Constant extends Constant
35
private static final String ENCODING = "UTF-8";
37
35
private static final char TWO_BYTE_LIMIT = 0x80;
38
private static final byte TWO_BYTE_CONSTANT1 = (byte)0xc0;
39
private static final byte TWO_BYTE_CONSTANT2 = (byte)0x80;
36
private static final int TWO_BYTE_CONSTANT1 = 0xc0;
37
private static final int TWO_BYTE_CONSTANT2 = 0x80;
40
38
private static final int TWO_BYTE_SHIFT1 = 6;
41
private static final byte TWO_BYTE_MASK1 = (byte)0x1f;
42
private static final byte TWO_BYTE_MASK2 = (byte)0x3f;
39
private static final int TWO_BYTE_MASK1 = 0x1f;
40
private static final int TWO_BYTE_MASK2 = 0x3f;
44
42
private static final char THREE_BYTE_LIMIT = 0x800;
45
private static final byte THREE_BYTE_CONSTANT1 = (byte)0xe0;
46
private static final byte THREE_BYTE_CONSTANT2 = (byte)0x80;
47
private static final byte THREE_BYTE_CONSTANT3 = (byte)0x80;
43
private static final int THREE_BYTE_CONSTANT1 = 0xe0;
44
private static final int THREE_BYTE_CONSTANT2 = 0x80;
45
private static final int THREE_BYTE_CONSTANT3 = 0x80;
48
46
private static final int THREE_BYTE_SHIFT1 = 12;
49
47
private static final int THREE_BYTE_SHIFT2 = 6;
50
private static final byte THREE_BYTE_MASK1 = (byte)0x0f;
51
private static final byte THREE_BYTE_MASK2 = (byte)0x3f;
52
private static final byte THREE_BYTE_MASK3 = (byte)0x3f;
48
private static final int THREE_BYTE_MASK1 = 0x0f;
49
private static final int THREE_BYTE_MASK2 = 0x3f;
50
private static final int THREE_BYTE_MASK3 = 0x3f;
55
53
// There are a lot of Utf8Constant objects, so we're optimising their storage.
152
// Small utility methods.
155
* Switches to a byte array representation of the UTF-8 data.
157
private void switchToByteArrayRepresentation() throws UnsupportedEncodingException
161
bytes = getByteArrayRepresentation(string);
152
168
* Switches to a String representation of the UTF-8 data.
154
170
private void switchToStringRepresentation() throws UnsupportedEncodingException
156
if (utf8string == null)
158
utf8string = new String(bytes, ENCODING);
174
string = getStringRepresentation(bytes);
165
* Transforms UTF-8 bytes to the slightly modified UTF-8 representation that
166
* is used by classes.
181
* Returns the modified UTF-8 byte array representation of the given string.
168
private byte[] getByteArrayRepresentation() throws UnsupportedEncodingException
183
private byte[] getByteArrayRepresentation(String string) throws UnsupportedEncodingException
170
// Do we still have the byte array representation?
173
// Then return that one.
177
185
// We're computing the byte array ourselves, because the implementation
178
186
// of String.getBytes("UTF-8") has a bug, at least up to JRE 1.4.2.
179
187
// Also note the special treatment of the 0 character.
181
189
// Compute the byte array length.
182
190
int byteLength = 0;
183
int stringLength = utf8string.length();
191
int stringLength = string.length();
184
192
for (int stringIndex = 0; stringIndex < stringLength; stringIndex++)
186
char c = utf8string.charAt(stringIndex);
194
char c = string.charAt(stringIndex);
188
196
// The character is represented by one, two, or three bytes.
189
197
byteLength += c == 0 ? 2 :
199
207
int byteIndex = 0;
200
208
for (int stringIndex = 0; stringIndex < stringLength; stringIndex++)
202
char c = utf8string.charAt(stringIndex);
210
char c = string.charAt(stringIndex);
205
213
// The 0 character gets a two-byte representation in classes.
206
bytes[byteIndex++] = TWO_BYTE_CONSTANT1;
207
bytes[byteIndex++] = TWO_BYTE_CONSTANT2;
214
bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT1;
215
bytes[byteIndex++] = (byte)TWO_BYTE_CONSTANT2;
209
217
else if (c < TWO_BYTE_LIMIT)
242
* Returns the String representation of the given modified UTF-8 byte array.
244
private String getStringRepresentation(byte[] bytes) throws UnsupportedEncodingException
246
// We're computing the string ourselves, because the implementation
247
// of "new String(bytes)" doesn't honor the special treatment of
248
// the 0 character in JRE 1.6_u11.
250
// Allocate the byte array with the computed length.
251
char[] chars = new char[bytes.length];
253
// Fill out the array.
256
while (byteIndex < bytes.length)
259
int b = bytes[byteIndex++] & 0xff;
261
// Depending on the flag bits in the first byte, the character
262
// is represented by a single byte, by two bytes, or by three
263
// bytes. We're not checking the redundant flag bits in the
264
// second byte and the third byte.
268
(char)(b < TWO_BYTE_CONSTANT1 ? b :
270
b < THREE_BYTE_CONSTANT1 ? ((b & TWO_BYTE_MASK1) << TWO_BYTE_SHIFT1) |
271
((bytes[byteIndex++] & TWO_BYTE_MASK2) ) :
273
((b & THREE_BYTE_MASK1) << THREE_BYTE_SHIFT1) |
274
((bytes[byteIndex++] & THREE_BYTE_MASK2) << THREE_BYTE_SHIFT2) |
275
((bytes[byteIndex++] & THREE_BYTE_MASK3) ));
277
catch (ArrayIndexOutOfBoundsException e)
279
throw new UnsupportedEncodingException("Missing UTF-8 bytes after initial byte [0x"+Integer.toHexString(b)+"] in string ["+new String(chars, 0, charIndex)+"]");
283
return new String(chars, 0, charIndex);