103
* Helper called by generated code to determine if a byte array is a valid
104
* UTF-8 encoded string such that the original bytes can be converted to
105
* a String object and then back to a byte array round tripping the bytes
108
* This is inspired by UTF_8.java in sun.nio.cs.
110
* @param byteString the string to check
111
* @return whether the byte array is round trippable
113
public static boolean isValidUtf8(ByteString byteString) {
115
int size = byteString.size();
116
// To avoid the masking, we could change this to use bytes;
117
// Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
118
// gets turned into X >= 0, etc.
120
while (index < size) {
121
int byte1 = byteString.byteAt(index++) & 0xFF;
123
// fast loop for single bytes
126
// we know from this point on that we have 2-4 byte forms
127
} else if (byte1 < 0xC2 || byte1 > 0xF4) {
128
// catch illegal first bytes: < C2 or > F4
132
// fail if we run out of bytes
135
int byte2 = byteString.byteAt(index++) & 0xFF;
136
if (byte2 < 0x80 || byte2 > 0xBF) {
137
// general trail-byte test
141
// two-byte form; general trail-byte test is sufficient
145
// we know from this point on that we have 3 or 4 byte forms
147
// fail if we run out of bytes
150
int byte3 = byteString.byteAt(index++) & 0xFF;
151
if (byte3 < 0x80 || byte3 > 0xBF) {
152
// general trail-byte test
156
// three-byte form. Vastly more frequent than four-byte forms
157
// The following has an extra test, but not worth restructuring
158
if (byte1 == 0xE0 && byte2 < 0xA0 ||
159
byte1 == 0xED && byte2 > 0x9F) {
160
// check special cases of byte2
168
// fail if we run out of bytes
171
int byte4 = byteString.byteAt(index++) & 0xFF;
172
if (byte4 < 0x80 || byte4 > 0xBF) {
173
// general trail-byte test
176
// The following has an extra test, but not worth restructuring
177
if (byte1 == 0xF0 && byte2 < 0x90 ||
178
byte1 == 0xF4 && byte2 > 0x8F) {
179
// check special cases of byte2
103
188
* Interface for an enum value or value descriptor, to be used in FieldSet.
104
189
* The lite library stores enum values directly in FieldSets but the full
105
190
* library stores EnumValueDescriptors in order to better support reflection.