52
52
* @author kenton@google.com Kenton Varda
54
54
public final class TextFormat {
55
private TextFormat() {
57
59
* Outputs a textual representation of the Protocol Message supplied into
58
60
* the parameter output. (This representation is the new version of the
59
61
* classic "ProtocolPrinter" output from the original Protocol Buffer system)
61
public static void print(Message message, Appendable output)
63
public static void print(final Message message, final Appendable output)
62
64
throws IOException {
63
TextGenerator generator = new TextGenerator(output);
65
final TextGenerator generator = new TextGenerator(output);
64
66
print(message, generator);
67
69
/** Outputs a textual representation of {@code fields} to {@code output}. */
68
public static void print(UnknownFieldSet fields, Appendable output)
70
public static void print(final UnknownFieldSet fields,
71
final Appendable output)
69
72
throws IOException {
70
TextGenerator generator = new TextGenerator(output);
73
final TextGenerator generator = new TextGenerator(output);
71
74
printUnknownFields(fields, generator);
75
78
* Like {@code print()}, but writes directly to a {@code String} and
78
public static String printToString(Message message) {
81
public static String printToString(final Message message) {
80
StringBuilder text = new StringBuilder();
83
final StringBuilder text = new StringBuilder();
81
84
print(message, text);
82
85
return text.toString();
83
86
} catch (IOException e) {
91
94
* Like {@code print()}, but writes directly to a {@code String} and
94
public static String printToString(UnknownFieldSet fields) {
97
public static String printToString(final UnknownFieldSet fields) {
96
StringBuilder text = new StringBuilder();
99
final StringBuilder text = new StringBuilder();
97
100
print(fields, text);
98
101
return text.toString();
99
102
} catch (IOException e) {
106
private static void print(Message message, TextGenerator generator)
109
private static void print(final Message message,
110
final TextGenerator generator)
107
111
throws IOException {
108
for (Map.Entry<FieldDescriptor, Object> field :
112
for (final Map.Entry<FieldDescriptor, Object> field :
109
113
message.getAllFields().entrySet()) {
110
114
printField(field.getKey(), field.getValue(), generator);
112
116
printUnknownFields(message.getUnknownFields(), generator);
119
public static void printField(final FieldDescriptor field,
121
final Appendable output)
123
final TextGenerator generator = new TextGenerator(output);
124
printField(field, value, generator);
115
public static void printField(FieldDescriptor field,
117
TextGenerator generator)
127
public static String printFieldToString(final FieldDescriptor field,
128
final Object value) {
130
final StringBuilder text = new StringBuilder();
131
printField(field, value, text);
132
return text.toString();
133
} catch (IOException e) {
134
throw new RuntimeException(
135
"Writing to a StringBuilder threw an IOException (should never " +
140
private static void printField(final FieldDescriptor field,
142
final TextGenerator generator)
118
143
throws IOException {
119
144
if (field.isRepeated()) {
120
145
// Repeated field. Print each element.
121
for (Object element : (List) value) {
146
for (final Object element : (List) value) {
122
147
printSingleField(field, element, generator);
129
private static void printSingleField(FieldDescriptor field,
131
TextGenerator generator)
154
private static void printSingleField(final FieldDescriptor field,
156
final TextGenerator generator)
132
157
throws IOException {
133
158
if (field.isExtension()) {
134
159
generator.print("[");
168
193
generator.print("\n");
171
private static void printFieldValue(FieldDescriptor field,
173
TextGenerator generator)
196
private static void printFieldValue(final FieldDescriptor field,
198
final TextGenerator generator)
174
199
throws IOException {
175
200
switch (field.getType()) {
224
private static void printUnknownFields(UnknownFieldSet unknownFields,
225
TextGenerator generator)
247
private static void printUnknownFields(final UnknownFieldSet unknownFields,
248
final TextGenerator generator)
226
249
throws IOException {
227
for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
250
for (final Map.Entry<Integer, UnknownFieldSet.Field> entry :
228
251
unknownFields.asMap().entrySet()) {
229
String prefix = entry.getKey().toString() + ": ";
230
UnknownFieldSet.Field field = entry.getValue();
252
final String prefix = entry.getKey().toString() + ": ";
253
final UnknownFieldSet.Field field = entry.getValue();
232
for (long value : field.getVarintList()) {
255
for (final long value : field.getVarintList()) {
233
256
generator.print(entry.getKey().toString());
234
257
generator.print(": ");
235
258
generator.print(unsignedToString(value));
236
259
generator.print("\n");
238
for (int value : field.getFixed32List()) {
261
for (final int value : field.getFixed32List()) {
239
262
generator.print(entry.getKey().toString());
240
263
generator.print(": ");
241
264
generator.print(String.format((Locale) null, "0x%08x", value));
242
265
generator.print("\n");
244
for (long value : field.getFixed64List()) {
267
for (final long value : field.getFixed64List()) {
245
268
generator.print(entry.getKey().toString());
246
269
generator.print(": ");
247
270
generator.print(String.format((Locale) null, "0x%016x", value));
248
271
generator.print("\n");
250
for (ByteString value : field.getLengthDelimitedList()) {
273
for (final ByteString value : field.getLengthDelimitedList()) {
251
274
generator.print(entry.getKey().toString());
252
275
generator.print(": \"");
253
276
generator.print(escapeBytes(value));
254
277
generator.print("\"\n");
256
for (UnknownFieldSet value : field.getGroupList()) {
279
for (final UnknownFieldSet value : field.getGroupList()) {
257
280
generator.print(entry.getKey().toString());
258
281
generator.print(" {\n");
259
282
generator.indent();
289
312
* An inner class for writing text to the output stream.
291
static private final class TextGenerator {
294
boolean atStartOfLine = true;
295
StringBuilder indent = new StringBuilder();
297
public TextGenerator(Appendable output) {
314
private static final class TextGenerator {
315
private Appendable output;
316
private boolean atStartOfLine = true;
317
private final StringBuilder indent = new StringBuilder();
319
private TextGenerator(final Appendable output) {
298
320
this.output = output;
324
346
* Print text to the output stream.
326
public void print(CharSequence text) throws IOException {
327
int size = text.length();
348
public void print(final CharSequence text) throws IOException {
349
final int size = text.length();
330
352
for (int i = 0; i < size; i++) {
337
359
write(text.subSequence(pos, size), size - pos);
340
private void write(CharSequence data, int size) throws IOException {
362
private void write(final CharSequence data, final int size)
400
423
// We use possesive quantifiers (*+ and ++) because otherwise the Java
401
424
// regex matcher has stack overflows on large inputs.
402
private static Pattern WHITESPACE =
425
private static final Pattern WHITESPACE =
403
426
Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
404
private static Pattern TOKEN = Pattern.compile(
427
private static final Pattern TOKEN = Pattern.compile(
405
428
"[a-zA-Z_][0-9a-zA-Z_+-]*+|" + // an identifier
406
429
"[0-9+-][0-9a-zA-Z_.+-]*+|" + // a number
407
430
"\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" + // a double-quoted string
408
431
"\'([^\"\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string
409
432
Pattern.MULTILINE);
411
private static Pattern DOUBLE_INFINITY = Pattern.compile(
434
private static final Pattern DOUBLE_INFINITY = Pattern.compile(
413
436
Pattern.CASE_INSENSITIVE);
414
private static Pattern FLOAT_INFINITY = Pattern.compile(
437
private static final Pattern FLOAT_INFINITY = Pattern.compile(
415
438
"-?inf(inity)?f?",
416
439
Pattern.CASE_INSENSITIVE);
417
private static Pattern FLOAT_NAN = Pattern.compile(
440
private static final Pattern FLOAT_NAN = Pattern.compile(
419
442
Pattern.CASE_INSENSITIVE);
421
444
/** Construct a tokenizer that parses tokens from the given text. */
422
public Tokenizer(CharSequence text) {
445
private Tokenizer(final CharSequence text) {
423
446
this.text = text;
424
447
this.matcher = WHITESPACE.matcher(text);
425
448
skipWhitespace();
481
504
* If the next token exactly matches {@code token}, consume it and return
482
505
* {@code true}. Otherwise, return {@code false} without doing anything.
484
public boolean tryConsume(String token) {
507
public boolean tryConsume(final String token) {
485
508
if (currentToken.equals(token)) {
494
517
* If the next token exactly matches {@code token}, consume it. Otherwise,
495
518
* throw a {@link ParseException}.
497
public void consume(String token) throws ParseException {
520
public void consume(final String token) throws ParseException {
498
521
if (!tryConsume(token)) {
499
522
throw parseException("Expected \"" + token + "\".");
521
544
public String consumeIdentifier() throws ParseException {
522
545
for (int i = 0; i < currentToken.length(); i++) {
523
char c = currentToken.charAt(i);
546
final char c = currentToken.charAt(i);
524
547
if (('a' <= c && c <= 'z') ||
525
548
('A' <= c && c <= 'Z') ||
526
549
('0' <= c && c <= '9') ||
600
623
// We need to parse infinity and nan separately because
601
624
// Double.parseDouble() does not accept "inf", "infinity", or "nan".
602
625
if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
603
boolean negative = currentToken.startsWith("-");
626
final boolean negative = currentToken.startsWith("-");
605
628
return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
625
648
// We need to parse infinity and nan separately because
626
649
// Float.parseFloat() does not accept "inf", "infinity", or "nan".
627
650
if (FLOAT_INFINITY.matcher(currentToken).matches()) {
628
boolean negative = currentToken.startsWith("-");
651
final boolean negative = currentToken.startsWith("-");
630
653
return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
672
695
* {@link ParseException}.
674
697
public ByteString consumeByteString() throws ParseException {
675
char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0';
698
final char quote = currentToken.length() > 0 ? currentToken.charAt(0)
676
700
if (quote != '\"' && quote != '\'') {
677
701
throw parseException("Expected string.");
686
String escaped = currentToken.substring(1, currentToken.length() - 1);
687
ByteString result = unescapeBytes(escaped);
710
final String escaped =
711
currentToken.substring(1, currentToken.length() - 1);
712
final ByteString result = unescapeBytes(escaped);
690
} catch (InvalidEscapeSequence e) {
715
} catch (InvalidEscapeSequenceException e) {
691
716
throw parseException(e.getMessage());
696
721
* Returns a {@link ParseException} with the current line and column
697
722
* numbers in the description, suitable for throwing.
699
public ParseException parseException(String description) {
724
public ParseException parseException(final String description) {
700
725
// Note: People generally prefer one-based line and column numbers.
701
726
return new ParseException(
702
727
(line + 1) + ":" + (column + 1) + ": " + description);
706
731
* Returns a {@link ParseException} with the line and column numbers of
707
732
* the previous token in the description, suitable for throwing.
709
public ParseException parseExceptionPreviousToken(String description) {
734
public ParseException parseExceptionPreviousToken(
735
final String description) {
710
736
// Note: People generally prefer one-based line and column numbers.
711
737
return new ParseException(
712
738
(previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
716
742
* Constructs an appropriate {@link ParseException} for the given
717
743
* {@code NumberFormatException} when trying to parse an integer.
719
private ParseException integerParseException(NumberFormatException e) {
745
private ParseException integerParseException(
746
final NumberFormatException e) {
720
747
return parseException("Couldn't parse integer: " + e.getMessage());
724
751
* Constructs an appropriate {@link ParseException} for the given
725
752
* {@code NumberFormatException} when trying to parse a float or double.
727
private ParseException floatParseException(NumberFormatException e) {
754
private ParseException floatParseException(final NumberFormatException e) {
728
755
return parseException("Couldn't parse number: " + e.getMessage());
732
759
/** Thrown when parsing an invalid text format message. */
733
760
public static class ParseException extends IOException {
734
public ParseException(String message) {
761
private static final long serialVersionUID = 3196188060225107702L;
763
public ParseException(final String message) {
740
769
* Parse a text-format message from {@code input} and merge the contents
741
770
* into {@code builder}.
743
public static void merge(Readable input,
744
Message.Builder builder)
745
throws ParseException, IOException {
772
public static void merge(final Readable input,
773
final Message.Builder builder)
746
775
merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
750
779
* Parse a text-format message from {@code input} and merge the contents
751
780
* into {@code builder}.
753
public static void merge(CharSequence input,
754
Message.Builder builder)
782
public static void merge(final CharSequence input,
783
final Message.Builder builder)
755
784
throws ParseException {
756
785
merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
761
790
* into {@code builder}. Extensions will be recognized if they are
762
791
* registered in {@code extensionRegistry}.
764
public static void merge(Readable input,
765
ExtensionRegistry extensionRegistry,
766
Message.Builder builder)
767
throws ParseException, IOException {
793
public static void merge(final Readable input,
794
final ExtensionRegistry extensionRegistry,
795
final Message.Builder builder)
768
797
// Read the entire input to a String then parse that.
770
799
// If StreamTokenizer were not quite so crippled, or if there were a kind
781
810
// TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
782
811
// overhead is worthwhile
783
private static StringBuilder toStringBuilder(Readable input)
812
private static StringBuilder toStringBuilder(final Readable input)
784
813
throws IOException {
785
StringBuilder text = new StringBuilder();
786
CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
814
final StringBuilder text = new StringBuilder();
815
final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
788
int n = input.read(buffer);
817
final int n = input.read(buffer);
800
829
* into {@code builder}. Extensions will be recognized if they are
801
830
* registered in {@code extensionRegistry}.
803
public static void merge(CharSequence input,
804
ExtensionRegistry extensionRegistry,
805
Message.Builder builder)
832
public static void merge(final CharSequence input,
833
final ExtensionRegistry extensionRegistry,
834
final Message.Builder builder)
806
835
throws ParseException {
807
Tokenizer tokenizer = new Tokenizer(input);
836
final Tokenizer tokenizer = new Tokenizer(input);
809
838
while (!tokenizer.atEnd()) {
810
839
mergeField(tokenizer, extensionRegistry, builder);
815
844
* Parse a single field from {@code tokenizer} and merge it into
816
845
* {@code builder}.
818
private static void mergeField(Tokenizer tokenizer,
819
ExtensionRegistry extensionRegistry,
820
Message.Builder builder)
847
private static void mergeField(final Tokenizer tokenizer,
848
final ExtensionRegistry extensionRegistry,
849
final Message.Builder builder)
821
850
throws ParseException {
822
851
FieldDescriptor field;
823
Descriptor type = builder.getDescriptorForType();
852
final Descriptor type = builder.getDescriptorForType();
824
853
ExtensionRegistry.ExtensionInfo extension = null;
826
855
if (tokenizer.tryConsume("[")) {
828
StringBuilder name = new StringBuilder(tokenizer.consumeIdentifier());
857
final StringBuilder name =
858
new StringBuilder(tokenizer.consumeIdentifier());
829
859
while (tokenizer.tryConsume(".")) {
831
861
name.append(tokenizer.consumeIdentifier());
847
877
field = extension.descriptor;
849
String name = tokenizer.consumeIdentifier();
879
final String name = tokenizer.consumeIdentifier();
850
880
field = type.findFieldByName(name);
852
882
// Group names are expected to be capitalized as they appear in the
855
885
if (field == null) {
856
886
// Explicitly specify US locale so that this code does not break when
857
887
// executing in Turkey.
858
String lowerName = name.toLowerCase(Locale.US);
888
final String lowerName = name.toLowerCase(Locale.US);
859
889
field = type.findFieldByName(lowerName);
860
890
// If the case-insensitive match worked but the field is NOT a group,
861
891
if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
951
981
value = tokenizer.consumeByteString();
955
EnumDescriptor enumType = field.getEnumType();
985
final EnumDescriptor enumType = field.getEnumType();
957
987
if (tokenizer.lookingAtInteger()) {
958
int number = tokenizer.consumeInt32();
988
final int number = tokenizer.consumeInt32();
959
989
value = enumType.findValueByNumber(number);
960
990
if (value == null) {
961
991
throw tokenizer.parseExceptionPreviousToken(
962
992
"Enum type \"" + enumType.getFullName() +
963
"\" has no value with number " + number + ".");
993
"\" has no value with number " + number + '.');
966
String id = tokenizer.consumeIdentifier();
996
final String id = tokenizer.consumeIdentifier();
967
997
value = enumType.findValueByName(id);
968
998
if (value == null) {
969
999
throw tokenizer.parseExceptionPreviousToken(
1002
1031
* which no defined short-hand escape sequence is defined will be escaped
1003
1032
* using 3-digit octal sequences.
1005
static String escapeBytes(ByteString input) {
1006
StringBuilder builder = new StringBuilder(input.size());
1034
static String escapeBytes(final ByteString input) {
1035
final StringBuilder builder = new StringBuilder(input.size());
1007
1036
for (int i = 0; i < input.size(); i++) {
1008
byte b = input.byteAt(i);
1037
final byte b = input.byteAt(i);
1010
1039
// Java does not recognize \a or \v, apparently.
1011
1040
case 0x07: builder.append("\\a" ); break;
1038
1067
* {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with
1039
1068
* "\x") are also recognized.
1041
static ByteString unescapeBytes(CharSequence input)
1042
throws InvalidEscapeSequence {
1043
byte[] result = new byte[input.length()];
1070
static ByteString unescapeBytes(final CharSequence input)
1071
throws InvalidEscapeSequenceException {
1072
final byte[] result = new byte[input.length()];
1045
1074
for (int i = 0; i < input.length(); i++) {
1046
1075
char c = input.charAt(i);
1081
1110
code = digitValue(input.charAt(i));
1083
throw new InvalidEscapeSequence(
1112
throw new InvalidEscapeSequenceException(
1084
1113
"Invalid escape sequence: '\\x' with no digits");
1086
1115
if (i + 1 < input.length() && isHex(input.charAt(i + 1))) {
1094
throw new InvalidEscapeSequence(
1095
"Invalid escape sequence: '\\" + c + "'");
1123
throw new InvalidEscapeSequenceException(
1124
"Invalid escape sequence: '\\" + c + '\'');
1099
throw new InvalidEscapeSequence(
1128
throw new InvalidEscapeSequenceException(
1100
1129
"Invalid escape sequence: '\\' at end of string.");
1111
1140
* Thrown by {@link TextFormat#unescapeBytes} and
1112
1141
* {@link TextFormat#unescapeText} when an invalid escape sequence is seen.
1114
static class InvalidEscapeSequence extends IOException {
1115
public InvalidEscapeSequence(String description) {
1143
static class InvalidEscapeSequenceException extends IOException {
1144
private static final long serialVersionUID = -8164033650142593304L;
1146
InvalidEscapeSequenceException(final String description) {
1116
1147
super(description);
1122
1153
* Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
1123
1154
* individually as a 3-digit octal escape. Yes, it's weird.
1125
static String escapeText(String input) {
1156
static String escapeText(final String input) {
1126
1157
return escapeBytes(ByteString.copyFromUtf8(input));
1130
1161
* Un-escape a text string as escaped using {@link #escapeText(String)}.
1131
1162
* Two-digit hex escapes (starting with "\x") are also recognized.
1133
static String unescapeText(String input) throws InvalidEscapeSequence {
1164
static String unescapeText(final String input)
1165
throws InvalidEscapeSequenceException {
1134
1166
return unescapeBytes(input).toStringUtf8();
1137
1169
/** Is this an octal digit? */
1138
private static boolean isOctal(char c) {
1170
private static boolean isOctal(final char c) {
1139
1171
return '0' <= c && c <= '7';
1142
1174
/** Is this a hex digit? */
1143
private static boolean isHex(char c) {
1175
private static boolean isHex(final char c) {
1144
1176
return ('0' <= c && c <= '9') ||
1145
1177
('a' <= c && c <= 'f') ||
1146
1178
('A' <= c && c <= 'F');
1151
1183
* numeric value. This is like {@code Character.digit()} but we don't accept
1152
1184
* non-ASCII digits.
1154
private static int digitValue(char c) {
1186
private static int digitValue(final char c) {
1155
1187
if ('0' <= c && c <= '9') {
1156
1188
return c - '0';
1157
1189
} else if ('a' <= c && c <= 'z') {
1166
1198
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1167
1199
* and "0" to signify hexidecimal and octal numbers, respectively.
1169
static int parseInt32(String text) throws NumberFormatException {
1201
static int parseInt32(final String text) throws NumberFormatException {
1170
1202
return (int) parseInteger(text, true, false);
1177
1209
* result is coerced to a (signed) {@code int} when returned since Java has
1178
1210
* no unsigned integer type.
1180
static int parseUInt32(String text) throws NumberFormatException {
1212
static int parseUInt32(final String text) throws NumberFormatException {
1181
1213
return (int) parseInteger(text, false, false);
1186
1218
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1187
1219
* and "0" to signify hexidecimal and octal numbers, respectively.
1189
static long parseInt64(String text) throws NumberFormatException {
1221
static long parseInt64(final String text) throws NumberFormatException {
1190
1222
return parseInteger(text, true, true);
1197
1229
* result is coerced to a (signed) {@code long} when returned since Java has
1198
1230
* no unsigned long type.
1200
static long parseUInt64(String text) throws NumberFormatException {
1232
static long parseUInt64(final String text) throws NumberFormatException {
1201
1233
return parseInteger(text, false, true);
1204
private static long parseInteger(String text,
1236
private static long parseInteger(final String text,
1237
final boolean isSigned,
1238
final boolean isLong)
1207
1239
throws NumberFormatException {