53
53
"""Thrown in case of ASCII parsing error."""
56
def MessageToString(message):
56
def MessageToString(message, as_utf8=False, as_one_line=False):
57
57
out = cStringIO.StringIO()
58
PrintMessage(message, out)
58
PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line)
59
59
result = out.getvalue()
62
return result.rstrip()
64
def PrintMessage(message, out, indent = 0):
66
def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False):
65
67
for field, value in message.ListFields():
66
68
if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
67
69
for element in value:
68
PrintField(field, element, out, indent)
70
PrintField(field, element, out, indent, as_utf8, as_one_line)
70
PrintField(field, value, out, indent)
73
def PrintField(field, value, out, indent = 0):
72
PrintField(field, value, out, indent, as_utf8, as_one_line)
75
def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False):
74
76
"""Print a single field name/value pair. For repeated fields, the value
75
77
should be a single element."""
96
98
# don't include it.
99
PrintFieldValue(field, value, out, indent)
103
def PrintFieldValue(field, value, out, indent = 0):
101
PrintFieldValue(field, value, out, indent, as_utf8, as_one_line)
108
def PrintFieldValue(field, value, out, indent=0,
109
as_utf8=False, as_one_line=False):
104
110
"""Print a single field value (not including name). For repeated fields,
105
111
the value should be a single element."""
107
113
if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
109
PrintMessage(value, out, indent + 2)
110
out.write(' ' * indent + '}')
116
PrintMessage(value, out, indent, as_utf8, as_one_line)
120
PrintMessage(value, out, indent + 2, as_utf8, as_one_line)
121
out.write(' ' * indent + '}')
111
122
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
112
123
out.write(field.enum_type.values_by_number[value].name)
113
124
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
115
out.write(_CEscape(value))
126
if type(value) is unicode:
127
out.write(_CEscape(value.encode('utf-8'), as_utf8))
129
out.write(_CEscape(value, as_utf8))
117
131
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
208
222
sub_message = message.Extensions[field]
210
224
sub_message = getattr(message, field.name)
211
sub_message.SetInParent()
225
sub_message.SetInParent()
213
227
while not tokenizer.TryConsume(end_token):
214
228
if tokenizer.AtEnd():
348
362
def _SkipWhitespace(self):
351
match = re.match(self._WHITESPACE, self._current_line)
365
match = self._WHITESPACE.match(self._current_line, self._column)
354
368
length = len(match.group(0))
355
self._current_line = self._current_line[length:]
356
369
self._column += length
358
371
def TryConsume(self, token):
402
415
ParseError: If an identifier couldn't be consumed.
404
417
result = self.token
405
if not re.match(self._IDENTIFIER, result):
418
if not self._IDENTIFIER.match(result):
406
419
raise self._ParseError('Expected identifier.')
481
494
ParseError: If a floating point number couldn't be consumed.
483
496
text = self.token
484
if re.match(self._FLOAT_INFINITY, text):
497
if self._FLOAT_INFINITY.match(text):
486
499
if text.startswith('-'):
487
500
return -_INFINITY
490
if re.match(self._FLOAT_NAN, text):
503
if self._FLOAT_NAN.match(text):
508
521
ParseError: If a boolean value couldn't be consumed.
510
if self.token == 'true':
523
if self.token in ('true', 't', '1'):
513
elif self.token == 'false':
526
elif self.token in ('false', 'f', '0'):
526
539
ParseError: If a string value couldn't be consumed.
528
return unicode(self.ConsumeByteString(), 'utf-8')
541
bytes = self.ConsumeByteString()
543
return unicode(bytes, 'utf-8')
544
except UnicodeDecodeError, e:
545
raise self._StringParseError(e)
530
547
def ConsumeByteString(self):
531
548
"""Consumes a byte array value.
609
626
def _ParseError(self, message):
610
627
"""Creates and *returns* a ParseError for the current token."""
611
628
return ParseError('%d:%d : %s' % (
612
self._line + 1, self._column + 1, message))
629
self._line + 1, self._column - len(self.token) + 1, message))
614
631
def _IntegerParseError(self, e):
615
632
return self._ParseError('Couldn\'t parse integer: ' + str(e))
617
634
def _FloatParseError(self, e):
618
635
return self._ParseError('Couldn\'t parse number: ' + str(e))
637
def _StringParseError(self, e):
638
return self._ParseError('Couldn\'t parse string: ' + str(e))
620
640
def NextToken(self):
621
641
"""Reads the next meaningful token."""
622
642
self._previous_line = self._line
623
643
self._previous_column = self._column
645
self._column += len(self.token)
646
self._SkipWhitespace()
648
if not self._lines and len(self._current_line) <= self._column:
627
self._column += len(self.token)
629
# Make sure there is data to work on.
632
match = re.match(self._TOKEN, self._current_line)
652
match = self._TOKEN.match(self._current_line, self._column)
634
654
token = match.group(0)
635
self._current_line = self._current_line[len(token):]
636
655
self.token = token
638
self.token = self._current_line[0]
639
self._current_line = self._current_line[1:]
640
self._SkipWhitespace()
657
self.token = self._current_line[self._column]
643
660
# text.encode('string_escape') does not seem to satisfy our needs as it
645
662
# C++ unescaping function allows hex escapes to be any length. So,
646
663
# "\0011".encode('string_escape') ends up being "\\x011", which will be
647
664
# decoded in C++ as a single-character string with char code 0x11.
665
def _CEscape(text, as_utf8):
651
668
if o == 10: return r"\n" # optional escape
656
673
if o == 34: return r'\"' # necessary escape
657
674
if o == 92: return r"\\" # necessary escape
659
if o >= 127 or o < 32: return "\\%03o" % o # necessary escapes
677
if not as_utf8 and (o >= 127 or o < 32): return "\\%03o" % o
661
679
return "".join([escape(c) for c in text])
664
_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-f-A-F])')
682
_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-fA-F])')
667
685
def _CUnescape(text):