1
# Copyright (C) 2003-2007, 2009, 2010 Nominum, Inc.
3
# Permission to use, copy, modify, and distribute this software and its
4
# documentation for any purpose with or without fee is hereby granted,
5
# provided that the above copyright notice and this permission notice
6
# appear in all copies.
8
# THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
9
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR
11
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
14
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
"""Tokenize DNS master file format"""
34
_QUOTING_DELIMITERS = { '"' : True }
44
class UngetBufferFull(dns.exception.DNSException):
45
"""Raised when an attempt is made to unget a token when the unget
50
"""A DNS master file format token.
52
@ivar ttype: The token type
54
@ivar value: The token value
56
@ivar has_escape: Does the token value contain escapes?
57
@type has_escape: bool
60
def __init__(self, ttype, value='', has_escape=False):
61
"""Initialize a token instance.
63
@param ttype: The token type
65
@ivar value: The token value
67
@ivar has_escape: Does the token value contain escapes?
68
@type has_escape: bool
72
self.has_escape = has_escape
75
return self.ttype == EOF
78
return self.ttype == EOL
80
def is_whitespace(self):
81
return self.ttype == WHITESPACE
83
def is_identifier(self):
84
return self.ttype == IDENTIFIER
86
def is_quoted_string(self):
87
return self.ttype == QUOTED_STRING
90
return self.ttype == COMMENT
92
def is_delimiter(self):
93
return self.ttype == DELIMITER
95
def is_eol_or_eof(self):
96
return (self.ttype == EOL or self.ttype == EOF)
98
def __eq__(self, other):
99
if not isinstance(other, Token):
101
return (self.ttype == other.ttype and
102
self.value == other.value)
104
def __ne__(self, other):
105
if not isinstance(other, Token):
107
return (self.ttype != other.ttype or
108
self.value != other.value)
111
return '%d "%s"' % (self.ttype, self.value)
114
if not self.has_escape:
124
raise dns.exception.UnexpectedEnd
129
raise dns.exception.UnexpectedEnd
133
raise dns.exception.UnexpectedEnd
136
if not (c2.isdigit() and c3.isdigit()):
137
raise dns.exception.SyntaxError
138
c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
140
return Token(self.ttype, unescaped)
142
# compatibility for old-style tuple tokens
148
return iter((self.ttype, self.value))
150
def __getitem__(self, i):
158
class Tokenizer(object):
159
"""A DNS master file format tokenizer.
161
A token is a (type, value) tuple, where I{type} is an int, and
162
I{value} is a string. The valid types are EOF, EOL, WHITESPACE,
163
IDENTIFIER, QUOTED_STRING, COMMENT, and DELIMITER.
165
@ivar file: The file to tokenize
167
@ivar ungotten_char: The most recently ungotten character, or None.
168
@type ungotten_char: string
169
@ivar ungotten_token: The most recently ungotten token, or None.
170
@type ungotten_token: (int, string) token tuple
171
@ivar multiline: The current multiline level. This value is increased
172
by one every time a '(' delimiter is read, and decreased by one every time
173
a ')' delimiter is read.
175
@ivar quoting: This variable is true if the tokenizer is currently
176
reading a quoted string.
178
@ivar eof: This variable is true if the tokenizer has encountered EOF.
180
@ivar delimiters: The current delimiter dictionary.
181
@type delimiters: dict
182
@ivar line_number: The current line number
183
@type line_number: int
184
@ivar filename: A filename that will be returned by the L{where} method.
185
@type filename: string
188
def __init__(self, f=sys.stdin, filename=None):
189
"""Initialize a tokenizer instance.
191
@param f: The file to tokenize. The default is sys.stdin.
192
This parameter may also be a string, in which case the tokenizer
193
will take its input from the contents of the string.
194
@type f: file or string
195
@param filename: the name of the filename that the L{where} method
197
@type filename: string
200
if isinstance(f, str):
201
f = cStringIO.StringIO(f)
203
filename = '<string>'
211
self.ungotten_char = None
212
self.ungotten_token = None
216
self.delimiters = _DELIMITERS
218
self.filename = filename
221
"""Read a character from input.
225
if self.ungotten_char is None:
229
c = self.file.read(1)
233
self.line_number += 1
235
c = self.ungotten_char
236
self.ungotten_char = None
240
"""Return the current location in the input.
242
@rtype: (string, int) tuple. The first item is the filename of
243
the input, the second is the current line number.
246
return (self.filename, self.line_number)
248
def _unget_char(self, c):
249
"""Unget a character.
251
The unget buffer for characters is only one character large; it is
252
an error to try to unget a character when the unget buffer is not
255
@param c: the character to unget
257
@raises UngetBufferFull: there is already an ungotten char
260
if not self.ungotten_char is None:
261
raise UngetBufferFull
262
self.ungotten_char = c
264
def skip_whitespace(self):
265
"""Consume input until a non-whitespace character is encountered.
267
The non-whitespace character is then ungotten, and the number of
268
whitespace characters consumed is returned.
270
If the tokenizer is in multiline mode, then newlines are whitespace.
278
if c != ' ' and c != '\t':
279
if (c != '\n') or not self.multiline:
284
def get(self, want_leading = False, want_comment = False):
285
"""Get the next token.
287
@param want_leading: If True, return a WHITESPACE token if the
288
first character read is whitespace. The default is False.
289
@type want_leading: bool
290
@param want_comment: If True, return a COMMENT token if the
291
first token read is a comment. The default is False.
292
@type want_comment: bool
294
@raises dns.exception.UnexpectedEnd: input ended prematurely
295
@raises dns.exception.SyntaxError: input was badly formed
298
if not self.ungotten_token is None:
299
token = self.ungotten_token
300
self.ungotten_token = None
301
if token.is_whitespace():
304
elif token.is_comment():
309
skipped = self.skip_whitespace()
310
if want_leading and skipped > 0:
311
return Token(WHITESPACE, ' ')
317
if c == '' or c in self.delimiters:
318
if c == '' and self.quoting:
319
raise dns.exception.UnexpectedEnd
320
if token == '' and ttype != QUOTED_STRING:
323
self.skip_whitespace()
326
if not self.multiline > 0:
327
raise dns.exception.SyntaxError
329
self.skip_whitespace()
334
self.delimiters = _QUOTING_DELIMITERS
335
ttype = QUOTED_STRING
339
self.delimiters = _DELIMITERS
340
self.skip_whitespace()
343
return Token(EOL, '\n')
347
if c == '\n' or c == '':
352
return Token(COMMENT, token)
355
raise dns.exception.SyntaxError('unbalanced parentheses')
358
self.skip_whitespace()
362
return Token(EOL, '\n')
364
# This code exists in case we ever want a
365
# delimiter to be returned. It never produces
376
raise dns.exception.UnexpectedEnd
378
c2 = self._get_char()
380
raise dns.exception.UnexpectedEnd
381
c3 = self._get_char()
383
raise dns.exception.UnexpectedEnd
384
if not (c2.isdigit() and c3.isdigit()):
385
raise dns.exception.SyntaxError
386
c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
388
raise dns.exception.SyntaxError('newline in quoted string')
391
# It's an escape. Put it and the next character into
392
# the token; it will be checked later for goodness.
397
if c == '' or c == '\n':
398
raise dns.exception.UnexpectedEnd
400
if token == '' and ttype != QUOTED_STRING:
402
raise dns.exception.SyntaxError('unbalanced parentheses')
404
return Token(ttype, token, has_escape)
406
def unget(self, token):
409
The unget buffer for tokens is only one token large; it is
410
an error to try to unget a token when the unget buffer is not
413
@param token: the token to unget
414
@type token: Token object
415
@raises UngetBufferFull: there is already an ungotten token
418
if not self.ungotten_token is None:
419
raise UngetBufferFull
420
self.ungotten_token = token
423
"""Return the next item in an iteration.
424
@rtype: (int, string)
438
"""Read the next token and interpret it as an integer.
440
@raises dns.exception.SyntaxError:
444
token = self.get().unescape()
445
if not token.is_identifier():
446
raise dns.exception.SyntaxError('expecting an identifier')
447
if not token.value.isdigit():
448
raise dns.exception.SyntaxError('expecting an integer')
449
return int(token.value)
452
"""Read the next token and interpret it as an 8-bit unsigned
455
@raises dns.exception.SyntaxError:
459
value = self.get_int()
460
if value < 0 or value > 255:
461
raise dns.exception.SyntaxError('%d is not an unsigned 8-bit integer' % value)
464
def get_uint16(self):
465
"""Read the next token and interpret it as a 16-bit unsigned
468
@raises dns.exception.SyntaxError:
472
value = self.get_int()
473
if value < 0 or value > 65535:
474
raise dns.exception.SyntaxError('%d is not an unsigned 16-bit integer' % value)
477
def get_uint32(self):
478
"""Read the next token and interpret it as a 32-bit unsigned
481
@raises dns.exception.SyntaxError:
485
token = self.get().unescape()
486
if not token.is_identifier():
487
raise dns.exception.SyntaxError('expecting an identifier')
488
if not token.value.isdigit():
489
raise dns.exception.SyntaxError('expecting an integer')
490
value = long(token.value)
491
if value < 0 or value > 4294967296L:
492
raise dns.exception.SyntaxError('%d is not an unsigned 32-bit integer' % value)
495
def get_string(self, origin=None):
496
"""Read the next token and interpret it as a string.
498
@raises dns.exception.SyntaxError:
502
token = self.get().unescape()
503
if not (token.is_identifier() or token.is_quoted_string()):
504
raise dns.exception.SyntaxError('expecting a string')
507
def get_identifier(self, origin=None):
508
"""Read the next token and raise an exception if it is not an identifier.
510
@raises dns.exception.SyntaxError:
514
token = self.get().unescape()
515
if not token.is_identifier():
516
raise dns.exception.SyntaxError('expecting an identifier')
519
def get_name(self, origin=None):
520
"""Read the next token and interpret it as a DNS name.
522
@raises dns.exception.SyntaxError:
523
@rtype: dns.name.Name object"""
526
if not token.is_identifier():
527
raise dns.exception.SyntaxError('expecting an identifier')
528
return dns.name.from_text(token.value, origin)
531
"""Read the next token and raise an exception if it isn't EOL or
534
@raises dns.exception.SyntaxError:
539
if not token.is_eol_or_eof():
540
raise dns.exception.SyntaxError('expected EOL or EOF, got %d "%s"' % (token.ttype, token.value))
544
token = self.get().unescape()
545
if not token.is_identifier():
546
raise dns.exception.SyntaxError('expecting an identifier')
547
return dns.ttl.from_text(token.value)