3
# Copyright 2008, Google Inc.
6
# Redistribution and use in source and binary forms, with or without
7
# modification, are permitted provided that the following conditions are
10
# * Redistributions of source code must retain the above copyright
11
# notice, this list of conditions and the following disclaimer.
12
# * Redistributions in binary form must reproduce the above
13
# copyright notice, this list of conditions and the following disclaimer
14
# in the documentation and/or other materials provided with the
16
# * Neither the name of Google Inc. nor the names of its
17
# contributors may be used to endorse or promote products derived from
18
# this software without specific prior written permission.
20
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
"""pump v0.2.0 - Pretty Useful for Meta Programming.
34
A tool for preprocessor meta programming. Useful for generating
35
repetitive boilerplate code. Especially useful for writing C++
36
classes, functions, macros, and templates that need to work with
37
various number of arguments.
44
Converts foo.cc.pump to foo.cc.
48
ATOMIC_CODE ::= $var ID = EXPRESSION
49
| $var ID = [[ CODE ]]
50
| $range ID EXPRESSION..EXPRESSION
51
| $for ID SEPARATOR [[ CODE ]]
55
| $if EXPRESSION [[ CODE ]] ELSE_BRANCH
58
SEPARATOR ::= RAW_CODE | EMPTY
59
ELSE_BRANCH ::= $else [[ CODE ]]
60
| $elif EXPRESSION [[ CODE ]] ELSE_BRANCH
62
EXPRESSION has Python syntax.
65
__author__ = 'wan@google.com (Zhanyong Wan)'
73
(re.compile(r'\$var\s+'), '$var'),
74
(re.compile(r'\$elif\s+'), '$elif'),
75
(re.compile(r'\$else\s+'), '$else'),
76
(re.compile(r'\$for\s+'), '$for'),
77
(re.compile(r'\$if\s+'), '$if'),
78
(re.compile(r'\$range\s+'), '$range'),
79
(re.compile(r'\$[_A-Za-z]\w*'), '$id'),
80
(re.compile(r'\$\(\$\)'), '$($)'),
81
(re.compile(r'\$'), '$'),
82
(re.compile(r'\[\[\n?'), '[['),
83
(re.compile(r'\]\]\n?'), ']]'),
88
"""Represents a position (line and column) in a text file."""
90
def __init__(self, line=-1, column=-1):
94
def __eq__(self, rhs):
95
return self.line == rhs.line and self.column == rhs.column
97
def __ne__(self, rhs):
98
return not self == rhs
100
def __lt__(self, rhs):
101
return self.line < rhs.line or (
102
self.line == rhs.line and self.column < rhs.column)
104
def __le__(self, rhs):
105
return self < rhs or self == rhs
107
def __gt__(self, rhs):
110
def __ge__(self, rhs):
117
return '%s(%s)' % (self.line + 1, self.column)
119
def __add__(self, offset):
120
return Cursor(self.line, self.column + offset)
122
def __sub__(self, offset):
123
return Cursor(self.line, self.column - offset)
126
"""Returns a copy of self."""
128
return Cursor(self.line, self.column)
131
# Special cursor to indicate the end-of-file.
133
"""Returns the special cursor to denote the end-of-file."""
134
return Cursor(-1, -1)
138
"""Represents a token in a Pump source file."""
140
def __init__(self, start=None, end=None, value=None, token_type=None):
150
self.token_type = token_type
153
return 'Token @%s: \'%s\' type=%s' % (
154
self.start, self.value, self.token_type)
157
"""Returns a copy of self."""
159
return Token(self.start.Clone(), self.end.Clone(), self.value,
163
def StartsWith(lines, pos, string):
164
"""Returns True iff the given position in lines starts with 'string'."""
166
return lines[pos.line][pos.column:].startswith(string)
169
def FindFirstInLine(line, token_table):
170
best_match_start = -1
171
for (regex, token_type) in token_table:
172
m = regex.search(line)
174
# We found regex in lines
175
if best_match_start < 0 or m.start() < best_match_start:
176
best_match_start = m.start()
177
best_match_length = m.end() - m.start()
178
best_match_token_type = token_type
180
if best_match_start < 0:
183
return (best_match_start, best_match_length, best_match_token_type)
186
def FindFirst(lines, token_table, cursor):
187
"""Finds the first occurrence of any string in strings in lines."""
189
start = cursor.Clone()
190
cur_line_number = cursor.line
191
for line in lines[start.line:]:
192
if cur_line_number == start.line:
193
line = line[start.column:]
194
m = FindFirstInLine(line, token_table)
196
# We found a regex in line.
197
(start_column, length, token_type) = m
198
if cur_line_number == start.line:
199
start_column += start.column
200
found_start = Cursor(cur_line_number, start_column)
201
found_end = found_start + length
202
return MakeToken(lines, found_start, found_end, token_type)
204
# We failed to find str in lines
208
def SubString(lines, start, end):
209
"""Returns a substring in lines."""
212
end = Cursor(len(lines) - 1, len(lines[-1]))
217
if start.line == end.line:
218
return lines[start.line][start.column:end.column]
220
result_lines = ([lines[start.line][start.column:]] +
221
lines[start.line + 1:end.line] +
222
[lines[end.line][:end.column]])
223
return ''.join(result_lines)
226
def StripMetaComments(str):
227
"""Strip meta comments from each line in the given string."""
229
# First, completely remove lines containing nothing but a meta
230
# comment, including the trailing \n.
231
str = re.sub(r'^\s*\$\$.*\n', '', str)
233
# Then, remove meta comments from contentful lines.
234
return re.sub(r'\s*\$\$.*', '', str)
237
def MakeToken(lines, start, end, token_type):
238
"""Creates a new instance of Token."""
240
return Token(start, end, SubString(lines, start, end), token_type)
243
def ParseToken(lines, pos, regex, token_type):
244
line = lines[pos.line][pos.column:]
245
m = regex.search(line)
246
if m and not m.start():
247
return MakeToken(lines, pos, pos + m.end(), token_type)
249
print 'ERROR: %s expected at %s.' % (token_type, pos)
253
ID_REGEX = re.compile(r'[_A-Za-z]\w*')
254
EQ_REGEX = re.compile(r'=')
255
REST_OF_LINE_REGEX = re.compile(r'.*?(?=$|\$\$)')
256
OPTIONAL_WHITE_SPACES_REGEX = re.compile(r'\s*')
257
WHITE_SPACE_REGEX = re.compile(r'\s')
258
DOT_DOT_REGEX = re.compile(r'\.\.')
261
def Skip(lines, pos, regex):
262
line = lines[pos.line][pos.column:]
263
m = re.search(regex, line)
264
if m and not m.start():
270
def SkipUntil(lines, pos, regex, token_type):
271
line = lines[pos.line][pos.column:]
272
m = re.search(regex, line)
274
return pos + m.start()
276
print ('ERROR: %s expected on line %s after column %s.' %
277
(token_type, pos.line + 1, pos.column))
281
def ParseExpTokenInParens(lines, pos):
282
def ParseInParens(pos):
283
pos = Skip(lines, pos, OPTIONAL_WHITE_SPACES_REGEX)
284
pos = Skip(lines, pos, r'\(')
286
pos = Skip(lines, pos, r'\)')
290
pos = SkipUntil(lines, pos, r'\(|\)', ')')
291
if SubString(lines, pos, pos + 1) == '(':
293
pos = Skip(lines, pos, r'\)')
299
pos = ParseInParens(pos)
300
return MakeToken(lines, start, pos, 'exp')
303
def RStripNewLineFromToken(token):
304
if token.value.endswith('\n'):
305
return Token(token.start, token.end, token.value[:-1], token.token_type)
310
def TokenizeLines(lines, pos):
312
found = FindFirst(lines, TOKEN_TABLE, pos)
314
yield MakeToken(lines, pos, Eof(), 'code')
317
if found.start == pos:
319
prev_token_rstripped = None
321
prev_token = MakeToken(lines, pos, found.start, 'code')
322
prev_token_rstripped = RStripNewLineFromToken(prev_token)
324
if found.token_type == '$var':
325
if prev_token_rstripped:
326
yield prev_token_rstripped
328
id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
330
pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX)
332
eq_token = ParseToken(lines, pos, EQ_REGEX, '=')
334
pos = Skip(lines, eq_token.end, r'\s*')
336
if SubString(lines, pos, pos + 2) != '[[':
337
exp_token = ParseToken(lines, pos, REST_OF_LINE_REGEX, 'exp')
339
pos = Cursor(exp_token.end.line + 1, 0)
340
elif found.token_type == '$for':
341
if prev_token_rstripped:
342
yield prev_token_rstripped
344
id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
346
pos = Skip(lines, id_token.end, WHITE_SPACE_REGEX)
347
elif found.token_type == '$range':
348
if prev_token_rstripped:
349
yield prev_token_rstripped
351
id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
353
pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX)
355
dots_pos = SkipUntil(lines, pos, DOT_DOT_REGEX, '..')
356
yield MakeToken(lines, pos, dots_pos, 'exp')
357
yield MakeToken(lines, dots_pos, dots_pos + 2, '..')
359
new_pos = Cursor(pos.line + 1, 0)
360
yield MakeToken(lines, pos, new_pos, 'exp')
362
elif found.token_type == '$':
366
exp_token = ParseExpTokenInParens(lines, found.end)
369
elif (found.token_type == ']]' or found.token_type == '$if' or
370
found.token_type == '$elif' or found.token_type == '$else'):
371
if prev_token_rstripped:
372
yield prev_token_rstripped
383
"""A generator that yields the tokens in the given string."""
385
lines = s.splitlines(True)
386
for token in TokenizeLines(lines, Cursor(0, 0)):
391
def __init__(self, atomic_code_list=None):
392
self.atomic_code = atomic_code_list
396
def __init__(self, identifier=None, atomic_code=None):
397
self.identifier = identifier
398
self.atomic_code = atomic_code
402
def __init__(self, identifier=None, exp1=None, exp2=None):
403
self.identifier = identifier
409
def __init__(self, identifier=None, sep=None, code=None):
410
self.identifier = identifier
416
def __init__(self, else_branch=None):
417
self.else_branch = else_branch
421
def __init__(self, exp=None, then_branch=None, else_branch=None):
423
self.then_branch = then_branch
424
self.else_branch = else_branch
428
def __init__(self, token=None):
429
self.raw_code = token
432
class LiteralDollarNode:
433
def __init__(self, token):
438
def __init__(self, token, python_exp):
440
self.python_exp = python_exp
443
def PopFront(a_list):
449
def PushFront(a_list, elem):
453
def PopToken(a_list, token_type=None):
454
token = PopFront(a_list)
455
if token_type is not None and token.token_type != token_type:
456
print 'ERROR: %s expected at %s' % (token_type, token.start)
457
print 'ERROR: %s found instead' % (token,)
463
def PeekToken(a_list):
470
def ParseExpNode(token):
471
python_exp = re.sub(r'([_A-Za-z]\w*)', r'self.GetValue("\1")', token.value)
472
return ExpNode(token, python_exp)
475
def ParseElseNode(tokens):
476
def Pop(token_type=None):
477
return PopToken(tokens, token_type)
479
next = PeekToken(tokens)
482
if next.token_type == '$else':
485
code_node = ParseCodeNode(tokens)
488
elif next.token_type == '$elif':
492
code_node = ParseCodeNode(tokens)
494
inner_else_node = ParseElseNode(tokens)
495
return CodeNode([IfNode(ParseExpNode(exp), code_node, inner_else_node)])
496
elif not next.value.strip():
498
return ParseElseNode(tokens)
503
def ParseAtomicCodeNode(tokens):
504
def Pop(token_type=None):
505
return PopToken(tokens, token_type)
507
head = PopFront(tokens)
510
return RawCodeNode(head)
514
next = PeekToken(tokens)
515
if next.token_type == 'exp':
517
return VarNode(id_token, ParseExpNode(exp_token))
519
code_node = ParseCodeNode(tokens)
521
return VarNode(id_token, code_node)
524
next_token = PeekToken(tokens)
525
if next_token.token_type == 'code':
526
sep_token = next_token
531
code_node = ParseCodeNode(tokens)
533
return ForNode(id_token, sep_token, code_node)
535
exp_token = Pop('code')
537
code_node = ParseCodeNode(tokens)
539
else_node = ParseElseNode(tokens)
540
return IfNode(ParseExpNode(exp_token), code_node, else_node)
543
exp1_token = Pop('exp')
545
exp2_token = Pop('exp')
546
return RangeNode(id_token, ParseExpNode(exp1_token),
547
ParseExpNode(exp2_token))
549
return ParseExpNode(Token(head.start + 1, head.end, head.value[1:], 'id'))
551
return LiteralDollarNode(head)
553
exp_token = Pop('exp')
554
return ParseExpNode(exp_token)
556
code_node = ParseCodeNode(tokens)
560
PushFront(tokens, head)
564
def ParseCodeNode(tokens):
565
atomic_code_list = []
569
atomic_code_node = ParseAtomicCodeNode(tokens)
571
atomic_code_list.append(atomic_code_node)
574
return CodeNode(atomic_code_list)
577
def ParseToAST(pump_src_text):
578
"""Convert the given Pump source text into an AST."""
579
tokens = list(Tokenize(pump_src_text))
580
code_node = ParseCodeNode(tokens)
591
clone.variables = self.variables[:]
592
clone.ranges = self.ranges[:]
595
def PushVariable(self, var, value):
596
# If value looks like an int, store it as an int.
598
int_value = int(value)
599
if ('%s' % int_value) == value:
603
self.variables[:0] = [(var, value)]
605
def PopVariable(self):
606
self.variables[:1] = []
608
def PushRange(self, var, lower, upper):
609
self.ranges[:0] = [(var, lower, upper)]
614
def GetValue(self, identifier):
615
for (var, value) in self.variables:
616
if identifier == var:
619
print 'ERROR: meta variable %s is undefined.' % (identifier,)
622
def EvalExp(self, exp):
624
result = eval(exp.python_exp)
626
print 'ERROR: caught exception %s: %s' % (e.__class__.__name__, e)
627
print ('ERROR: failed to evaluate meta expression %s at %s' %
628
(exp.python_exp, exp.token.start))
632
def GetRange(self, identifier):
633
for (var, lower, upper) in self.ranges:
634
if identifier == var:
635
return (lower, upper)
637
print 'ERROR: range %s is undefined.' % (identifier,)
645
def GetLastLine(self):
646
index = self.string.rfind('\n')
650
return self.string[index + 1:]
656
def RunAtomicCode(env, node, output):
657
if isinstance(node, VarNode):
658
identifier = node.identifier.value.strip()
660
RunAtomicCode(env.Clone(), node.atomic_code, result)
661
value = result.string
662
env.PushVariable(identifier, value)
663
elif isinstance(node, RangeNode):
664
identifier = node.identifier.value.strip()
665
lower = int(env.EvalExp(node.exp1))
666
upper = int(env.EvalExp(node.exp2))
667
env.PushRange(identifier, lower, upper)
668
elif isinstance(node, ForNode):
669
identifier = node.identifier.value.strip()
674
(lower, upper) = env.GetRange(identifier)
675
for i in range(lower, upper + 1):
676
new_env = env.Clone()
677
new_env.PushVariable(identifier, i)
678
RunCode(new_env, node.code, output)
681
elif isinstance(node, RawCodeNode):
682
output.Append(node.raw_code.value)
683
elif isinstance(node, IfNode):
684
cond = env.EvalExp(node.exp)
686
RunCode(env.Clone(), node.then_branch, output)
687
elif node.else_branch is not None:
688
RunCode(env.Clone(), node.else_branch, output)
689
elif isinstance(node, ExpNode):
690
value = env.EvalExp(node)
691
output.Append('%s' % (value,))
692
elif isinstance(node, LiteralDollarNode):
694
elif isinstance(node, CodeNode):
695
RunCode(env.Clone(), node, output)
702
def RunCode(env, code_node, output):
703
for atomic_code in code_node.atomic_code:
704
RunAtomicCode(env, atomic_code, output)
707
def IsSingleLineComment(cur_line):
708
return '//' in cur_line
711
def IsInPreprocessorDirective(prev_lines, cur_line):
712
if cur_line.lstrip().startswith('#'):
714
return prev_lines and prev_lines[-1].endswith('\\')
717
def WrapComment(line, output):
718
loc = line.find('//')
719
before_comment = line[:loc].rstrip()
720
if before_comment == '':
723
output.append(before_comment)
724
indent = len(before_comment) - len(before_comment.lstrip())
725
prefix = indent*' ' + '// '
726
max_len = 80 - len(prefix)
727
comment = line[loc + 2:].strip()
728
segs = [seg for seg in re.split(r'(\w+\W*)', comment) if seg != '']
731
if len((cur_line + seg).rstrip()) < max_len:
734
if cur_line.strip() != '':
735
output.append(prefix + cur_line.rstrip())
736
cur_line = seg.lstrip()
737
if cur_line.strip() != '':
738
output.append(prefix + cur_line.strip())
741
def WrapCode(line, line_concat, output):
742
indent = len(line) - len(line.lstrip())
743
prefix = indent*' ' # Prefix of the current line
744
max_len = 80 - indent - len(line_concat) # Maximum length of the current line
745
new_prefix = prefix + 4*' ' # Prefix of a continuation line
746
new_max_len = max_len - 4 # Maximum length of a continuation line
747
# Prefers to wrap a line after a ',' or ';'.
748
segs = [seg for seg in re.split(r'([^,;]+[,;]?)', line.strip()) if seg != '']
749
cur_line = '' # The current line without leading spaces.
751
# If the line is still too long, wrap at a space.
752
while cur_line == '' and len(seg.strip()) > max_len:
754
split_at = seg.rfind(' ', 0, max_len)
755
output.append(prefix + seg[:split_at].strip() + line_concat)
756
seg = seg[split_at + 1:]
758
max_len = new_max_len
760
if len((cur_line + seg).rstrip()) < max_len:
761
cur_line = (cur_line + seg).lstrip()
763
output.append(prefix + cur_line.rstrip() + line_concat)
765
max_len = new_max_len
766
cur_line = seg.lstrip()
767
if cur_line.strip() != '':
768
output.append(prefix + cur_line.strip())
771
def WrapPreprocessorDirective(line, output):
772
WrapCode(line, ' \\', output)
775
def WrapPlainCode(line, output):
776
WrapCode(line, '', output)
779
def IsMultiLineIWYUPragma(line):
780
return re.search(r'/\* IWYU pragma: ', line)
783
def IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
784
return (re.match(r'^#(ifndef|define|endif\s*//)\s*[\w_]+\s*$', line) or
785
re.match(r'^#include\s', line) or
786
# Don't break IWYU pragmas, either; that causes iwyu.py problems.
787
re.search(r'// IWYU pragma: ', line))
790
def WrapLongLine(line, output):
794
elif IsSingleLineComment(line):
795
if IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
796
# The style guide made an exception to allow long header guard lines,
797
# includes and IWYU pragmas.
800
WrapComment(line, output)
801
elif IsInPreprocessorDirective(output, line):
802
if IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
803
# The style guide made an exception to allow long header guard lines,
804
# includes and IWYU pragmas.
807
WrapPreprocessorDirective(line, output)
808
elif IsMultiLineIWYUPragma(line):
811
WrapPlainCode(line, output)
814
def BeautifyCode(string):
815
lines = string.splitlines()
818
WrapLongLine(line, output)
819
output2 = [line.rstrip() for line in output]
820
return '\n'.join(output2) + '\n'
823
def ConvertFromPumpSource(src_text):
824
"""Return the text generated from the given Pump source text."""
825
ast = ParseToAST(StripMetaComments(src_text))
827
RunCode(Env(), ast, output)
828
return BeautifyCode(output.string)
837
output_str = ConvertFromPumpSource(file(file_path, 'r').read())
838
if file_path.endswith('.pump'):
839
output_file_path = file_path[:-5]
841
output_file_path = '-'
842
if output_file_path == '-':
845
output_file = file(output_file_path, 'w')
846
output_file.write('// This file was GENERATED by command:\n')
847
output_file.write('// %s %s\n' %
848
(os.path.basename(__file__), os.path.basename(file_path)))
849
output_file.write('// DO NOT EDIT BY HAND!!!\n\n')
850
output_file.write(output_str)
854
if __name__ == '__main__':