3
# Thomas Nagy, 2006-2009 (ita)
6
C/C++ preprocessor for finding dependencies
8
Reasons for using the Waf preprocessor by default
9
1. Some c/c++ extensions (Qt) require a custom preprocessor for obtaining the dependencies (.moc files)
10
2. Not all compilers provide .d files for obtaining the dependencies (portability)
11
3. A naive file scanner will not catch the constructs such as "#include foo()"
12
4. A naive file scanner will catch unnecessary dependencies (change an unused header -> recompile everything)
14
Regarding the speed concerns:
15
a. the preprocessing is performed only when files must be compiled
16
b. the macros are evaluated only for #if/#elif/#include
17
c. the time penalty is about 10%
18
d. system headers are not scanned
20
Now if you do not want the Waf preprocessor, the tool "gccdeps" uses the .d files produced
21
during the compilation to track the dependencies (useful when used with the boost libraries).
22
It only works with gcc though, and it cannot be used with Qt builds. A dumb
23
file scanner will be added in the future, so we will have most bahaviours.
25
# TODO: more varargs, pragma once
26
# TODO: dumb file scanner tracking all includes
28
import re, sys, os, string
29
import Logs, Build, Utils
30
from Logs import debug, error
33
class PreprocError(Utils.WafError):
39
recursion_limit = 5000
40
"do not loop too much on header inclusion"
43
"set to 1 to track headers on files in /usr/include - else absolute paths are ignored"
45
standard_includes = ['/usr/include']
46
if sys.platform == "win32":
47
standard_includes = []
50
'apply the trigraph rules first'
53
"Keep <> for system includes (do not search for those includes)"
67
"these ops are for c++, to reset, set an empty dict"
69
# ignore #warning and #error
70
re_lines = re.compile(\
71
'^[ \t]*(#|%:)[ \t]*(ifdef|ifndef|if|else|elif|endif|include|import|define|undef|pragma)[ \t]*(.*)\r*$',
72
re.IGNORECASE | re.MULTILINE)
74
re_mac = re.compile("^[a-zA-Z_]\w*")
75
re_fun = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*[(]')
76
re_pragma_once = re.compile('^\s*once\s*', re.IGNORECASE)
77
re_nl = re.compile('\\\\\r*\n', re.MULTILINE)
79
r"""(/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)|//[^\n]*|("(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|.[^/"'\\]*)""",
81
trig_def = [('??'+a, b) for a, b in zip("=-/!'()<>", r'#~\|^[]{}')]
82
chr_esc = {'0':0, 'a':7, 'b':8, 't':9, 'n':10, 'f':11, 'v':12, 'r':13, '\\':92, "'":39}
90
tok_types = [NUM, STR, IDENT, OP]
92
r"""0[xX](?P<hex>[a-fA-F0-9]+)(?P<qual1>[uUlL]*)|L*?'(?P<char>(\\.|[^\\'])+)'|(?P<n1>\d+)[Ee](?P<exp0>[+-]*?\d+)(?P<float0>[fFlL]*)|(?P<n2>\d*\.\d+)([Ee](?P<exp1>[+-]*?\d+))?(?P<float1>[fFlL]*)|(?P<n4>\d+\.\d*)([Ee](?P<exp2>[+-]*?\d+))?(?P<float2>[fFlL]*)|(?P<oct>0*)(?P<n0>\d+)(?P<qual2>[uUlL]*)""",
95
r'%:%:|<<=|>>=|\.\.\.|<<|<%|<:|<=|>>|>=|\+\+|\+=|--|->|-=|\*=|/=|%:|%=|%>|==|&&|&=|\|\||\|=|\^=|:>|!=|##|[\(\)\{\}\[\]<>\?\|\^\*\+&=:!#;,%/\-\?\~\.]',
97
re_clexer = re.compile('|'.join(["(?P<%s>%s)" % (name, part) for name, part in zip(tok_types, exp_types)]), re.M)
112
def filter_comments(filename):
113
# return a list of tuples : keyword, line
114
code = Utils.readf(filename)
116
for (a, b) in trig_def: code = code.split(a).join(b)
117
code = re_nl.sub('', code)
118
code = re_cpp.sub(repl, code)
119
return [(m.group(2), m.group(3)) for m in re.finditer(re_lines, code)]
122
# op -> number, needed for such expressions: #if 1 && 2 != 0
123
ops = ['* / %', '+ -', '<< >>', '< <= >= >', '== !=', '& | ^', '&& ||', ',']
124
for x in range(len(ops)):
126
for u in syms.split():
129
def reduce_nums(val_1, val_2, val_op):
130
"""apply arithmetic rules and try to return an integer result"""
131
#print val_1, val_2, val_op
133
# now perform the operation, make certain a and b are numeric
135
except TypeError: a = int(val_1)
137
except TypeError: b = int(val_2)
147
elif d=='||': c = int(a or b)
149
elif d=='&&': c = int(a and b)
150
elif d=='==': c = int(a == b)
151
elif d=='!=': c = int(a != b)
152
elif d=='<=': c = int(a <= b)
153
elif d=='<': c = int(a < b)
154
elif d=='>': c = int(a > b)
155
elif d=='>=': c = int(a >= b)
156
elif d=='^': c = int(a^b)
157
elif d=='<<': c = a<<b
158
elif d=='>>': c = a>>b
163
if not lst: raise PreprocError("empty list for get_num")
181
raise PreprocError("rparen expected %r" % lst)
183
(num, _) = get_term(lst[1:i])
184
return (num, lst[i+1:])
187
return get_num(lst[1:])
189
num, lst = get_num(lst[1:])
190
return (reduce_nums('-1', num, '*'), lst)
192
num, lst = get_num(lst[1:])
193
return (int(not int(num)), lst)
195
return (~ int(num), lst)
197
raise PreprocError("invalid op token %r for get_num" % lst)
201
# all macros should have been replaced, remaining identifiers eval to 0
204
raise PreprocError("invalid token %r for get_num" % lst)
207
if not lst: raise PreprocError("empty list for get_term")
208
num, lst = get_num(lst)
213
if v == '&&' and not num:
215
elif v == '||' and num:
219
return get_term(lst[1:])
236
raise PreprocError("rparen expected %r" % lst)
239
return get_term(lst[1:i])
241
return get_term(lst[i+1:])
244
num2, lst = get_num(lst[1:])
247
# no more tokens to process
248
num2 = reduce_nums(num, num2, v)
249
return get_term([(NUM, num2)] + lst)
251
# operator precedence
254
raise PreprocError("op expected %r" % lst)
256
if prec[v2] >= prec[v]:
257
num2 = reduce_nums(num, num2, v)
258
return get_term([(NUM, num2)] + lst)
260
num3, lst = get_num(lst[1:])
261
num3 = reduce_nums(num2, num3, v2)
262
return get_term([(NUM, num), (p, v), (NUM, num3)] + lst)
265
raise PreprocError("cannot reduce %r" % lst)
267
def reduce_eval(lst):
268
"""take a list of tokens and output true or false (#if/#elif conditions)"""
269
num, lst = get_term(lst)
273
"""use for converting a list of tokens to a string"""
274
lst = [str(v2) for (p2, v2) in lst]
277
def paste_tokens(t1, t2):
279
here is what we can paste:
285
if t1[0] == OP and t2[0] == OP:
287
elif t1[0] == IDENT and (t2[0] == IDENT or t2[0] == NUM):
289
elif t1[0] == NUM and t2[0] == NUM:
292
raise PreprocError('tokens do not make a valid paste %r and %r' % (t1, t2))
293
return (p1, t1[1] + t2[1])
295
def reduce_tokens(lst, defs, ban=[]):
296
"""replace the tokens in lst, using the macros provided in defs, and a list of macros that cannot be re-applied"""
302
if p == IDENT and v == "defined":
311
elif p2 == OP and v2 == '(':
314
del lst[i] # remove the ident, and change the ) for the value
320
raise PreprocError("invalid define expression %r" % lst)
322
elif p == IDENT and v in defs:
324
if isinstance(defs[v], str):
325
a, b = extract_macro(defs[v])
328
to_add = macro_def[1]
330
if isinstance(macro_def[0], list):
331
# macro without arguments
333
for x in xrange(len(to_add)):
334
lst.insert(i, to_add[x])
337
# collect the arguments for the funcall
343
raise PreprocError("expected '(' after %r (got nothing)" % v)
346
if p2 != OP or v2 != '(':
347
raise PreprocError("expected '(' after %r" % v)
357
if p2 == OP and count_paren == 0:
359
one_param.append((p2, v2))
362
if one_param: args.append(one_param)
365
if not one_param: raise PreprocError("empty param in funcall %s" % p)
366
args.append(one_param)
369
one_param.append((p2, v2))
371
one_param.append((p2, v2))
372
if v2 == '(': count_paren += 1
373
elif v2 == ')': count_paren -= 1
375
raise PreprocError('malformed macro')
377
# substitute the arguments within the define expression
379
arg_table = macro_def[0]
381
while j < len(to_add):
384
if p2 == OP and v2 == '#':
385
# stringize is for arguments only
386
if j+1 < len(to_add) and to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table:
387
toks = args[arg_table[to_add[j+1][1]]]
388
accu.append((STR, stringize(toks)))
391
accu.append((p2, v2))
392
elif p2 == OP and v2 == '##':
393
# token pasting, how can man invent such a complicated system?
394
if accu and j+1 < len(to_add):
395
# we have at least two tokens
399
if to_add[j+1][0] == IDENT and to_add[j+1][1] in arg_table:
400
toks = args[arg_table[to_add[j+1][1]]]
403
accu[-1] = paste_tokens(t1, toks[0]) #(IDENT, accu[-1][1] + toks[0][1])
404
accu.extend(toks[1:])
407
accu.append((p2, v2))
409
elif to_add[j+1][0] == IDENT and to_add[j+1][1] == '__VA_ARGS__':
411
# first collect the tokens
413
st = len(macro_def[0])
415
for x in args[pt-st+1:]:
417
va_toks.append((OP, ','))
418
if va_toks: va_toks.pop() # extra comma
423
# remove the token paste
425
if v4 == ',' and pt < st:
430
accu[-1] = paste_tokens(t1, to_add[j+1])
434
# invalid paste, case "##a" or "b##"
435
accu.append((p2, v2))
437
elif p2 == IDENT and v2 in arg_table:
438
toks = args[arg_table[v2]]
439
reduce_tokens(toks, defs, ban+[v])
442
accu.append((p2, v2))
447
reduce_tokens(accu, defs, ban+[v])
449
for x in xrange(len(accu)-1, -1, -1):
450
lst.insert(i, accu[x])
455
def eval_macro(lst, adefs):
456
"""reduce the tokens from the list lst, and try to return a 0/1 result"""
457
reduce_tokens(lst, adefs, [])
458
if not lst: raise PreprocError("missing tokens to evaluate")
459
(p, v) = reduce_eval(lst)
462
def extract_macro(txt):
463
"""process a macro definition from "#define f(x, y) x * y" into a function or a simple macro without arguments"""
465
if re_fun.search(txt):
469
if p != OP: raise PreprocError("expected open parenthesis")
485
elif p == OP and v == ')':
488
raise PreprocError("unexpected token (3)")
490
if p == OP and v == ',':
492
elif p == OP and v == ')':
495
raise PreprocError("comma or ... expected")
501
elif p == OP and v == '...':
502
raise PreprocError("not implemented (1)")
504
raise PreprocError("comma or ... expected (2)")
506
raise PreprocError("not implemented (2)")
508
raise PreprocError("unexpected else")
510
#~ print (name, [params, t[i+1:]])
511
return (name, [params, t[i+1:]])
514
return (v, [[], t[1:]])
516
re_include = re.compile('^\s*(<(?P<a>.*)>|"(?P<b>.*)")')
517
def extract_include(txt, defs):
518
"""process a line in the form "#include foo" to return a string representing the file"""
519
m = re_include.search(txt)
521
if m.group('a'): return '<', m.group('a')
522
if m.group('b'): return '"', m.group('b')
524
# perform preprocessing and look at the result, it must match an include
526
reduce_tokens(toks, defs, ['waf_include'])
529
raise PreprocError("could not parse include %s" % txt)
532
if toks[0][0] == STR:
533
return '"', toks[0][1]
535
if toks[0][1] == '<' and toks[-1][1] == '>':
536
return stringize(toks).lstrip('<').rstrip('>')
538
raise PreprocError("could not parse include %s." % txt)
541
if not txt: raise PreprocError("attempted to parse a null char")
546
if len(txt) == 4 and txt[3] in string.hexdigits: return int(txt[2:], 16)
547
return int(txt[2:], 16)
549
if c == '0' and len(txt)==2: return 0
551
if len(txt) > i and txt[1:1+i].isdigit():
552
return (1+i, int(txt[1:1+i], 8))
554
try: return chr_esc[c]
555
except KeyError: raise PreprocError("could not parse char literal '%s'" % txt)
559
"""convert a string into a list of tokens (shlex.split does not apply to c/c++/d)"""
561
for match in re_clexer.finditer(s):
563
for name in tok_types:
567
try: v = g_optrans[v]; name = OP
570
if v.lower() == "true":
573
elif v.lower() == "false":
577
if m('oct'): v = int(v, 8)
578
elif m('hex'): v = int(m('hex'), 16)
579
elif m('n0'): v = m('n0')
582
if v: v = parse_char(v)
583
else: v = m('n2') or m('n4')
585
if v == '%:': v = '#'
586
elif v == '%:%:': v = '##'
588
# remove the quotes around the string
590
ret.append((name, v))
595
def define_name(line):
596
return re_mac.match(line).group(0)
598
class c_parser(object):
599
def __init__(self, nodepaths=None, defines=None):
600
#self.lines = txt.split('\n')
606
self.defs = dict(defines) # make a copy
609
self.env = None # needed for the variant when searching for files
612
self.currentnode_stack = []
614
self.nodepaths = nodepaths or []
621
self.ban_includes = set([])
623
def cached_find_resource(self, node, filename):
625
nd = node.bld.cache_nd
627
nd = node.bld.cache_nd = {}
629
tup = (node.id, filename)
633
ret = node.find_resource(filename)
637
def tryfind(self, filename):
638
self.curfile = filename
640
# for msvc it should be a for loop on the whole stack
641
found = self.cached_find_resource(self.currentnode_stack[-1], filename)
643
for n in self.nodepaths:
646
found = self.cached_find_resource(n, filename)
649
self.nodes.append(found)
650
if filename[-4:] != '.moc':
653
if not filename in self.names:
654
self.names.append(filename)
657
def addlines(self, node):
659
self.currentnode_stack.append(node.parent)
660
filepath = node.abspath(self.env)
662
self.count_files += 1
663
if self.count_files > recursion_limit: raise PreprocError("recursion limit exceeded")
664
pc = self.parse_cache
665
debug('preproc: reading file %r', filepath)
671
self.lines.extend(lns)
675
lines = filter_comments(filepath)
676
lines.append((POPFILE, ''))
678
pc[filepath] = lines # cache the lines filtered
679
self.lines.extend(lines)
681
raise PreprocError("could not read the file %s" % filepath)
684
error("parsing %s failed" % filepath)
685
traceback.print_exc()
687
def start(self, node, env):
688
debug('preproc: scanning %s (in %s)', node.name, node.parent.name)
691
variant = node.variant(env)
692
bld = node.__class__.bld
694
self.parse_cache = bld.parse_cache
695
except AttributeError:
697
self.parse_cache = bld.parse_cache
701
lst = [('define', x) for x in env['DEFLINES']]
703
self.lines.extend(lst)
706
(kind, line) = self.lines.pop()
708
self.currentnode_stack.pop()
711
self.process_line(kind, line)
714
debug('preproc: line parsing failed (%s): %s %s', e, line, Utils.ex_stack())
716
def process_line(self, token, line):
718
WARNING: a new state must be added for if* because the endif
721
if ve: debug('preproc: line is %s - %s state is %s', token, line, self.state)
724
# make certain we define the state if we are about to enter in an if block
725
if token in ['ifdef', 'ifndef', 'if']:
726
state.append(undefined)
727
elif token == 'endif':
730
# skip lines when in a dead 'if' branch, wait for the endif
731
if not token in ['else', 'elif', 'endif']:
732
if skipped in self.state or ignored in self.state:
736
ret = eval_macro(tokenize(line), self.defs)
737
if ret: state[-1] = accepted
738
else: state[-1] = ignored
739
elif token == 'ifdef':
740
m = re_mac.match(line)
741
if m and m.group(0) in self.defs: state[-1] = accepted
742
else: state[-1] = ignored
743
elif token == 'ifndef':
744
m = re_mac.match(line)
745
if m and m.group(0) in self.defs: state[-1] = ignored
746
else: state[-1] = accepted
747
elif token == 'include' or token == 'import':
748
(kind, inc) = extract_include(line, self.defs)
749
if inc in self.ban_includes: return
750
if token == 'import': self.ban_includes.add(inc)
751
if ve: debug('preproc: include found %s (%s) ', inc, kind)
752
if kind == '"' or not strict_quotes:
754
elif token == 'elif':
755
if state[-1] == accepted:
757
elif state[-1] == ignored:
758
if eval_macro(tokenize(line), self.defs):
760
elif token == 'else':
761
if state[-1] == accepted: state[-1] = skipped
762
elif state[-1] == ignored: state[-1] = accepted
763
elif token == 'define':
765
self.defs[define_name(line)] = line
767
raise PreprocError("invalid define line %s" % line)
768
elif token == 'undef':
769
m = re_mac.match(line)
770
if m and m.group(0) in self.defs:
771
self.defs.__delitem__(m.group(0))
772
#print "undef %s" % name
773
elif token == 'pragma':
774
if re_pragma_once.match(line.lower()):
775
self.ban_includes.add(self.curfile)
777
def get_deps(node, env, nodepaths=[]):
779
Get the dependencies using a c/c++ preprocessor, this is required for finding dependencies of the kind
780
#include some_macro()
783
gruik = c_parser(nodepaths)
784
gruik.start(node, env)
785
return (gruik.nodes, gruik.names)
787
#################### dumb dependency scanner
789
re_inc = re.compile(\
790
'^[ \t]*(#|%:)[ \t]*(include)[ \t]*(.*)\r*$',
791
re.IGNORECASE | re.MULTILINE)
793
def lines_includes(filename):
794
code = Utils.readf(filename)
796
for (a, b) in trig_def: code = code.split(a).join(b)
797
code = re_nl.sub('', code)
798
code = re_cpp.sub(repl, code)
799
return [(m.group(2), m.group(3)) for m in re.finditer(re_inc, code)]
801
def get_deps_simple(node, env, nodepaths=[], defines={}):
803
Get the dependencies by just looking recursively at the #include statements
810
lst = lines_includes(node.abspath(env))
812
for (_, line) in lst:
813
(t, filename) = extract_include(line, defines)
814
if filename in names:
817
if filename.endswith('.moc'):
818
names.append(filename)
824
found = n.find_resource(filename)
827
if not filename in names:
828
names.append(filename)
829
elif not found in nodes:
834
return (nodes, names)