2
Copyright (C) Andrew Tridgell 2002
4
This program is free software; you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; either version 2 of the License, or
7
(at your option) any later version.
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software
16
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
2
* Copyright (C) 2002 Andrew Tridgell
4
* This program is free software; you can redistribute it and/or modify it
5
* under the terms of the GNU General Public License as published by the Free
6
* Software Foundation; either version 3 of the License, or (at your option)
9
* This program is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14
* You should have received a copy of the GNU General Public License along with
15
* this program; if not, write to the Free Software Foundation, Inc., 51
16
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
the idea is that changes that don't affect the resulting C code
22
should not change the hash. This is achieved by folding white-space
23
and other non-semantic fluff in the input into a single unified format.
25
This unifier was design to match the output of the unifier in
26
compilercache, which is flex based. The major difference is that
27
this unifier is much faster (about 2x) and more forgiving of
28
syntactic errors. Continuing on syntactic errors is important to
29
cope with C/C++ extensions in the local compiler (for example,
30
inline assembly systems).
22
* The idea is that changes that don't affect the resulting C code should not
23
* change the hash. This is achieved by folding white-space and other
24
* non-semantic fluff in the input into a single unified format.
26
* This unifier was design to match the output of the unifier in compilercache,
27
* which is flex based. The major difference is that this unifier is much
28
* faster (about 2x) and more forgiving of syntactic errors. Continuing on
29
* syntactic errors is important to cope with C/C++ extensions in the local
30
* compiler (for example, inline assembly systems).
33
33
#include "ccache.h"
35
static char *s_tokens[] = {
35
#include <sys/types.h>
43
static const char *const s_tokens[] = {
36
44
"...", ">>=", "<<=", "+=", "-=", "*=", "/=", "%=", "&=", "^=",
37
45
"|=", ">>", "<<", "++", "--", "->", "&&", "||", "<=", ">=",
38
46
"==", "!=", ";", "{", "<%", "}", "%>", ",", ":", "=",
96
104
/* buffer up characters before hashing them */
97
static void pushchar(unsigned char c)
105
static void pushchar(struct mdfour *hash, unsigned char c)
99
107
static unsigned char buf[64];
104
hash_buffer((char *)buf, len);
112
hash_buffer(hash, (char *)buf, len);
107
hash_buffer(NULL, 0);
115
hash_buffer(hash, NULL, 0);
113
hash_buffer((char *)buf, len);
121
hash_buffer(hash, (char *)buf, len);
118
126
/* hash some C/C++ code after unifying */
119
static void unify(unsigned char *p, size_t size)
127
static void unify(struct mdfour *hash, unsigned char *p, size_t size)
236
pushchar(hash, p[ofs]);
237
pushchar(hash, '\n');
236
/* hash a file that consists of preprocessor output, but remove any line
244
/* hash a file that consists of preprocessor output, but remove any line
237
245
number information from the hash
239
int unify_hash(const char *fname)
247
int unify_hash(struct mdfour *hash, const char *fname)
245
253
fd = open(fname, O_RDONLY|O_BINARY);
246
254
if (fd == -1 || fstat(fd, &st) != 0) {
247
cc_log("Failed to open preprocessor output %s\n", fname);
255
cc_log("Failed to open preprocessor output %s", fname);
248
256
stats_update(STATS_PREPROCESSOR);
253
261
lines in preprocessor output. I have seen lines of over
254
262
100k in length, so this is well worth it */
255
263
map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
256
265
if (map == (char *)-1) {
257
cc_log("Failed to mmap %s\n", fname);
266
cc_log("Failed to mmap %s", fname);
262
270
/* pass it through the unifier */
263
unify((unsigned char *)map, st.st_size);
271
unify(hash, (unsigned char *)map, st.st_size);
265
273
munmap(map, st.st_size);