/* * * Copyright (c) 1996-2001, Darren Hiebert * * This source code is released for free distribution under the terms of the * GNU General Public License. * * This module contains the high level source read functions (preprocessor * directives are handled within this level). */ /* * INCLUDE FILES */ #include "general.h" /* must always come first */ #include #include #include "entry.h" #include "get.h" #include "main.h" #include "options.h" #include "read.h" #include "vstring.h" /* * MACROS */ #define stringMatch(s1,s2) (strcmp (s1,s2) == 0) #define isspacetab(c) ((c) == ' ' || (c) == '\t') /* * DATA DECLARATIONS */ typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS } Comment; enum eCppLimits { MaxCppNestingLevel = 20, MaxDirectiveName = 10 }; /* Defines the one nesting level of a preprocessor conditional. */ typedef struct sConditionalInfo { boolean ignoreAllBranches; /* ignoring parent conditional branch */ boolean singleBranch; /* choose only one branch */ boolean branchChosen; /* branch already selected */ boolean ignoring; /* current ignore state */ } conditionalInfo; /* Defines the current state of the pre-processor. */ typedef struct sCppState { int ungetch, ungetch2; /* ungotten characters, if any */ boolean resolveRequired; /* must resolve if/else/elif/endif branch */ struct sDirective { enum eState { DRCTV_NONE, /* no known directive - ignore to end of line */ DRCTV_DEFINE, /* "#define" encountered */ DRCTV_HASH, /* initial '#' read; determine directive */ DRCTV_IF, /* "#if" or "#ifdef" encountered */ DRCTV_UNDEF /* "#undef" encountered */ } state; boolean accept; /* is a directive syntatically permitted? */ vString * name; /* macro name */ unsigned int nestLevel; /* level 0 is not used */ conditionalInfo ifdef [MaxCppNestingLevel]; } directive; } cppState; /* * DATA DEFINITIONS */ /* Use brace formatting to detect end of block. */ static boolean BraceFormat = FALSE; static cppState Cpp = { '\0', '\0', /* ungetch characters */ FALSE, /* resolveRequired */ { DRCTV_NONE, /* state */ FALSE, /* accept */ NULL, /* tag name */ 0, /* nestLevel */ { {FALSE,FALSE,FALSE,FALSE} } /* ifdef array */ } /* directive */ }; /* * FUNCTION DEFINITIONS */ extern boolean isBraceFormat (void) { return BraceFormat; } extern unsigned int getDirectiveNestLevel (void) { return Cpp.directive.nestLevel; } extern void cppInit (const boolean state) { BraceFormat = state; Cpp.ungetch = '\0'; Cpp.ungetch2 = '\0'; Cpp.resolveRequired = FALSE; Cpp.directive.state = DRCTV_NONE; Cpp.directive.accept = TRUE; Cpp.directive.nestLevel = 0; if (Cpp.directive.name == NULL) Cpp.directive.name = vStringNew (); else vStringClear (Cpp.directive.name); } extern void cppTerminate (void) { if (Cpp.directive.name != NULL) { vStringDelete (Cpp.directive.name); Cpp.directive.name = NULL; } } extern void cppBeginStatement (void) { Cpp.resolveRequired = TRUE; } extern void cppEndStatement (void) { Cpp.resolveRequired = FALSE; } /* * Scanning functions * * This section handles preprocessor directives. It strips out all * directives and may emit a tag for #define directives. */ /* This puts a character back into the input queue for the source File. * Up to two characters may be ungotten. */ extern void cppUngetc (const int c) { Assert (Cpp.ungetch2 == '\0'); Cpp.ungetch2 = Cpp.ungetch; Cpp.ungetch = c; } /* Reads a directive, whose first character is given by "c", into "name". */ static boolean readDirective (int c, char *const name, unsigned int maxLength) { unsigned int i; for (i = 0 ; i < maxLength - 1 ; ++i) { if (i > 0) { c = fileGetc (); if (c == EOF || ! isalpha (c)) { fileUngetc (c); break; } } name [i] = c; } name [i] = '\0'; /* null terminate */ return (boolean) isspacetab (c); } /* Reads an identifier, whose first character is given by "c", into "tag", * together with the file location and corresponding line number. */ static boolean readDefineTag (int c, vString *const name, boolean *const parameterized) { vStringClear (name); do { vStringPut (name, c); } while (c = fileGetc (), (c != EOF && isident (c))); fileUngetc (c); vStringPut (name, '\0'); *parameterized = (boolean) (c == '('); return (boolean) (isspace (c) || c == '('); } static conditionalInfo *currentConditional (void) { return &Cpp.directive.ifdef [Cpp.directive.nestLevel]; } static boolean isIgnore (void) { return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring; } static boolean setIgnore (const boolean ignore) { return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore; } static boolean isIgnoreBranch (void) { conditionalInfo *const ifdef = currentConditional (); /* Force a single branch if an incomplete statement is discovered * en route. This may have allowed earlier branches containing complete * statements to be followed, but we must follow no further branches. */ if (Cpp.resolveRequired && ! BraceFormat) ifdef->singleBranch = TRUE; /* We will ignore this branch in the following cases: * * 1. We are ignoring all branches (conditional was within an ignored * branch of the parent conditional) * 2. A branch has already been chosen and either of: * a. A statement was incomplete upon entering the conditional * b. A statement is incomplete upon encountering a branch */ return (boolean) (ifdef->ignoreAllBranches || (ifdef->branchChosen && ifdef->singleBranch)); } static void chooseBranch (void) { if (! BraceFormat) { conditionalInfo *const ifdef = currentConditional (); ifdef->branchChosen = (boolean) (ifdef->singleBranch || Cpp.resolveRequired); } } /* Pushes one nesting level for an #if directive, indicating whether or not * the branch should be ignored and whether a branch has already been chosen. */ static boolean pushConditional (const boolean firstBranchChosen) { const boolean ignoreAllBranches = isIgnore (); /* current ignore */ boolean ignoreBranch = FALSE; if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1) { conditionalInfo *ifdef; ++Cpp.directive.nestLevel; ifdef = currentConditional (); /* We take a snapshot of whether there is an incomplete statement in * progress upon encountering the preprocessor conditional. If so, * then we will flag that only a single branch of the conditional * should be followed. */ ifdef->ignoreAllBranches= ignoreAllBranches; ifdef->singleBranch = Cpp.resolveRequired; ifdef->branchChosen = firstBranchChosen; ifdef->ignoring = (boolean) (ignoreAllBranches || ( ! firstBranchChosen && ! BraceFormat && (ifdef->singleBranch || !Option.if0))); ignoreBranch = ifdef->ignoring; } return ignoreBranch; } /* Pops one nesting level for an #endif directive. */ static boolean popConditional (void) { if (Cpp.directive.nestLevel > 0) --Cpp.directive.nestLevel; return isIgnore (); } static void makeDefineTag (const char *const name, boolean parameterized) { const boolean isFileScope = (boolean) (! isHeaderFile ()); if (includingDefineTags () && (! isFileScope || Option.include.fileScope)) { tagEntryInfo e; initTagEntry (&e, name); e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN); e.isFileScope = isFileScope; e.truncateLine = TRUE; e.kindName = "macro"; e.kind = 'd'; if (parameterized) e.extensionFields.arglist = getArglistFromPos(getInputFilePosition() , e.name); makeTagEntry (&e); if (parameterized) free((char *) e.extensionFields.arglist); } } static void directiveDefine (const int c) { boolean parameterized; if (isident1 (c)) { readDefineTag (c, Cpp.directive.name, ¶meterized); if (! isIgnore ()) /// TODO // the second argument need to be tested, not sure if TRUE is correct makeDefineTag (vStringValue (Cpp.directive.name), TRUE); } Cpp.directive.state = DRCTV_NONE; } static boolean directiveIf (const int c) { const boolean ignore = pushConditional ((boolean) (c != '0')); Cpp.directive.state = DRCTV_NONE; return ignore; } static boolean directiveHash (const int c) { boolean ignore = FALSE; char directive [MaxDirectiveName]; DebugStatement ( const boolean ignore0 = isIgnore (); ) readDirective (c, directive, MaxDirectiveName); if (stringMatch (directive, "define")) Cpp.directive.state = DRCTV_DEFINE; else if (stringMatch (directive, "undef")) Cpp.directive.state = DRCTV_UNDEF; else if (strncmp (directive, "if", (size_t) 2) == 0) Cpp.directive.state = DRCTV_IF; else if (stringMatch (directive, "elif") || stringMatch (directive, "else")) { ignore = setIgnore (isIgnoreBranch ()); if (! ignore && stringMatch (directive, "else")) chooseBranch (); Cpp.directive.state = DRCTV_NONE; DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); ) } else if (stringMatch (directive, "endif")) { DebugStatement ( debugCppNest (FALSE, Cpp.directive.nestLevel); ) ignore = popConditional (); Cpp.directive.state = DRCTV_NONE; DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); ) } else /* "pragma", etc. */ Cpp.directive.state = DRCTV_NONE; return ignore; } /* Handles a pre-processor directive whose first character is given by "c". */ static boolean handleDirective (const int c) { boolean ignore = isIgnore (); switch (Cpp.directive.state) { case DRCTV_NONE: ignore = isIgnore (); break; case DRCTV_DEFINE: directiveDefine (c); break; case DRCTV_HASH: ignore = directiveHash (c); break; case DRCTV_IF: ignore = directiveIf (c); break; case DRCTV_UNDEF: directiveDefine (c); break; } return ignore; } /* Called upon reading of a slash ('/') characters, determines whether a * comment is encountered, and its type. */ static Comment isComment (void) { Comment comment; const int next = fileGetc (); if (next == '*') comment = COMMENT_C; else if (next == '/') comment = COMMENT_CPLUS; else { fileUngetc (next); comment = COMMENT_NONE; } return comment; } /* Skips over a C style comment. According to ANSI specification a comment * is treated as white space, so we perform this subsitution. */ static int skipOverCComment (void) { int c = fileGetc (); while (c != EOF) { if (c != '*') c = fileGetc (); else { const int next = fileGetc (); if (next != '/') c = next; else { c = ' '; /* replace comment with space */ break; } } } return c; } /* Skips over a C++ style comment. */ static int skipOverCplusComment (void) { int c; while ((c = fileGetc ()) != EOF) { if (c == BACKSLASH) fileGetc (); /* throw away next character, too */ else if (c == NEWLINE) break; } return c; } /* Skips to the end of a string, returning a special character to * symbolically represent a generic string. */ static int skipToEndOfString (void) { int c; while ((c = fileGetc ()) != EOF) { if (c == BACKSLASH) fileGetc (); /* throw away next character, too */ else if (c == DOUBLE_QUOTE) break; } return STRING_SYMBOL; /* symbolic representation of string */ } /* Skips to the end of the three (possibly four) 'c' sequence, returning a * special character to symbolically represent a generic character. */ static int skipToEndOfChar (void) { int c; while ((c = fileGetc ()) != EOF) { if (c == BACKSLASH) fileGetc (); /* throw away next character, too */ else if (c == SINGLE_QUOTE) break; else if (c == NEWLINE) { fileUngetc (c); break; } } return CHAR_SYMBOL; /* symbolic representation of character */ } /* This function returns the next character, stripping out comments, * C pre-processor directives, and the contents of single and double * quoted strings. In short, strip anything which places a burden upon * the tokenizer. */ extern int cppGetc (void) { boolean directive = FALSE; boolean ignore = FALSE; int c; if (Cpp.ungetch != '\0') { c = Cpp.ungetch; Cpp.ungetch = Cpp.ungetch2; Cpp.ungetch2 = '\0'; return c; /* return here to avoid re-calling debugPutc () */ } else do { c = fileGetc (); process: switch (c) { case EOF: ignore = FALSE; directive = FALSE; break; case TAB: case SPACE: break; /* ignore most white space */ case NEWLINE: if (directive && ! ignore) directive = FALSE; Cpp.directive.accept = TRUE; break; case DOUBLE_QUOTE: Cpp.directive.accept = FALSE; c = skipToEndOfString (); break; case '#': if (Cpp.directive.accept) { directive = TRUE; Cpp.directive.state = DRCTV_HASH; Cpp.directive.accept = FALSE; } break; case SINGLE_QUOTE: Cpp.directive.accept = FALSE; c = skipToEndOfChar (); break; case '/': { const Comment comment = isComment (); if (comment == COMMENT_C) c = skipOverCComment (); else if (comment == COMMENT_CPLUS) { c = skipOverCplusComment (); if (c == NEWLINE) fileUngetc (c); } else Cpp.directive.accept = FALSE; break; } case BACKSLASH: { int next = fileGetc (); if (next == NEWLINE) continue; else if (next == '?') cppUngetc (next); else fileUngetc (next); break; } case '?': { int next = fileGetc (); if (next != '?') fileUngetc (next); else { next = fileGetc (); switch (next) { case '(': c = '['; break; case ')': c = ']'; break; case '<': c = '{'; break; case '>': c = '}'; break; case '/': c = BACKSLASH; goto process; case '!': c = '|'; break; case SINGLE_QUOTE: c = '^'; break; case '-': c = '~'; break; case '=': c = '#'; goto process; default: fileUngetc (next); cppUngetc ('?'); break; } } } break; default: Cpp.directive.accept = FALSE; if (directive) ignore = handleDirective (c); break; } } while (directive || ignore); DebugStatement ( debugPutc (DEBUG_CPP, c); ) DebugStatement ( if (c == NEWLINE) debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); ) return c; } extern char *getArglistFromPos(fpos_t startPosition, const char *tokenName) { fpos_t originalPosition; char *result = NULL; char *arglist = NULL; long pos1, pos2 = ftell(File.fp); fgetpos(File.fp, &originalPosition); fsetpos(File.fp, &startPosition); pos1 = ftell(File.fp); if (pos2 > pos1) { result = (char *) g_malloc(sizeof(char ) * (pos2 - pos1 + 2)); if (result != NULL) { fread(result, sizeof(char), pos2 - pos1 + 1, File.fp); result[pos2-pos1+1] = '\0'; arglist = getArglistFromStr(result, tokenName); free(result); } } fsetpos(File.fp, &originalPosition); return arglist; } typedef enum { st_none_t, st_escape_t, st_c_comment_t, st_cpp_comment_t, st_double_quote_t, st_single_quote_t } ParseState; static void stripCodeBuffer(char *buf) { int i = 0, pos = 0; ParseState state = st_none_t, prev_state = st_none_t; while (buf[i] != '\0') { switch(buf[i]) { case '/': if (st_none_t == state) { /* Check if this is the start of a comment */ if (buf[i+1] == '*') /* C comment */ state = st_c_comment_t; else if (buf[i+1] == '/') /* C++ comment */ state = st_cpp_comment_t; else /* Normal character */ buf[pos++] = '/'; } else if (st_c_comment_t == state) { /* Check if this is the end of a C comment */ if (buf[i-1] == '*') { if ((pos > 0) && (buf[pos-1] != ' ')) buf[pos++] = ' '; state = st_none_t; } } break; case '"': if (st_none_t == state) state = st_double_quote_t; else if (st_double_quote_t == state) state = st_none_t; break; case '\'': if (st_none_t == state) state = st_single_quote_t; else if (st_single_quote_t == state) state = st_none_t; break; default: if ((buf[i] == '\\') && (st_escape_t != state)) { prev_state = state; state = st_escape_t; } else if (st_escape_t == state) { state = prev_state; prev_state = st_none_t; } else if ((buf[i] == '\n') && (st_cpp_comment_t == state)) { if ((pos > 0) && (buf[pos-1] != ' ')) buf[pos++] = ' '; state = st_none_t; } else if (st_none_t == state) { if (isspace(buf[i])) { if ((pos > 0) && (buf[pos-1] != ' ')) buf[pos++] = ' '; } else buf[pos++] = buf[i]; } break; } ++i; } buf[pos] = '\0'; return; } extern char *getArglistFromStr(char *buf, const char *name) { char *start, *end; int level; if ((NULL == buf) || (NULL == name) || ('\0' == name[0])) return NULL; stripCodeBuffer(buf); if (NULL == (start = strstr(buf, name))) return NULL; if (NULL == (start = strchr(start, '('))) return NULL; for (level = 1, end = start + 1; level > 0; ++end) { if ('\0' == *end) break; else if ('(' == *end) ++ level; else if (')' == *end) -- level; } *end = '\0'; return strdup(start); } /* vi:set tabstop=8 shiftwidth=4: */