/*
*
*   Copyright (c) 1996-2001, Darren Hiebert
*
*   This source code is released for free distribution under the terms of the
*   GNU General Public License.
*
*   This module contains the high level source read functions (preprocessor
*   directives are handled within this level).
*/

/*
*   INCLUDE FILES
*/
#include "general.h"	/* must always come first */
#include <glib.h>

#include <string.h>

#include "entry.h"
#include "get.h"
#include "main.h"
#include "options.h"
#include "read.h"
#include "vstring.h"

/*
*   MACROS
*/
#define stringMatch(s1,s2)	(strcmp (s1,s2) == 0)
#define isspacetab(c)		((c) == ' ' || (c) == '\t')

/*
*   DATA DECLARATIONS
*/
typedef enum { COMMENT_NONE, COMMENT_C, COMMENT_CPLUS } Comment;

enum eCppLimits {
    MaxCppNestingLevel = 20,
    MaxDirectiveName = 10
};

/*  Defines the one nesting level of a preprocessor conditional.
 */
typedef struct sConditionalInfo {
    boolean ignoreAllBranches;	/* ignoring parent conditional branch */
    boolean singleBranch;	/* choose only one branch */
    boolean branchChosen;	/* branch already selected */
    boolean ignoring;		/* current ignore state */
} conditionalInfo;

/*  Defines the current state of the pre-processor.
 */
typedef struct sCppState {
    int	    ungetch, ungetch2;	/* ungotten characters, if any */
    boolean resolveRequired;	/* must resolve if/else/elif/endif branch */
    struct sDirective {
	enum eState {
	    DRCTV_NONE,		/* no known directive - ignore to end of line */
	    DRCTV_DEFINE,	/* "#define" encountered */
	    DRCTV_HASH,		/* initial '#' read; determine directive */
	    DRCTV_IF,		/* "#if" or "#ifdef" encountered */
	    DRCTV_UNDEF		/* "#undef" encountered */
	} state;
	boolean	accept;		/* is a directive syntatically permitted? */
	vString * name;		/* macro name */
	unsigned int nestLevel;	/* level 0 is not used */
	conditionalInfo ifdef [MaxCppNestingLevel];
    } directive;
} cppState;

/*
*   DATA DEFINITIONS
*/

/*  Use brace formatting to detect end of block.
 */
static boolean BraceFormat = FALSE;

static cppState Cpp = {
    '\0', '\0',			/* ungetch characters */
    FALSE,			/* resolveRequired */
    {
	DRCTV_NONE,		/* state */
	FALSE,			/* accept */
	NULL,			/* tag name */
	0,			/* nestLevel */
	{ {FALSE,FALSE,FALSE,FALSE} }	/* ifdef array */
    }				/* directive */
};

/*
*   FUNCTION DEFINITIONS
*/

extern boolean isBraceFormat (void)
{
    return BraceFormat;
}

extern unsigned int getDirectiveNestLevel (void)
{
    return Cpp.directive.nestLevel;
}

extern void cppInit (const boolean state)
{
    BraceFormat = state;

    Cpp.ungetch         = '\0';
    Cpp.ungetch2        = '\0';
    Cpp.resolveRequired = FALSE;

    Cpp.directive.state	    = DRCTV_NONE;
    Cpp.directive.accept    = TRUE;
    Cpp.directive.nestLevel = 0;

    if (Cpp.directive.name == NULL)
	Cpp.directive.name = vStringNew ();
    else
	vStringClear (Cpp.directive.name);
}

extern void cppTerminate (void)
{
    if (Cpp.directive.name != NULL)
    {
	vStringDelete (Cpp.directive.name);
	Cpp.directive.name = NULL;
    }
}

extern void cppBeginStatement (void)
{
    Cpp.resolveRequired = TRUE;
}

extern void cppEndStatement (void)
{
    Cpp.resolveRequired = FALSE;
}

/*
*   Scanning functions
*
*   This section handles preprocessor directives.  It strips out all
*   directives and may emit a tag for #define directives.
*/

/*  This puts a character back into the input queue for the source File.
 *  Up to two characters may be ungotten.
 */
extern void cppUngetc (const int c)
{
    Assert (Cpp.ungetch2 == '\0');
    Cpp.ungetch2 = Cpp.ungetch;
    Cpp.ungetch = c;
}

/*  Reads a directive, whose first character is given by "c", into "name".
 */
static boolean readDirective (int c, char *const name, unsigned int maxLength)
{
    unsigned int i;

    for (i = 0  ;  i < maxLength - 1  ;  ++i)
    {
	if (i > 0)
	{
	    c = fileGetc ();
	    if (c == EOF  ||  ! isalpha (c))
	    {
		fileUngetc (c);
		break;
	    }
	}
	name [i] = c;
    }
    name [i] = '\0';					/* null terminate */

    return (boolean) isspacetab (c);
}

/*  Reads an identifier, whose first character is given by "c", into "tag",
 *  together with the file location and corresponding line number.
 */
static boolean readDefineTag (int c, vString *const name,
			      boolean *const parameterized)
{
    vStringClear (name);
    do
    {
	vStringPut (name, c);
    } while (c = fileGetc (), (c != EOF  &&  isident (c)));
    fileUngetc (c);
    vStringPut (name, '\0');

    *parameterized = (boolean) (c == '(');
    return (boolean) (isspace (c)  ||  c == '(');
}

static conditionalInfo *currentConditional (void)
{
    return &Cpp.directive.ifdef [Cpp.directive.nestLevel];
}

static boolean isIgnore (void)
{
    return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring;
}

static boolean setIgnore (const boolean ignore)
{
    return Cpp.directive.ifdef [Cpp.directive.nestLevel].ignoring = ignore;
}

static boolean isIgnoreBranch (void)
{
    conditionalInfo *const ifdef = currentConditional ();

    /*  Force a single branch if an incomplete statement is discovered
     *  en route. This may have allowed earlier branches containing complete
     *  statements to be followed, but we must follow no further branches.
     */
    if (Cpp.resolveRequired  &&  ! BraceFormat)
	ifdef->singleBranch = TRUE;

    /*  We will ignore this branch in the following cases:
     *
     *  1.  We are ignoring all branches (conditional was within an ignored
     *        branch of the parent conditional)
     *  2.  A branch has already been chosen and either of:
     *      a.  A statement was incomplete upon entering the conditional
     *      b.  A statement is incomplete upon encountering a branch
     */
    return (boolean) (ifdef->ignoreAllBranches ||
		     (ifdef->branchChosen  &&  ifdef->singleBranch));
}

static void chooseBranch (void)
{
    if (! BraceFormat)
    {
	conditionalInfo *const ifdef = currentConditional ();

	ifdef->branchChosen = (boolean) (ifdef->singleBranch ||
					Cpp.resolveRequired);
    }
}

/*  Pushes one nesting level for an #if directive, indicating whether or not
 *  the branch should be ignored and whether a branch has already been chosen.
 */
static boolean pushConditional (const boolean firstBranchChosen)
{
    const boolean ignoreAllBranches = isIgnore ();	/* current ignore */
    boolean ignoreBranch = FALSE;

    if (Cpp.directive.nestLevel < (unsigned int) MaxCppNestingLevel - 1)
    {
	conditionalInfo *ifdef;

	++Cpp.directive.nestLevel;
	ifdef = currentConditional ();

	/*  We take a snapshot of whether there is an incomplete statement in
	 *  progress upon encountering the preprocessor conditional. If so,
	 *  then we will flag that only a single branch of the conditional
	 *  should be followed.
	 */
	ifdef->ignoreAllBranches= ignoreAllBranches;
	ifdef->singleBranch	= Cpp.resolveRequired;
	ifdef->branchChosen	= firstBranchChosen;
	ifdef->ignoring		= (boolean) (ignoreAllBranches || (
				    ! firstBranchChosen  &&  ! BraceFormat  &&
				    (ifdef->singleBranch || !Option.if0)));
	ignoreBranch = ifdef->ignoring;
    }
    return ignoreBranch;
}

/*  Pops one nesting level for an #endif directive.
 */
static boolean popConditional (void)
{
    if (Cpp.directive.nestLevel > 0)
	--Cpp.directive.nestLevel;

    return isIgnore ();
}

static void makeDefineTag (const char *const name, boolean parameterized)
{
    const boolean isFileScope = (boolean) (! isHeaderFile ());

    if (includingDefineTags () &&
	(! isFileScope  ||  Option.include.fileScope))
    {
	tagEntryInfo e;

	initTagEntry (&e, name);

	e.lineNumberEntry = (boolean) (Option.locate != EX_PATTERN);
	e.isFileScope	= isFileScope;
	e.truncateLine	= TRUE;
	e.kindName	= "macro";
	e.kind		= 'd';
	if (parameterized)
		e.extensionFields.arglist = getArglistFromPos(getInputFilePosition()
		  , e.name);
	makeTagEntry (&e);
	if (parameterized)
		free((char *) e.extensionFields.arglist);
    }
}

static void directiveDefine (const int c)
{
    boolean parameterized;

    if (isident1 (c))
    {
	readDefineTag (c, Cpp.directive.name, &parameterized);
	if (! isIgnore ())
	    /// TODO
	    // the second argument need to be tested, not sure if TRUE is correct
	    makeDefineTag (vStringValue (Cpp.directive.name), TRUE);
    }
    Cpp.directive.state = DRCTV_NONE;
}

static boolean directiveIf (const int c)
{
    const boolean ignore = pushConditional ((boolean) (c != '0'));

    Cpp.directive.state = DRCTV_NONE;

    return ignore;
}

static boolean directiveHash (const int c)
{
    boolean ignore = FALSE;
    char directive [MaxDirectiveName];
    DebugStatement ( const boolean ignore0 = isIgnore (); )

    readDirective (c, directive, MaxDirectiveName);
    if (stringMatch (directive, "define"))
	Cpp.directive.state = DRCTV_DEFINE;
    else if (stringMatch (directive, "undef"))
	Cpp.directive.state = DRCTV_UNDEF;
    else if (strncmp (directive, "if", (size_t) 2) == 0)
	Cpp.directive.state = DRCTV_IF;
    else if (stringMatch (directive, "elif")  ||
	    stringMatch (directive, "else"))
    {
	ignore = setIgnore (isIgnoreBranch ());
	if (! ignore  &&  stringMatch (directive, "else"))
	    chooseBranch ();
	Cpp.directive.state = DRCTV_NONE;
	DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
    }
    else if (stringMatch (directive, "endif"))
    {
	DebugStatement ( debugCppNest (FALSE, Cpp.directive.nestLevel); )
	ignore = popConditional ();
	Cpp.directive.state = DRCTV_NONE;
	DebugStatement ( if (ignore != ignore0) debugCppIgnore (ignore); )
    }
    else	/* "pragma", etc. */
	Cpp.directive.state = DRCTV_NONE;

    return ignore;
}

/*  Handles a pre-processor directive whose first character is given by "c".
 */
static boolean handleDirective (const int c)
{
    boolean ignore = isIgnore ();

    switch (Cpp.directive.state)
    {
	case DRCTV_NONE:	ignore = isIgnore ();		break;
	case DRCTV_DEFINE:	directiveDefine (c);		break;
	case DRCTV_HASH:	ignore = directiveHash (c);	break;
	case DRCTV_IF:		ignore = directiveIf (c);	break;
	case DRCTV_UNDEF:	directiveDefine (c);		break;
    }
    return ignore;
}

/*  Called upon reading of a slash ('/') characters, determines whether a
 *  comment is encountered, and its type.
 */
static Comment isComment (void)
{
    Comment comment;
    const int next = fileGetc ();

    if (next == '*')
	comment = COMMENT_C;
    else if (next == '/')
	comment = COMMENT_CPLUS;
    else
    {
	fileUngetc (next);
	comment = COMMENT_NONE;
    }
    return comment;
}

/*  Skips over a C style comment. According to ANSI specification a comment
 *  is treated as white space, so we perform this subsitution.
 */
static int skipOverCComment (void)
{
    int c = fileGetc ();

    while (c != EOF)
    {
	if (c != '*')
	    c = fileGetc ();
	else
	{
	    const int next = fileGetc ();

	    if (next != '/')
		c = next;
	    else
	    {
		c = ' ';			/* replace comment with space */
		break;
	    }
	}
    }
    return c;
}

/*  Skips over a C++ style comment.
 */
static int skipOverCplusComment (void)
{
    int c;

    while ((c = fileGetc ()) != EOF)
    {
	if (c == BACKSLASH)
	    fileGetc ();			/* throw away next character, too */
	else if (c == NEWLINE)
	    break;
    }
    return c;
}

/*  Skips to the end of a string, returning a special character to
 *  symbolically represent a generic string.
 */
static int skipToEndOfString (void)
{
    int c;

    while ((c = fileGetc ()) != EOF)
    {
	if (c == BACKSLASH)
	    fileGetc ();			/* throw away next character, too */
	else if (c == DOUBLE_QUOTE)
	    break;
    }
    return STRING_SYMBOL;		/* symbolic representation of string */
}

/*  Skips to the end of the three (possibly four) 'c' sequence, returning a
 *  special character to symbolically represent a generic character.
 */
static int skipToEndOfChar (void)
{
    int c;

    while ((c = fileGetc ()) != EOF)
    {
	if (c == BACKSLASH)
	    fileGetc ();			/* throw away next character, too */
	else if (c == SINGLE_QUOTE)
	    break;
	else if (c == NEWLINE)
	{
	    fileUngetc (c);
	    break;
	}
    }
    return CHAR_SYMBOL;		    /* symbolic representation of character */
}

/*  This function returns the next character, stripping out comments,
 *  C pre-processor directives, and the contents of single and double
 *  quoted strings. In short, strip anything which places a burden upon
 *  the tokenizer.
 */
extern int cppGetc (void)
{
    boolean directive = FALSE;
    boolean ignore = FALSE;
    int c;

    if (Cpp.ungetch != '\0')
    {
	c = Cpp.ungetch;
	Cpp.ungetch = Cpp.ungetch2;
	Cpp.ungetch2 = '\0';
	return c;	    /* return here to avoid re-calling debugPutc () */
    }
    else do
    {
	c = fileGetc ();
process:
	switch (c)
	{
	    case EOF:
		ignore    = FALSE;
		directive = FALSE;
		break;

	    case TAB:
	    case SPACE:
		break;				/* ignore most white space */

	    case NEWLINE:
		if (directive  &&  ! ignore)
		    directive = FALSE;
		Cpp.directive.accept = TRUE;
		break;

	    case DOUBLE_QUOTE:
		Cpp.directive.accept = FALSE;
		c = skipToEndOfString ();
		break;

	    case '#':
		if (Cpp.directive.accept)
		{
		    directive = TRUE;
		    Cpp.directive.state  = DRCTV_HASH;
		    Cpp.directive.accept = FALSE;
		}
		break;

	    case SINGLE_QUOTE:
		Cpp.directive.accept = FALSE;
		c = skipToEndOfChar ();
		break;

	    case '/':
	    {
		const Comment comment = isComment ();

		if (comment == COMMENT_C)
		    c = skipOverCComment ();
		else if (comment == COMMENT_CPLUS)
		{
		    c = skipOverCplusComment ();
		    if (c == NEWLINE)
			fileUngetc (c);
		}
		else
		    Cpp.directive.accept = FALSE;
		break;
	    }

	    case BACKSLASH:
	    {
		int next = fileGetc ();

		if (next == NEWLINE)
		    continue;
		else if (next == '?')
		    cppUngetc (next);
		else
		    fileUngetc (next);
		break;
	    }

	    case '?':
	    {
		int next = fileGetc ();
		if (next != '?')
		    fileUngetc (next);
		else
		{
		    next = fileGetc ();
		    switch (next)
		    {
			case '(':          c = '[';       break;
			case ')':          c = ']';       break;
			case '<':          c = '{';       break;
			case '>':          c = '}';       break;
			case '/':          c = BACKSLASH; goto process;
			case '!':          c = '|';       break;
			case SINGLE_QUOTE: c = '^';       break;
			case '-':          c = '~';       break;
			case '=':          c = '#';       goto process;
			default:
			    fileUngetc (next);
			    cppUngetc ('?');
			    break;
		    }
		}
	    } break;

	    default:
		Cpp.directive.accept = FALSE;
		if (directive)
		    ignore = handleDirective (c);
		break;
	}
    } while (directive || ignore);

    DebugStatement ( debugPutc (DEBUG_CPP, c); )
    DebugStatement ( if (c == NEWLINE)
		debugPrintf (DEBUG_CPP, "%6ld: ", getInputLineNumber () + 1); )

    return c;
}


extern char *getArglistFromPos(fpos_t startPosition, const char *tokenName)
{
	fpos_t originalPosition;
	char *result = NULL;
	char *arglist = NULL;
	long pos1, pos2 = ftell(File.fp);

	fgetpos(File.fp, &originalPosition);
	fsetpos(File.fp, &startPosition);
	pos1 = ftell(File.fp);
	if (pos2 > pos1)
	{
		result = (char *) g_malloc(sizeof(char ) * (pos2 - pos1 + 2));
		if (result != NULL)
		{
			fread(result, sizeof(char), pos2 - pos1 + 1, File.fp);
			result[pos2-pos1+1] = '\0';
			arglist = getArglistFromStr(result, tokenName);
			free(result);
		}
	}
	fsetpos(File.fp, &originalPosition);
	return arglist;
}

typedef enum
{
	st_none_t,
	st_escape_t,
	st_c_comment_t,
	st_cpp_comment_t,
	st_double_quote_t,
	st_single_quote_t
} ParseState;

static void stripCodeBuffer(char *buf)
{
	int i = 0, pos = 0;
	ParseState state = st_none_t, prev_state = st_none_t;

	while (buf[i] != '\0')
	{
		switch(buf[i])
		{
			case '/':
				if (st_none_t == state)
				{
					/* Check if this is the start of a comment */
					if (buf[i+1] == '*') /* C comment */
						state = st_c_comment_t;
					else if (buf[i+1] == '/') /* C++ comment */
						state = st_cpp_comment_t;
					else /* Normal character */
						buf[pos++] = '/';
				}
				else if (st_c_comment_t == state)
				{
					/* Check if this is the end of a C comment */
					if (buf[i-1] == '*')
					{
						if ((pos > 0) && (buf[pos-1] != ' '))
							buf[pos++] = ' ';
						state = st_none_t;
					}
				}
				break;
			case '"':
				if (st_none_t == state)
					state = st_double_quote_t;
				else if (st_double_quote_t == state)
					state = st_none_t;
				break;
			case '\'':
				if (st_none_t == state)
					state = st_single_quote_t;
				else if (st_single_quote_t == state)
					state = st_none_t;
				break;
			default:
				if ((buf[i] == '\\') && (st_escape_t != state))
				{
					prev_state = state;
					state = st_escape_t;
				}
				else if (st_escape_t == state)
				{
					state = prev_state;
					prev_state = st_none_t;
				}
				else if ((buf[i] == '\n') && (st_cpp_comment_t == state))
				{
					if ((pos > 0) && (buf[pos-1] != ' '))
						buf[pos++] = ' ';
					state = st_none_t;
				}
				else if (st_none_t == state)
				{
					if (isspace(buf[i]))
					{
						if ((pos > 0) && (buf[pos-1] != ' '))
							buf[pos++] = ' ';
					}
					else
						buf[pos++] = buf[i];
				}
				break;
		}
		++i;
	}
	buf[pos] = '\0';
	return;
}

extern char *getArglistFromStr(char *buf, const char *name)
{
	char *start, *end;
	int level;
	if ((NULL == buf) || (NULL == name) || ('\0' == name[0]))
		return NULL;
	stripCodeBuffer(buf);
	if (NULL == (start = strstr(buf, name)))
		return NULL;
	if (NULL == (start = strchr(start, '(')))
		return NULL;
	for (level = 1, end = start + 1; level > 0; ++end)
	{
		if ('\0' == *end)
			break;
		else if ('(' == *end)
			++ level;
		else if (')' == *end)
			-- level;
	}
	*end = '\0';
	return strdup(start);
}

/* vi:set tabstop=8 shiftwidth=4: */