/* myertoken.c - Reorganize parse tokens by source file -*- coding: latin-1 -*-

    2003 by Jonathan Yavner.  GPL license; see file COPYING for details.
 */

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>

#include "hashtab.h"

#include "myer.h"


static CHAIN_t CurMacros; /* Chain of SPOT_MacroDefs being expanded */
static SRC_t *CurSrc;	  /* Current token stream */
static SPOT_t *CurFun; /* If nonzero, declarations are local */
static CHAIN_t *LastSpot; /* LastSpot->spot was last spot created */
static int      LastSID;  /* File that contains LastSpot */

static SRC_t **SIDs;	  /* Array of token streams for this compilation unit */
static DECL_t **UIDs, **FakeUIDs; /* Arrays of decls by UID */
static int NextFakeUID, last_builtin, first_line;

static char *LineDirective_name;
static int  LineDirective_sid;



/* ============================================================================
   Working with chains of interesting spots.
*/

/* ----------------------------------------------------------------------------
   Adds a spot to the list for a source file, in the proper position given
   its line and column.  START is a starting point (for moving forward) or 0
   to scan backward from the end of CurSrc.  Result is 0 if this is a
   duplicate entry.
*/
static int
AddSpot( CHAIN_t *start, SPOT_t *s )
{
	if (start) {
		while (start->spot &&
		       (start->spot->line < s->line ||
			(start->spot->line == s->line &&
			 start->spot->col < s->col))) {
			start = start->next;
		}
	} else {
		start = CurSrc->spots.prev;
		while (start->spot &&
		       (start->spot->line > s->line ||
			(start->spot->line == s->line &&
			 start->spot->col >= s->col))) {
			start = start->prev;
		}
		start = start->next;
	}
	/* Scan through spots with equal (line,col) to see if there's an
	   exact duplicate.  We skip the ref.dlink field since that won't be
	   exactly the same */
	while (start->spot
	       && start->spot->line == s->line
	       && start->spot->col  == s->col) {
		if (start->spot->endline == s->endline
		    && start->spot->endcol == s->endcol
		    && start->spot->decl == s->decl) {
			/* It's a duplicate */
			return 0;
		}
		start = start->next;
	}
	LastSpot = InsertChain( start, s );
	return 1;
} /* AddSpot */


/* ----------------------------------------------------------------------------
   Creates a new spot and sites it.  The site must be either within the
   current file or within the definition for one of the macros-in-progress.
   Result is 0 if the spot is within a predefined macro or is a duplicate of
   a reference within a macro that's already been found.
*/
static SPOT_t *
NewSpot( enum SPOTKIND kind, TOKEN_t *t, DECL_t *val )
{
	SPOT_t *s, *mac;

	s           = NEW_(SPOT_t);
	s->kind     = kind;
	s->line     = t->line;
	s->col      = t->col;
	s->endline  = t->endline;
	s->endcol   = t->endcol;
	s->decl     = val;

	/* In gcc-3.2, generated symbols overlay the *last* character of the
	   macro invocation.  In gcc-3.3, they sometimes overlay the *first*
	   character. */
	mac = FIND_CHAIN_( &CurMacros, SPOT_t, v,
			   (v->endline == t->line
			    && v->decl != val
			    && (v->endcol == t->col+1
				|| v->col==t->col)) );

	if (mac) {
		/* It's a generated symbol, presented as overlapping the
		   last character of a macro invocation.  Make it
		   coextensive. */
		s->line    = mac->line;
		s->col     = mac->col;
		s->endline = mac->endline;
		s->endcol  = mac->endcol;
		if (!AddSpot( 0, s )) {
			/* Duplicate */
			free( s );
			return 0;
		}
		LastSID = CurSrc->sid;
		goto Success;
	}

	mac = FIND_CHAIN_( &CurMacros, SPOT_t, v,
			   (v->decl->def &&
			    v->decl->def->spot->line <= t->line &&
			    t->line < v->decl->def->spot->endline) );
	if (mac) {
		/* 'mac' is the SPOT_t for a macro expansion.  This
		   reference is inside the definition for that macro. */
		if (AddSpot( mac->decl->def, s )) {
			LastSID = mac->decl->defsid;
			goto Success;
		}
		if (kind == SPOT_MacroRef && s->decl->def) {
			/* Duplicate siting of a MacroRef inside a MacroDef. */
			mac = s->decl->def->spot;
			free( s );
			return mac;
		}
		/* Duplicate siting, not a MacroRef */
		goto Failure;
	}

	if (t->line < first_line) {
		goto Failure;
	}

	if (t->line == first_line && Cc1_major == 3 && Cc1_minor == 2
	    && t->line < CurSrc->baseline) {
		/* gcc-3.2 does not distinguish between macros defined on
		   the first line of the .c file and those defined on the
		   command line. */
		goto Failure;
	}

	assert( t->line >= CurSrc->baseline );

	/* Ordinary reference */
	if (AddSpot( 0, s )) {
		LastSID = CurSrc->sid;
		goto Success;
	}

 Failure: /* No new spot was added */
	free( s );
	return 0;

	/* New spot was added, so link it back to its decl */
 Success:
	if (kind != SPOT_Skip && kind != SPOT_IncludeFile) {
		s->ref.dlink = InsertChain( &val->refs, s );
	}
	return s;
} /* NewSpot */



/* ============================================================================
   Working with unique ID's for declarations and source files.
*/

/* ----------------------------------------------------------------------------
   Find an existing declaration, or add a new one to the UIDs array.
*/
static DECL_t *
FindUID( TOKEN_t *t )
{
	assert( t->uid > 0 );

	if (t->uid >= NUM_IDS(UIDs) || !UIDs[t->uid]) {
		DECL_t *d = NEW_(DECL_t);
		d->name = t->name;
		d->class = t->class;
		d->uid  = t->uid;
		if (t->uid < last_builtin) {
			/* Built-in thingies are intrinsically public */
			d->ispublic = d->isglobal = 1;
		}
		InitChain( &d->refs );
		InitChain( &d->decls );
		AddUID( &UIDs, t->uid, d );
		if ((!CurFun || t->uid < last_builtin) && last_builtin) {
			/* We'll fuzz C rules a bit by treating all items
			   outside of functions as module-global, but not when
			   reading a .myer2 file. */
			d->isglobal = 1;
		}
	}

	return UIDs[t->uid];
} /* FindUID */


/* ----------------------------------------------------------------------------
   Adds a new declaration to the array of fake UIDs.
*/
static DECL_t *
AddFakeUID( TOKEN_t *t )
{
	DECL_t *d = NEW_(DECL_t);

	d->name = t->name;
	d->class = t->class;
	d->uid  = --NextFakeUID;
	d->isglobal = 1;
	InitChain( &d->refs );
	InitChain( &d->decls );
	t->hashref->data = d;

	AddUID( &FakeUIDs, -d->uid, d );

	return d;
} /* AddFakeUID */



/* ============================================================================
   Organize the token stream by source file.
*/

/* ----------------------------------------------------------------------------
   Process one token from the gcc parser, as modified by myerParse().

   In general, we receive (endline,endcol) pointing to the last character of
   the token, but we change (endline,col) to point to the first character
   that's *not* part of the token.
*/
void
myerToken( TOKEN_t *t )
{
	DECL_t *d, *dalt;
	SRC_t *src = 0;
	static SPOT_t *spot;
	CHAIN_t *cm, *keep;
	int sid;
	dev_t dev;
	ino_t ino;

	if (!first_line && t->class == TOKEN_FILEPUSH) {
		first_line = t->line;
	}

	TokenHash( Names_hash, t );
	if (t->uid) {
		d = FindUID( t );
	} else {
		d = 0; /* Make compiler happy */
	}

	switch (t->class) {
	case TOKEN_COMMENT:
	case TOKEN_KEYWORD:
		/* Ignore these.  They're just present to make the pass-1
		   debug output files easier to comprehend. */
		spot = 0;
		break;

	case TOKEN_FILEPUSH:
		/* Must be unique or we would have gotten FILESKIP instead. */
		src = NEW_(SRC_t);
		src->name = t->name;
		Stat_File( t->name, &src->device, &src->inode );
		src->sid   = ++NextSID;
		src->startline = t->line;
		src->baseline  = t->line;
		src->fileline  = 1;
		InitChain( &src->spots );
		AddSID( &SIDs, src );
		if (CurSrc) {
			spot = NewSpot(SPOT_IncludeFile,t,0);
			assert( spot );
			spot->ref.sid = NextSID;
			CurSrc->fileline += (t->line - CurSrc->baseline);
			CurSrc->baseline  = t->line;
		}
		CurSrc = src;
		spot = 0;
		if (CurFun) {
			/* This makes things too complicated. Where is the
			   function's cast color to be displayed? */
			assert(!"#include inside a function not implemented\n");
		}
		break;

	case TOKEN_FILEPOP:
		CurSrc->baseline = -1;
		for (sid = NextSID-1; sid > 0; --sid) {
			/* Scan backwards, looking for the SRC_t we're
			   popping back to.  Be careful about stuff like
			   <limits.h>, which includes itself recursively. */
			src = SIDs[sid];
			if (src->name == t->name &&
			    src->baseline > 0 &&
			    src->spots.prev->spot->kind==SPOT_IncludeFile &&
			    src->spots.prev->spot->ref.sid == CurSrc->sid) {
				break;
			}
		}
		if (LineDirective_name
		    && !strcmp( t->name, LineDirective_name )) {
			/* Popping back to metafile. */
			sid = LineDirective_sid;
		} else {
			/* Should be popping back to already-seen file */
			assert( sid > 0 );
			/* Cross-check our line-number tracking */
			assert( t->endline == src->fileline );
		}
		src->spots.prev->spot->endline = t->line;
		src->baseline = t->line;
		CurSrc = src;
		spot = 0;
		break;

	case TOKEN_FILECHG:
		/* This is a #line directive, which we can't really deal
		   with well.  For now, just remember it so we can deal with
		   file-pops. */
		LineDirective_name = t->name;
		LineDirective_sid  = CurSrc->sid;
		break;

	case TOKEN_FILESKIP:
		if (CurFun) {
			/* This makes things too complicated. Where is the
			   function's cast color to be displayed? */
			assert(!"#include inside a function not implemented\n");
		}
		/* Find some source file with this name.  Doesn't matter
		   which; they'll be merged eventually */
		Stat_File( t->name, &dev, &ino );
		for (sid = 1; sid < NUM_IDS(SIDs); ++sid) {
			if (!SIDs[sid]) {
				break;
			}
			if (SIDs[sid]->device==dev && SIDs[sid]->inode==ino) {
				break;
			}
		}
		assert( sid < NUM_IDS(SIDs) && SIDs[sid] );
		t->endline = t->line + 1;
		assert( CurSrc );
		spot = NewSpot( SPOT_IncludeFile, t, 0 );
		assert( spot );
		spot->ref.sid = sid;
		spot->kind = TOKEN_FILESKIP; /* Flag for Fixup_Line_Numbers */
		break;

	case TOKEN_SKIP:
		++t->endline;
		assert( NewSpot(SPOT_Skip,t,0) );
		spot = 0;
		break;

	case TOKEN_MACRODEF:
		/* Macros don't come with UID's, so we'll make up negative
		   ones.  Use the hash-table to find the most recent
		   definition (we ignore #undef) */
		d = AddFakeUID( t );
		++t->endline;
		spot = NewSpot(SPOT_MacroDef,t,d);
		assert( spot );
		d->def = LastSpot;
		d->defsid = LastSID;
		break;

	case TOKEN_MACROREF:
		d = (DECL_t *) t->hashref->data;
		if (!d) {
			/* Reference to built-in or command-line macro */
			d = AddFakeUID(t);
			d->class = TOKEN_MACRODEF;
		}
		++t->endcol;
		spot = NewSpot(SPOT_MacroRef,t,d);
		if (spot) {
			InsertChain( &CurMacros, spot );
		}
		break;

	case TOKEN_FUNCSTART:
		t->endline = t->line; /* For now */
		t->endcol  = t->col;  /* For now */
		assert( spot && spot->decl == d );
		spot->kind = SPOT_Def;
		spot = NewSpot(SPOT_FunDef,t,d);
		assert( spot );
		d->def    = LastSpot;
		d->defsid = LastSID;
		assert( !CurFun );
		CurFun = spot;
		break;

	case TOKEN_FUNCEND:
		assert( CurFun );
		assert( CurFun->decl == d );
		CurFun->endline = t->line;
		CurFun->endcol  = t->col;
		CurFun = 0;
		break;

	case TOKEN_CHARCONST:
	case TOKEN_STRCONST:
	case TOKEN_NUMBCONST:
		/* Since constants don't have UID's, and they're all public
		   anyway, we use the hashtable to find them. */
		d = (DECL_t *) t->hashref->data;
		if (!d) {
			d = AddFakeUID(t);
			d->ispublic = d->isglobal = 1;
		}
		t->endline = t->line;
		t->endcol  = t->col + strlen(t->name);
		spot = NewSpot(SPOT_ConstRef,t,d);
		break;

	case TOKEN_PARAMETER:
		/* Parameters are always function-local */
		d->isglobal = 0;
		/* Fall through */

	case TOKEN_ENUMCONST:
	case TOKEN_FIELD:
	case TOKEN_LABEL:
	case TOKEN_FUNCTION:
	case TOKEN_STRUCT:
	case TOKEN_TYPEDEF:
	case TOKEN_VARIABLE:
		t->endline = t->line;
		t->endcol = t->col + strlen(t->name);
		spot = NewSpot(SPOT_Ref,t,d);
		if (!spot ||
		    t->uid < last_builtin ||
		    (t->class!=TOKEN_TYPEDEF && t->class!=TOKEN_ENUMCONST)) {
			break;
		}
		/* Otherwise fall through - types and enum constants have to
		   be defined at first reference, since there's no way to
		   forward-reference them. */

	case TOKEN_DEF:
		assert( spot->decl == d
			&& (spot->kind == SPOT_Ref
			    || spot->kind == SPOT_Decl
			    || spot->kind == SPOT_Def) );
		if (!d->def || d->def->spot->kind != SPOT_Def) {
			assert( spot == LastSpot->spot );
			spot->kind = SPOT_Def;
			d->def = LastSpot;
			d->defsid = LastSID;
		}
		break;

	case TOKEN_PUBLIC:
		assert( spot && spot->decl == d );
		if (d->uid != t->uid) {
			/* Skip this for duplicates, since a public
			   definition following a non-public declaration is
			   NOT PUBLIC */
			break;
		}
		d->ispublic = 1;
		d->isglobal = 1;
		/* Fall through */

	case TOKEN_DECLSPOT:
		assert( spot
			&& spot->decl == d
			&& (spot->kind == SPOT_Ref
			    || spot->kind == SPOT_Decl
			    || spot->kind == SPOT_Def) );
		if (spot->kind != SPOT_Def && spot->kind != SPOT_Decl) {
			spot->kind = SPOT_Decl;
			InsertChain( &d->decls, (void *) CurSrc->sid );
		}
		if (!d->def) {
			assert( spot == LastSpot->spot );
			d->def = LastSpot;
			d->defsid = LastSID;
		}
		break;

	case TOKEN_EQUATEUID:
		dalt = UIDs[t->line];
		if (!dalt && t->line < last_builtin) {
			/* Redeclaration of built-in */
			UIDs[t->line] = d;
			d->uid = t->line;
			break;
		}
		assert( d->name == dalt->name
			&& d->class == dalt->class );
		MergeUID( dalt, d );
		free( UIDs[t->uid] );
		UIDs[t->uid] = UIDs[t->line];
		break;

	case TOKEN_BUILTIN:
		last_builtin = t->line;
		{
			char *ptr;
			Cc1_major = strtol( t->name, &ptr, 10 );
			Cc1_minor = strtol( &ptr[1], &ptr, 10 );
		}
		break;

	case TOKEN_ENDINPUT:
		t->line = 0x7fffffff;
		/* Fall through */

	case TOKEN_ENDSTMT:
		/* Find the highest-numbered macro that precedes our line */
		keep = &CurMacros;
		for (cm = CurMacros.next; cm != &CurMacros; cm = cm->next) {
			if (cm->spot->line <= t->line
			    && (!keep->spot
				|| keep->spot->endline < cm->spot->endline))  {
				keep = cm;
			}
		}
		/* Delete all macros that have gone out of scope, except for
		   the last one and its submacros. */
		while (CurMacros.next != keep) {
			UnlinkChain( CurMacros.next );
		}
		/* Catch extraneous backward refs by bumping our
		   baseline. Since the tokens arrive slightly out of order,
		   we'll set our baseline to end of the last kept macro. */
		if (CurMacros.next != &CurMacros
		    && t->line > CurMacros.next->spot->line) {
			t->line = CurMacros.next->spot->line;
		}
		CurSrc->fileline += t->line - CurSrc->baseline;
		CurSrc->baseline  = t->line;
		spot = 0;
		break;

	case TOKEN_PHASE4:
	case TOKEN_DECL:
	case TOKEN_NEWFILE:
	case TOKEN_SRCMARK:
	case TOKEN_NUMREFS:
		assert( !"Shouldn't be seen in phase-2" );
	}
} /* myerToken */


/* ----------------------------------------------------------------------------
   Switch from global line numbers to file-specific ones.
*/
static void
Fixup_Line_Numbers(void)
{
	for (int j = 1; j <= NextSID; ++j) {
		SRC_t *src = SIDs[j];
		int adjust = src->startline - 1;
		for (CHAIN_t *sp = src->spots.next; sp->spot; sp=sp->next) {
			SPOT_t *spot = sp->spot;
			if (spot->kind == SPOT_IncludeFile) {
				/* We checked elsewhere that the #include
				   statement doesn't have a backslash
				   continuation, so it is exactly one line */
				spot->line -= adjust + 1;
				adjust = spot->endline - spot->line - 1;
			} else if (spot->kind == TOKEN_FILESKIP) {
				spot->kind = SPOT_IncludeFile;
				/* Off by one error? */
				spot->line -= adjust;
			} else {
				spot->line -= adjust;
			}
			spot->endline -= adjust;
			assert( spot->line > 0 && spot->endline > 0 );
		}
	}
} /* Fixup_Line_Numbers */


/* ----------------------------------------------------------------------------
   Read phase-2 data from file.  Also used for reading phase-3 and phase-4
   files.
*/
COMPILATION_UNIT_t
myerToken_Read( char *filename, int lastch )
{
	int sid, num_waiting = 0, srcmark = 0;
	char class[3], name[4096];
	TOKEN_t tok;
	DECL_t *decl;
	SPOT_t *s;
	FILE *infile;

	if (!(infile = fopen( filename, "r" ))) {
		perror( filename );
		exit( 1 );
	}

	Names_hash = Token_Hash_init();

	/* Load UIDs */
	last_builtin = 0; /* Prevent duplicate detection in FindUID */
	for (;;) {
		tok.class = getc(infile);
		if (tok.class == TOKEN_NEWFILE
		    || tok.class == TOKEN_SRCMARK) {
			/* TOKEN_SRCMARK occurs before TOKEN_NEWFILE for
			   phases 3 and 4 */
			break;
		}
		if (tok.class == TOKEN_COMMENT) {
			fscanf( infile, " %*[^\n]\n" );
			continue;
		}
		if (tok.class != TOKEN_DECL
		    || fscanf( infile, " #%d %2s ", &tok.uid, class ) != 2) {
			goto Parse_Error;
		}
		/* Determine whether optional decl-spot is present */
		tok.class = getc(infile);
		if (tok.class != TOKEN_NEWFILE) {
			ungetc( tok.class, infile );
			sid = 0;
		} else if (fscanf( infile, "%d %d ", &sid, &tok.line ) != 2) {
			goto Parse_Error;
		}
		if (fscanf( infile, "%4095[^\n]\n", name ) != 1) {
			goto Parse_Error;
		}
		tok.name = name;
		tok.hashref = 0;
		TokenHash( Names_hash, &tok );
		tok.class = class[0];

		if (tok.uid > 0) {
			decl = FindUID( &tok );
		} else {
			decl = AddFakeUID( &tok );
			assert( decl->uid == tok.uid );
		}
		if (class[1] == 'P') {
			decl->ispublic = 1;
			decl->isglobal = 1;
		} else if (class[1] != 'f') {
			decl->isglobal = 1;
		}
		if (sid) {
			decl->defsid = sid;
			/* Save for later */
			decl->def = (CHAIN_t *) tok.line;
			decl->defwaiting = 1;
			++num_waiting;
		}
	}

	/* Load spots */
	while ((int) tok.class != lastch) {
		switch (tok.class) {
		case TOKEN_COMMENT:
			fscanf( infile, "%*[^\n]\n" );
			break;
		case TOKEN_SRCMARK:
			switch (getc(infile)) {
			case 'I':
				fscanf( infile, "nconsistent contents\n" );
				srcmark = INCONSISTENT_CONTENTS;
				break;
			case 'W':
				fscanf( infile, "anted\n" );
				srcmark = WANTED_FOR_OUTPUT;
				break;
			default:
				goto Parse_Error;
			}
			break;
		case TOKEN_NEWFILE:
			if (fscanf(infile,"%d %4095[^\n]\n",&NextSID,name)<2) {
				goto Parse_Error;
			}
			tok.name = name;
			tok.hashref = 0;
			TokenHash( Names_hash, &tok );
			CurSrc = NEW_(SRC_t);
			CurSrc->name = tok.name;
			if (tok.name[0] == '\\') {
				/* Special filename for globals */
				assert( !srcmark );
			} else {
				Stat_File( tok.name,
					   &CurSrc->device,
					   &CurSrc->inode );
			}
			CurSrc->sid  = NextSID;
			InitChain( &CurSrc->spots );
			AddSID( &SIDs, CurSrc );
			CurSrc->baseline = srcmark;
			srcmark = 0;
			break;

		case TOKEN_NUMREFS:
			if (fscanf( infile, " %d\n", &CurSrc->numrefs ) != 1) {
				goto Parse_Error;
			}
			break;

		case TOKEN_FILEPOP:
			s = NEW_(SPOT_t);
			s->kind = tok.class;
			if (fscanf( infile, " @%d %d,0 %d,0\n",
				    &s->ref.sid, &s->line, &s->endline ) < 3) {
				goto Parse_Error;
			}
			InsertChain( &CurSrc->spots, s );
			break;

		case TOKEN_SKIP:
			s = NEW_(SPOT_t);
			s->kind = tok.class;
			if (fscanf( infile, " 0 %d,%d %d,%d\n", &s->line,
				    &s->col, &s->endline, &s->endcol ) < 4) {
				goto Parse_Error;
			}
			s->decl = 0;
			InsertChain( &CurSrc->spots, s );
			break;

		case TOKEN_CHARCONST:
		case TOKEN_MACRODEF:
		case TOKEN_MACROREF:
		case TOKEN_DEF:
		case TOKEN_DECLSPOT:
		case TOKEN_FUNCTION:
		case SPOT_Ref:
			s = NEW_(SPOT_t);
			s->kind = tok.class;
			if (fscanf( infile, " #%d %d,%d %d,%d\n",
				    &tok.uid, &s->line, &s->col,
				    &s->endline, &s->endcol ) < 5) {
				goto Parse_Error;
			}
			if (tok.uid > 0) {
				assert( tok.uid < NUM_IDS(UIDs) );
				decl = UIDs[tok.uid];
			} else {
				assert( -tok.uid < NUM_IDS(FakeUIDs) );
				decl = FakeUIDs[-tok.uid];
			}
			assert( decl );
			s->decl = decl;
			s->ref.dlink = InsertChain( &decl->refs, s );
			InsertChain( &CurSrc->spots, s );
			if (tok.class == SPOT_Decl
			    || tok.class == SPOT_Def) {
				InsertChain(&decl->decls,(void *) CurSrc->sid);
			}
			if (decl->defwaiting &&
			    CurSrc->sid == decl->defsid &&
			    s->line     == (int) decl->def) {
				/* Right decl, right file, right line.
				   There's a remote chance that this
				   identifier appears several times on this
				   line of the file and the first
				   occurrences isn't the def. */
				decl->def        = CurSrc->spots.prev;
				decl->defwaiting = 0;
				--num_waiting;
			}
			break;
		default:
			goto Parse_Error;
		}
		tok.class = getc(infile);
	}
	for (int j = NUM_IDS(UIDs)-1; j > 0; --j) {
		if (UIDs[j] && UIDs[j]->defwaiting) {
			/* Oops!  We never found the file/line that was
			   mentioned.  The assert below will fail, so print
			   out the UID for debugging */
			fprintf( stderr, "%d\n", j );
		}
	}
	assert( !num_waiting );
	fclose( infile );
	return (COMPILATION_UNIT_t){ SIDs, UIDs, FakeUIDs, Names_hash };

 Parse_Error:
	assert( !"Parse error in phase-2/3/4 file" );
	return (COMPILATION_UNIT_t){0,0,0,0}; /* Make compiler happy */
} /* myerToken_Read */



/* ============================================================================
   Read phase 1 files.  This ought to be in myerParse.c, but that file is
   linked into cc1 instead of myer.  It could be linked into both programs,
   but it seems silly to have a file with two disjoint sets of code chosen
   by #ifdef!
*/

/* scanf args: %1$=line, %2$=col, %3$=endline, %4$=endcol, %5$=uid, %6$=name */
static const struct INPUT_FORMAT Phase1_Formats[] = {
   { TOKEN_COMMENT,  0, "%*[^\n]\n" },
   { TOKEN_FILEPUSH, 2, "%1$d            %6$4095[^\n]\n" },
   { TOKEN_FILESKIP, 2, "%1$d            %6$4095[^\n]\n" },
   { TOKEN_FILEPOP,  3, "%1$d %3$d       %6$4095[^\n]\n" },
   { TOKEN_FILECHG,  3, "%1$d %3$d       %6$4095[^\n]\n" },
   { TOKEN_SKIP,     2, "%1$d-%3$d\n" },
   { TOKEN_MACRODEF, 3, "%1$d-%3$d       %6$4095s\n" },
   { TOKEN_PUBLIC,   1, "          #%5$d\n" },
   { TOKEN_DECLSPOT, 1, "          #%5$d\n" },
   { TOKEN_DEF,      1, "          #%5$d\n" },
   { TOKEN_EQUATEUID,2, "          #%5$d #%1$d\n" },
   { TOKEN_ENDSTMT,  2, "%1$d,%2$d\n" },
   { TOKEN_KEYWORD,  3, "%1$d,%2$d       %6$4095s\n" },
   { TOKEN_CHARCONST,3, "%1$d,%2$d       %6$4095[^\n]\n" },
   { TOKEN_STRCONST, 3, "%1$d,%2$d       %6$4095[^\n]\n" },
   { TOKEN_NUMBCONST,3, "%1$d,%2$d       %6$4095[^\n]\n" },
   { TOKEN_FUNCSTART,4, "%1$d,%2$d #%5$d %6$4095s +++++++++++++++++++++++++\n"},
   { TOKEN_FUNCEND,  4, "%1$d,%2$d #%5$d %6$4095s -------------------------\n"},
   { TOKEN_ENUMCONST,4, "%1$d,%2$d #%5$d %6$4095s\n" },
   { TOKEN_FIELD,    4, "%1$d,%2$d #%5$d %6$4095s\n" },
   { TOKEN_LABEL,    4, "%1$d,%2$d #%5$d %6$4095s\n" },
   { TOKEN_FUNCTION, 4, "%1$d,%2$d #%5$d %6$4095s\n" },
   { TOKEN_PARAMETER,4, "%1$d,%2$d #%5$d %6$4095s\n" },
   { TOKEN_STRUCT,   4, "%1$d,%2$d #%5$d %6$4095s\n" },
   { TOKEN_TYPEDEF,  4, "%1$d,%2$d #%5$d %6$4095s\n" },
   { TOKEN_VARIABLE, 4, "%1$d,%2$d #%5$d %6$4095s\n" },
   { TOKEN_MACROREF, 5, "%1$d,%2$d  %3$d,%4$d %6$4095s\n" },
   /* We put the UID for the last builtin in token.line, rather than
      token.uid, to avoid calling FindUID for this unreferenced UID. */
   { TOKEN_BUILTIN,  2, "           #%1$d %6$4095s\n\n" },
   { 0,              0, 0 }
};

/* ------------------------------------------------------------------------- */
static void
myerParse_Read( FILE *in )
{
	TOKEN_t token;
	char name[4096];
	int class;
	const struct INPUT_FORMAT *ifp;

	while ((class = getc(in)) != EOF) {
		memset( &token, 0, sizeof(token) );
		name[0] = 0;
		token.class = class;
		if (class == TOKEN_ENDINPUT) {
			/* We should read the final newline from the cc1
			   subprocess and then check for end of file, but
			   this stalls (under Linux 2.4.18) for some
			   reason. */
			// if (getc(in) != '\n') {
			//	goto Parse_Error;
			// }
			// if (getc(in) != EOF) {
			//	goto Parse_Error;
			// }
			myerToken( &token );
			return;
		}
		for (ifp = Phase1_Formats; ifp->class; ++ifp) {
			if (ifp->class == class) {
				break;
			}
		}
		if (!ifp->class) {
			fprintf(stderr,"Unknown class code in phase-1 file\n");
			exit( 1 );
		}
		if (ifp->numread != fscanf( in, ifp->fmt,
					    &token.line, &token.col,
					    &token.endline, &token.endcol,
					    &token.uid, name )) {
			goto Parse_Error;
		}
		if (name[0]) {
			/* If the string is too long, scanf will just truncate*/
			assert( strlen(name) < sizeof(name)-1 );
			token.name = name;
			TokenHash( Names_hash, &token );
		}
		myerToken( &token );
	}
	/* Fall out of this loop if file doesn't end with TOKEN_ENDINPUT */

 Parse_Error:
	assert( !"Parse error in phase-1 file" );
} /* myerParse_Read */


/* ============================================================================
   API for phase 2
*/

/* ----------------------------------------------------------------------------
   Convert .c, .myer1, or .myer2 file to phase-2 database.
*/
COMPILATION_UNIT_t
myerToken_From_File( char **fnamep )
{
	SIDs = 0;
	UIDs = FakeUIDs = 0;
	NextSID = NextFakeUID = 0;
	LineDirective_name = 0;
	InitChain( &CurMacros );
	CurSrc = 0;
	char *filename = *fnamep;

	char *extension = strrchr( filename, '.' );
	if (!extension) {
		extension = ".";
	}
	if (!strcmp( extension, ".myer2" )) {
		if (Verbose) {
			fprintf( stderr, "[2] < %s\n", filename );
		}
		myerToken_Read( filename, EOF );
	} else if (!strcmp( extension, ".o" )) {
		/* Presumably, this is the link phase for a Makefile where
		   we wrote phase-2 files for each compilation unit */
		*extension = 0;
		char *fn = concat( filename, ".myer2", 0 );
		if (Verbose) {
			fprintf( stderr, "[2] < %s\n", fn );
		}
		myerToken_Read( fn, EOF );
		free( fn );
	} else if (!strcmp( extension, ".myer1" )) {
		Names_hash = Token_Hash_init();
		if (Verbose) {
			fprintf( stderr, "[1] < %s\n", filename );
		}
		if (!freopen( filename, "r", stdin )) {
			perror( filename );
			exit( 1 );
		}
		myerParse_Read( stdin );
		Fixup_Line_Numbers();
		assert( !CurFun );
	} else if (!strcmp( extension, ".c" )) {
		if (Verbose) {
			fprintf( stderr, "[1,2] < %s\n", filename );
		}
		Names_hash = Token_Hash_init();
		FILE *in = Call_cc1(filename,0);
		myerParse_Read( in );
		if (pclose(in)){
			/* Presumably, an error msg was already printed */
			exit( 1 );
		}
		Fixup_Line_Numbers();
		assert( !CurFun );
	} else {
		/* Skip it for now.  Phase 3 will deal with it.  */
		return (COMPILATION_UNIT_t) { 0, 0, 0, 0 };
	}

	/* Change the argument so it will match the filename in
	   later passes.  This hack is visible: it causes 'ps' to
	   lie about the program's arguments, so perhaps it should
	   be replaced with some other hack. */
	strcpy( extension, ".c" );
	/* A file named "x.myer2" should generally be the result of
	   compiling a file named "x.c" */
	dev_t dev;
	ino_t ino;
	Stat_File( filename, &dev, &ino );
	assert( SIDs && SIDs[1] );
	if (SIDs[1]->device != dev || SIDs[1]->inode != ino ) {
		fprintf( stderr, "Mismatch: %s\n", SIDs[1]->name );
		*fnamep = xstrdup(SIDs[1]->name);
	}

	return (COMPILATION_UNIT_t) { SIDs, UIDs, FakeUIDs, Names_hash };
} /* myerToken_From_File */
