/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved

Permission to use, copy, modify, and distribute this software and
its documentation for any purpose and without fee is hereby
granted, provided that the above copyright notice appear in all
copies and that both that the copyright notice and this
permission notice and warranty disclaimer appear in supporting
documentation, and that the name Lucent Technologies or any of
its entities not be used in advertising or publicity pertaining
to distribution of the software without specific, written prior
permission.

LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/

/* lasciate ogne speranza, voi ch'intrate. */

#define	DEBUG

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>

#include "awklib.h"
#include "awkgram.h"
#include "proto.h"


#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
				/* NCHARS is 2**n */
#define MAXLIN 22

#define TYPE(v)		(v)->nobj	/* badly overloaded here */
#define INFO(v)		(v)->ntype	/* badly overloaded here */
#define LEFT(v)		(v)->narg[0]
#define RIGHT(v)	(v)->narg[1]
#define PARENT(v)	(v)->nnext

#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
#define ELEAF	case EMPTYRE:		/* empty string in regexp */
#define UNARY	case STAR: case PLUS: case QUEST:

/* encoding in tree awknode_t:
	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
		left is index, right contains value or pointer to value
	unary (STAR, PLUS, QUEST): left is child, right is null
	binary (CAT, OR): left and right are children
	parent contains pointer to parent
*/


static	int	makeinit(awk_t *, fa_t *, int);
static	void	penter(awk_t *, awknode_t *);
static	void	freetr(awk_t *, awknode_t *);
static	int	hexstr(const uint8_t **);
static	void	overflo(awk_t *, const char *);
static	void	cfoll(awk_t *awkp, fa_t *, awknode_t *);
static	int	first(awk_t *, awknode_t *);
static	void	follow(awk_t *, awknode_t *);
static	int	member(int, const char *);
static	awknode_t	*regexp(awk_t *);
static	awknode_t	*primary(awk_t *);
static	awknode_t	*concat(awk_t *, awknode_t *);
static	awknode_t	*alt(awk_t *, awknode_t *);
static int relex(awk_t *);
static awknode_t *unary(awk_t *awkp, awknode_t *);

static void
resize_state(awk_t *awkp, fa_t *fap, int state)
{
	void *p;
	int	new_count;
	int	i;

	if (++state < fap->state_count)
		return;

	new_count = state + 10; /* needs to be tuned */

	p = realloc(fap->gototab, new_count * sizeof(fap->gototab[0]));
	if (p == NULL)
		goto out;
	fap->gototab = p;

	p = realloc(fap->out, new_count * sizeof(fap->out[0]));
	if (p == NULL)
		goto out;
	fap->out = p;

	p = realloc(fap->posns, new_count * sizeof(fap->posns[0]));
	if (p == NULL)
		goto out;
	fap->posns = p;

	for (i = fap->state_count; i < new_count; ++i) {
		fap->gototab[i] = calloc(1, NCHARS * sizeof (**fap->gototab));
		if (fap->gototab[i] == NULL)
			goto out;
		fap->out[i]  = 0;
		fap->posns[i] = NULL;
	}
	fap->state_count = new_count;
	return;
out:
	overflo(awkp, "out of memory in resize_state");
}

/* parses regular expression pointed to by p */
/* uses relex() to scan regular expression */
static awknode_t *
reparse(awk_t *awkp, const char *p)
{
	awknode_t *np;

	DPRINTF(awkp, ("reparse <%s>\n", p) );
	/* prestr points to string to be parsed */
	awkp->lastre = awkp->prestr = (uint8_t *) p;
	awkp->rtok = relex(awkp);
	/* GNU compatibility: an empty regexp matches anything */
	if (awkp->rtok == '\0') {
		/* FATAL(awkp, "empty regular expression"); previous */
		return(awklib_op2(awkp, EMPTYRE, NIL, NIL));
	}
	np = regexp(awkp);
	if (awkp->rtok != '\0')
		FATAL(awkp, "syntax error in regular expression %s at %s", awkp->lastre, awkp->prestr);
	return(np);
}

/* does the real work of making a dfa */
/* anchor = 1 for anchored matches, else 0 */
static fa_t *
mkdfa(awk_t *awkp, const char *s, int anchor)
{
	awknode_t	*p;
	awknode_t	*p1;
	fa_t		*f;

	p = reparse(awkp, s);
	p1 = awklib_op2(awkp, CAT, awklib_op2(awkp, STAR, awklib_op2(awkp, ALL, NIL, NIL), NIL), p);
		/* put ALL STAR in front of reg.  exp. */
	p1 = awklib_op2(awkp, CAT, p1, awklib_op2(awkp, FINAL, NIL, NIL));
		/* put FINAL after reg.  exp. */

	awkp->poscnt = 0;
	penter(awkp, p1);	/* enter parent pointers and leaf indices */
	if ((f = calloc(1, sizeof(fa_t) + awkp->poscnt*sizeof(awkrrow_t))) == NULL)
		overflo(awkp, "out of space for fa_t");
	f->accept = awkp->poscnt-1;	/* penter has computed number of positions in re */
	cfoll(awkp, f, p1);	/* set up follow sets */
	freetr(awkp, p1);
	resize_state(awkp, f, 1);
	if ((f->posns[0] = calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
			overflo(awkp, "out of space in mkdfa");
	if ((f->posns[1] = calloc(1, sizeof(int))) == NULL)
		overflo(awkp, "out of space in mkdfa");
	*f->posns[1] = 0;
	f->initstat = makeinit(awkp, f, anchor);
	f->anchor = anchor;
	f->restr = (uint8_t *) awklib_tostring(awkp, s);
	return f;
}

static int
cgoto(awk_t *awkp, fa_t *f, int s, int c)
{
	int	i;
	int	j;
	int	k;
	int	*p;
	int	*q;

	assert(c == HAT || c < NCHARS);
	while (f->accept >= awkp->maxsetvec) {	/* guessing here! */
		awkp->maxsetvec *= 4;
		awkp->setvec = realloc(awkp->setvec, awkp->maxsetvec * sizeof(int));
		awkp->tmpset = realloc(awkp->tmpset, awkp->maxsetvec * sizeof(int));
		if (awkp->setvec == NULL || awkp->tmpset == NULL)
			overflo(awkp, "out of space in cgoto()");
	}
	for (i = 0; i <= f->accept; i++)
		awkp->setvec[i] = 0;
	awkp->setcnt = 0;
	resize_state(awkp, f, s);
	/* compute positions of gototab[s,c] into setvec */
	p = f->posns[s];
	for (i = 1; i <= *p; i++) {
		if ((k = f->re[p[i]].ltype) != FINAL) {
			if ((k == CHAR && c == awklib_ptoi(f->re[p[i]].lval.np))
			 || (k == DOT && c != 0 && c != HAT)
			 || (k == ALL && c != 0)
			 || (k == EMPTYRE && c != 0)
			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
				q = f->re[p[i]].lfollow;
				for (j = 1; j <= *q; j++) {
					if (q[j] >= awkp->maxsetvec) {
						awkp->maxsetvec *= 4;
						awkp->setvec = realloc(awkp->setvec, awkp->maxsetvec * sizeof(int));
						awkp->tmpset = realloc(awkp->tmpset, awkp->maxsetvec * sizeof(int));
						if (awkp->setvec == NULL || awkp->tmpset == NULL)
							overflo(awkp, "cgoto overflow");
					}
					if (awkp->setvec[q[j]] == 0) {
						awkp->setcnt++;
						awkp->setvec[q[j]] = 1;
					}
				}
			}
		}
	}
	/* determine if setvec is a previous state */
	awkp->tmpset[0] = awkp->setcnt;
	j = 1;
	for (i = f->accept; i >= 0; i--)
		if (awkp->setvec[i]) {
			awkp->tmpset[j++] = i;
		}

	resize_state(awkp, f, f->curstat > s ? f->curstat : s);
	/* tmpset == previous state? */
	for (i = 1; i <= f->curstat; i++) {
		p = f->posns[i];
		if ((k = awkp->tmpset[0]) != p[0])
			goto different;
		for (j = 1; j <= k; j++)
			if (awkp->tmpset[j] != p[j])
				goto different;
		/* setvec is state i */
		if (c != HAT)
			f->gototab[s][c] = i;
		return i;
	  different:;
	}

	/* add tmpset to current set of states */
	++(f->curstat);
	resize_state(awkp, f, f->curstat);
	for (i = 0; i < NCHARS; i++)
		f->gototab[f->curstat][i] = 0;
	XFREE(f->posns[f->curstat]);
	if ((p = calloc(1, (awkp->setcnt+1)*sizeof(int))) == NULL)
		overflo(awkp, "out of space in cgoto");

	f->posns[f->curstat] = p;
	f->gototab[s][c] = f->curstat;
	for (i = 0; i <= awkp->setcnt; i++)
		p[i] = awkp->tmpset[i];
	if (awkp->setvec[f->accept])
		f->out[f->curstat] = 1;
	else
		f->out[f->curstat] = 0;
	return f->curstat;
}

static int
makeinit(awk_t *awkp, fa_t *f, int anchor)
{
	int	i;
	int	k;

	resize_state(awkp, f, 2);
	f->curstat = 2;
	f->out[2] = 0;
	k = *(f->re[0].lfollow);
	XFREE(f->posns[2]);			
	if ((f->posns[2] = calloc(1, (k+1)*sizeof(int))) == NULL)
		overflo(awkp, "out of space in makeinit");
	for (i=0; i <= k; i++) {
		(f->posns[2])[i] = (f->re[0].lfollow)[i];
	}
	if ((f->posns[2])[1] == f->accept)
		f->out[2] = 1;
	for (i=0; i < NCHARS; i++)
		f->gototab[2][i] = 0;
	f->curstat = cgoto(awkp, f, 2, HAT);
	if (anchor) {
		*f->posns[2] = k-1;	/* leave out position 0 */
		for (i=0; i < k; i++) {
			(f->posns[0])[i] = (f->posns[2])[i];
		}

		f->out[0] = f->out[2];
		if (f->curstat != 2) {
			resize_state(awkp, f, f->curstat);
			--(*f->posns[f->curstat]);
		}
	}
	return f->curstat;
}

/* set up parent pointers and leaf indices */
static void
penter(awk_t *awkp, awknode_t *p)
{
	switch (TYPE(p)) {
	ELEAF
	LEAF
		INFO(p) = awkp->poscnt;
		awkp->poscnt++;
		break;
	UNARY
		penter(awkp, LEFT(p));
		PARENT(LEFT(p)) = p;
		break;
	case CAT:
	case OR:
		penter(awkp, LEFT(p));
		penter(awkp, RIGHT(p));
		PARENT(LEFT(p)) = p;
		PARENT(RIGHT(p)) = p;
		break;
	default:	/* can't happen */
		FATAL(awkp, "can't happen: unknown type %d in penter", TYPE(p));
		break;
	}
}

/* free parse tree */
static void
freetr(awk_t *awkp, awknode_t *p)
{
	switch (TYPE(p)) {
	ELEAF
	LEAF
		XFREE(p);
		break;
	UNARY
		freetr(awkp, LEFT(p));
		XFREE(p);
		break;
	case CAT:
	case OR:
		freetr(awkp, LEFT(p));
		freetr(awkp, RIGHT(p));
		XFREE(p);
		break;
	default:	/* can't happen */
		FATAL(awkp, "can't happen: unknown type %d in freetr", TYPE(p));
		break;
	}
}

/* in the parsing of regular expressions, metacharacters like . have */
/* to be seen literally;  \056 is not a metacharacter. */

/* find and eval hex string at pp, return new p */
/* only pick up one 8-bit byte (2 chars) */
static int
hexstr(const uint8_t **pp)
{
	const uint8_t *p;
	int n = 0;
	int i;

	for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
		if (isdigit(*p)) {
			n = 16 * n + *p - '0';
		} else if (*p >= 'a' && *p <= 'f') {
			n = 16 * n + *p - 'a' + 10;
		} else if (*p >= 'A' && *p <= 'F') {
			n = 16 * n + *p - 'A' + 10;
		}
	}
	*pp = p;
	return n;
}

#define ISOCTDIGIT(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */

/* pick up next thing after a \\ */
/* and increment *pp */
static int
quoted(const uint8_t **pp)
{
	const uint8_t *p = *pp;
	int c;

	if ((c = *p++) == 't')
		c = '\t';
	else if (c == 'n')
		c = '\n';
	else if (c == 'f')
		c = '\f';
	else if (c == 'r')
		c = '\r';
	else if (c == 'b')
		c = '\b';
	else if (c == '\\')
		c = '\\';
	else if (c == 'x') {	/* hexadecimal goo follows */
		c = hexstr(&p);	/* this adds a null if number is invalid */
	} else if (ISOCTDIGIT(c)) {	/* \d \dd \ddd */
		int n = c - '0';
		if (ISOCTDIGIT(*p)) {
			n = 8 * n + *p++ - '0';
			if (ISOCTDIGIT(*p))
				n = 8 * n + *p++ - '0';
		}
		c = n;
	} /* else */
		/* c = c; */
	*pp = p;
	return c;
}

/* add a character class */
static char *
cclenter(awk_t *awkp, const char *argp)
{
	int	i;
	int	c;
	int	c2;
	const uint8_t *p = (const uint8_t *) argp;
	const uint8_t *op;
	uint8_t	*bp;

	op = p;
	if (awkp->cclenterbuf == NULL && (awkp->cclenterbuf = malloc(awkp->cclenterbufsz)) == NULL)
		FATAL(awkp, "out of space for character class [%.10s...] 1", p);
	bp = awkp->cclenterbuf;
	for (i = 0; (c = *p++) != 0; ) {
		if (c == '\\') {
			c = quoted(&p);
		} else if (c == '-' && i > 0 && bp[-1] != 0) {
			if (*p != 0) {
				c = bp[-1];
				c2 = *p++;
				if (c2 == '\\')
					c2 = quoted(&p);
				if (c > c2) {	/* empty; ignore */
					bp--;
					i--;
					continue;
				}
				while (c < c2) {
					if (!awklib_adjbuf(awkp, &awkp->cclenterbuf, &awkp->cclenterbufsz, (int)(bp - awkp->cclenterbuf+2), 100, &bp, "cclenter1"))
						FATAL(awkp, "out of space for character class [%.10s...] 2", p);
					*bp++ = ++c;
					i++;
				}
				continue;
			}
		}
		if (!awklib_adjbuf(awkp, &awkp->cclenterbuf, &awkp->cclenterbufsz, (int)(bp - awkp->cclenterbuf+2), 100, &bp, "cclenter2"))
			FATAL(awkp, "out of space for character class [%.10s...] 3", p);
		*bp++ = c;
		i++;
	}
	*bp = 0;
	DPRINTF(awkp, ("cclenter: in = |%s|, out = |%s|\n", op, awkp->cclenterbuf) );
	XFREE(op);
	return (char *) awklib_tostring(awkp, (char *) awkp->cclenterbuf);
}

static void
overflo(awk_t *awkp, const char *s)
{
	FATAL(awkp, "regular expression too big: %.30s...", s);
}

/* enter follow set of each leaf of vertex v into lfollow[leaf] */
static void
cfoll(awk_t *awkp, fa_t *f, awknode_t *v)
{
	int i;
	int *p;

	switch (TYPE(v)) {
	ELEAF
	LEAF
		f->re[INFO(v)].ltype = TYPE(v);
		f->re[INFO(v)].lval.np = RIGHT(v);
		while (f->accept >= awkp->maxsetvec) {	/* guessing here! */
			awkp->maxsetvec *= 4;
			awkp->setvec = realloc(awkp->setvec, awkp->maxsetvec * sizeof(int));
			awkp->tmpset = realloc(awkp->tmpset, awkp->maxsetvec * sizeof(int));
			if (awkp->setvec == NULL || awkp->tmpset == NULL)
				overflo(awkp, "out of space in cfoll()");
		}
		for (i = 0; i <= f->accept; i++)
			awkp->setvec[i] = 0;
		awkp->setcnt = 0;
		follow(awkp, v);	/* computes setvec and setcnt */
		if ((p = calloc(1, (awkp->setcnt+1)*sizeof(int))) == NULL)
			overflo(awkp, "out of space building follow set");
		f->re[INFO(v)].lfollow = p;
		*p = awkp->setcnt;
		for (i = f->accept; i >= 0; i--)
			if (awkp->setvec[i] == 1)
				*++p = i;
		break;
	UNARY
		cfoll(awkp, f,LEFT(v));
		break;
	case CAT:
	case OR:
		cfoll(awkp, f,LEFT(v));
		cfoll(awkp, f,RIGHT(v));
		break;
	default:	/* can't happen */
		FATAL(awkp, "can't happen: unknown type %d in cfoll", TYPE(v));
	}
}

/* collects initially active leaves of p into setvec */
/* returns 0 if p matches empty string */
static int
first(awk_t *awkp, awknode_t *p)
{
	int	lp;
	int	b;

	switch (TYPE(p)) {
	ELEAF
	LEAF
		lp = INFO(p);	/* look for high-water mark of subscripts */
		while (awkp->setcnt >= awkp->maxsetvec || lp >= awkp->maxsetvec) {	/* guessing here! */
			awkp->maxsetvec *= 4;
			awkp->setvec = realloc(awkp->setvec, awkp->maxsetvec * sizeof(int));
			awkp->tmpset = realloc(awkp->tmpset, awkp->maxsetvec * sizeof(int));
			if (awkp->setvec == NULL || awkp->tmpset == NULL)
				overflo(awkp, "out of space in first()");
		}
		if (TYPE(p) == EMPTYRE) {
			awkp->setvec[lp] = 0;
			return(0);
		}
		if (awkp->setvec[lp] != 1) {
			awkp->setvec[lp] = 1;
			awkp->setcnt++;
		}
		if (TYPE(p) == CCL && (*(char *) RIGHT(p)) == '\0')
			return(0);		/* empty CCL */
		else return(1);
	case PLUS:
		if (first(awkp, LEFT(p)) == 0) return(0);
		return(1);
	case STAR:
	case QUEST:
		first(awkp, LEFT(p));
		return(0);
	case CAT:
		if (first(awkp, LEFT(p)) == 0 && first(awkp, RIGHT(p)) == 0) return(0);
		return(1);
	case OR:
		b = first(awkp, RIGHT(p));
		if (first(awkp, LEFT(p)) == 0 || b == 0) return(0);
		return(1);
	}
	FATAL(awkp, "can't happen: unknown type %d in first", TYPE(p));	/* can't happen */
	return(-1);
}

/* collects leaves that can follow v into setvec */
static void
follow(awk_t *awkp, awknode_t *v)
{
	awknode_t *p;

	if (TYPE(v) == FINAL)
		return;
	p = PARENT(v);
	switch (TYPE(p)) {
	case STAR:
	case PLUS:
		first(awkp, v);
		follow(awkp, p);
		return;

	case OR:
	case QUEST:
		follow(awkp, p);
		return;

	case CAT:
		if (v == LEFT(p)) {	/* v is left child of p */
			if (first(awkp, RIGHT(p)) == 0) {
				follow(awkp, p);
				return;
			}
		} else		/* v is right child */
			follow(awkp, p);
		return;
	}
}

static int
member(int c, const char *sarg)	/* is c in s? */
{
	const uint8_t *s = (const uint8_t *) sarg;

	while (*s)
		if (c == *s++)
			return(1);
	return(0);
}

/* shortest match ? */
int
awklib_match(awk_t *awkp, fa_t *f, const char *p0)
{
	const uint8_t	*p = (const uint8_t *) p0;
	int		 ns;
	int		 s;

	s = f->initstat;
	assert (s < f->state_count);

	if (f->out[s])
		return(1);
	do {
		/* assert(*p < NCHARS); */
		if ((ns = f->gototab[s][*p]) != 0)
			s = ns;
		else
			s = cgoto(awkp, f, s, *p);

		assert (s < f->state_count);

		if (f->out[s])
			return(1);
	} while (*p++ != 0);
	return(0);
}

/* longest match, for sub */
int
awklib_pmatch(awk_t *awkp, fa_t *f, const char *p0)
{
	int s, ns;
	uint8_t *p = (uint8_t *) p0;
	uint8_t *q;

	s = f->initstat;
	assert(s < f->state_count);
	awkp->patbeg = p;
	awkp->patlen = -1;
	do {
		q = p;
		do {
			if (f->out[s])		/* final state */
				awkp->patlen = q-p;
			/* assert(*q < NCHARS); */
			if ((ns = f->gototab[s][*q]) != 0)
				s = ns;
			else
				s = cgoto(awkp, f, s, *q);

			assert(s < f->state_count);

			if (s == 1) {	/* no transition */
				if (awkp->patlen >= 0) {
					awkp->patbeg = p;
					return(1);
				}
				else
					goto nextin;	/* no match */
			}
		} while (*q++ != 0);
		if (f->out[s])
			awkp->patlen = q-p-1;	/* don't count $ */
		if (awkp->patlen >= 0) {
			awkp->patbeg = p;
			return(1);
		}
nextin:
		s = 2;
	} while (*p++ != 0);
	return (0);
}

/* non-empty match, for sub */
int
awklib_nematch(awk_t *awkp, fa_t *f, const char *p0)
{
	int s, ns;
	uint8_t *p = (uint8_t *) p0;
	uint8_t *q;

	s = f->initstat;
	assert(s < f->state_count);

	awkp->patlen = -1;
	while (*p) {
		q = p;
		do {
			if (f->out[s])		/* final state */
				awkp->patlen = q-p;
			/* assert(*q < NCHARS); */
			if ((ns = f->gototab[s][*q]) != 0)
				s = ns;
			else
				s = cgoto(awkp, f, s, *q);

			assert(s < f->state_count);

			if (s == 1) {	/* no transition */
				if (awkp->patlen > 0) {
					awkp->patbeg = p;
					return(1);
				} else
					goto nnextin;	/* no nonempty match */
			}
		} while (*q++ != 0);
		if (f->out[s])
			awkp->patlen = q-p-1;	/* don't count $ */
		if (awkp->patlen > 0 ) {
			awkp->patbeg = p;
			return(1);
		}
nnextin:
		s = 2;
		p++;
	}
	return (0);
}

/* top-level parse of reg expr */
static awknode_t *
regexp(awk_t *awkp)
{
	return (alt(awkp, concat(awkp, primary(awkp))));
}

static awknode_t *
primary(awk_t *awkp)
{
	awknode_t *np;

	switch (awkp->rtok) {
	case CHAR:
		np = awklib_op2(awkp, CHAR, NIL, awklib_itonp(awkp->rlxval));
		awkp->rtok = relex(awkp);
		return (unary(awkp, np));
	case ALL:
		awkp->rtok = relex(awkp);
		return (unary(awkp, awklib_op2(awkp, ALL, NIL, NIL)));
	case EMPTYRE:
		awkp->rtok = relex(awkp);
		return (unary(awkp, awklib_op2(awkp, ALL, NIL, NIL)));
	case DOT:
		awkp->rtok = relex(awkp);
		return (unary(awkp, awklib_op2(awkp, DOT, NIL, NIL)));
	case CCL:
		np = awklib_op2(awkp, CCL, NIL, (awknode_t*) cclenter(awkp, (char *) awkp->rlxstr));
		awkp->rtok = relex(awkp);
		return (unary(awkp, np));
	case NCCL:
		np = awklib_op2(awkp, NCCL, NIL, (awknode_t *) cclenter(awkp, (char *) awkp->rlxstr));
		awkp->rtok = relex(awkp);
		return (unary(awkp, np));
	case '^':
		awkp->rtok = relex(awkp);
		return (unary(awkp, awklib_op2(awkp, CHAR, NIL, awklib_itonp(HAT))));
	case '$':
		awkp->rtok = relex(awkp);
		return (unary(awkp, awklib_op2(awkp, CHAR, NIL, NIL)));
	case '(':
		awkp->rtok = relex(awkp);
		if (awkp->rtok == ')') {	/* special pleading for () */
			awkp->rtok = relex(awkp);
			return unary(awkp, awklib_op2(awkp, CCL, NIL, (awknode_t *) awklib_tostring(awkp, "")));
		}
		np = regexp(awkp);
		if (awkp->rtok == ')') {
			awkp->rtok = relex(awkp);
			return (unary(awkp, np));
		}
		else
			FATAL(awkp, "syntax error in regular expression %s at %s", awkp->lastre, awkp->prestr);
	default:
		FATAL(awkp, "illegal primary in regular expression %s at %s", awkp->lastre, awkp->prestr);
	}
	return 0;	/*NOTREACHED*/
}

static awknode_t *
concat(awk_t *awkp, awknode_t *np)
{
	switch (awkp->rtok) {
	case CHAR:
	case DOT:
	case ALL:
	case EMPTYRE:
	case CCL:
	case NCCL:
	case '$':
	case '(':
		return (concat(awkp, awklib_op2(awkp, CAT, np, primary(awkp))));
	}
	return (np);
}

static awknode_t *
alt(awk_t *awkp, awknode_t *np)
{
	if (awkp->rtok == OR) {
		awkp->rtok = relex(awkp);
		return (alt(awkp, awklib_op2(awkp, OR, np, concat(awkp, primary(awkp)))));
	}
	return (np);
}

static awknode_t *
unary(awk_t *awkp, awknode_t *np)
{
	switch (awkp->rtok) {
	case STAR:
		awkp->rtok = relex(awkp);
		return (unary(awkp, awklib_op2(awkp, STAR, np, NIL)));
	case PLUS:
		awkp->rtok = relex(awkp);
		return (unary(awkp, awklib_op2(awkp, PLUS, np, NIL)));
	case QUEST:
		awkp->rtok = relex(awkp);
		return (unary(awkp, awklib_op2(awkp, QUEST, np, NIL)));
	default:
		return (np);
	}
}

/*
 * Character class definitions conformant to the POSIX locale as
 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
 * and operating character sets are both ASCII (ISO646) or supersets
 * thereof.
 *
 * Note that to avoid overflowing the temporary buffer used in
 * relex(), the expanded character class (prior to range expansion)
 * must be less than twice the size of their full name.
 */

/* Because isblank doesn't show up in any of the header files on any
 * system i use, it's defined here.  if some other locale has a richer
 * definition of "blank", define HAS_ISBLANK and provide your own
 * version.
 * the parentheses here are an attempt to find a path through the maze
 * of macro definition and/or function and/or version provided.  thanks
 * to nelson beebe for the suggestion; let's see if it works everywhere.
 */

#ifndef HAS_ISBLANK

int (isblank)(int c)
{
	return c==' ' || c=='\t';
}

#endif

static const struct charclass {
	const char *cc_name;
	int cc_namelen;
	int (*cc_func)(int);
} charclasses[] = {
	{ "alnum",	5,	isalnum },
	{ "alpha",	5,	isalpha },
	{ "blank",	5,	isblank },
	{ "cntrl",	5,	iscntrl },
	{ "digit",	5,	isdigit },
	{ "graph",	5,	isgraph },
	{ "lower",	5,	islower },
	{ "print",	5,	isprint },
	{ "punct",	5,	ispunct },
	{ "space",	5,	isspace },
	{ "upper",	5,	isupper },
	{ "xdigit",	6,	isxdigit },
	{ NULL,		0,	NULL },
};

/* lexical analyzer for reparse */
static int
relex(awk_t *awkp)
{
	int c, n;
	int cflag;
	uint8_t *bp;
	const struct charclass *cc;
	int i;

	switch (c = *awkp->prestr++) {
	case '|': return OR;
	case '*': return STAR;
	case '+': return PLUS;
	case '?': return QUEST;
	case '.': return DOT;
	case '\0': awkp->prestr--; return '\0';
	case '^':
	case '$':
	case '(':
	case ')':
		return c;
	case '\\':
		awkp->rlxval = quoted((const uint8_t **)&awkp->prestr);
		return CHAR;
	default:
		awkp->rlxval = c;
		return CHAR;
	case '[': 
		if (awkp->relexbuf == NULL && (awkp->relexbuf = malloc(awkp->relexbufsz)) == NULL)
			FATAL(awkp, "out of space in reg expr %.10s..", awkp->lastre);
		bp = awkp->relexbuf;
		if (*awkp->prestr == '^') {
			cflag = 1;
			awkp->prestr++;
		}
		else
			cflag = 0;
		n = 2 * strlen((const char *) awkp->prestr)+1;
		if (!awklib_adjbuf(awkp, &awkp->relexbuf, &awkp->relexbufsz, n, n, &bp, "relex1"))
			FATAL(awkp, "out of space for reg expr %.10s...", awkp->lastre);
		for (; ; ) {
			if ((c = *awkp->prestr++) == '\\') {
				*bp++ = '\\';
				if ((c = *awkp->prestr++) == '\0')
					FATAL(awkp, "nonterminated character class %.20s...", awkp->lastre);
				*bp++ = c;
			/* } else if (c == '\n') { */
			/* 	FATAL(awkp, "newline in character class %.20s...", awkp->lastre); */
			} else if (c == '[' && *awkp->prestr == ':') {
				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
				for (cc = charclasses; cc->cc_name; cc++)
					if (strncmp((const char *) awkp->prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
						break;
				if (cc->cc_name != NULL && awkp->prestr[1 + cc->cc_namelen] == ':' &&
				    awkp->prestr[2 + cc->cc_namelen] == ']') {
					awkp->prestr += cc->cc_namelen + 3;
					for (i = 1; i < NCHARS; i++) {
						if (!awklib_adjbuf(awkp, &awkp->relexbuf, &awkp->relexbufsz, (int)(bp - awkp->relexbuf+1), 100, &bp, "relex2"))
						    FATAL(awkp, "out of space for reg expr %.10s...", awkp->lastre);
						if (cc->cc_func(i)) {
							*bp++ = i;
							n++;
						}
					}
				} else
					*bp++ = c;
			} else if (c == '\0') {
				FATAL(awkp, "nonterminated character class %.20s", awkp->lastre);
			} else if (bp == awkp->relexbuf) {	/* 1st char is special */
				*bp++ = c;
			} else if (c == ']') {
				*bp++ = 0;
				awkp->rlxstr = (uint8_t *) awklib_tostring(awkp, (char *) awkp->relexbuf);
				if (cflag == 0)
					return CCL;
				else
					return NCCL;
			} else
				*bp++ = c;
		}
	}
}


static void
freefa(fa_t *f)	/* free a finite automaton */
{
	int i;

	if (f == NULL)
		return;
	for (i = 0; i < f->state_count; i++) {
		XFREE(f->gototab[i]);
		XFREE(f->posns[i]);
	}
	for (i = 0; i <= f->accept; i++) {
		XFREE(f->re[i].lfollow);
		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
			XFREE((f->re[i].lval.np));
	}
	XFREE(f->restr);
	XFREE(f->out);
	XFREE(f->posns);
	XFREE(f->gototab);
	XFREE(f);
}

/* returns dfa for reg expr s */
fa_t *
awklib_makedfa(awk_t *awkp, const char *s, int anchor)
{
	int i, use, nuse;
	fa_t *pfa;

	if (awkp->setvec == NULL) {	/* first time through any RE */
		awkp->maxsetvec = MAXLIN;
		awkp->setvec = malloc(awkp->maxsetvec * sizeof(int));
		awkp->tmpset = malloc(awkp->maxsetvec * sizeof(int));
		if (awkp->setvec == NULL || awkp->tmpset == NULL)
			overflo(awkp, "out of space initializing awklib_makedfa");
	}

	if (awkp->stage == AWKLIB_COMPILING) {
		/* a constant for sure */
		return mkdfa(awkp, s, anchor);
	}
	for (i = 0; i < awkp->nfatab; i++)	/* is it there already? */
		if (awkp->fatab[i]->anchor == anchor &&
		    strcmp((const char *) awkp->fatab[i]->restr, s) == 0) {
			awkp->fatab[i]->use = awkp->make_dfa_now++;
			return awkp->fatab[i];
		}
	pfa = mkdfa(awkp, s, anchor);
	if (awkp->nfatab < NFA) {	/* room for another */
		awkp->fatab[awkp->nfatab] = pfa;
		awkp->fatab[awkp->nfatab]->use = awkp->make_dfa_now++;
		awkp->nfatab++;
		return pfa;
	}
	use = awkp->fatab[0]->use;	/* replace least-recently used */
	nuse = 0;
	for (i = 1; i < awkp->nfatab; i++)
		if (awkp->fatab[i]->use < use) {
			use = awkp->fatab[i]->use;
			nuse = i;
		}
	freefa(awkp->fatab[nuse]);
	awkp->fatab[nuse] = pfa;
	pfa->use = awkp->make_dfa_now++;
	return pfa;
}
