/*------------------------------------------------------------*
 | preprocess.c                                               |
 | copyright 1999,  Andrew Sumner (andrew_sumner@bigfoot.com) |
 |                                                            |
 | This is a source file for the awka package, a translator   |
 | of the AWK programming language to ANSI C.                 |
 |                                                            |
 | This program is free software; you can redistribute it     |
 | and/or modify it under the terms of the GNU General Public |
 | License as published by the Free Software Foundation;      |
 | either version 2 of the License, or any later version.     |
 |                                                            |
 | This program is distributed in the hope that it will be    |
 | useful, but WITHOUT ANY WARRANTY; without even the implied |
 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR    |
 | PURPOSE.  See the GNU General Public License for more      |
 | details.                                                   |
 |                                                            |
 | You should have received a copy of the GNU General Public  |
 | License along with this program; if not, write to the      |
 | Free Software Foundation, Inc., 675 Mass Ave, Cambridge,   |
 | MA 02139, USA.                                             |
 *------------------------------------------------------------*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <limits.h>

#include <stdarg.h>
#include "awka.h"
#include "../config.h"
#include "mem.h"
#include "memory.h"
#include "awka_exe.h"

extern struct ivar_idx ivar[];
extern int indent;
extern int split_req, split_max, mode, dol0_used;
extern int var_allc, var_used;
extern int litr_allc, litr_used;
extern int lits_allc, lits_used;
extern int litd_allc, litd_used;
extern char **varname, **litrname, **litsname, **litdname;
extern char **litr_val, **lits_val, **litd_val;
extern FILE *outfp;
extern int awka_main;
extern char *awka_main_func;
extern int cur_func;
extern int al_count;
extern int array_sort;
extern int range_no;
extern int max_call_args;

int findivar(char *);

int
is_ext_builtin(name)
  char *name;
{
  struct a_sc *fp;

  fp = ext_funcs;
  while (fp->name)
  {
    if (!strcmp(fp->name, name))
      return 1;
    fp++;
  }

  return 0;
}

void
moveprog(int k, int i)
{
  progcode[i].op = progcode[k].op;
  progcode[i].pop = progcode[k].pop;
  progcode[i].func = progcode[k].func;
  progcode[i].val = progcode[k].val;
  progcode[i].arg = progcode[k].arg;
  progcode[i].minst = progcode[k].minst;
  progcode[i].inst = progcode[k].inst;
  progcode[i].line = progcode[k].line;
  progcode[i].jumpfrom = progcode[k].jumpfrom;
  progcode[i].jumpto = progcode[k].jumpto;
  progcode[i].earliest = progcode[k].earliest;
  progcode[i].done = progcode[k].done;
  progcode[i].endloop = progcode[k].endloop;
  progcode[i].doloop = progcode[k].doloop;
  progcode[i].context = progcode[k].context;
  progcode[i].label = progcode[k].label;
  progcode[i].varidx = progcode[k].varidx;
  progcode[i].ftype = progcode[k].ftype;
}

void
preprocess()
{
  int cur = 0;
  int i, j, j2, j3, k, p, found=0;
  char **functions, *x, *q;
  int func_no = 0, func_allc = 10;
  struct a_sc *ebp;

  functions = (char **) malloc( 10 * sizeof(char *) );
  varname = (char **) malloc( 10 * sizeof(char *) );
  var_allc = 10;

  /* PREPROCESSING - identify variables & code jumps */
  for (cur=0; cur<prog_no; cur++)
  {
    switch (progcode[cur].op)
    {
      case _END:
        end_used = TRUE;
        break;

      case _BEGIN:
        begin_used = TRUE;
        break;

      case _MAIN:
        main_used = TRUE;
        break;

      case _PUSHI:
        if (!strcmp(progcode[cur].val, "@fs_shadow")) break;
        if (progcode[cur].val[0] == '$')
        {
          progcode[cur].op = F_PUSHI;
          progcode[cur].func = code[F_PUSHI-1].func;
          break;
        }
      case A_PUSHA:
      case AE_PUSHA:
      case AE_PUSHI:
        if (progcode[cur].op != _PUSHI)
          add2arraylist(progcode[cur].val);
      case _PUSHA:
        if ((i = findivar(progcode[cur].val)) != -1)
        {
          if (!strcmp(ivar[i].vname, "a_bivar[a_ARGV]") && progcode[cur].op != AE_PUSHI)
          {
            progcode[cur].val = (char *) malloc(15);
            strcpy(progcode[cur].val, "awka_argv()");
          }
          else
          {
            progcode[cur].val = (char *) malloc(strlen(ivar[i].vname)+1);
            strcpy(progcode[cur].val, ivar[i].vname);
          }
          break;
        }
        /* adding _awk on end avoids conflicts with system defined names */
        x = (char *) malloc(strlen(progcode[cur].val) + 6);
        sprintf(x, "%s_awk", progcode[cur].val);
        free(progcode[cur].val);
        progcode[cur].val = x;

        for (i=0; i<var_used; i++)
          if (!strcmp(varname[i], progcode[cur].val))
            break;

        if (i == var_used)
        {
          if (++var_used == var_allc)
          {
            var_allc += 10;
            varname = (char **) realloc(varname, var_allc * sizeof(char *));
          }
          varname[i] = progcode[cur].val;
          fprintf(outfp, "a_VAR *%s = NULL;\n",varname[i]);
        }
        break;

      case _MATCH0:
      case _MATCH1:
      case _PUSHC:
        if (progcode[cur].val[0] != '0')
          break;

        for (i=0; i<litr_used; i++)
          if (!strcmp(litr_val[i], progcode[cur].arg))
            break;

        if (i == litr_used)
        {
          if (litr_allc == 0)
          {
            litr_allc = 20;
            litrname = (char **) malloc(20 * sizeof(char *));
            litr_val = (char **) malloc(20 * sizeof(char *));
          }
          else if (litr_used == litr_allc)
          {
            litr_allc += 20;
            litrname = (char **) realloc(litrname, litr_allc * sizeof(char *));
            litr_val = (char **) realloc(litr_val, litr_allc * sizeof(char *));
          }

          litrname[i] = (char *) malloc(20);
          sprintf(litrname[i], "_litr%d_awka", i);
          litr_val[i] = progcode[cur].arg;
          litr_used++;
        }
        progcode[cur].arg = (char *) malloc(20);
        strcpy(progcode[cur].arg, litrname[i]);
        break;


      case _PUSHD:
        for (i=0; i<litd_used; i++)
          if (!strcmp(litd_val[i], progcode[cur].val))
            break;

        if (i == litd_used)
        {
          if (litd_allc == 0)
          {
            litd_allc = 20;
            litdname = (char **) malloc(20 * sizeof(char *));
            litd_val = (char **) malloc(20 * sizeof(char *));
          }
          else if (litd_used == litd_allc)
          {
            litd_allc += 20;
            litdname = (char **) realloc(litdname, litd_allc * sizeof(char *));
            litd_val = (char **) realloc(litd_val, litd_allc * sizeof(char *));
          }

          litdname[i] = (char *) malloc(20);
          sprintf(litdname[i], "_litd%d_awka", i);
          litd_val[i] = (char *) malloc(strlen(progcode[cur].val)+1);
          strcpy(litd_val[i], progcode[cur].val);
          litd_used++;
        }

        progcode[cur].val = (char *) malloc(20);
        strcpy(progcode[cur].val, litdname[i]);
        break;

      case _PUSHS:
        for (i=0; i<lits_used; i++)
          if (!strcmp(lits_val[i], progcode[cur].val))
            break;

        if (i == lits_used)
        {
          if (lits_allc == 0)
          {
            lits_allc = 20;
            litsname = (char **) malloc(20 * sizeof(char *));
            lits_val = (char **) malloc(20 * sizeof(char *));
          }
          else if (lits_used == lits_allc)
          {
            lits_allc += 20;
            litsname = (char **) realloc(litsname, lits_allc * sizeof(char *));
            lits_val = (char **) realloc(lits_val, lits_allc * sizeof(char *));
          }

          litsname[i] = (char *) malloc(20);
          sprintf(litsname[i], "_lits%d_awka", i);
          lits_val[i] = (char *) malloc(strlen(progcode[cur].val)+3);
          strcpy(lits_val[i], progcode[cur].val);
          lits_used++;
        }

        progcode[cur].val = (char *) malloc(20);
        strcpy(progcode[cur].val, litsname[i]);
        break;

      case _LJNZ:
      case _LJZ:
        j = atoi(progcode[cur].val);
        progcode[cur].jumpto = -1;
        if (j > progcode[cur].minst)
        {
          for (i=cur+1; i<prog_no; i++)
          {
            if (progcode[i].minst > j ||
                progcode[i].minst < progcode[cur].minst)
              break;
            if (progcode[i].minst == j)
            {
              progcode[i].ljumpfrom = cur;
              progcode[cur].jumpto = i;
              break;
            }
          }
        }
        else
        {
          for (i=cur-1; i>=0; i--)
          {
            if (progcode[i].minst < j ||
                progcode[i].minst > progcode[cur].minst)
              break;
            if (progcode[i].minst == j)
            {
              progcode[i].ljumpfrom = cur;
              progcode[cur].jumpto = i;
              break;
            }
          }
        }
        if (progcode[cur].jumpto == -1)
          awka_error("parse error: lj[n]z target not found, line %d.\n",progcode[cur].line);
        break;

      case _JMP:
        j = atoi(progcode[cur].val);
        progcode[cur].jumpto = -1;
        if (j > progcode[cur].minst)
        {
          found = 0;
          for (i=cur+1; i<prog_no; i++)
          {
            if (progcode[i].minst < progcode[cur].minst)
              break;
            if (progcode[i].minst == j)
              found = 1;
            if (progcode[i].minst >= j && progcode[i].op == _JNZ)
            {
              progcode[i].jumpfrom = cur;
              progcode[i].jumpto = cur+1;
              progcode[cur].jumpto = i;
              break;
            }
          }
        }
        else
        {
          for (i=cur; i>=0; i--)
          {
            if (progcode[i].minst > progcode[cur].minst)
              break;
            if (progcode[i].minst == j)
            {
              progcode[i].jumpfrom = cur;
              progcode[i].jumpto = cur+1;
              progcode[cur].jumpto = i;
              if (i == cur)
                progcode[i].foreverloop = 2;
              else
              {
                progcode[i].foreverloop = 1;
                progcode[cur].endloop++;
              }
              break;
            }
          }
        }
        if (progcode[cur].jumpto == -1 && progcode[cur].op == _JMP)
          awka_error("parse error: jmp target not found, line %d.\n",progcode[cur].line);
        break;

      case SET_ALOOP:
      case _JZ:
      case _QMARK:
        j = atoi(progcode[cur].val);
        progcode[cur].jumpto = -1;
        if (j > progcode[cur].minst)
        {
          for (i=cur+1; i<prog_no; i++)
          {
            if (progcode[i].minst > j ||
                progcode[i].minst < progcode[cur].minst)
              break;
            if (progcode[i].minst == j)
            {
              progcode[cur].jumpto = i;
              if (progcode[cur].op != _QMARK)
                progcode[i].endloop++;
              break;
            }
          }
        }
        else
        {
          for (i=cur-1; i>=0; i--)
          {
            if (progcode[i].minst < j ||
                progcode[i].minst > progcode[cur].minst)
              break;
            if (progcode[i].minst == j)
            {
              progcode[cur].jumpto = i;
              if (progcode[cur].op != _QMARK)
                progcode[i].endloop++;
              break;
            }
          }
        }
        if (progcode[cur].jumpto == -1)
          awka_error("parse error: set_al/jz/qmark target not found, line %d.\n",progcode[cur].line);
        if (progcode[cur].op != SET_ALOOP)
        {
          if (progcode[cur].op == _QMARK)
          {
            j = atoi(progcode[i-1].val);
            k = progcode[i-1].minst;
            for (p=i; p<prog_no; p++)
            {
              if (progcode[p].minst > j ||
                  progcode[p].minst < k)
                break;
              if (progcode[p].minst == j)
              {
                for (k=i; k<p; k++)
                  moveprog(k, k-1);
                k--;
                progcode[k].op = _COLON;
                progcode[k].func = code[_COLON-1].func;
                break;
              }
            }
          }
          else if (progcode[i-1].op == _ELSE)
          {
            /* wimping out of a proper method */
            progcode[i-1].op = _GOTO;
            progcode[i-1].func = code[_GOTO-1].func;
          }
          progcode[cur].jumpto = -1;
        }
        break;

      case _JNZ:
        /* a while or a do/while statement */
        j = atoi(progcode[cur].val);
        if (progcode[cur].jumpto == -1 || progcode[cur].jumpfrom == -1)
        {
          progcode[cur].jumpto = -1;
          if (j > progcode[cur].minst)
          {
            for (i=cur+1; i<prog_no; i++)
            {
              if (progcode[i].minst > j ||
                  progcode[i].minst < progcode[cur].minst)
                break;
              if (progcode[i].minst == j)
              {
                progcode[cur].jumpto = i;
                break;
              }
            }
          }
          else
          {
            for (i=cur-1; i>=0; i--)
            {
              if (progcode[i].minst < j ||
                  progcode[i].minst > progcode[cur].minst)
                break;
              if (progcode[i].minst == j)
              {
                progcode[cur].jumpto = i;
                break;
              }
            }
          }
          if (progcode[cur].jumpto == -1)
            awka_error("parse error: jnz target not found, line %d.\n",progcode[cur].line);
          /* progcode[cur].jumpto = -1; */
          if (cur > 0)
            if (progcode[cur-1].op == _JMP)
              break;  /* while loop */
          progcode[i].doloop = TRUE;
        }
        break;

      case _GOTO:
        j = atoi(progcode[cur].val);
        if (j > progcode[cur].minst)
        {
          for (i=cur+1; i<prog_no; i++)
          {
            if (progcode[i].minst > j ||
                progcode[i].minst < progcode[cur].minst)
              break;
            if (progcode[i].minst == j)
            {
              progcode[i].label = TRUE;
              break;
            }
          }
        }
        else
        {
          for (i=cur-1; i>=0; i--)
          {
            if (progcode[i].minst < j ||
                progcode[i].minst > progcode[cur].minst)
              break;
            if (progcode[i].minst == j)
            {
              progcode[i].label = TRUE;
              break;
            }
          }
        }
        break;

      case _CALL:
        i = strlen(progcode[cur].val);
        if (progcode[cur].val[i-1] == '(')
          progcode[cur].val[i-1] = '\0';
        j = atoi(progcode[cur].arg);
        max_call_args = (max_call_args > j ? max_call_args : j);
        break;

      case _FUNCTION:
        i = strlen(progcode[cur].val);
        if (progcode[cur].val[i-1] == '(')
          progcode[cur].val[i-1] = '\0';
        for (i=0; i<func_no; i++)
          if (!strcmp(functions[i], progcode[cur].val))
            break;

        if (i == func_no)
        {
          if (func_no == func_allc)
          {
            func_allc *= 2;
            functions = (char **) realloc(functions, func_allc * sizeof(char *));
          }
          functions[i] = (char *) malloc(strlen(progcode[cur].val)+1);
          strcpy(functions[i], progcode[cur].val);
          func_no++;
        }
        break;
    }

  }

  /* another pass to decide whether calls are to functions or to 
     extended builtins. */
  for (cur=0; cur<prog_no; cur++)
  {
    /* find extended builtins & see if the're locals */
    for (ebp = ext_funcs; ebp->name != NULL; ebp++)
      if (ebp->op == progcode[cur].op)
        break;

    if (ebp->name)
    {
      for (i=0; i<func_no; i++)
        if (!strcmp(functions[i], ebp->name))
          break;

      if (i < func_no)
      {
        progcode[cur].func = awka_call;
        progcode[cur].op = _CALL;
        progcode[cur].pop = FALSE;
        progcode[cur].val = functions[i];
        if (cur == 0 || progcode[cur-1].inst != _PUSHINT)
        {
          progcode[cur].arg = (char *) malloc(20);
          sprintf(progcode[cur].arg, "%d", _a_bi_vararg[progcode[cur].varidx].min_args);
        }
        progcode[cur].varidx = -1;
      }
      continue;
    }

    /* find locals & see if the're extended builtins */
    if (progcode[cur].op == _CALL)
    {
      for (i=0; i<func_no; i++)
        if (!strcmp(functions[i], progcode[cur].val))
          break;

      if (i == func_no)
      {
        /* local definition don't exist - is it an extended func? */
        for (ebp = ext_funcs; ebp->name != NULL; ebp++)
          if (!strcmp(progcode[cur].val, ebp->name))
          {
            progcode[cur].op = ebp->op;
            progcode[cur].varidx = code[ebp->op-1].varidx;
            progcode[cur].pop = code[ebp->op-1].pop;
            progcode[cur].func = code[ebp->op-1].func;
            break;
          }
      }
    }

  }

  /* set up range patterns */
  for (cur=prog_no; cur>=0; cur--)
  {
    int minst, label;
    if (progcode[cur].op == _RANGE)
    {
      j = atoi(progcode[cur].val);
      k = progcode[cur].endloop;
      q = progcode[cur].val;
      while (*q && *q != ' ') q++;
      if (!(*q))
        awka_error("parse error: range value not set correctly, line %d.\n",progcode[cur].line);
      j2 = atoi(++q);
      while (*q && *q != ' ') q++;
      if (!(*q))
        awka_error("parse error: range value not set correctly, line %d.\n",progcode[cur].line);
      j3 = atoi(q);

      /* find end of patterns */
      for (i=cur+1; i<prog_no; i++)
      {
        if (progcode[i].minst > j2)
          awka_error("parse error: can't find range target (1), line %d.\n", progcode[cur].line);
        if (progcode[i].minst == j2)
          break;
      }
      if (i == prog_no)
        awka_error("parse error: can't find range target (2), line %d.\n", progcode[cur].line);
      j2 = i-1;

      /* find jumpto opcode */
      for (i=j2+1; i<prog_no; i++)
      {
        if (progcode[i].minst > j3)
          awka_error("parse error: can't find range target (3), line %d.\n", progcode[cur].line);
        if (progcode[i].minst == j3)
          break;
      }
      if (i == prog_no)
        awka_error("parse error: can't find range target (4), line %d.\n", progcode[cur].line);
      j3 = i;
      progcode[i].label = TRUE;
      progcode[cur+1].label = progcode[cur].label;
      progcode[cur+1].minst = progcode[cur].minst;

      /* move range opcode to end of patterns */
      for (i=cur+1; i<=j2; i++)
        moveprog(i, i-1);
      progcode[--i].op = _RANGE;
      progcode[i].val = (char *) malloc(20);
      sprintf(progcode[i].val, "%d", progcode[j3].minst);
      progcode[i].arg = NULL;
      progcode[i].func = awka_range;
      progcode[i].jumpto = j3;
      progcode[i].endloop = k;
    }
  }

  fprintf(outfp, "\nstruct gvar_struct *_gvar;\n");

  if (litd_used)
  {
    fprintf(outfp, "a_VAR *_litd0_awka=NULL");
    for (i=1; i<litd_used; i++)
      fprintf(outfp, ", *_litd%d_awka=NULL",i);
    fprintf(outfp, ";\n");
  }

  if (lits_used)
  {
    fprintf(outfp, "a_VAR *_lits0_awka=NULL");
    for (i=1; i<lits_used; i++)
      fprintf(outfp, ", *_lits%d_awka=NULL",i);
    fprintf(outfp, ";\n");
  }

  if (litr_used)
  {
    fprintf(outfp, "a_VAR *_litr0_awka=NULL");
    for (i=1; i<litr_used; i++)
      fprintf(outfp, ", *_litr%d_awka=NULL",i);
    fprintf(outfp, ";\n");
  }

  for (i=0; i<func_no; i++)
  {
    fprintf(outfp, "a_VAR * %s_fn(a_VARARG *);\n",functions[i]);
    free(functions[i]);
  }
  if (begin_used)
    fprintf(outfp, "void BEGIN();\n");
  if (main_used)
    fprintf(outfp, "void MAIN();\n");
  if (end_used)
    fprintf(outfp, "void END();\n");
  free(functions);
}

