/*
 * 	Random Access Machine.
 * 	Expression parser.
 *
 * 	Copyright (C) 2002, 2003  Dmitry Rutsky	<rutsky@school.ioffe.rssi.ru>
 * 	
 * 	This program is free software; you can redistribute it and/or modify
 * 	it under the terms of the GNU General Public License as published by
 * 	the Free Software Foundation; either version 2 of the License, or
 * 	(at your option) any later version.
 *
 * 	This program is distributed in the hope that it will be useful,
 * 	but WITHOUT ANY WARRANTY; without even the implied warranty of
 * 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * 	GNU General Public License for more details.
 *
 * 	You should have received a copy of the GNU General Public License
 * 	along with this program; if not, write to the 
 * 	Free Software Foundation, Inc.,
 * 	59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 */
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <stdarg.h>

#include "ram.h"

/* 	This module very similar to one described in Bjarne Stroustrup's
 * 	"The C++ Programming Language" book, but I have written it from
 * 	scratch, and commented out here much of the things I didn't
 * 	understand while learned this thing first time.
 * 	There are other improvements, especially in the get_token function.
 *
 * 	For now, it is needed only for the debugger data examining.
 * 	However, it's easy to allow arithmetical expressions in RAM program 
 * 	too, so it is included in separate library.
 */

//	Things are quite simple
typedef enum
{
   NUMBER = 'N', END = '\0', ERROR = 'E',	// This symbolics aids debugging
   PLUS = '+', MINUS = '-', MULTIPLY = '*', DIVIDE = '/',
   LEFT_BRACKET = '(', RIGHT_BRACKET = ')', 
   LEFT_SQUARE_BRACKET = '[', RIGHT_SQUARE_BRACKET = ']',
   IN_POWER_OF = '^', SEMICOLON = ';', EXCLAMATION = '!',
   LESS_THAN = '<', GREATER_THAN = '>',
   LESS_OR_EQUAL = 'l', GREATER_OR_EQUAL = 'g',
   EQUAL_TO = '=', NOT_EQUAL_TO = 'n',
   AND = '&', OR = '|', NOT = '^'
}
TokenType;

typedef struct
{
   TokenType type;
   mpz_t value;
}
Token;

// (token -> value) should be already initialized.
static TokenType get_token (char **s, Token *t)
{
   while (isspace (**s))
      (*s) ++;

   if (! (**s))
   {
      (t -> type) = END;
      return (t -> type);
   }

   if (isdigit (**s))
   {
      char *number = *s;
      
      do
         (*s) ++;
      while (isdigit (**s));

      {	// Take care of the mpz_set_str's whitespaces.
	 char c;
	 
	 c = **s;
	 (**s) = '\0';
	 
	 // The `number' string consists only of the numbers, so there is
	 // no need to check mpz_set_str status for possible errors.
         mpz_set_str ((t -> value), number, 10);

	 (**s) = c;
      }

      (t -> type) = NUMBER;
      return (t -> type);
   }

   { //	It works faster and consumes less code size than `switch' operator.
      const char *symbols = "+-*/()[]^;&|^=";
      
      if (strchr (symbols, **s))
      {
	 (t -> type) = **s;
	 (*s) ++;

	 return (t -> type);
      }
   }
  
   if (**s == '<')
   {
      if (*(*s + 1) == '=')
      {
	 (t -> type) = LESS_OR_EQUAL;
	 (*s) ++;
      }
      else
	 (t -> type) = LESS_THAN;

      (*s) ++;

      return (t -> type);
   }

   if (**s == '>')
   {
      if (*(*s + 1) == '=')
      {
	 (t -> type) = GREATER_OR_EQUAL;
	 (*s) ++;
      }
      else
	 (t -> type) = GREATER_THAN;

      (*s) ++;

      return (t -> type);
   }

   if (**s == '<')
   {
      if (*(*s + 1) == '=')
      {
	 (t -> type) = LESS_OR_EQUAL;
	 (*s) ++;
      }
      else
	 (t -> type) = LESS_THAN;

      (*s) ++;

      return (t -> type);
   }

   if (**s == '!')
   {
      if (*(*s + 1) == '=')
      {
	 (t -> type) = NOT_EQUAL_TO;
	 (*s) ++;
      }
      else
	 (t -> type) = EXCLAMATION;

      (*s) ++;

      return (t -> type);
   }

   (t -> type) = ERROR;
   err_error ("no token match `%c'.", **s);
   
   return (t -> type);
}

static void report_syntax_error (const char *expression)
{
   while (isspace (*expression))
      expression ++;
   
   if (*expression != '\0')
      err_error ("syntax error before `%s'.", expression);
   else
      err_error ("syntax error near the end of the expression.");
}

//
//	The following syntax is used for expression evaluation:
//	
//	Primary expression --- one of
//		NUMBER;
//		MINUS thirdary_expression;
//		^ fifthary_expression;
//		LEFT_SQUARE_BRACKET toplevel_expression RIGHT_SQUARE_BRACKET;
//		LEFT_BRACKET toplevel_expression RIGHT_BRACKET.
//
//	Secondary expression --- one of
//		primary_expression;
//		secondary_expression IN_POWER_OF primary_expression;
//		secondary_expression EXCLAMATION.
//
//	Thirdary expression --- one of
//		secondary_expression;
//		thirdary_expression * secondary_expression;
//		thirdary_expression / secondary_expression.
//
//	Fourthary expression --- one of
//		thirdary_expression;
//		fourthary_expression + thirdary_expression;
//		fourthary_expression - thirdary_expression.
//
//	Fifthary expression --- one of
//		fourthary_expression;
//		fifthary_expression > fourthary_expression;
//		fifthary_expression < fourthary_expression;
//		fifthary_expression >= fourthary_expression;
//		fifthary_expression <= fourthary_expression;
//		fifthary_expression = fourthary_expression;
//		fifthary_expression != fourthary_expression.
//
//	Toplevel expression --- one of
//		fifthary_expression;
//		toplevel_expression & fifthary_expression;
//		toplevel_expression | fifthary_expression.
//
//	The whole expression is just
//		toplevel_expression END.
//

// In these functions `get' indicates if `token' isn't assigned to the current
// expression token, and toplevel_expression have to obtain it.
// These functions return `true' or `false', indicating whether everything 
// was successful.
// `token' should point to a initialized structure, as well as `value'
// should point to initialized mpz_t where the result will be stored.

static int toplevel_expression (RAM *machine, char **expression, mpz_t *value,
				Token *token, int get);
static int thirdary_expression (RAM *machine, char **expression, mpz_t *value,
				Token *token, int get);
static int fifthary_expression (RAM *machine, char **expression, mpz_t *value,
				Token *token, int get);

static int primary_expression (RAM *machine, char **expression, mpz_t *value,
				Token *token, int get)
{
   if (get)
      get_token (expression, token);

   if ((token -> type) == NUMBER)	// The most frequently happening case
      mpz_set (*value, (token -> value));
   else if ((token -> type) == LEFT_BRACKET)
   {
      toplevel_expression (machine, expression, value, token, 1);

      if ((token -> type) != RIGHT_BRACKET)
      {
         err_error ("syntax error: ')' expected.");

	 return 0;
      }
   }
   else if ((token -> type) == LEFT_SQUARE_BRACKET)
   {
      mpz_t index;
      
      mpz_init (index);
      if (!toplevel_expression (machine, expression, &index, token, 1))
      {
	 mpz_clear (index);
	 return 0;
      }

      if ((token -> type) != RIGHT_SQUARE_BRACKET)
      {
	 mpz_clear (index);
	 err_error ("syntax error: ']' expected.");
	 return 0;
      }
      
      if (!machine)
      {
	 err_error ("there is no machine to examine registers.");
	 mpz_clear (index);
	 return 0;
      }
      
      if (mpz_sgn (index) < 0)
      {
	 err_error ("invalid register index %Zd.", index);
	 mpz_clear (index);
	 return 0;
      }

      {
	 mpz_t *r = ram_try_to_get_register ((machine -> memory), &index);
	 if (!r)
	 {
	    err_warning ("register %Zd is not initialized.", index);
	    mpz_set_ui (*value, 0);
	 }
	 else
            mpz_set (*value, *r);
      }

      mpz_clear (index);
   }
   else if ((token -> type) == MINUS)
   {
      if (!thirdary_expression (machine, expression, value, token, 1))
	 return 0;

      mpz_neg (*value, *value);

      return 1;
   }
   else if ((token -> type) == NOT)
   {
      if (!fifthary_expression (machine, expression, value, token, 1))
	 return 0;

      mpz_set_ui (*value, ! mpz_sgn (*value));

      return 1;
   }
   else
   {
      if ((token -> type) != ERROR)
         report_syntax_error (*expression);

      return 0;
   }
   
   get_token (expression, token);
   
   return 1;
}

static int secondary_expression (RAM *machine, char **expression, mpz_t *value,
				Token *token, int get)
{
   if (!primary_expression (machine, expression, value, token, get))
      return 0;

   while ((token -> type) == IN_POWER_OF || (token -> type) == EXCLAMATION)
      if ((token -> type) == IN_POWER_OF)
      {
         mpz_t right, module;
         int sign;

         mpz_init (right);
         if (!primary_expression (machine, expression, &right, token, 1))
         {
	    mpz_clear (right);
	    return 0;
         }

         if (mpz_sgn (right) < 0)
         {
	    err_error (
	      "cannot raise in negative power using integer arithmetic.");
	    mpz_clear (right);
	    return 0;
         }

         sign = mpz_sgn (*value);
      
         // Trivial case
         if (! sign)
         {
	    mpz_clear (right);
	    return 1;
         }
      
         mpz_init (module);

         // Heavy magic here -- in most practical cases this is an overloaded
         // code, but I like the fact it actually works in some pathological
         // cases, e.g. something like 2 ^ (2 ^ 12)...
         // Any better idea?  We need a number that is bigger than what we're
	 // going to calc, because the GMP library provides only modular
	 // functions for raising in power, so we use 2 ^ (N * right), where
         // N is greater greater by 1 than the bit size of the base.
         {
	    mpz_t base;
	    int too_big = 0;
	
	    // Get rid of the negative bases.
	    if (sign < 0)
	    {
	       mpz_neg (*value, *value);
	       if (mpz_even_p (right))
	          sign = 1;
	    }

	    // Other trivial case.
	    if (!mpz_cmp_ui (*value, 1))
	    {
	       if (sign < 0)
	          mpz_neg (*value, *value);

	       mpz_clear (right);
	       mpz_clear (module);

	       return 1;
	    }
	 
	    mpz_init_set_ui (base, 2);
	 
            mpz_mul_ui (module, right, mpz_sizeinbase (*value, 2) + 1);
	    if (!mpz_fits_uint_p (module))
	       too_big = 1;
	    else
               mpz_mul_2exp (module, base, mpz_get_ui (module));

	    if (too_big)
	    {
	       if (verbosity_is_enough (-1))
	       {
	          size_t size = mpz_sizeinbase (module, 10);
	     
	          err_error (
"the calculation requires a number to be of a size\n"
"represented by a number having more than %d digits in decimal representation,"
"\nthat is out of the possible range.", size);
	       }
	    
	       mpz_clear (base);
	       mpz_clear (module);
	       mpz_clear (right);

	       return 0;
	    }
	 
	    mpz_clear (base);
         }
         mpz_powm (*value, *value, right, module);
         if (sign < 0)
	    mpz_neg (*value, *value);

         mpz_clear (module);
      }
      else
      {
	 if (mpz_sgn (*value) < 0)
	 {
	    err_error ("integer factorial of %Zd is not defined.", *value);

	    return 0;
	 }

	 if (! mpz_fits_uint_p (*value))
	 {
	    err_error ("factorial of %Zd is too big to be calculated.", *value);

	    return 0;
	 }

	 mpz_fac_ui (*value, mpz_get_ui (*value));
	 
	 get_token (expression, token);
      }

   return 1;
}

static int thirdary_expression (RAM *machine, char **expression, mpz_t *value,
				Token *token, int get)
{
   if (!secondary_expression (machine, expression, value, token, get))
      return 0;
   
   while ((token -> type) == MULTIPLY || (token -> type) == DIVIDE)
   {
      mpz_t right;
      int mul = ((token -> type) == MULTIPLY);

      mpz_init (right);
      
      if (! secondary_expression (machine, expression, &right, token, 1))
      {
	 mpz_clear (right);
	 return 0;
      }
      
      if (! mpz_sgn (right))
      {
	 mpz_clear (right);
	 err_error ("division by zero.");
         return 0;
      }
      
      if (mul)
	 mpz_mul (*value, *value, right);
      else
	 mpz_tdiv_q (*value, *value, right);

      mpz_clear (right);
   }
      
   return 1;
}

static int fourthary_expression (RAM *machine, char **expression, mpz_t *value,
				Token *token, int get)
{
   if (!thirdary_expression (machine, expression, value, token, get))
      return 0;

   while ((token -> type) == PLUS || (token -> type) == MINUS)
   {
      mpz_t right;
      int add = ((token -> type) == PLUS);

      mpz_init (right);

      if (! thirdary_expression (machine, expression, &right, token, 1))
      {
         mpz_clear (right);
	 return 0;
      }

      if (add)
	 mpz_add (*value, *value, right);
      else
         mpz_sub (*value, *value, right);
	 
      mpz_clear (right);
   }

   return 1;
}

static int fifthary_expression (RAM *machine, char **expression, mpz_t *value,
				Token *token, int get)
{
   if (!fourthary_expression (machine, expression, value, token, get))
      return 0;

   while ((token -> type) == LESS_THAN || (token -> type) == LESS_OR_EQUAL ||
		(token -> type) == GREATER_THAN ||
		(token -> type) == GREATER_OR_EQUAL ||
		(token -> type) == EQUAL_TO || (token -> type) == NOT_EQUAL_TO)
   {
      mpz_t right;
      TokenType type = (token -> type);

      mpz_init (right);

      if (! fourthary_expression (machine, expression, &right, token, 1))
      {
         mpz_clear (right);
	 return 0;
      }

      {
	 int relation = mpz_cmp (*value, right);

         switch (type)
         {
         case LESS_THAN:
	    mpz_set_ui (*value, relation < 0);
	    break;
	 case LESS_OR_EQUAL:
	    mpz_set_ui (*value, relation <= 0);
	    break;
	 case GREATER_THAN:
	    mpz_set_ui (*value, relation > 0);
	    break;
	 case GREATER_OR_EQUAL:
	    mpz_set_ui (*value, relation >= 0);
	    break;
	 case EQUAL_TO:
	    mpz_set_ui (*value, !relation);
	    break;
	 case NOT_EQUAL_TO:
	    mpz_set_ui (*value, relation);
	    break;
	 default:
	    err_programming ("unhandled case in fifthary_expression");
         }
      }

      mpz_clear (right);
   }

   return 1;
}

static int toplevel_expression (RAM *machine, char **expression, mpz_t *value,
				Token *token, int get)
{
   if (!fifthary_expression (machine, expression, value, token, get))
      return 0;

   while ((token -> type) == AND || (token -> type) == OR)
   {
      mpz_t right;
      int and = ((token -> type) == AND);

      mpz_init (right);

      if (! fifthary_expression (machine, expression, &right, token, 1))
      {
         mpz_clear (right);
	 return 0;
      }

      if (and)
	 mpz_set_ui (*value, mpz_sgn (*value) && mpz_sgn (right));
      else
	 mpz_set_ui (*value, mpz_sgn (*value) || mpz_sgn (right));

      mpz_clear (right);
   }

   return 1;
}

// `machine' can be NULL, then registers are disabled.
// `value' should point to initialized mpz_t variable where the result will
// be stored.
// Function returns expression evaluation status.
int ram_evaluate_expression (RAM *machine, char *expression, mpz_t *value)
{
   Token token;
   int result;

   mpz_init (token.value);
   
   if (get_token (&expression, &token) == END)
   {
      err_error ("empty expression.");
      mpz_clear (token.value);

      return 0;
   }
   
   result = toplevel_expression (machine, &expression, value, &token, 0);
   
   if (result && (token.type != END))
   {
      if (token.type != ERROR)
         report_syntax_error (expression);

      mpz_clear (token.value);
      
      return 0;
   }

   mpz_clear (token.value);
   
   return result;
}

// Display the evaluation result, prefixed by string `prefix'
// which is passed to gmp_vfprintf with arguments you give after `output',
// if the evaluation was successful.  Return the evaluation status.
int ram_display_expression (RAM *machine, char *prefix,
				char *expression, FILE *output, ...)
{
   Token token;
   mpz_t value;
   int result, keep = 1;

   va_list args;

   va_start (args, output);

   mpz_init (value);
   mpz_init (token.value);
   
   get_token (&expression, &token);
   do
   {
      result = toplevel_expression (machine, &expression, &value, &token, 0);
      if (result)
      {
	 gmp_vfprintf (output, prefix, args);
	 gmp_fprintf (output, "%Zd", value);
	 
	 if (token.type == SEMICOLON)
	 {
	    fprintf (output, "; ");
	    
	    get_token (&expression, &token);
	    if (token.type == END)
	       keep = 0;
	 }
	 else
	    keep = 0;
      }
   }
   while (result && keep);

   va_end (args);

   return result;
}
