{-
    Kaya - My favourite toy language.
    Copyright (C) 2004, 2005, 2006 Edwin Brady

    This file is distributed under the terms of the GNU General
    Public Licence. See COPYING for licence.
-}

module AbsSyntax where

-- Abstract syntax, parse trees, type/function contexts.

import Portability
import Control.Monad

data Const = Num Int
	   | Ch Char
	   | Bo Bool
	   | Re Double
	   | Str String
	   | Exc String Int
	   | Empty
   deriving (Eq, Ord)

instance Show Const where
    show (Num i) = show i
    show (Ch ch) = show ch
    show (Bo False) = "false"
    show (Bo True) = "true"
    show (Re d) = show d
    show (Str str) = show str
    show (Exc str i) = "exception(" ++ show str ++ ", " ++ show i ++ ")"
    show Empty = "()"

data Name = UN String
	  | MN (String,Int)
	  | NS Name Name -- Name in a namespace
          | OP Op -- operator to overload
	  | None
   deriving (Eq, Ord)

type LongName = [Name]

instance Show Name where
    show n = "K" ++ show' n -- dlsym can't cope with underscores in front!
       where show' (UN s) = "_D_" ++ quotename s
             show' (NS s n) = "_ns_"++ show' s ++ "_" ++ show' n
             show' (MN (s,i)) = "_my_" ++ quotename s ++ show i
             show' (OP op) = "_op_" ++ opName op
             show' None = "_DNAME__"

quotename [] = ""
quotename ('_':cs) = "_UN_"++quotename cs
quotename ('\'':cs) = "_PR_"++quotename cs
quotename ('?':cs) = "_QU_"++quotename cs
-- ghc6.6 doesn't like pound signs in this context.
-- quotename ('':cs) = "_PO_"++quotename cs
quotename ('$':cs) = "_DO_"++quotename cs
quotename ('#':cs) = "_HA_"++quotename cs
quotename ('@':cs) = "_AT_"++quotename cs
quotename (c:cs) = c:(quotename cs)

showuser (UN s) = s
showuser (MN (s,i)) = "<"++show s ++ show i++">"
showuser (NS s n) = showuser s ++ "::" ++ showuser n
showuser (OP op) = "operator" ++ show op
showuser None = error "Please don't do that"

showlist [] = ""
showlist [x] = show x
showlist (x:xs) = show x ++ ", " ++ showlist xs

data PrimType = Number
	      | Character
	      | Boolean
	      | RealNum
	      | StringType
	      | Pointer
	      | Exception
	      | Void
   deriving Eq

instance Show PrimType where
  show Number = "Int"
  show Character = "Char"
  show Boolean = "Bool"
  show RealNum = "Float"
  show StringType = "String"
  show Pointer = "Ptr"
  show Exception = "%Exception"
  show Void = "Void"

data Type = Prim PrimType
	  | Fn [Maybe Raw] [Type] Type -- Defaults, arg types, return type
	  | Array Type
--	  | User Name [Type] -- User type, parametrised over types
-- Want this instead, so that we can have type variables of kind other than
-- *:
          | User Name
          | TyApp Type [Type]
--	  | Syn Name -- Type synonym - rapidly expanded out by typechecker
	  | TyVar Name
	  | UnknownType -- We don't know what the type is yet.

-- Amount of space a closure of this type would need for arguments
argSpace (Fn _ xs _) = length xs
argSpace _ = 0

instance Eq Type where
    (==) (Prim t1) (Prim t2) = t1==t2
    (==) (Fn _ ts t) (Fn _ ts2 t2) = ts==ts2 && t==t2
    (==) (Array t1) (Array t2) = t1==t2
    (==) (User n1) (User n2) = n1==n2
    (==) (TyApp n1 t1) (TyApp n2 t2) = n1==n2 && t1==t2
--    (==) (Syn n1) (Syn n2) = n1==n2
    (==) (TyVar n1) (TyVar n2) = n1==n2
    (==) UnknownType UnknownType = True
    (==) _ _ = False

instance Show Type where
    show (Prim t) = show t
    show (Array t) = "[" ++ show t ++ "]"
    show (Fn ns ts t) = show t ++ "(" ++ showlist ts ++ ")"
    show (User n) = showuser n
    show (TyApp n tys) = show n ++ showargs tys
	    where showargs [] = ""
		  showargs (x:xs) = "<" ++ sa' (x:xs) ++ "> "
		  sa' [] = ""
		  sa' [x] = show x
		  sa' (x:xs) = show x ++ ", " ++ sa' xs
    show (TyVar (UN n)) = n
    show (TyVar (MN ("FV",n))) = "f" ++ show n
    show (TyVar (MN ("CLOS",n))) = "c" ++ show n
    show (TyVar (MN ("CLOSRET",n))) = "c" ++ show n
    show (TyVar (MN (_,n))) = "t" ++ show n
    show (TyVar n) = "<" ++ show n ++ ">"
--    show (Syn (UN n)) = n
    show (UnknownType) = "Unknown"

data InputType = Program String
	       | Shebang
	       | Module
               | SharedLib -- for the REPL
  deriving Eq

instance Show InputType where
    show (Program str) = str
    show Shebang = "script"
    show Module = "module"
    show SharedLib = "library"

data ArgType = Var | Copy
  deriving (Show, Eq)

-- Raw expressions
-- This really should be split up, at least so that things which syntactically
-- can't happen can't be represented. For example, RLambda can only occure
-- right at the top. But some other time.
data Raw = RVar String Int Name
	 | RQVar String Int Name -- Quoted (ie non evaluated) name
	 | RConst String Int Const
	 | RLambda String Int [ArgType] [(Name,Type)] Raw
	 | RClosure String Int [(Name,Type)] Raw
	 | RBind String Int Name Type Raw Raw
	 | RDeclare String Int (Name,Bool) Type Raw
	 | RReturn String Int Raw
	 | RVoidReturn String Int
	 | RAssign String Int RAssign Raw
	 | RAssignOp String Int Op RAssign Raw
	 | RSeq String Int Raw Raw
	 | RApply String Int Raw [Raw]
	 | RPartial String Int Raw [Raw]
	 | RForeign String Int Type Name [Raw]
	 | RWhile String Int Raw Raw
	 | RDoWhile String Int Raw Raw
	 | RFor String Int RAssign (Maybe Name) Raw Raw
	 | RTryCatch String Int Raw Raw Name Raw
	 | RNewTryCatch String Int Raw [RCatch]
	 | RThrow String Int Raw
	 | RExcept String Int Raw Raw
	 | RBreak String Int
	 | RPrint String Int Raw
	 | RInfix String Int Op Raw Raw 
	 | RUnary String Int UnOp Raw
	 | RCoerce String Int Type Raw
	 | RCase String Int Raw [RCaseAlt]
         | RMatch String Int Raw [MatchAlt]
	 | RIf String Int Raw Raw Raw
	 | RIndex String Int Raw Raw
	 | RField String Int Raw Name
	 | RArrayInit String Int [Raw]
	 | RVMPtr String Int
         | RLength String Int Raw -- Get directly at a string's length
         | RUnderscore String Int -- match anything pattern
	 | RNoop String Int
	 | RMetavar String Int Int
   deriving (Show, Eq)

data RCatch = RCatch String Int
                     -- Left, exception name and arguments
                     -- Right, catch all, variable to hold exception
                     (Either (Name, [Name]) Name)
                     Raw -- Handler
   deriving (Show, Eq)

data RCaseAlt = RAlt String Int Name [Name] Raw
              | RConstAlt String Int Const Raw
              | RArrayAlt String Int [Name] Raw
              | RDefault String Int Raw
   deriving (Show, Eq)

data MatchAlt = MAlt String Int 
                     [Raw] -- patterns 
                     Raw -- result
   deriving (Show, Eq)

data LCClause = LCGet String Int Name Raw
              | LCFilter String Int Raw
   deriving (Show, Eq)

data RAssign = RAName String Int Name
	     | RAIndex String Int RAssign Raw
	     | RAField String Int RAssign Name
   deriving (Show, Eq)

-- Foreign declaration (doc string at the end)
data Foreign = ForeignDecl String Int [FOpt] Type 
                           [(Name,Type)] Name Name String

data Op = Plus | Minus | Times | Divide | Modulo | Power | Equal | NEqual |
	  OpLT | OpGT | OpLE | OpGE | OpAnd | OpOr | OpXOR | BAnd | BOr |
	  OpShLeft | OpShRight | OpAndBool | OpOrBool
   deriving (Eq, Ord)

instance Show Op where
    show Plus = "+"
    show Minus = "-"
    show Times = "*"
    show Divide = "/"
    show Modulo = "%"
    show Power = "**"
    show Equal = "=="
    show NEqual = "!="
    show OpLT = "<"
    show OpGT = ">"
    show OpLE = "<="
    show OpGE = ">="
    show OpAnd = "&"
    show OpOr = "|"
    show OpXOR = "^"
    show BAnd = "&&" -- FIXME: BAnd seems unnecessary
    show BOr = "||" -- FIXME: BOr seems unnecessary
    show OpShLeft = "<<"
    show OpShRight = ">>"
    show OpAndBool = "&&"
    show OpOrBool = "||"

opName Plus = "plus"
opName Minus = "minus"
opName Times = "times"
opName Divide = "div"
opName Modulo = "mod"
opName Power = "pow"
opName Equal = "eq"
opName NEqual = "neq"
opName OpLT = "lt"
opName OpGT = "gt"
opName OpLE = "le"
opName OpGE = "ge"
opName OpAnd = "and"
opName OpOr = "or"
opName OpXOR = "xor"
opName BAnd = "band" -- FIXME: BAnd seems unnecessary
opName BOr = "bor" -- FIXME: BOr seems unnecessary
opName OpShLeft = "shl"
opName OpShRight = "shr"
opName OpAndBool = "and"
opName OpOrBool = "or"

boolops = [Equal, NEqual, OpLT, OpGT, OpLE, OpGE, BAnd, BOr]

data UnOp = Not | Neg
   deriving Eq

instance Show UnOp where
    show Not = "!"
    show Neg = "-"

{-
data RGlob = RGlob [RGDecl]
   deriving Show

data RGDecl = RGDecl Type Name
   deriving Show
-}

-- Type checked expressions
-- As for Raw expressions, this really ought to be split up.
data Expr n = Global n String Int 
                      -- Global function definition, disambiguation, arity 
	    | Loc Int -- Local variable
	    | GVar Int -- Module global variable
 	    | GConst Const
	    | Lambda [ArgType] [(n,Type)] (Expr n)
	    | Closure [(n,Type)] Type (Expr n) -- remember return type
	    | Bind n Type (Expr n) (Expr n)
-- Bool indicates whether n is used *only* in this block (default False)
	    | Declare String Int (n, Bool) Type (Expr n) 
	    | Return (Expr n)
	    | VoidReturn
	    | Assign (Assign n) (Expr n)
	    | AssignOp Op (Assign n) (Expr n)
            | AssignApp (Assign n) (Expr n)
	    | Seq (Expr n) (Expr n)
	    | Apply (Expr n) [Expr n]
            | ConApply (Expr n) [Expr n]
	    | Partial (Expr n) [Expr n] Int
	    | Foreign Type n [(Expr n,Type)]
	    | While (Expr n) (Expr n)
	    | DoWhile (Expr n) (Expr n)
	      -- ints here are for counting through the loop
	    | For Int (Maybe n) Int (Assign n) (Expr n) (Expr n) 
	    | TryCatch (Expr n) (Expr n) (Expr n) (Expr n)
	    | NewTryCatch (Expr n) [Catch n]
	    | Throw (Expr n)
	    | Except (Expr n) (Expr n)
            | NewExcept [Expr n]
	    | Break String Int
	    | InferPrint (Expr n) Type String Int
	    | PrintNum (Expr n)
	    | PrintStr (Expr n)
	    | PrintExc (Expr n)
	    | Infix Op (Expr n) (Expr n)
	    | RealInfix Op (Expr n) (Expr n)
	    | InferInfix Op (Expr n) (Expr n) (Type,Type,Type) String Int
	    | CmpExcept Op (Expr n) (Expr n)
	    | CmpStr Op (Expr n) (Expr n)
	    | Append (Expr n) (Expr n)
            | AppendChain [Expr n]
	    | Unary UnOp (Expr n)
	    | RealUnary UnOp (Expr n)
	    | InferUnary UnOp (Expr n) (Type,Type) String Int
	    | Coerce Type Type (Expr n)
	    | InferCoerce Type Type (Expr n) String Int
	    | Case (Expr n) [CaseAlt n]
	    | If (Expr n) (Expr n) (Expr n)
	    | Index (Expr n) (Expr n)
	    | Field (Expr n) Name Int Int -- name, argument and tag
	    | ArrayInit [Expr n]
	    | VMPtr
            | Length (Expr n)
	    | Error String
	    | Noop
	    | NoInit
	    | Metavar String Int Int -- Hole for optimisations
	    | Annotation Annotation (Expr n) -- Annotation expression
  deriving (Show, Eq)

data Catch n = Catch -- Left, exception name and arguments
                     -- Right, catch all, variable to hold exception
                     (Either (n, [Expr n]) (Expr n))
                     (Expr n) -- Handler
   deriving (Show, Eq)

data CaseAlt n = Alt Int Int [Expr n] (Expr n) -- tag, total
               | ConstAlt PrimType Const (Expr n)
               | ArrayAlt [Expr n] (Expr n)
               | Default (Expr n) -- if nothing else matches
   deriving (Show, Eq)

data Annotation = Line String Int -- File, Line number
		| FnBody String String Int -- Function, File, Line 
		| LamBody String -- Function
                | DynCheck Type -- Runtime check on enclosed term
   deriving (Show, Eq)

instance Eq n => Ord (CaseAlt n) where
    compare (Alt x _ _ _) (Alt y _ _ _) = compare x y
    compare (ArrayAlt xs _) (ArrayAlt ys _) = compare (length xs) (length ys)
    compare (ConstAlt _ x _) (ConstAlt _ y _) = compare x y

    compare (Alt _ _ _ _) (Default _) = LT
    compare (ArrayAlt _ _) (Default _) = LT
    compare (ConstAlt _ _ _) (Default _) = LT
    compare (Default _) (Default _) = EQ
    compare (Default _) (Alt _ _ _ _) = GT
    compare (Default _) (ConstAlt _ _ _) = GT
    compare _ _ = EQ -- others don't matter


data Assign n = AName Int -- Local name
	      | AGlob Int -- Module global name
	      | AIndex (Assign n) (Expr n)
	      | AField (Assign n) Name Int Int -- Name, argument and tag
  deriving (Show, Eq)

{-
data Glob n = Glob [GDecl n]
   deriving Show

data GDecl n = GDecl Type n
   deriving Show
-}

-- Function bindings
--type FBind = (Name, Type, Expr Name, [FOpt])

{- Flags telling the compiler how to deal with various kinds of function.
Public, Pure, Deprecated are user specified, others depend on how
the function is defined, imported, etc. -}
data FOpt = Public | Pure | NoArgs | Inline | Export | Generated
	  | DefaultDef -- can override definition
          | StartupFn | DeprecatedFn | Constructor
          | Repeatable -- can repeat definition if type is the same
  deriving (Show, Eq)

data DOpt = DPublic | DAbstract | DExport
  deriving (Show, Eq)

-- String, Int -> Filename, line number (in general)

data Decl = FunBind (String, Int, Name, Type, [FOpt], Binder (Expr Name)) 
                    String -- Documentation string
                    Type -- Type before normalisation
	  | Glob (Name, Type, Int, Maybe (Expr Name)) 
                -- Global variable; name, type, id, initial value
	  | Imported String -- Imported module
	  | Linker String -- Extra library to link
	  | TySyn (String, Int, Name, [Name], Type, Bool) -- bool is whether to export
	  | CInclude String
          | FMName String -- Name to include in function map, for generating consistent fnids
	  | DataDecl String Int [DOpt] Name [Type] [ConDecl] String
	  | AbstractDecl Name [Type] -- Abstract data type, and parameters
          | ExceptDecl String Int Name [Type] String
  deriving Show

instance Eq Decl where
    (==) (FunBind (_,_,x1,t1,_,b1) _ _) (FunBind (_,_,x2,t2,_,b2) _ _) 
	= (x1==x2) && (t1==t2)
--    (==) (FunBind (_,_,x1,t1,_,b1)) (FunBind (_,_,x2,t2,_,b2)) 
--	= (x1==x2) && (t1==t2) && (b1==b2)
    (==) (Glob x) (Glob y) = x==y
    (==) (Imported x) (Imported y) = x==y
    (==) (Linker x) (Linker y) = x==y
    (==) (TySyn x) (TySyn y) = x==y
    (==) (CInclude x) (CInclude y) = x==y
    (==) (DataDecl _ _ _ n ty c _) (DataDecl _ _ _ n2 ty2 c2 _) 
	= (n==n2) && (ty==ty2) && (c==c2)
    (==) (AbstractDecl n ty) (AbstractDecl n2 ty2) = (n==n2) && (ty==ty2)
    (==) _ _ = False

-- Programs
type Program = [Decl]

getFunID :: Name -> Program -> Int
getFunID fn ds = gf' 0 ds
 where gf' _ [] = -1
       gf' i ((FunBind (_,_,n,_,_,_) _ _):xs) | fn == n = i
       gf' i (x:xs) = gf' (i+1) xs

data Binder a = Defined a  -- Defined function
	      | DataCon Int Int Bool -- Tagged constructor, with arity
		   -- Bool says whether or not to compile code for it
              | ExceptionFn Name Int Bool
                    -- Exception with name and number of arguments
                    -- Bool says whether defined in the current module
	      | Unbound -- Not bound/defined elsewhere
              | ExtInlinable a -- External definition (so unbound) but
                               -- can be inline in modules which import it
  deriving (Show, Eq)

data ParseResult = PR { inputtype :: InputType,
			parsemodulename :: Name,
			parsedefinitions :: [RawDecl],
                        moduledocstring :: String }
  deriving Show

addToPT :: Result ParseResult -> Result [RawDecl] -> Result ParseResult
addToPT (Success (PR i n rs mdstr)) (Success rs') = Success (PR i n (rs'++rs) mdstr)
addToPT f _ = f

data RawDecl = FB (String,Int,Name,Type,[FOpt], Binder Raw) String
	     | GlobDecl String Int (Name, Type, Maybe Raw)
	     | CInc String
             | FMN String
	     | Imp String
	     | SearchImport String -- First pass only, when chasing modules
	     | Link String
	     | TSyn (String,Int,Name,[Type],Type, Bool) -- bool for export
	     | DDecl String Int [DOpt] Name [Type] [ConDecl] String
	     | ADecl Name [Type]
	     | CDecl Name Type Int Int
             | ExcDecl String Int Name [Type] String
             | ExtExc String Int Name [Type]
  deriving Show

-- A constructor declaration is a name and its type --- data constructors
-- are a special kind of function.
-- The list of names is the list of field names.
data ConDecl = Con Name Type [Name] Bool -- Bool is whether to compile code
  deriving (Show, Eq)

getConName (Con n _ _ _) = n

-- Local contexts
--type Context = [(Name,(Type,[FOpt]))]

-- Global variables (Int is an id to refer to it by)
type GContext = [(Name,(Type,Int))]

-- Defined types
--type Types = [(Name,TypeInfo)]
data TypeInfo = UserData [Type] -- User defined type
	      | Syn [Name] Type -- Type synonym
	      | Abstract -- Abstract type
	      | Private -- Private type
       deriving (Show, Eq)

-- Field names and which data type they belong to, 
-- mapping to the field type, which argument it projects to, and the tag.
type Fields = [((Name, Type), (Type, Int, Int))]

-- Constructor tags
type Tags = [(Name,(Int,Int))] -- map from constructor name to tag,number of
                               -- constructors in that type.

data Result r = Success r
              | Failure String String Int
    deriving (Show, Eq)

instance Monad Result where
    (Success r)   >>= k = k r
    (Failure err fn line) >>= k = Failure err fn line
    return              = Success
    fail s              = Failure s "(no file)" 0

instance MonadPlus Result where
    mzero = Failure "Error" "(no file)" 0
    mplus (Success x) _ = (Success x)
    mplus (Failure _ _ _) y = y

-- Substitute a term for a variable in a raw term

rawSubst :: Name -> Raw -> Raw -> Raw
rawSubst n t (RVar _ _ x) | n == x = t
rawSubst n t (RQVar _ _ x) | n == x = t
rawSubst n t (RClosure f l ts r) = RClosure f l ts (rawSubst n t r)
rawSubst n t (RBind f l nm ty r1 r2) 
   = RBind f l nm ty (rawSubst n t r1) (rawSubst n t r2)
rawSubst n t (RDeclare f l nb ty r)
   = RDeclare f l nb ty (rawSubst n t r)
rawSubst n t (RReturn f l v) = RReturn f l (rawSubst n t v)
rawSubst n t (RAssign f l a r) = RAssign f l (rawASubst n t a)
                                             (rawSubst n t r)
rawSubst n t (RAssignOp f l op a r) = RAssignOp f l op (rawASubst n t a)
                                                       (rawSubst n t r)
rawSubst n t (RSeq f l r1 r2) 
   = RSeq f l (rawSubst n t r1) (rawSubst n t r2)
rawSubst n t (RApply f l fn args)
   = RApply f l (rawSubst n t fn) (map (rawSubst n t) args)
rawSubst n t (RPartial f l fn args)
   = RPartial f l (rawSubst n t fn) (map (rawSubst n t) args)
rawSubst n t (RForeign f l ty nm args)
   = RForeign f l ty nm (map (rawSubst n t) args)
rawSubst n t (RWhile f l r1 r2) 
   = RWhile f l (rawSubst n t r1) (rawSubst n t r2)
rawSubst n t (RDoWhile f l r1 r2) 
   = RDoWhile f l (rawSubst n t r1) (rawSubst n t r2)
rawSubst n t (RFor f l ra nm r1 r2)
   = RFor f l (rawASubst n t ra) nm (rawSubst n t r1) (rawSubst n t r2)
rawSubst n t (RTryCatch f l r1 r2 nm r3)
   = RTryCatch f l (rawSubst n t r1) (rawSubst n t r2) nm (rawSubst n t r3)
rawSubst n t (RNewTryCatch f l r rcs)
   = RNewTryCatch f l (rawSubst n t r) (map (rawCSubst n t) rcs)
rawSubst n t (RThrow f l v) = RThrow f l (rawSubst n t v)
rawSubst n t (RExcept f l r1 r2) 
   = RExcept f l (rawSubst n t r1) (rawSubst n t r2)
rawSubst n t (RPrint f l v) = RPrint f l (rawSubst n t v)
rawSubst n t (RInfix f l op r1 r2) 
   = RInfix f l op (rawSubst n t r1) (rawSubst n t r2)
rawSubst n t (RUnary f l op r1) 
   = RUnary f l op (rawSubst n t r1)
rawSubst n t (RCoerce f l ty r1) 
   = RCoerce f l ty (rawSubst n t r1)
rawSubst n t (RMatch f l sc alts) 
   = RMatch f l (rawSubst n t sc) (map (rawMSubst n t) alts)
rawSubst n t (RCase f l sc alts) 
   = RCase f l (rawSubst n t sc) (map (rawCaseSubst n t) alts)
rawSubst n t (RIf f l r1 r2 r3)
   = RIf f l (rawSubst n t r1) (rawSubst n t r2) (rawSubst n t r3)
rawSubst n t (RIndex f l r1 r2)
   = RIndex f l (rawSubst n t r1) (rawSubst n t r2)
rawSubst n t (RField f l r1 nm)
   = RField f l (rawSubst n t r1) nm
rawSubst n t (RArrayInit f l rs)
   = RArrayInit f l (map (rawSubst n t) rs)
rawSubst n t (RLength f l r)
   = RLength f l (rawSubst n t r)
rawSubst _ _ r = r

rawCSubst n t (RCatch f l c h) = RCatch f l c (rawSubst n t h)

rawMSubst n t (MAlt f l pats res) = MAlt f l pats (rawSubst n t res)

rawCaseSubst n t (RAlt f l nm ns res) = RAlt f l nm ns (rawSubst n t res)
rawCaseSubst n t (RConstAlt f l c res) = RConstAlt f l c (rawSubst n t res)
rawCaseSubst n t (RArrayAlt f l ns res) = RArrayAlt f l ns (rawSubst n t res)
rawCaseSubst n t (RDefault f l res) = RDefault f l (rawSubst n t res)

rawASubst n t assign@(RAName f l nm) = assign
rawASubst n t (RAIndex f l a r) = RAIndex f l (rawASubst n t a)
                                              (rawSubst n t r)
rawASubst n t (RAField f l a nm) = RAField f l (rawASubst n t a) nm


