{- This source file is part of mmisar - a tool for converting
Metamath proofs to Isabelle/Isar.
Copyright 2006 Slawomir Kolodynski

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-}

-- | this module defines a parser for Metamath ZF formulas. 
-- This is my homegrown parsing that should not be taken
-- as an example to follow. I will rewrite it to use Parsec one day.

module ParseMM
    where

import Utils
import Data.List
import Char
import Monad
import Maybe
import MMIDataTypes

{-
"ph -> A e. CC"
"A e. RR -> A e. CC"
"E. x ( A + x ) = 0"
"A e. CC -> E. x e. CC ( A + x ) = 0"
"E. x e. CC ( A + x ) = 0"
"E! x e. A ph <-> ( E. x e. A ph /\\ A. x e. A A. y e. A ( ( ph /\\ ps ) -> x = y ) )"
"( A e. CC /\\ A =/= 0 ) -> E. x e. CC ( A x. x ) = 1"
"A <_ B \\/ B <_ A"
"B < A <-> -. A <_ B"
-}


-- | table of infixes for logical operations
logLogLogInfixTable = ["->", "<->", "/\\", "\\/"]

-- | table of set and logical prefixes
-- prefixTable = ["-.", "-u"]

-- table of infixes for operations that take sets and return logical values
-- Note we are cheating here with the "R" infix. 
setSetLogInfixTable = ["e.","=/=", "=", "(_", "<_", "<", "R"]


-- table of infixes that take sets and return a set
setSetSetInfixTable = ["+", "-", "x.", "/"]

-- table of quantifiers
quantTable = ["A.", "E.", "E!"]

-- | tests if we have spurious parantheses around an expression
-- returns the expression with the spurious parantheses removed
-- note this is always called on an argument processed with breakIntoZeroNest
-- so we can assume we don't have situation like [["(", "a", "+", "b", ")", "+", "c"]]"
-- If the only string in the list starts with "(", the this block is the whole string
-- example 
-- testParens $ breakIntoZeroNest $ words "( ( phi /\\ psi ) )" == Just ["phi", "/\\", "psi"]
testParens :: [[String]] -> Maybe [String]
testParens ["(":x] = if null x then error ("trying to process a single " ++ "(")
                     else Just (init x)
testParens _ = Nothing

-- tests if the given list of list of strings represents an infix with the given string
-- this is the same for set and logical infixes
-- if so, returns a pair of its operands
-- testInfix "and" [ ["ph"], ["and"], ["a", "or", "b"] ] == Just ( ["ph"], ["a", "or", "b"] )
-- testInfix "/\\"  $ breakIntoZeroNest $ words "ph /\\  a -> ( c -> d ) -> e " ==
-- Just ( ["ph"]], ["a", "->", "(", "c -> d", ")", "->", "e"] )
-- testInfix "and" [ ["ph"], ["or"], ["a", "and", b"] ] == Nothing
testInfix :: String -> [[String]] -> Maybe ([String], [String])
testInfix s lsn = if length (snd sls) > 1 then 
                     Just (concat $ fst sls, concat $ tail $ snd sls)
                  else Nothing
                     where sls = break (==[s]) lsn

-- tests if the given list of list of strings represents a variable
{-testvar :: [[String]] -> Maybe String
testvar lsn = if (length lsn == 1) && ( length (head lsn) == 1 ) then 
                 Just ( head (head lsn) )
              else Nothing-}

testvar :: [[String]] -> Maybe [String]
testvar lsn = if (length lsn == 1) && ( length (head lsn) == 1 ) then 
                 Just (head lsn)
              else Nothing


-- | tests if a given list of lists of strings is a numeral
testNum :: [[String]] -> Maybe [String]
testNum lsn = if (length lsn == 1) && ( length x == 1  ) && ( all isDigit (head x) )
              then Just x 
              else Nothing
                 where x = head lsn

-- tests if the list of lists of string represents a quantifier expression
-- ex. testQuant $ breakIntoZeroNest $ words "E! x ph" == Just ("E!","x",["ph"])
-- testQuant "E." $ breakIntoZeroNest $ words "E. x ( A + x ) = 0" == Just ("x",["A","+","x"],["="],["0"]])
testQuant :: String -> [[String]] -> Maybe (String, [String])
testQuant q lsn = if length lsn > 2 && fstWord == q && length svar == 1 && (lsn !! 2) /= ["e."]
                then Just (head svar, concat $ drop 2 lsn)
                else Nothing
                   where 
                   fstWord = head $ head lsn
                   svar = lsn !! 1

-- | Tests if the list of lists of strings represents a quantifier expression
-- with a range. Returns a triple of arguments needed to build the corresponding logical 
-- expression. Examples:
-- testQuantRange "E." $ breakIntoZeroNest $ words "E. x e. A ph" ==
-- Just (x,["A"],["ph"]) 
-- testQuantRange "E." $ breakIntoZeroNest $ words "E. x e. CC ( A + x ) = 0"
-- Just (x, ["CC"], ["(", "A", "+", "x", ")", "=", "0"])
testQuantRange :: String -> [[String]] -> Maybe (String, [String], [String])
testQuantRange q lsn = if length lsn > 4 && fstWord == q && length svar == 1 && (lsn !! 2) == ["e."] 
                       then Just (head svar, srange, concat $ drop 4 lsn)
                       else Nothing
                       where 
                          fstWord = head $ head lsn
                          svar = lsn!!1
                          srange = lsn!!3

-- | tests for negation
testPrefix :: String -> [[String]] -> Maybe [String]
testPrefix p lsn = if length lsn > 1 && head lsn == [p] then Just (concat $ tail lsn)
              else Nothing

-- | checks for if expression like in  if ( A e. RR , A , 0 )
testIf :: [[String]] -> Maybe ([String], [String], [String])
testIf lsn = if length lsn == 2 && head lsn == ["if"] && head ifarg == "(" && last ifarg == ")" &&
                length seplsn == 3
                then Just (concat $ head seplsn, concat (seplsn!!1), concat (seplsn!!2) )
             else Nothing
             where 
                ifarg = lsn!!1
                seplsn = separate [","] $ breakIntoZeroNest $ tail $ init ifarg

-- | testSquare tests if the expression is a square. The "show " commands translate A\cdotA as A ^ 2, 
-- although the web interface does not show this.
testSquare :: [[String]] -> Maybe [String]
testSquare lsn = if length lsn == 3 && lsn!!1 == ["^"] && lsn!!2 == ["2"]
                    then Just (head lsn)
                 else Nothing

-- | tests if the the expression may say the something is a set for example NN e. V
-- needs to be matched before the e. infix
testIsASet :: [[String]] -> Maybe [String]
testIsASet lsn = if length lsn > 2 && lsn!!1 == ["e."] && lsn!!2 == ["V"] then Just (head lsn)
                 else Nothing

-- | tests if the expression may be a set comprehension
testSetCompr :: [[String]] -> Maybe ([String],[String])
testSetCompr lsn = if length lsn == 1 && length ls > 4 && head ls == "{" && ls!!2 == "|" && last ls ==  "}" 
                      then Just ([ls!!1], init $ drop 3 ls)
                   else Nothing
                      where ls = head lsn

-- | tests if the expression may be a set comprehension with range
-- we assume here that the range will not be another set comprehension
-- if this is not always the case then we will have to modify this
testSetComprRange :: [[String]] -> Maybe ([String],[String],[String])
testSetComprRange lsn = if length lsn == 1 && length ls > 6 && head ls == "{" && (ls!!2) == "e." && 
                           last ls ==  "}" && length reminder > 2
                           then Just ([ls!!1], srange, init $ tail reminder)
                        else Nothing
                           where
                              ls = head lsn
                              (srange,reminder) = break (=="|") (drop 3 ls)

-- converts a pair of Maybe lists of strings into a Maybe logical expression
-- of the type given by the first string in the triple. The second and third are the
-- operands of the infix
str2LLLinfix :: String -> ([String],[String]) -> Maybe LogExpr
str2LLLinfix inf (s1,s2) = do
      p1 <- str2LogExpr s1
      p2 <- str2LogExpr s2
      return (InfixLLL inf p1 p2)

-- converts a pair of Maybe lists of strings into a Maybe infix of logical expressions
-- inf - the infix, like "e.", ">" etc
-- (s1,s2) - arguments of the infix
str2SSLinfix :: String -> ([String],[String]) -> Maybe LogExpr
str2SSLinfix inf (s1,s2) = do
      p1 <- str2SetExpr s1
      p2 <- str2SetExpr s2
      return (InfixSSL inf p1 p2)

-- converts a pair of Maybe lists of strings into a Maybe set expression
-- given by the first argument
str2SSSinfix :: String -> ([String],[String]) -> Maybe SetExpr
str2SSSinfix inf (s1,s2) = do
      p1 <- str2SetExpr s1
      p2 <- str2SetExpr s2
      return (InfixSSS inf p1 p2)

-- | converts a pair into a Maybe logical
-- expression representing a quantifier with range. The quantifier is 
-- given in the first argument
str2Quant :: String             -- ^ quantifier is that, may be "E.", "E!" or "A."
          -> (String, [String]) -- ^ the first el. of the pair gives the variable, the second is the log formula
          -> Maybe LogExpr
str2Quant q (s1, ls2) = do
      loge <- str2LogExpr ls2
      return (Quant q (Svar s1) loge)

-- | converts he pair in the second argument into a Maybe logical
-- expression representing a quantifier with range. 
str2QuantRange :: String                   -- ^ quantifier : "E.", "E!" or "A." ?
          -> (String, [String], [String] ) -- ^ (variable, quantifier range, log formula)
          -> Maybe LogExpr
str2QuantRange q (s1, ls2, ls3) = do
      srange <- str2SetExpr ls2
      loge <-  str2LogExpr ls3
      return ( QuantRange q (Svar s1) srange loge )

-- | converts a list of strings into a negation of a logical expression
str2Not :: [String]                        -- ^ the argument of a negation
        -> Maybe LogExpr
str2Not ls = do
      loge <- str2LogExpr ls
      return (Negation loge)

-- | converts a list of strings into a negatve of a set expression
str2Neg :: [String]                        -- ^ the argument of the negative
        -> Maybe SetExpr
str2Neg ls = do
      sete <- str2SetExpr ls 
      return (Negative sete)

-- | converts a triple of lists of strings into an if expression
str2if :: ([String], [String], [String]) -> Maybe SetExpr
str2if (ls1, ls2, ls3) = do
   loge <- str2LogExpr ls1
   sete1 <- str2SetExpr ls2
   sete2 <- str2SetExpr ls3
   return (If loge sete1 sete2)

-- | converts an argument that comes from testing that an expression says
-- something is a set to a corresponding logical expression
str2isASet :: [String] -> Maybe LogExpr
str2isASet ls = do
   sete <- str2SetExpr ls
   return (IsASet sete)

-- | converts a pair of arguments into a set comprehension
str2SetCompr :: ([String],[String]) -> Maybe SetExpr
str2SetCompr (ls1,ls2) = do
   svar <- str2SetExpr ls1
   loge <- str2LogExpr ls2
   return (SetCompr svar loge)

-- | converts a triple of arguments into a set comprehension with range
str2SetComprRange :: ([String],[String], [String]) -> Maybe SetExpr
str2SetComprRange (ls1,ls2,ls3) = do
   svar <- str2SetExpr ls1
   sete <- str2SetExpr ls2
   loge <- str2LogExpr ls3
   return (SetComprRange svar sete loge)


-- | tries to convert a list of strings into a logical expression with 
-- one argument
parseLoge :: ([[String]] -> Maybe [String]) -> ([String] -> Maybe LogExpr) -> [[String]] -> Maybe LogExpr
parseLoge test str2log lsn = test lsn >>= str2log 

-- tries to parse logical infix
parseLLLinfix :: String -> [[String]] -> Maybe LogExpr
parseLLLinfix inf lsn =  testInfix inf lsn >>= str2LLLinfix inf

--tries to parse infix that takes sets and has logical value
parseSSLinfix :: String -> [[String]] -> Maybe LogExpr
parseSSLinfix inf lsn =  testInfix inf lsn >>= str2SSLinfix inf

-- tries to parse a quantifier expression
parseQuant :: String -> [[String]] -> Maybe LogExpr
parseQuant q lsn = testQuant q lsn >>= str2Quant q

-- | tries to parse a quantifier expression with range
parseQuantRange :: String -> [[String]] -> Maybe LogExpr
parseQuantRange q lsn = testQuantRange q lsn >>= str2QuantRange q

-- tries to parse infix that takes sets and results in sets
parseSSSinfix :: String -> [[String]] -> Maybe SetExpr
parseSSSinfix inf lsn =  testInfix inf lsn >>= str2SSSinfix inf

-- | tries to parse a set expresion with one argument
parseSet :: ([[String]] -> Maybe [String]) -> ([String] -> Maybe SetExpr) -> [[String]] -> Maybe SetExpr
parseSet test str2set lsn = test lsn >>= str2set

-- | tries to parse an if expression
parseIf :: [[String]] -> Maybe SetExpr
parseIf lsn = testIf lsn >>= str2if

{-- | tries to parse a square
parseSquare :: [[String]] -> Maybe SetExpr
parseSquare lsn = testSquare lsn >>= strSq2prod-}

-- | tries to parse a set comprehension
parseSetCompr :: [[String]] -> Maybe SetExpr
parseSetCompr lsn = testSetCompr lsn >>= str2SetCompr

-- | tries to parse a set comprehension with range
parseSetComprRange :: [[String]] -> Maybe SetExpr
parseSetComprRange lsn = testSetComprRange lsn >>= str2SetComprRange

-- tries to convert a string to a logical expression
str2LogExpr :: [String] -> Maybe LogExpr
str2LogExpr ls = ( parseLoge testParens str2LogExpr lsn ) `mplus`
                 ( msum $ ap ( ap [parseLLLinfix] logLogLogInfixTable )  [lsn] ) `mplus` 
                 ( parseLoge (testPrefix "-.") str2Not lsn ) `mplus`
                 ( parseLoge testIsASet str2isASet lsn) `mplus`
                 ( msum $ ap ( ap [parseQuant, parseQuantRange] quantTable )  [lsn] ) `mplus`
                 ( msum $ ap ( ap [parseSSLinfix] setSetLogInfixTable )  [lsn] ) `mplus`
                 ( parseLoge testvar (Just . Lvar . head) lsn )
                 where
                    lsn = breakIntoZeroNest ls

-- tries to convert a string to a set expression
str2SetExpr :: [String] -> Maybe SetExpr
str2SetExpr ls = (parseSet testParens str2SetExpr lsn) `mplus`
                 ( msum $ ap ( ( ap [parseSSSinfix] setSetSetInfixTable ) )  [lsn] ) `mplus`
                 ( parseSetCompr lsn) `mplus` ( parseSetComprRange lsn) `mplus`
                 ( parseSet (testPrefix "-u") str2Neg lsn) `mplus`
                 ( parseIf lsn) `mplus`
                 ( parseSet testNum (Just . Snum . head) lsn ) `mplus`
                 ( parseSet testvar (Just . Svar . head) lsn )
                 where
                    lsn = breakIntoZeroNest ls

{--------------------------------------------------------------}
{------ helpful operations on strings--------------------------}
{--------------------------------------------------------------}

-- spanZeroNestLev breaks a list of strings into first level substrings
-- examples
-- spanZeroNestLev (0, [], words "( qt rq ) ab cd ( ef gh )" ) ==
-- (0,["qt","rq"],["ab","cd","(","ef","gh",")"])
-- spanZeroNestLev (0, [], words "ij ( qt rq ) ab cd ( ef gh )" ) ==
-- (0,["ij"],["(","qt","rq",")","ab","cd","(","ef","gh",")"])
spanZeroNestLev :: [String] -> [String] -> (Int, [String], [String]) -> (Int, [String], [String])
spanZeroNestLev open close (n, xs, xe) | null xe          = (n, xs, [])
                            | n == 0 && (not $ null xs)      = (0, xs, xe)
                            | head xe `elem` open             = spanZeroNestLev open close (n+1, xs ++ [head xe], tail xe)
                            | head xe `elem` close             = spanZeroNestLev open close (n-1, xs ++ [head xe], tail xe)
                            | otherwise        = spanZeroNestLev open close (n, xs ++ [head xe], tail xe)

-- a helper function for unfoldr
getNextBlock :: [String] -> Maybe ([String], [String])
getNextBlock [] = Nothing
getNextBlock ls = Just (snd3 tr, thrd3 tr)
             where tr = spanZeroNestLev ["(", "{"] [")", "}"] (0,[],ls)

-- breaks a formula into tokens at the zero nesting level
-- breakIntoZeroNest $ words "ij ( qt rq ) ab cd ( ef gh )" ==
-- [ ["ij"], ["qt", "rq"], ["ab"], ["ef", "gh"] ]
breakIntoZeroNest :: [String] -> [[String]]
breakIntoZeroNest = unfoldr getNextBlock

-- a safe tail function
stail :: [a] -> [a]
stail x = if null x then [] else tail x

-- | a helper function for unfoldr
getNextField :: Eq a => a -> [a] -> Maybe ([a],[a])
getNextField _ [] = Nothing
getNextField sep x = Just (fst sp, stail $ snd sp)
                     where sp = span (/= sep) x

-- | separate separates a list into fields acording to a separator. Note that it ignores a separator that
-- is the last element of the list (does not create an empty field if it is the last one
separate :: Eq a => a -> [a] -> [[a]]
separate sep = unfoldr (getNextField sep)
