;;; tts.el -- translates TeX notation to s-exp notation

;; Copyright (C) 2005 Joe Corneli <jcorneli@math.utexas.edu>

;; Time-stamp: <jac -- Sun Jun 26 10:16:34 CDT 2005>

;; This file is not part of GNU Emacs, but it is distributed under
;; the same terms as GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published
;; by the Free Software Foundation; either version 2, or (at your
;; option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary

;;; Code:

(require 'cl)
(require 'tts-symbols)
(require 'tts-assist)
(require 'tts-greedy)
(require 'tts-continuation)

(defun tokenize-string (str)
  "Given a TeX expression, output the tokens it is made up of."
  ;; This is an alternative to `with-temp-buffer' that helps
  ;; with debugging:
;  (save-window-excursion
;    (pop-to-buffer "*scratch*")
  (with-temp-buffer
    (let (tokens)
      (delete-region (point-min) (point-max))
      (insert str)
      (goto-char (point-min))
      (while (not (eobp))
        (cond 
         ;; skip past whitespace
         ((looking-at "\\s-")
          (forward-char))
         ;; word separators
         ((looking-at (regexp-opt
                       '("[" "]" "(" ")"
                         "{" "}" "," "."
                         "+" "-" "*" "/"
                         "=" ":" "^" "_")))
          (setq tokens (cons (match-string-no-properties 0)
                             tokens))
          (forward-char))
         ((looking-at "\\\\")
          (setq tokens (cons (tts-next-word-token) tokens)))
         ((looking-at "[0-9]+")
          (setq tokens (cons (match-string 0) tokens))
          (goto-char (match-end 0)))
         (t
          (forward-char 1)
          (setq tokens (cons (buffer-substring-no-properties 
                              (point)
                              (1- (point)))
                             tokens)))))
      (reverse tokens))))

;; Note that some math expressions that might appear to be word tokens
;; are actually a concatenation of words: xyz means "multiply x, y,
;; and z together.
(defun tts-next-word-token ()
  (let ((beg (point)))
    ;; this treatment will obviously have to be
    ;; more complicated when we start dealing with
    ;; more complicated expressions.
    (when (looking-at "\\\\")
      (forward-char 1))
    (while (not (or (eobp)
                    (looking-at (regexp-opt
                                 '("[" "]" "(" ")"
                                   "{" "}" "," "."
                                   "+" "-" "*" "/"
                                   "\n" "\t" " " "\\"
                                   "=" ":" "^" "_")))))
      (forward-char 1))
    (buffer-substring-no-properties beg (point))))

(defun parser-1 (tokens &optional current)
  "Returns a list of objects recovered from TOKENS.
If present, CURRENT should specify the operator that is the head
of a run of objects we are working on at present; `parser-1'
should add all the objects that are being acted upon by this
operator.  If no operator is given as current, just find one
object.  The list of objects that is found is returned together
with a list of all remaining tokens."
  (let (elements just-a-symbol parens)
    (cond ((null tokens)
           (setq elements nil))
          ;; probably the order of different operator
          ;; cases here can be what determines the order
          ;; of operations.  (For this to work, `parser-1'
          ;; needs to be called with the "current" operator.)
          ((member (car tokens) '("(" "{" "["))
           (let ((chunk (parse-parens tokens nil nil)))
             ;; it should clearly just return the tokens that
             ;; it hasn't parsed yet.
             (setq elements (first chunk)
                   tokens (second chunk)
                   parens 'parens)))
          ;; just a symbol
          ((just-a-symbol-p (car tokens))
;           (setq just-a-symbol t)
           ;; allow some special translation for special symbols
           (setq elements  (or (number (car tokens))
                               (tts-greek (car tokens))
                               (intern (car tokens)))
;                 current '*~
                 tokens (cdr tokens))))
    ;; we may continue on!
    (when (and elements tokens)
      (parser-1-continuation))
    (list elements tokens parens)))

(defun parser (tokens tree waiting &optional parens)
  "Grind through the TOKENS and put them into the parse TREE.
Any tokens that are WAITING to be inserted will be inserted in
the appropriate spot.  If PARENS is non-nil, then WAITING is
enclosed in a pair of parentheses."
  (cond ((null tokens)
         (setq tree waiting))
        ((and (not (cdr tokens))
              (not waiting))
         (parser-one-token-case))
        ((member (car tokens) '("(" "{" "["))
         (parser-paren-case))
        ((operator (car tokens))
         (parser-operator-case))
        ;; this case is a little weird
        ((and (equal (car tokens) "-") 
              (number (second tokens))
              (not waiting))
         (parser-negative-number-case))
        ((and (modifier (car tokens)) 
              (not waiting))
         (parser-modifier-case))
        ((greedy-operator (car tokens))
         (parser-greedy-operator-case))
        ((equal (car tokens) ":")
         (parser-function-property-case))
        ((quantifier (car tokens))
         (parser-quantifier-case))
        ((equal (car tokens) "\\sum")
         (parser-summation-case))
        (t
         (parser-default-case)))
  tree)

(defun basic-waiting-builder (waiting parens new-parens new)
;  (consp waiting)
;  (consp new)
  (cond
   ;; function application case: 
   ;; YES: f(x), i.e., waiting=f parens=nil new-parens=t new=x
   ;; NO: (f)(x)
   ;;     (1-x)(1+x), i.e., waiting=(- x 1) new=(+ x 1)
   ;; ADD: f_1(x), i.e., waiting=(sub f 1) new=x
   ;; ADD: f_1(x-1), i.e., waiting=(sub f 1) new=(- x 1)
   ;;
   ;; To accomplish this, we distinguish between parenthesized
   ;; _sub/super-scripted_ objects, and other parethesized objects.
   ;; (Don't allow the latter sort.)
   ;; Note: this approach will not work on expressions like
   ;;  $(f-g)(x)$, which would need some _semantic_ parsing...
   ;; Note: we also must _disallow_ script-only operands
   ((and waiting
         (not (and parens
                   (not (eq parens 'script))))
         (and new-parens
              (not (eq new-parens 'script))))
    (list waiting new))
   ;; FLOW implicit multiplication cases: abc, (1-x)xy, (1-x)(1+x)(2-x)
   ;; note that the flow is different in the case (ab)H, where we have
   ;; waiting=(*~ a b) new=H parens='group
   ;; and in the case abc, where we have
   ;; waiting=(*~ a b) new=c parens='implicit
   ((and waiting
         (consp waiting)
         (eq (car waiting) '*~)
         (not (eq parens 'group))
         (not (eq parens 'function)))
    (append waiting new))
   ;; BASIC implicit multiplication cases: ab, (1-x)x, (1-x)(1+x)
   ;; ... we could probably even swing x(1-x) and skip f(1-x),
   ;;  but for now, temporarily, we'll rule out any parenthesized
   ;; second piece
   ;; YES: xy
   ;;      (1-x)x
   ;;      (1-x)(1+x)
   ;; NO: f(1-x)
   ;;
   ;; To ADD: x(x-1) [a little trickier - above, require that if
   ;; the symbol is a member of the argument, it can
   ;; be the car of a subexpression, but nothing else.]
   ((and waiting
         (or (or (consp waiting)
                 parens)
             (not new-parens)))
    (list '*~ waiting new))
   (t
    new)))

;; I think that this function may need to be revised so that it
;; trashes whatever tokens it is finished processing?
;;
;; Note: I'm not so sure that the `waiting' value returned by this
;; function is all that helpful.
;;
;; function application, $f_1(x)$, should be dealt with here,
;; not as a special case.  Note that in my first draft
;; of this function [pre Mon Apr 18 23:02:38 2005],
;; WAITING never appears on the right-hand-side of a rule.
;; We can fix that up by adding the function application rule here.
;;
;; It is a very strange thing to have the TREE leave `parser',
;; and I think that is probably a mistake.  Should check the
;; usage here.
(defun parse-parens (tokens tree old-waiting)
  "Takes TOKENS, TREE, and WAITING from the parser; first token is a paren.
Finds matching paren, and parses tuples and order of operations
groupings as appropriate to each.  Returns the result of its parse 
of whatever is in between the parens and a list of any left over tokens.
If given, PAREN is the leading paren."
  (let* ((paren-count 1)
         (tok (cdr tokens))
         (open-paren (car tokens))
         (close-paren (tts-matching-paren open-paren))
         waiting)
    ;; find the matching paren!  (A fun game for children's parties.)
    (while (and (> paren-count 0)
                tok)
      ;; accomodate several paren types.
      (cond ((equal (car tok) open-paren)
             (setq paren-count (1+ paren-count)))
            ((equal (car tok) close-paren)
             (setq paren-count (1- paren-count))))
      (setq tok (cdr tok)))
    ;; cases: first, did we find a match?
    (cond 
     ((> paren-count 0) (error "Paren mismatch"))
     (t 
      (let ((intervening (cdr (butlast tokens (1+ (length tok))))))
        ;; I don't know if this should be done for {'s or other
        ;; weird parens...
        (if (member "," intervening)
            ;; it looks like we have a tuple. We identify the
            ;; top-level objects, and run the parser on each of them.
            ;; if there is actually only *one* top-level object, then
            ;; we didn't have a tuple after all.  So we have to count.
            (let ((objects (objects-inside-tuple intervening)))
              (if (eq (length objects) 1)
                  ;; as if we really didn't see that ","
                  (setq waiting (parser intervening
                                        tree
                                        nil)
                        tokens tok)
                ;; tuple case
                (setq waiting (append (list 'tuple)
                                      (mapcar #'(lambda (elt)
                                                  (parser elt nil nil))
                                              objects))
                      tokens tok)))
          ;; we just have an order-of-operations grouping
          (setq waiting (parser intervening
                                tree
                                nil)
                tokens tok)))))
    ;; deal with function application, e.g.
    ;; $f(x) or $f_1(E)$.
    ;;
    ;; (Note that this is one of the many places in
    ;; this code where we deal with function application - so
    ;; I'm not completly sure which one is "the one"... or
    ;; if maybe, ironically, we need several.)
;    (when old-waiting
;      (setq waiting (append (list old-waiting)
;                            (list waiting))))
    (list waiting tokens)))

;; This combines elements of (the original) `parser-1' and
;; `parse-parens'.
;;
;; Note: Currently needs testing/debugging for the "{" case.
(defun parse-sub/sup-script (tokens)
  (if (equal (car tokens) "{")
      ;; look for a matching "}", and return whatever's
      ;; between them
      ;;
      ;; Note that we have to modify the `parse-parens' return value
      ;; to make it match the format that `parser-1' expects.  It
      ;; might be better to agree on a universal format for the return
      ;; value of functions in use in this code.
      (let ((retval (parse-parens tokens nil nil)))
        (list (car retval) (cadr retval)))
    ;; otherwise, we just parse the first token and return the result
    ;; of that.
    (let ((one-object (parser-1 (list (car tokens)))))
      (list (car one-object) (cdr tokens)))))

;; I'm not totally sure this works when there's a "remainder"
;; but it does work in the simple case of no remainder.
(defun parse-function-def (tokens waiting)
  (let* ( ;; peel off the ":", grab the "domain"
         (second-bit (parser-1 (cdr tokens)))
         ;; peel off the "\rightarrow", grab the "range"
         (third-bit (parser-1 (cdr (second second-bit)))))
    (setq waiting (append (list 'map)
                          (list waiting)
                          (list (car second-bit))
                          (list (car third-bit)))
          tokens (cadr third-bit))
    (list waiting tokens)))

(defun parse-summation (tokens)
  (let (summands
        for
        from
        to
        (tok (cdr tokens)))
    (when (equal (car tok) "_")
      (let ((sub (parse-sub/sup-script (cdr tok))))
        ;; here, actually, we should extract the "over"
        ;; variable.  Note: I don't know if it is better to
        ;; write 
        ;; (sum k :from 1 :to n :of (pow k 2))
        ;; or
        ;; (sum (pow k 2) :for k :from 1 :to n)
        ;; The first one has the order that people are
        ;; familiar with from LaTeX, even though the second one
        ;; is a bit more natural sounding.  I guess that we'll
        ;; go with the first one for now...
        (setq for (second (caar sub))
              from (third (caar sub))
              tok (cadr sub))))
    (when (equal (car tok) "^")
      (let ((sup (parse-sub/sup-script (cdr tok))))
        (setq to (caar sup)
              tok (cadr sup))))
    (list 'sum (parser tok nil nil) ':for for ':from from ':to to)))

(defun parse-quantifier (tokens)
  "The TOKENS represent an assertion headed by a quantifier."
  (let ((retval (list (quantifier (car tokens))))
        (tok (cdr tokens)))
    (while (and (not (equal (car tok) "\\in"))
                (not (equal (car tok) ":")))
      (if (equal (car tok) ",")
          ;;  we typically just have *variables* here -- but it seems
          ;; that there's there's no particular reason not to be more
          ;; general, for example, we might want to be able to write
          ;; confusing things like $\forall f(x) \in X : P(y)$
          (setq tok (cdr tok))
        (let ((next-thing (parser-1 tok)))
          (setq retval (cons (first next-thing) retval)
                tok (second next-thing)))))
    ;; deal with the (optional) $\in$ token, and if it is present,
    ;; Use `parser-1' to read off the space that the quantifier
    ;; is being applied to.
    (when (equal (car tok) "\\in")
      (setq retval (cons ':in retval)
            tok (cdr tok))
      (let ((next-thing (parser-1 tok)))
        (setq retval (cons (first next-thing) retval)
              tok (second next-thing))))
    ;; we take the cdr to get rid of the superfluous ":".  Later, we
    ;; should only take the cdr if there really is a colon present.
    (when (equal (car tok) ":")
      (setq retval (cons (parser (cdr tok) nil nil) 
                         (cons ':st retval))))
    (reverse retval)))

(defun objects-inside-tuple (tokens)
  "TOKENS is the list of tokens found between parens forming a tuple.
I.e. it comes from a comma-separated list of things.  We are to return
a list of the tokens that make up those things, i.e., separate the list
at the commas."
  (let (retval inner-list)
    (while tokens
      (let ((head (car tokens)))
        (cond ((equal head "(")
               ;; parens are grouping symbols, between which
               ;; there might be more commas.
               (let ((end (position ")" tokens :test #'equal)))
                 (if end
                     (dotimes (i end)
                       (setq inner-list
                             (cons (nth i tokens) inner-list)))
                   (error "Paren mismatch"))
                 (setq tokens (nthcdr end tokens))))
              ((equal head ",")
               (setq tokens (cdr tokens))
               (when inner-list
                 (setq retval (cons (reverse inner-list) retval))
                 (setq inner-list nil)))
              (t
               (setq inner-list (cons (car tokens) inner-list))
               (setq tokens (cdr tokens))))))
    ;; pick up the last object, if it is a single token
    (when inner-list
      (setq retval (cons (reverse inner-list) retval)))
    (reverse retval)))

(defun tts-parse-region (&optional beg end)
  "Parse a buffer substring, optionally demarcated by BEG and END.
Otherwise, the current region."
  (interactive)
  (let ((str (tts-parse-string
              (buffer-substring-no-properties
               (or beg (region-beginning))
               (or end (region-end))))))
        (when (interactive-p)
          (message (format "Result: %s" str)))
    str))

(defun tts-parse-string (str)
  (parser (tokenize-string str) nil nil))

(defun tts-parse (str)
  (interactive "MString: ")
  (message (format "%s" (tts-parse-string str))))

(provide 'tts)
;;; tts.el ends here.