;;; x001

;; Copyright (C) 2007, 2009, 2010, 2011 Thien-Thi Nguyen
;; Portions Copyright (C) 1999, 2000 Thierry Bzecourt
;;
;; This is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; This software is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this package; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA  02110-1301  USA.

(use-modules
 ((mixp expat) #:prefix E: #:select (make-xml-encoding
                                     parser-create
                                     hset!
                                     set-base
                                     get-base
                                     parse
                                     set-param-entity-parsing
                                     error-symbol
                                     get-locus
                                     error-string))
 ((mixp utils) #:prefix U: #:select (parse-data
                                     utf8->latin1
                                     utf8->ucs2
                                     xml->tree)))

(define testfile
  (let ((srcdir (getenv "srcdir")))
    (lambda (filename)
      (in-vicinity srcdir filename))))

(define (fs s . args)
  (apply simple-format #f s args))

(define verbose? (getenv "VERBOSE"))

(define (fso s . args)
  (and verbose? (apply simple-format #t s args)))

(define (fse s . args)
  (apply simple-format (current-error-port) s args))

(define (accumulator)
  (let* ((ls (list #f))
         (tp ls))
    (lambda stuff
      (cond ((null? stuff) (cdr ls))
            (else (set-cdr! tp stuff)
                  (set! tp (last-pair tp)))))))

(define memcheck? (getenv "MEMCHECK"))

(cond (memcheck? (gc) (gc) (gc) (sleep 5)))

(define (hset!/p parser . plist)
  (E:hset! parser (let loop ((alist '()) (ls plist))
                    (if (null? ls)
                        alist
                        (loop (acons (car ls) (cadr ls) alist)
                              (cddr ls))))))

(define (bad-parse? symbol)
  (eq? 'XML_STATUS_ERROR symbol))

;; A sample XML document used in several tests
(define my-document
  "<?xml version='1.0' encoding='ISO-8859-1' standalone='yes'?>
<!DOCTYPE doc [<!ENTITY program.name 'gexpat'>
               <!NOTATION SomeNotation PUBLIC 'nota'>]>
<!-- Comment for the sample document -->
<doc> <title> A sample XML document for &program.name; </title>

<chapter name='Chapter 1' id='1'>Contents of Chapter 1.<br/> </chapter>

<![CDATA[ba l> dsq>]]>

<chapter name='Chapter 2' id='2'>Contents of Chapter 2.</chapter>

</doc>")

(define (err s . args)
  (apply fse s args)
  (newline (current-error-port)))

(define trace (if (getenv "TRACE")
                  (lambda (s . args)
                    (apply simple-format #t s args)
                    (newline))
                  noop))

(define count (make-object-property))

(define test-counter 0)
(define test-failed (accumulator))
(set! (count test-failed) 0)

(define (failed++ desc failed s . args)
  (test-failed (list test-counter desc))
  (set! (count test-failed) (1+ (count test-failed)))
  (apply err (string-append "FAIL[~S]: ~A - ~A. " s)
         test-counter desc (failed) args))

(define (good desc . failed)
  (fso "PASS[~A]: ~A~A~%"
       test-counter desc
       (if (null? failed)
           ""
           (fs " - ~A" (car failed)))))

(define-macro (check expression desc failed-msg)
  `(let ((where (fs "evaluating: ~S" ',expression))
         (thunk (lambda () ,expression))
         (failed (lambda () ,failed-msg)))
     (define (bad s . args)
       (apply failed++ ,desc failed s args))
     (set! test-counter (1+ test-counter))
     (let ((res (catch #t thunk (lambda x
                                  (if (and (not (null? x))
                                           (pair? (car x)))
                                      (caar x)
                                      x)))))
       (cond ((eq? res #t)
              (good ,desc))
             ((eq? res #f)
              (bad "Result #f from ~A" where))
             (else
              (bad "Exception `~S' received while ~A" res where))))))

(define-macro (check-exc expression exc desc failed-msg)
  `(let ((during (fs "while evaluating: ~S" ',expression))
         (thunk (lambda () ,expression))
         (failed (lambda () ,failed-msg)))
     (define (bad s . args)
       (apply failed++ ,desc failed s args))
     (set! test-counter (1+ test-counter))
     (let ((res (catch #t thunk (lambda args (car args)))))
       (cond ((memq res '(#t #f))
              (bad "Missing exception `~S' ~A" ,exc during))
             ((eq? res ,exc)
              (good ,desc (failed)))
             (else
              (bad "Received unexpected answer `~S' ~A" res during))))))

(define TESTS '())

(define-macro (test description . body)
  `(set! TESTS (append! TESTS (list (lambda () ,description ,@body)))))

(define indent "")
(define (indent-increase)
  (set! indent (string-append indent "    ")))
(define (indent-decrease)
  (set! indent (substring indent 0 (- (string-length indent) 4))))

(test "Test handlers"
  ;; create the parser, and set the handlers
  (let* ((my-parser (E:parser-create))
         (v (E:get-locus my-parser)))
    (hset!/p
     my-parser

     'element-start
     (lambda (name atts)
       (E:get-locus my-parser v)
       (trace "~A ELEMENT START '~A' - attributes ~S @~A"
              indent name atts v)
       (indent-increase))

     'element-end
     (lambda (name)
       (indent-decrease)
       (trace "~A ELEMENT END '~A'"
              indent name))

     'character-data
     (lambda (value)
       (trace "~A CHARACTER DATA '~A' "
              indent value))

     'comment
     (lambda (value)
       (trace "~A COMMENT '~A' "
              indent value))

     'cdata-section-start
     (lambda ()
       (trace "~A CDATA START "
              indent)
       (indent-increase))

     'cdata-section-end
     (lambda ()
       (indent-decrease)
       (trace "~A CDATA END "
              indent)))

    ;; parse the document, and print the errors, if any
    (let ((res-parse (E:parse my-parser my-document #t)))
      (check (not (bad-parse? res-parse))
             "parse"
             (fs "Parser error: ~A(~A)"
                 (E:error-symbol my-parser)
                 (E:error-string
                  (E:error-symbol my-parser)))))))

(test "Try to parse a not well-formed document"
  (let ((bad-xml "<doc>dfssfd</do>")
        (badness 13)) ;;; -------^
    (trace "Parsing a not well-formed document with E:parse...")
    (let ((p (E:parser-create)))
      (hset!/p p 'character-data (lambda (c)
                                   '()))
      (let ((res (E:parse p bad-xml #t)))
        (check (bad-parse? res)
               "Parse bad XML"
               "Parser did not return an error on not well-formed document")
        (if (equal? res 0)
            (let ((es (E:error-symbol p))
                  (locus (E:get-locus p)))
              (check (eqv? es 'XML_ERROR_TAG_MISMATCH)
                     "Expect an 'XML_ERROR_TAG_MISMATCH error"
                     (fs "Expecting ~A received ~A"
                         'XML_ERROR_TAG_MISMATCH
                         es))
              (let ((want (vector 1 badness 0 badness)))
                (check (equal? want locus)
                       "Locus of badness"
                       (fs "Should be ~S but got ~S" want locus)))))))
    (trace "Parsing a not well-formed document with U:parse-data...")
    (check-exc (call-with-input-string bad-xml U:parse-data)
               'XML_ERROR_TAG_MISMATCH
               "Expect an 'XML_ERROR_TAG_MISMATCH exception"
               "Parser did not return the expected exception on not well-formed document")))

(test "Test unparsed entities and notations"
  (let ((xml-doc "<!DOCTYPE doc [
<!NOTATION vrml PUBLIC 'VRML 2'>
<!ENTITY Antarctica SYSTEM 'http://www.antarctica.net' NDATA vrml>
<!ATTLIST World src ENTITY #REQUIRED>]>
<doc>
 <World src='Antarctica' />
</doc>")
        (my-parser (E:parser-create))
        (acc (accumulator)))
    (hset!/p my-parser
             'notation-decl
             (lambda (notation-name base system-id public-id)
               (acc `(,notation-name
                      ,public-id)))
             'unparsed-entity-decl
             (lambda (entity-name base system-id public-id n-name)
               (acc `(,entity-name
                      ,system-id))))
    (let ((res-parse (E:parse my-parser xml-doc #t)))
      (check (not (bad-parse? res-parse))
             "unparsed entities"
             (fs "Parser error: ~A"
                 (E:error-string
                  (E:error-symbol my-parser))))
      (check (equal? (acc)
                     '(("vrml" "VRML 2")
                       ("Antarctica" "http://www.antarctica.net")))
             "unparsed entities"
             (fs "Handler error: composed the following user data: ~A"
                 (acc))))))

(test "Test the error string reporting"
  (let ((msg (E:error-string 'XML_ERROR_UNCLOSED_TOKEN)))
    (check (> (string-length msg) 1)
           "error string not empty"
           (fs "Returned message was `~A'" msg))))

(test "Create and print an encoding"
  (let* ((map (make-vector 256 67))
         (convert (lambda (s) (trace "convert") 68))
         (release (lambda () (trace "release")))
         (encoding (E:make-xml-encoding map convert release)))
    (trace "~S" encoding)
    #t))

(test "Use a document with characters which have a wrong encoding"
  ;; Wrong encoding in the document should raise an error.
  (let* ((doc "<?xml version='1.0' encoding='US-ASCII'
standalone='yes'?> <body>Les Franais ne sont pas ascii</body>")
         (my-parser (E:parser-create))
         (res-parse (E:parse my-parser doc #t)))
    (check (bad-parse? res-parse)
           "Expect an encoding error"
           "Error, expecting an encoding error")
    (if (bad-parse? res-parse)
        (let ((es (E:error-symbol my-parser)))
          (check (eqv? es 'XML_ERROR_INVALID_TOKEN)
                 "Ok, got an INVALID TOKEN error as expected"
                 (fs "Error: expecting ~A, got ~A"
                     'XML_ERROR_INVALID_TOKEN
                     es))))))

';;; DISABLED.  TODO: unknown encoding handlers are not easy to test.
(test "Test parsing of unknown encoding"
  (let ((my-parser (E:parser-create "thb-encoding")))
    (hset!/p my-parser
             'unknown-encoding
             (lambda (name)
               (if (string=? name "thb-encoding")
                   (let ((map (make-vector 256 67))
                         (convert (lambda (s) (trace "convert") 68))
                         (release (lambda () (trace "release"))))
                     (begin
                       (trace "Recognized encoding ~S" name)
                       (E:make-xml-encoding map convert release))))))
    (let ((res-parse (E:parse my-parser my-document #t)))
      (check (not (bad-parse? res-parse))
             "Unknown encoding handler"
             (fs "Parser error: ~A"
                 (E:error-string
                  (E:error-symbol my-parser)))))))

(test "Use the base"
  (let ((my-parser (E:parser-create))
        (my-base "My base"))
    (hset!/p my-parser
             'notation-decl
             (lambda (notation-name base system-id public-id)
               (trace " NOTATION: ~A=~S, ~A=~S, ~A=~S, ~A=~S "
                      'notation-name notation-name
                      'base base
                      'system-id system-id
                      'public-id public-id)
               (check (equal? my-base base)
                      "in handler"
                      (fs "expected ~S but got ~S" my-base base))))
    (let ((base (E:get-base my-parser)))
      (check (not base)
             "initially #f"
             (fs "expected ~S but got ~S" #f base)))
    (E:set-base my-parser my-base)
    (let ((base (E:get-base my-parser)))
      (check (equal? my-base base)
             "from E:get-base"
             (fs "expected ~S but got ~S" my-base base)))
    (let ((res-parse (E:parse my-parser my-document #t)))
      (check (not (bad-parse? res-parse))
             "parse w/ base"
             (fs "Parser error: ~A"
                 (E:error-string (E:error-symbol my-parser)))))))

(test "Test the encoding conversion functions"
  (let ((from "an ASCII string"))
    (check (equal? (U:utf8->latin1 from) from)
           "U:utf8->latin1 on ASCII"
           (fs "error with ascii string ~A != ~A~%"
               (U:utf8->latin1 from) from)))
  (let ((from "Bézecourt")(to "Bzecourt"))
    (check (equal? (U:utf8->latin1 from) to)
           "U:utf8->latin1 on Latin1"
           "error with Latin1 string~%"))
  ;; From rfc-2279
  (let ((from (map integer->char '(#x41 #xE2 #x89 #xA2 #xCE #x91 #x2E)))
        (to (map (lambda (c) (map integer->char c))
                 '((#x00 #x41) (#x22 #x62) (#x03 #x91) (#x00 #x2E)))))
    (check (equal? (U:utf8->ucs2 from) to)
           "U:utf8->ucs2"
           "error with rfc-2279 sample #1~%"))
  (let ((from (map integer->char
                   '(#xED #x95 #x9C #xEA #xB5 #xAD #xEC #x96 #xB4)))
        (to (map (lambda (c) (map integer->char c))
                 '((#xD5 #x5C) (#xAD #x6D) (#xC5 #xB4)))))
    (check (equal? (U:utf8->ucs2 from) to)
           "U:utf8->ucs2"
           "error with rfc-2279 sample #2~%"))
  (let ((from (map integer->char
                   '(#xE6 #x97 #xA5 #xE6 #x9C #xAC #xE8 #xAA #x9E)))
        (to (map (lambda (c) (map integer->char c))
                 '((#x65 #xE5) (#x67 #x2C) (#x8A #x9E)))))
    (check (equal? (U:utf8->ucs2 from) to)
           "U:utf8->ucs2"
           "error with rfc-2279 sample #3~%"))
  (check-exc (let ((from (map integer->char '(#xFF #xFF))))
               (U:utf8->ucs2 from))
             'invalid-utf8
             "U:utf8->ucs2 on invalid input"
             "An error was expected with invalid UTF-8 input (1)")
  (check-exc (U:utf8->ucs2 (map integer->char '(#xC0 #x80)))
             'invalid-utf8
             "U:utf8->ucs2 on invalid input"
             "An error was expected with invalid UTF-8 input (2)")
  (check-exc (U:utf8->ucs2 (map integer->char '(#xED #xA1 #x8C #xED #xBE #xB4)))
             'no-ucs2
             "U:utf8->ucs2 on invalid input"
             "An error was expected with invalid UTF-8 input (3)"))

(test "Test the XML to tree conversion"
  (let ((document "<foo><bar>Some text</bar><void/></foo>"))
    (let ((xml-tree (call-with-input-string document
                                            U:xml->tree))
          (expected '((element ("foo" ())
                        (element ("bar" ())
                          (character-data "Some text"))
                        (element ("void" ()))))))
      (check (equal? xml-tree expected)
             "xml->tree"
             (fs "tree built from XML different from expected tree ~A~%!=~%~A"
                 xml-tree expected))))
  (let* ((file (testfile "REC-xml-20081126.xml"))
         (xml-tree (call-with-input-file file U:xml->tree)))
    (define (nth n ls)
      (list-ref ls n))
    (check (equal? (nth 1 (nth 37 (nth 22 (nth 15 (nth 1 xml-tree)))))
                   '("div2" (("id" . "sec-predefined-ent"))))
           "check an element in the tree"
           "failed")))

(test "Test the DTD parsing feature"
  (let ((my-document
         (fs "<!DOCTYPE numbers SYSTEM ~S>
<numbers>&numbers;</numbers>" (testfile "test.dtd")))
        (my-parser (E:parser-create))
        (str ""))
    ;; Make sure sub.inc is available in cwd.
    (let* ((sub "sub.inc")
           (here (in-vicinity "." sub)))
      (or (file-exists? here)
          (copy-file (testfile sub) here)))
    (E:set-param-entity-parsing
     my-parser 'XML_PARAM_ENTITY_PARSING_ALWAYS)
    (hset!/p my-parser
             'character-data
             (lambda (value)
               (set! str (string-append str value)))
             'external-entity-ref
             (lambda (context base system-id public-id)
               (open-file system-id "r")))
    (let ((res-parse (E:parse my-parser
                              my-document
                              #t)))
      (check (not (bad-parse? res-parse))
             "parse"
             (fs "Parser error: ~A(~A)"
                 (E:error-symbol my-parser)
                 (E:error-string (E:error-symbol my-parser))))
      (check (equal? str "0, 2, 4, 6, 8, 1, 3, 5, 7, 9")
             "check the entity expansion"
             "failed"))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Main program : executes test functions, and catches the errors
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(let ((errors-list '()))
  (for-each (lambda (fn)
              (let* ((failed-count-before (count test-failed))
                     (name (procedure-documentation fn)))
                (fso "* ~A~%" name)
                (fn)
                (if (= (count test-failed) failed-count-before)
                    (fso "=> ok")
                    (fso "=> not ok: ~A tests failed"
                         (- (count test-failed)
                            failed-count-before)))
                (fso "~%~%")))
            TESTS)
  (if (zero? (count test-failed))
      (fso "All tests are successful~%")
      (begin
        (fso "The following tests failed:~%")
        (for-each (lambda (t)
                    (fso "\t~A: ~A~%" (car t) (cadr t)))
                  (test-failed))
        (exit #f))))

(cond (memcheck? (gc) (sleep 5)
                 (gc) (sleep 5)
                 (gc) (sleep 5)
                 (gc) (sleep 5)))

;;; Local variables:
;;; mode: scheme
;;; coding: iso-latin-1
;;; End:

;;; x001 ends here
