;; lexer.scm: Lexical analyzer for SCSS
;; Copyright (C) 2009 Julian Graham

;; SCSS is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with this program.  If not, see <http://www.gnu.org/licenses/>.

(define (make-css21-lexer strm)
  (define line 0)
  (define num-lines 0)
  (define column 0)
  (define offset 0)  
  (define lines (list))

  (define (lexer:read-char strm)
    (let ((c (read-char strm)))
      (or (eof-object? c)
	  (begin
	    (if (char=? c #\nl)
		(begin
		  (set! line (+ line 1))
		  (if (> line num-lines)
		      (begin (if (null? lines)
				 (set! lines (list column))
				 (append! lines (list column)))
			     (set! num-lines (+ num-lines 1))))
		  (set! column 0))
		(set! column (+ column 1)))
	    (set! offset (+ offset 1))))
      c))
  (define (lexer:unread-char c strm)
    (set! offset (- offset 1))
    (if (char? c)
	(begin 
	  (if (char=? c #\nl)
	      (begin (set! line (- line 1))
		     (set! column (list-ref lines line))))
	  (unread-char c strm))))

  (define (next-internal? strm str case-sensitive?)
    (define (unread c) (lexer:unread-char c strm))
    (define pred? (if case-sensitive? char=? char-ci=?))
    (define (rewind match) (for-each unread match) #f)
    (let loop ((chrs (string->list str))
	       (match (list)))
      (or (null? chrs)
	  (let ((c (lexer:read-char strm)))
	    (cond ((eof-object? c) (rewind match))
		  ((pred? c (car chrs)) (loop (cdr chrs) (cons c match)))
		  (else (lexer:unread-char c strm) (rewind match)))))))

  (define (next? strm str) (next-internal? strm str #t))
  (define (next-ci? strm str) (next-internal? strm str #f))
  
  (define (read-number strm seen-dot?)
    (let ((c (lexer:read-char strm)))
      (cond ((eof-object? c) '())
	    ((char-numeric? c) (cons c (read-number strm seen-dot?)))
	    ((char=? c #\.) (if seen-dot? 
				(begin (lexer:unread-char c strm) '())
				(cons c (read-number strm #t))))
	    (else (lexer:unread-char c strm) '()))))
  
  (define (consume-whitespace)
    (let ((c (peek-char strm)))
      (or (eof-object? c)
	  (and (char-whitespace? c)
	       (lexer:read-char strm)
	       (consume-whitespace)))))

  (define (consume-comments)
    (let ((c (lexer:read-char strm)))
      (cond ((eof-object? c))
	    ((and (char=? c #\*) (char=? (peek-char strm) #\/)) 
	     (lexer:read-char strm))
	    (else (consume-comments)))))

  (define (consume-whitespace-and-comments)
    (let loop ((c (lexer:read-char strm)))
      (cond ((eof-object? c) #f)
 	    ((char-whitespace? c) 
	     (consume-whitespace) 
	     (loop (lexer:read-char strm)))
 	    ((and (char=? c #\/) (char=? (peek-char strm) #\*))
 	     (lexer:read-char strm)
 	     (consume-comments) 
 	     (loop (lexer:read-char strm)))
 	    (else (lexer:unread-char c strm)))))
  
  (define (read-string strm type)
    (let ((c (lexer:read-char strm)))
      (cond ((eof-object? c) '())
	    ((char=? c #\\)
	     (let ((pc (peek-char strm)))
	       (cond ((eqv? pc type)
		      (lexer:read-char strm) 
		      (cons* c type (read-string strm type)))
		     ((eqv? pc #\nl) 
		      (lexer:read-char strm) 
		      (read-string strm type))
		     (else (cons c (read-string strm type))))))
	    ((char=? c type) (list type))
	    (else (cons c (read-string strm type))))))

  (define (read-hex strm)
    (define (read-hex-inner count)
      (let ((c (peek-char strm)))
	(cond ((or (= count 6) (eof-object? c)) '())
	      ((and (> count 0) (char-whitespace? c)) 
	       (lexer:read-char strm) (list c))
	      ((hex? c) 
	       (lexer:read-char strm) 
	       (cons c (read-hex-inner (+ count 1))))
	      (else '()))))
    (let ((h (read-hex-inner 0))) 
      (if (null? h) #f (begin (consume-whitespace) h))))
  
  (define (hex? chr)
    (or (char-numeric? chr) (and (char-ci>=? chr #\A) (char-ci<=? chr #\F))))
  (define (read-escape strm)
    (let ((c (peek-char strm)))
      (if (hex? c) (read-hex strm) (begin (lexer:read-char strm) (list c)))))

  (define (read-url strm)
    (define (read-url-inner)
      (let ((c (lexer:read-char strm)))
	(cond ((eqv? c #\)) (lexer:unread-char c strm) '())
	      ((or (memv c '(#\! #\# #\$ #\% #\&))
		   (and (char-ci>=? c #\*) (char-ci<=? #\~)))
	       (cons c (read-url-inner)))
	      ((eqv? c #\\) (append (cons c (read-escape strm))
				    (read-url-inner)))
	      (else (lexer:unread-char c strm) '()))))
    (read-url-inner))

  (define (ident-start-char? chr)
    (or (char-alphabetic? chr) 
	(char=? chr #\_)
	(> (char->integer chr) 127)))
  (define (ident-rest-char? chr)
    (or (ident-start-char? chr)
	(char=? chr #\-)
	(char-numeric? chr)))

  (define (read-ident c strm)
    (define (read-start c)
      (cond ((eof-object? c) '())
	    ((ident-start-char? c) (list c))
	    ((char=? c #\\) (append (cons c (read-escape strm))))
	    (else '())))
    (define (read-rest)
      (let ((c (lexer:read-char strm)))
	(cond ((eof-object? c) '())
	      ((ident-rest-char? c) (cons c (read-rest)))
	      ((char=? c #\\) (append (cons c (read-escape strm)) 
				      (read-rest)))
	      (else (lexer:unread-char c strm) '()))))
    (if (and (not (eof-object? c)) (char=? c #\-))
	(let* ((cc (lexer:read-char strm))
	       (s (read-start cc)))
	  (if (null? s)
	      (begin (lexer:unread-char cc strm) #f)
	      (cons c (append s (read-rest)))))
	(let ((s (read-start c)))
	  (if (null? s) #f (append s (read-rest))))))

  (define (make-token t)
    (let ((ct (cdr t)))
      (make-lexical-token 
       (car t) 
       (make-source-location "foo" line column offset (string-length ct))
       ct)))

  (lambda ()
    (let ((c (lexer:read-char strm)))
      (cond ((eof-object? c) '*eoi*)
	    ((char-whitespace? c) 
	     (consume-whitespace)
	     (consume-whitespace-and-comments)
	     (make-token '(S . " ")))
	    ((and (eqv? c #\<) (next? strm "!--")) (make-token '(CD . "!--")))
	    ((and (eqv? c #\-) (next? strm "->")) (make-token '(CD . "->")))
	    ((and (eqv? c #\~) (next? strm "=")) 
	     (make-token '(INCLUDES . "~=")))
	    ((and (eqv? c #\|) (next? strm "=")) 
	     (make-token '(DASHMATCH . "|=")))
	    ((eqv? c #\*) (make-token '(STAR . "*")))
	    
	    ((or (and (eqv? c #\.) (char-numeric? (peek-char strm)))
		 (and (char-numeric? c)))
	     (let* ((n (read-number strm (eqv? c #\.)))
		    (o (list->string (cons c n))))
	       (cond 
		((next-ci? strm "em") 
		 (make-token (cons 'EMS (string-append o "em"))))
		((next-ci? strm "ex") 
		 (make-token (cons 'EXS (string-append o "ex"))))
		((next-ci? strm "px") 
		 (make-token (cons 'LENGTH (string-append o "px"))))
		((next-ci? strm "cm") 
		 (make-token (cons 'LENGTH (string-append o "cm"))))
		((next-ci? strm "mm") 
		 (make-token (cons 'LENGTH (string-append o "mm")))) 
		((next-ci? strm "in") 
		 (make-token (cons 'LENGTH (string-append o "in"))))
		((next-ci? strm "pt") 
		 (make-token (cons 'LENGTH (string-append o "pt"))))
		((next-ci? strm "pc") 
		 (make-token (cons 'LENGTH (string-append o "pc"))))
		((next-ci? strm "deg") 
		 (make-token (cons 'ANGLE (string-append o "deg"))))
		((next-ci? strm "rad") 
		 (make-token (cons 'ANGLE (string-append o "rad")))) 
		((next-ci? strm "grad") 
		 (make-token (cons 'ANGLE (string-append o "grad"))))
		((next-ci? strm "ms") 
		 (make-token (cons 'TIME (string-append o "ms"))))
		((next-ci? strm "s") 
		 (make-token (cons 'TIME (string-append o "s"))))
		((next-ci? strm "hz") 
		 (make-token (cons 'FREQUENCY (string-append o "hz"))))
		((next-ci? strm "khz")
		 (make-token (cons 'FREQUENCY (string-append o "khz"))))
		((next-ci? strm "%") 
		 (make-token (cons 'PERCENTAGE (string-append o "%"))))
		(else (let ((c (lexer:read-char strm)))
			(or (and=> (read-ident c strm)
				   (lambda (x) 
				     (make-token (cons 'DIMENSION 
						       (string-append 
							o (list->string x))))))
			    (begin (lexer:unread-char c strm)
				   (make-token (cons 'NUMBER o)))))))))
	    ((eqv? c #\.) (make-token '(DOT . ".")))
	    ((eqv? c #\;) (make-token '(SEMICOLON . ";")))
	    ((eqv? c #\:) (make-token '(COLON . ":")))
	    ((eqv? c #\=) (make-token '(EQUALS . "=")))
	    ((eqv? c #\[) (make-token '(LBRACK . "[")))
	    ((eqv? c #\]) (make-token '(RBRACK . "]")))
	    ((eqv? c #\)) (make-token '(RPAREN . ")")))
	    ((eqv? c #\}) (make-token '(RBRACE . "}")))
	    ((eqv? c #\{) (make-token '(LBRACE . "{")))
	    ((eqv? c #\+) (make-token '(PLUS . "+")))
	    ((eqv? c #\>) (make-token '(GREATER . ">")))
	    ((eqv? c #\,) (make-token '(COMMA . ",")))
	    ((eqv? c #\/) 
	     (let ((cc (peek-char strm)))
	       (if (and (char? cc) (char=? cc #\*))
		   (begin (lexer:read-char strm)
			  (consume-comments)
			  (make-token '(S . " ")))
		   (make-token '(DIV . "/")))))
	    ((eqv? c #\%) (make-token '(PERCENT . "%")))
	    ((or (eqv? c #\') (eqv? c #\")) 
	     (let ((s (read-string strm c)))
	       (if (or (null? s) (not (char=? (car (last-pair s)) c)))
		   (make-token (cons 'INVALID (list->string (cons c s))))
		   (make-token (cons 'STRING (list->string (cons c s)))))))

	    ;; Identifiers can start with a hyphen, as long as it's not 
	    ;; followed by a digit.

	    ((read-ident c strm) => 
	     (lambda (ident)
	       (let ((i (list->string ident))
		     (cc (peek-char strm)))
		 (cond ((and (not (eof-object? cc)) (char=? cc #\())
			(begin (lexer:read-char strm) 
			       (if (string-ci=? i "url")
				   (begin 
				     (consume-whitespace)
				     (let ((r 
					    (let ((c (lexer:read-char strm)))
					      (if (or (eqv? c #\') 
						      (eqv? c #\"))
						  (list->string 
						   (cons c (read-string 
							    strm c)))
						  (begin
						    (lexer:unread-char c strm)
						    (list->string 
						     (read-url strm)))))))
				       (consume-whitespace)
				       (lexer:read-char strm)
				       (make-token (cons 'URI 
							 (string-append "url(" 
									r 
									")")))))
				   (make-token (cons 'FUNCTION 
						     (string-append i "("))))))
		       (else (make-token (cons 'IDENT i)))))))
	    ((eqv? c #\-) (make-token '(MINUS . "-")))
	    ((eqv? c #\#)
	     (let ((ident (read-ident (lexer:read-char strm) strm)))
	       (if ident
		   (if (and (<= (length ident) 6) (every hex? ident))
		       (make-token (cons 'HEX (list->string ident)))
		       (make-token (cons 'IDSEL (list->string ident))))
		   (or (and=> (read-hex strm)
			      (lambda (x) 
				(make-token (cons 'HEX (list->string x)))))
		       (make-token '(HASH . "#"))))))
	    ((eqv? c #\@)
	     (cond ((next-ci? strm "import") 
		    (make-token '(IMPORT_SYM . "import")))
		   ((next-ci? strm "page") 
		    (make-token '(PAGE_SYM . "page")))
		   ((next-ci? strm "media") 
		    (make-token '(MEDIA_SYM . "media")))
		   ((next-ci? strm "charset") 
		    (make-token '(CHARSET_SYM . "charset")))
		   (else (make-token '(BAD_AT_KEYWORD . "@")))))
	    
	    ((eqv? c #\!)
	     (consume-whitespace-and-comments)
	     (if (next? strm "important") 
		 (make-token '(IMPORTANT_SYM . "!important"))
		 (make-token (cons 'DELIM c))))
	    (else (make-token (cons 'DELIM c)))))))
