;; parser.scm: Non-LALR parser implementation for SCSS
;; Copyright (C) 2009 Julian Graham

;; SCSS is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with this program.  If not, see <http://www.gnu.org/licenses/>.

(define (make-css21-parser lexer)
  (define current-token #f)
  (define current-category #f)
  (define current-value #f)

  (define token-stack (list))

  (define (get-token) 
    (define (set-data token)
      (set! current-token token)
      (if (eq? token '*eoi*)
	  (begin
	    (set! current-category '*eoi*)
	    (set! current-value '*eoi*))
	  (begin
	    (set! current-category (lexical-token-category current-token))
	    (set! current-value (lexical-token-value current-token)))))
    (if (null? token-stack) 
	(set-data (lexer))
	(begin (set-data (car token-stack))
	       (set! token-stack (cdr token-stack)))))

  (define (unget-token token)
    (set! current-token #f)
    (set! current-category #f)
    (set! current-value #f)

    (set! token-stack (cons token token-stack)))

  (define (discard-maybe-sgml)
    (get-token)
    (case current-category     
      ((S) (discard-maybe-sgml))
      ((CD) (discard-maybe-sgml))
      (else (unget-token current-token))))

  (define (discard-maybe-space)
    (get-token)
    (if (eq? current-category 'S) 
	(discard-maybe-space) 
	(unget-token current-token)))

  (define (block-aware-discard markers)
    (define block-nesting 0)
    (define (block-aware-discard-inner markers)
      (get-token)
      (cond ((eq? current-category '*eoi*) #f)
	    ((and (= block-nesting 0) (memq current-category markers)) #f)
	    ((eq? current-category 'LBRACE)
	     (set! block-nesting (+ block-nesting 1))
	     (block-aware-discard-inner markers))
	    ((eq? current-category 'RBRACE)
	     (if (> block-nesting 0)
		 (begin (set! block-nesting (- block-nesting 1))
			(or (and (= block-nesting 0) (memq 'RBRACE markers)) 
			    (block-aware-discard-inner markers)))
		 (begin #f)))
	    (else (block-aware-discard-inner markers))))
    (block-aware-discard-inner markers))

  (define (parse-maybe-charset)
    (get-token)
    (if (eq? current-category 'CHARSET_SYM)
	(parse-charset)
	(begin (unget-token current-token) '())))

  (define (parse-media-list lst)
    (get-token)
    (if (eq? current-category 'IDENT)
	(let* ((medium (string->symbol current-value))
	       (next (if lst 
			 (if (list? lst) 
			     (append lst (list medium))
			     (cons lst (list medium)))
			 medium)))
	  (discard-maybe-space)
	  (get-token)
	  (if (eq? current-category 'COMMA)		      
	      (parse-media-list next)
	      next))
	(begin (unget-token current-token) lst)))

  (define (parse-import)
    (define (finish-import val)
      (discard-maybe-space)
      (let ((media-list (parse-media-list #f)))
	(get-token)
	(if (eq? current-category 'SEMICOLON)
	    (if media-list 
		(list '@import val media-list)
		(list '@import val))
	    (block-aware-discard '(SEMICOLON)))))
    (discard-maybe-space)
    (get-token)
    (case current-category
      ((STRING) (finish-import current-value))
      ((URI) (finish-import current-value))
      (else (block-aware-discard '(SEMICOLON)))))

  (define (parse-import-list)
    (get-token)
    (case current-category
      ((IMPORT_SYM) 
       (let ((parsed-import (parse-import)))
	 (discard-maybe-sgml)
	 (cons parsed-import (parse-import-list))))
      ((BAD_AT_KEYWORD)
       (block-aware-discard '(SEMICOLON))
       (discard-maybe-sgml)
       (parse-import-list))
      (else (unget-token current-token) '())))

  (define (parse-rule-list lst)
    (define (parse-rule)
      (define (parse-valid-rule)
	(define (parse-media)
	  (define (parse-block-rule-list lst)
	    (define (parse-block-rule)
	      (get-token)
	      (case current-category
		((PAGE_SYM) (parse-page) #f)
		((IMPORT_SYM) (parse-import) #f)
		((BAD_AT_KEYWORD) #f)
		(else (unget-token current-token) (parse-ruleset))))

	    (let* ((rule (parse-block-rule))
		   (next (if rule 
			     (if lst (append lst (list rule)) (list rule))
			     lst)))
	      (discard-maybe-sgml)
	      (get-token)
	      (cond ((eq? current-category '*eoi*) next)
		    ((eq? current-category 'RBRACE) 
		     (unget-token current-token) 
		     next)
		    (else (unget-token current-token)
			  (parse-block-rule-list next)))))

	  (define (discard-save-block)
	    (get-token)
	    (case current-category
	      ((RBRACE) #t)
	      ((*eoi*) #f)
	      (else (discard-save-block))))

	  (discard-maybe-space)
	  (get-token)
	  (let ((media-list (and (not (eq? current-category 'LBRACE))
				 (begin (unget-token current-token)
					(parse-media-list #f))))
		(ruleset (begin (discard-maybe-space)
				(parse-block-rule-list #f))))
	    (discard-save-block)
	    (and ruleset
		 (if media-list 
		     (cons* '@media media-list ruleset) 
		     ruleset))))
	  
	(define (parse-page)
	  (discard-until-eof-or- semicolon? invalid-block?) 
	  '())
	(define (parse-ruleset)
	  (define (parse-selector-list lst first?)	    
	    (define (parse-selector lst tail)
	      (define (parse-simple-selector)
		(define (parse-specifier-list lst)
		  (define (parse-specifier)
		    (define (parse-attr)
		      (get-token)
		      (if (eq? current-category 'IDENT)
			  (let ((cv current-value)) 
			    (discard-maybe-space)
			    (get-token)
			    (cond 
			     ((eq? current-category 'RBRACK)
			      (string-append "[" cv "]"))
			     ((memq current-category 
				    '(EQUALS INCLUDES DASHMATCH))
			      (let ((op current-value))
				(discard-maybe-space)
				(get-token)
				(if (memq current-category '(STRING IDENT))
				    (let ((cvr current-value))
				      (discard-maybe-space)
				      (get-token)
				      (if (eq? current-category 'RBRACK)
					  (string-append "[" cv op cvr "]")
					  (begin (unget-token current-token) 
						 #f)))
				    (begin (unget-token current-token) #f))))
			     (else (unget-token current-token) #f)))
			  (begin (unget-token current-token) #f)))
			  
		    (get-token)
		    (let ((tok current-token))
		      (case current-category
			((IDSEL) (string-append "|#" current-value "|"))
			((DOT) (get-token) 
			       (if (eq? current-category 'IDENT) 
				   (string-append "." current-value)
				   (begin (unget-token current-token)
					  (unget-token tok)
					  #f)))
			((HEX) 
			 (and (not (char-numeric? (string-ref current-value 0)))
			      (string-append "/#" current-value "/")))
			((LBRACK) (discard-maybe-space) (parse-attr))
			((COLON) (let ((t current-token))
				   (get-token)
				   (if (eq? current-category 'IDENT)
				       (string-append ":" current-value)
				       (begin (unget-token current-token) 
					      (unget-token t)
					      #f))))
			(else (unget-token current-token) #f))))
		  (let ((specifier (parse-specifier)))
		    (if specifier
			(parse-specifier-list (append lst (list specifier)))
			(begin 
			  (get-token)
			  (and (memq current-category '(S LBRACE COMMA))
			       (begin (unget-token current-token) lst))))))
			    
		(get-token)
		(let ((selector (if (memq current-category '(IDENT STAR))
				    current-value
				    (begin (unget-token current-token) #f)))
		      (specifiers (parse-specifier-list '())))
		  (cond ((null? specifiers) (and=> selector string->symbol))
			(specifiers (add-specifiers selector specifiers))
			(else #f))))
	      
	      (let ((simple-selector (parse-simple-selector)))
		(discard-maybe-space)
		(if simple-selector
		    (begin
		      (get-token)
		      (if (memq current-category '(PLUS GREATER))
			  (let ((t (list (string->symbol current-value)
					 simple-selector)))
			    (discard-maybe-space)
			    (parse-selector 
			     lst (if tail (append tail (list t)) (list t))))
			  (begin 
			    (unget-token current-token)
			    (let* ((t (if tail
					  (fold-right (lambda (x y) 
							(append x (list y)))
						      simple-selector 
						      tail)
					  simple-selector))
				   (lst (cond ((and (list? lst)
						   (eq? (car lst) '//))
					      (append lst (list t)))
					     (lst (list '// lst t))
					     (else t))))
			      (parse-selector lst #f)))))
		    (begin
		      (get-token)
		      (let ((cc current-category))
			(unget-token current-token)
			(and (memq cc '(COMMA LBRACE))
			     (not tail)
			     lst))))))

	    (if lst 
		(begin 
		  (get-token)
		  (if (eq? current-category 'COMMA)
		      (begin
			(discard-maybe-space)
			(let* ((sel (parse-selector #f #f))
			       (a (if sel 
				      (append (if first? (list lst) lst) 
					      (list sel))
				      lst)))
			  (discard-maybe-sgml) 
			  (parse-selector-list a #f)))
		      (begin (unget-token current-token) lst)))
		(let ((selector (parse-selector #f #f)))
		  (and selector (parse-selector-list selector #t)))))

	  (define (parse-declaration-list lst)
	    (define (parse-declaration)
	      (define (parse-property-and-colon)
		(get-token)
		(if (eq? current-category 'IDENT)
		    (let ((val current-value))
		      (discard-maybe-space)
		      (get-token)
		      (if (eq? current-category 'COLON)
			  (begin (discard-maybe-space) val)
			  (begin (unget-token current-token) #f)))
		    (begin (unget-token current-token) #f)))

	      (define (parse-expr lst nesting)
		(get-token)
		(let* ((unary-operator
			(if (memq current-category '(PLUS MINUS))
			    (let ((v current-value)) (get-token) v)
			    #f))
		       (term
			(if (memq current-category
				  '(NUMBER
				    PERCENTAGE
				    LENGTH
				    EMS
				    EXS
				    ANGLE
				    TIME
				    FREQ))
			    (let ((cv current-value))
			      (discard-maybe-space)
			      (if unary-operator 
				  (string-append unary-operator cv)
				  cv))
			    (if unary-operator 
				(begin (unget-token current-token) #f)
				(case current-category
				  ((STRING) 
				   (let ((v current-value)) 
				     (discard-maybe-space) v))
				  ((IDENT)
				   (let ((v current-value)) 
				     (discard-maybe-space) v))
				  ((DIMEN)
				   (let ((v current-value)) 
				     (discard-maybe-space) v))
				  ((URI)
				   (let ((v current-value)) 
				     (discard-maybe-space) v))
				  ((hexcolor) '())
				  ((FUNCTION) 
				   (let ((v current-value))
				     (discard-maybe-space)
				     (let ((expr (string-trim-both
						  (parse-expr 
						   "" (+ nesting 1)))))
				       (get-token)
				       (if (and (> (string-length expr) 0)
						(memq current-category 
						      '(*eoi* RPAREN)))
					   (string-append v expr ")")
					   (begin (unget-token current-token)
						  #f)))))
				  ((PERCENT) (discard-maybe-space))
				  (else (unget-token current-token) #f))))))
		  (if term
		      (begin
			(get-token)
			(if (memq current-category '(COMMA DIV))
			    (let ((cv current-value))
			      (discard-maybe-space)
			      (parse-expr (string-append lst term cv)
					  nesting))
			    (begin
			      (unget-token current-token)
			      (parse-expr (string-append lst term " ") 
					  nesting))))
		      (begin
			(discard-maybe-space)
			(get-token)
			(let ((c current-category))
			  (unget-token current-token)
			  (case c 
			    ((SEMICOLON) lst)
			    ((RBRACE) lst)
			    ((IMPORTANT_SYM) lst)
			    ((RPAREN) (if (> nesting 0) lst ""))
			    ((*eoi*) lst)
			    (else "")))))))

	      (let* ((property (and=> 
				(and=> 
				 (and=> (parse-property-and-colon)
					replace-escapes)
				 string-downcase)
				string->symbol))
		     (expr (and property
				(let ((expr (string-trim-both 
					     (parse-expr "" 0))))
				  (and (> (string-length expr) 0)
				       (replace-escapes expr)))))
		     (prio (begin (get-token) 
				  (or (eq? current-category 'IMPORTANT_SYM)
				      (begin (unget-token current-token) #f)))))
		(and property
		     expr
		     (validate-property property expr)
		     (if prio 
			 (list '! (list property expr))
			 (list property expr)))))
	    
	    (let* ((declaration (parse-declaration))
		   (next (if (null? lst) 
			     (if declaration (list declaration) lst)
			     (if declaration 
				 (append lst (list declaration))
				 lst))))
	      (block-aware-discard '(SEMICOLON))
	      (if (eq? current-category 'SEMICOLON)
		  (begin
		    (discard-maybe-space)
		    (parse-declaration-list next))
		  next)))

	  (let ((selectors (parse-selector-list #f #t)))
	    (get-token)
	    (if (eq? current-category 'LBRACE)
		(begin (discard-maybe-space)
		       (let ((declarations (parse-declaration-list '())))
			 (and (not (null? declarations))
			      selectors
			      (cons selectors declarations))))
		(begin 
		       (block-aware-discard '(LBRACE))
		       (block-aware-discard '(RBRACE))
		       #f))))
	  	
	(case current-category
	  ((MEDIA_SYM) (parse-media))
	  ((PAGE_SYM) (parse-page))
	  (else (unget-token current-token) (parse-ruleset))))
      
      (get-token)
      (cond  ((eq? current-category '*eoi*) #f)
	     ((member current-category
		      '(MEDIA_SYM 
			PAGE_SYM 
			IDENT 
			STAR 
			IDSEL 
			HEX 
			DOT 
			LBRACK 
			COLON))
	      (parse-valid-rule))
	     ((eq? current-category 'IMPORT_SYM) 
	      (parse-import) (discard-maybe-sgml) #f)
	     ((eq? current-category 'BAD_AT_KEYWORD)
	      (block-aware-discard '(SEMICOLON RBRACE)) #f)
	     (else (block-aware-discard '(LBRACE))
		   (block-aware-discard '(RBRACE))
		   #f)))
	    
    (let* ((rule (parse-rule))
	   (next (if rule (append lst (list rule)) lst)))
      (get-token)
      (if (eq? current-category '*eoi*) 
	  next
	  (begin 
	    (unget-token current-token)
	    (discard-maybe-sgml)
	    (parse-rule-list next)))))

  (lambda ()
    (let ((charset (parse-maybe-charset)))
      (discard-maybe-sgml)
      (let ((imports (parse-import-list))
	    (rules (parse-rule-list '())))
	(append '(css) imports rules)))))
