;;; -*- Mode: Emacs-Lisp; Syntax: Common-lisp; Base: 10; -*-
;;; File: ll1-sdts.el
;;; Author: Heinz Schmidt (hws@ICSI.Berkeley.EDU)
;;; Copyright (C) International Computer Science Institute, 1990, 1991
;;;
;;; COPYRIGHT NOTICE: This code is provided "AS IS" WITHOUT ANY WARRANTY.
;;; It is subject to the terms of the GNU EMACS GENERAL PUBLIC LICENSE
;;; described in a file COPYING in the GNU EMACS distribution or to be obtained
;;; from Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139
;;;*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
;;;* FUNCTION: Syntax directed translation schemes (SDTS) for
;;;*           interactive conversion between languages; or for transforming
;;;*           high-level programming styles to lower-level languages that
;;;*           do not have macros or where we do not want to use macros.
;;;*
;;;*           defparse defines input (parsing) translation schemes 
;;;*                    concrete repr -> abstract syntax trees.
;;;*           These SDTS are defined by rules mapping the result of LL(1)
;;;*           parsing to lists.  The input SDTS constructs an abstract syntax
;;;*           tree bottom-up on return from recognition steps in a recursive
;;;*           descent (top-down) parsing. The rules are compiled functions
;;;*           rather than table driven making for fast parsers that need a
;;;*           little space (for the function code).
;;;*
;;;* HISTORY:
;;;* Last edited: May 24 00:26 1991 (hws)
;;;*  Apr 14 11:36 1991 (hws): uses backquote instead of mini-cl-macros now
;;;* Created: Fri Dec 21 13:55:01 1990 (hws)
;;;*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

(provide 'll1-sdts)
(require 'backquote)
(require 'mini-cl)

;;;----------------------------------------------------------------------------
;;;
;;; AUXILIARIES -- Naming
;;;

(defun what-name (scheme type) (intern (format "%s-%s" scheme type)))
;;;(ASSERT (eq (what-name 'check 'module) 'check-module))

;;;----------------------------------------------------------------------------
;;;
;;; LEXICAL LEVEL --  We rely on the mode syntax to simplify the lexical level.
;;;

;;; LL(1) literal
(defun may-see (s)
  (skip-layout)
  (when (looking-at s)
	(forward-char (length s))
	t))

;;; LL(0) literal
(defun must-see (s)
  (or (may-see s) (error (format "Expected token %s!" s))))

;;; TOKEN
(defun token-to (end trafo)
  (if (null trafo) (setq trafo (function intern)))
  (skip-layout)
  (let ((theend (save-excursion (funcall end) (point))))
    (cond ((= (point) (point-max)) ':EOF)
	  ((= (point) theend) nil)
	  (t (funcall trafo (buffer-substring (point) theend))))))

(defun this-token (&optional class trafo) 
  (if (null class) (setq class 'forward-symbol))
  (token-to class trafo))
(defun next-token (&optional class trafo) 
  (if (null class) (setq class 'forward-symbol))
  (if (null trafo) (setq trafo (function intern)))
  (prog1 (this-token class trafo) (funcall class)))

;;; SYMBOLS
(defun forward-symbol ()
  (while (and (not (= (point) (point-max)))
	      (memq (char-syntax (char-after (point))) '(?w ?_)))
    (forward-char 1)))

(defun forward-string ()
  (and (looking-at "\"")
       (forward-sexp)))

;;; LAYOUT
(defvar preserve-comment t		;optionally preserve top-level comments
  "*Whether or not comments are collected when reading and translating
 input from an Emacs buffer.")

(defun skip-white-space () (re-search-forward "[ \t\n]*"))
(defun skip-layout ()
  (let (done comment p end)
    (while (not done)
      (skip-white-space)
      (if (looking-at comment-start)
	  (let ((p (point))
		(end (progn (end-of-line) (point))))
	    (forward-char 1)
	    (when preserve-comment
		  (setq comment (buffer-substring p end))
		  (if (consp preserve-comment)
		      (push comment preserve-comment)
		    (setq preserve-comment 
			  (list comment preserve-comment)))))
	(setq done t)))))

;;; LL(0) 
(defmacro must-be (form)
  ( ` (or (, form) (error (format "Failure for %s!" '(, form))))))

;;;----------------------------------------------------------------------------
;;;
;;; PARSING
;;;

;; Parser functions are compiled rules of syntax-directed translation 
;; scheme (sdts). The underlying grammar must be LL(1). No checks are made!

(defmacro defparse (name &rest rules)
  "Expands to a parser predicate for the nonterminal NAME. The mapping from
concrete representation to abstract syntax (input translation scheme) is 
defined by rules of the form  ( input-elm* -> sexp ).

The input elements are expanded to a LL (1) parser for NAME. The first input
element is tried, if this succeeds, subsequent input elements must be present.
If the first input element fails a subsequent rule for NAME is tried or
else the parser for NAME fails.

Simple input elements are of the form

 \"chars\"   -- a literal token
  name     -- any token recognized by (next-token) and bound to name, see also 
              'lex' below.
 (name)    -- nonterminal NAME, the parse tree is bound to NAME, (no multiple 
              occurrences in one rule!), see also below.

Complex input elements are of the form

 (lex var class tr)     -- a token recognized by (next-token class tr)
    CLASS defines the extent of the token by moving point but must not have any
    other sideeffect. If point is not moved the token is considered not present.
    For instance 'forward-word is the class of words.  TR translates the token
    string to the token to be returned.  CLASS defaults to 'forward-symbol and TR
    defaults to 'intern.

 (var name)             -- nonterminal NAME, the result is bound to VAR.

 (* var name \"x\" &optional min max)  -- recognizes a list of constructs for 
    the nonterminal NAME.  The elements of the list are separated by X. X may be
    empty (\"\"). If MIN and/or MAX are given, the list must contain at least MIN
    (at most MAX, respectively) elements.  The list is bound to var.

SEXP defines a translation from the concrete representation to an abstract
syntax tree.  When the input elements succeed, ie. when the corresponding
parser for one rule succeeds, it binds the abstract syntax subtrees or tokens
(atoms) to the variables occuring in the input elements, evaluates SEXP
with this binding and then returns the result as the tree of the current
nonterminal NAME.

The input pointer will be after the concrete input for NAME when the parser
succeeds but before any whitespace and/or comments following the construct.
Whitespace and comments are skipped using the function skip-layout BEFORE
recognizing each token.

NAME does not have any sideeffects when the parser fails on a the first input
elements of all rules.  Failing (after a sideeffect) in a subsequent element of
a rule is considered an error and usually leads to a meaningful error message
leaving point at the place of the error."  
  (let* ((pname (what-name 'parse name)) 
	 (result (gensym))
	 (rules-code 
	  (mapcar (function 
		   (lambda (rule)
		     (apply (function rule-code)
                            result
			    (reverse rule))))
		  rules))) 
    ( ` (defun (, pname) ()
	 (interactive)			;just for testing 	 
	 (let ((, result))
            (or (,@ rules-code))
            (, result))))))

(defun rule-code (result translation ignore &rest reverse-input)
  (let* ((input (reverse reverse-input))
	 (first (car input))
	 (rest (cdr input))
	 vars
	 (first-code 
	  (let ((code (input-code first t)))
	    (setq vars (append vars (cdr code)))
	    (car code)))
	 (rest-code (mapcar (function 
			     (lambda (item)
			       (let ((code (input-code item)))
				 (setq vars (append vars (cdr code)))
				 (car code))))
			    rest)))
    (if vars 
	( ` (let (, vars) (when (, first-code) 
			    (,@ rest-code) 
			    (setq (, result) (, translation)) t)))
      ( ` (when (, first-code) (,@ rest-code) (setq (, result) (, translation)) t))
      )))

(defun input-code (item &optional is-first)
  (let (code var class vars)
    (setq code
	  (cond ((stringp item) 
		 (if is-first ( ` (may-see (, item))) ( ` (must-see (, item)))))
		((symbolp item) (push item vars);; LL(0)
		 ( ` (setq (, item) (maybe-mandatory (, is-first) (next-token)))))
		((consp item)
		 (setq var (car item))
		 (cond ((eq var '*)
			(setq var (cadr item))
			(push var vars)
			( ` (setq (, var) (maybe-mandatory
					   (, is-first)
					   (, (apply (function iterator-code) 
						     (cddr item)))))))
		       ((eq var 'lex)
			(setq var (cadr item)) 
			(push var vars)
			( ` (setq (, var) 
				  (maybe-mandatory
				   (, is-first)
				   (next-token '(, (caddr item)) '(, (cadddr item)))))))
		       (t (push var vars)
			  (setq class 
				(if (= (length item) 2) (cadr item) var))
			  ( ` (setq (, var) 
				    (maybe-mandatory (, is-first)
						     ((, (what-name 'parse class)))))))))))
    (cons code vars)))

(defmacro maybe-mandatory (is-first parse-form)
  (if is-first parse-form
      ( ` (must-be (, parse-form)) )))
  
(defun iterator-code (class separator &optional min max)
  (let ((parser (what-name 'parse class)))
    ;;` (parse-list (function (, parser))  (, class) (, separator) (, (or min 0)) (, max))
    ( ` (parse-list (function (, parser))  '(, class) (, separator) (, (or min 0)) (, max))
      )))

;;; x*, [x], x(*,min,max): list, options, iterations

(defun parse-list (elt-parser class separator min &optional max)
  "Parses a list of elements acceptable to ELT-PARSER. 
CLASS is a symbol used in error messages. The elements are separated by SEPARATOR. 
At list MIN elements must be present. At most MAX elements are parsed.
MIN defaults to 0, and MAX to an arbitrary large number.
The function returns NIL on failure, ':empty-list for the empty list
and otherwise the list of elements returned by ELT-PARSER."
  (do (elt (i 1 (1+ i))
       collection done)
      (done (if (consp collection) ;; nonempty list
		(reverse collection)
	      collection))
      (setq elt (funcall elt-parser))
      (cond
       ((and (null elt) (= i 1))	;empty
	(setq done t)
	(if (= min 0) (setq collection ':empty-list))) ;empty acceptable else failure
       ((null elt) 
	(if (not (equal separator ""))
	    (error "Saw \"%s\" and expected another %s." separator class)
	  (setq done t)))
       ((<= i min)
	(push elt collection)
	(if (= i min)
	    (cond ((and max (= i max)) 
		   (setq done t collection (reverse collection))) ; over
		  ((may-see separator) t) ; maybe more of them 
		  (t (setq done t)))
	  ;; must-see separator
	  (if (not (may-see separator)) (error "Expected a %s list with at least %s elements."
					       class min))))
       ((and max (= i max))		; over
	(push elt collection) (setq done t))
       (t				;there is an elt
	(push elt collection)
	(if (not (may-see separator)) (setq done t))))))

