;\c	    Copyright (C) 1990 Pertti Kellomaki
;\c	 
;\c	 This file is part of Taurus, a parser generator producing Scheme
;\c	 
;\c	 Taurus is free software; you can redistribute it and/or modify
;\c	 it under the terms of the GNU General Public License as published by
;\c	 the Free Software Foundation; either version 1, or (at your option)
;\c	 any later version.
;\c	 
;\c	 Taurus is distributed in the hope that it will be useful,
;\c	 but WITHOUT ANY WARRANTY; without even the implied warranty of
;\c	 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;\c	 GNU General Public License for more details.
;\c	 
;\c	 You should have received a copy of the GNU General Public License
;\c	 along with Taurus; see the file COPYING.  If not, write to
;\c	 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
;
;\node The LL(1) Condition
;\comment  node-name,  next,  previous,  up
;\chapter{The LL(1) Condition}
;
;These procedures form the module
;
(module tll1)

;\node The Meaning of the LL(1) Condition
;\comment  node-name,  next,  previous,  up
;\section{The Meaning of the LL(1) Condition}
;
;The LL(1) condition means that it is possible to derive
;from the grammar a deterministic recursive descent parser that needs
;only one token lookahead. Deterministic here means, that in places
;where parsing can take more than one route, the lookahead token always
;determines which route to take.  For a more complete discussion, see
;for example [Waite1984], p.156.  There are two places, where
;nondeterminism can occur: alternatives and iterations.
;
;\node Checking the Grammar, , , 
;\comment  node-name,  next,  previous,  up
;\section{Checking the Grammar}
;
;The procedure \code{check-ll1} checks that the grammar satisfies the
;LL(1) condition by mapping \code{check-ll1-rule} over \code{grammar}.
;
;\findex{check-ll1}
(define (check-ll1 grammar)
  (set! *dangerous-follow-sets* '())
  (map (lambda (rule)
	 (check-ll1-rule rule grammar))
       grammar))

;The procedure \code{check-ll1-rule} checks each grammar rule for
;nondeterminisms. 
;
;\findex{check-ll1-rule}
(define (check-ll1-rule rule grammar)
  (check-ll1-expr (rule-expr rule) '()
		  grammar
		  (nonterminal-name
		   (rule-nonterminal rule))))


;Any nondeterminisms are reported using \code{report-nondeterminism}:
;
;\findex{report-nondeterminism}
(define (report-nondeterminism nonterminal-name . args)
  (display "Within rule " stderr-port)
  (display nonterminal-name stderr-port)
  (display ":" stderr-port)
  (let loop ((args args))
    (cond ((null? args))
	  ((pair? (car args))
	   (map (lambda (elem)
		  (display elem stderr-port))
		(car args))
	   (loop (cdr args)))
	  (else (display (car args) stderr-port)
		(loop (cdr args)))))
  (newline stderr-port))

;\node Detecting Common Symbols, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Detecting Common Symbols}
;
;Basically, checking for nondeterminism means looking for common
;symbols in some sets. This is done with \code{have-common-symbols?}.
;
;\findex{have-common-symbols?}
(define (have-common-symbols? sets)
  (define (flat sets)
    (let loop ((sets sets)
	       (result '()))
      (if (null? sets)
	  result
	  (loop (cdr sets)
		(cons (car sets) result)))))
  (let loop ((terminals (flat sets)))
    (cond ((null? terminals) #f)
	  ((member (car terminals) (cdr terminals)))
	  (else (loop (cdr terminals))))))
	     

;\node Checking Expressions, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Checking Expressions}
;
;Each type of expression needs its own special handling.
;
;\findex{check-ll1-expr}
(define (check-ll1-expr expr follow-set grammar current-nonterminal-name)
  (cond ((or (terminal? expr)
	     (action? expr)
	     (empty? expr)))
	((nonterminal? expr)
	 (check-ll1-nonterminal expr follow-set grammar current-nonterminal-name))
	((sequence? expr)
	 (check-ll1-sequence expr follow-set grammar current-nonterminal-name))
	((alternative? expr)
	 (check-ll1-alternative expr follow-set grammar current-nonterminal-name))
	((or (zero-iteration? expr)
	     (nonzero-iteration? expr))
	 (check-ll1-iteration expr follow-set grammar current-nonterminal-name))
	(else (taurus-error "check-ll1-expr: bad expression "
			    expr))))


;\node Nondeterminism In Sequences, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Nondeterminism In Sequences}
;
;Sequences themselves are always deterministic, because there is no
;choosing involved. Thus, only the elements of a sequence need to be
;checked for nondeterminism. Checking is done from right to left,
;because the \code{follow-set} is accumulated at the same time.
;
;\findex{check-ll1-sequence}
(define (check-ll1-sequence expr follow-set grammar current-nonterminal-name)
  (let loop ((elements (reverse (sequence-elements expr)))
	     (follow-set follow-set))
    (cond ((not (null? elements))
	   (check-ll1-expr (car elements) follow-set grammar current-nonterminal-name)
	   (loop (cdr elements)
		 (append (first-set (car elements) grammar)
			 follow-set))))))
    

;\node Nondeterminism In Alternatives, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Nondeterminism In Alternatives}
;
;If the FIRST sets of the choices in an alternative expression have
;terminal symbols in common, nondeterminism arises. There is no way for the
;parser automaton to decide, which alternative to choose. When
;generating the parser, the nondeterminism is reported.
;
;If one of the FIRST sets includes \code{empty}, the \dfn{FOLLOW} set
;{}(the set of terminal symbols that can follow the expression) must also
;be considered, because the symbols in it can also appear in
;the lookahead at that time.
;
;
;\findex{check-ll1-alternative}
(define (check-ll1-alternative expr follow-set grammar current-nonterminal-name)
  (let* ((first-sets (map (lambda (choice)
			    (first-set choice grammar))
			  (alternative-choices expr)))
	 (includes-empty
	  (let loop ((sets first-sets))
	    (cond ((null? sets) #f)
		  ((member (make-empty) (car sets)))
		  (else (loop (cdr sets)))))))
    (if (have-common-symbols? first-sets)
	(report-nondeterminism
	 current-nonterminal-name
	 "Choices of an alternative expression have common starting symbols."
	 "Expression:"
	 #\newline
	 expr)
	#f)
    (if (and includes-empty
	     (have-common-symbols?
	      (cons follow-set first-sets)))
	(report-nondeterminism
	 current-nonterminal-name
	 "A choice in an alternative expression may reduce to empty, \n and the follow set and some of the choices have common starting symbols."
	 expr)
	#f)))

;\node Nondeterminism In Iterations, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Nondeterminism In Iterations}
;
;Nondeterminism in iterations can be caused by two separate causes:
;ambiguity in termination of an iteration and iteration of an empty
;expression.
;
;Iteration is analogous to looping, so it can be reduced to self
;recursive nonterminals much the same way loops can be reduced to
;recursive function calls. From the reduced form it is easy to see how
;nondeterminism arises. If we convert
;\begin{example}
;{}(FOO (iter+ EXPR))
;\end{example}
;to
;\begin{example}
;{}(FOO EXPR (alt FOO empty))
;\end{example}
;\noindent
;{}(iterating \code{EXPR} one or more times is converted to
;expecting \code{EXPR} once and then expecting either \code{empty}, ie.
;just leaving the expression, or expecting \code{FOO} again), the
;potential nondeterminism is readily seen. If FIRST set of \code{EXPR} and
;the next expression have terminal symbols in common, nondeterminism arises.
;This leads to the same situation as in the previous section.
;
;Another problem with iterations appears when the iterand can reduce to
;\code{empty}, because the iteration could be continued forever. This
;kind of nondeterminism is easy to find, because it does not depend on
;the context of the expression.
;
;\findex{check-ll1-iteration}
(define (check-ll1-iteration expr follow-set grammar current-nonterminal-name)
  (let ((set (first-set (iteration-iterand expr) grammar)))
  (cond ((member (make-empty) set)
	 (report-nondeterminism
	  current-nonterminal-name
	  "Iteration of empty expression."
	  expr))
	((have-common-symbols? (append set follow-set))
	 (report-nondeterminism
	  current-nonterminal-name
	  "Ambiguity in ending iteration."
	  expr)))))

;\node Handling of Nonterminals, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Handling of Nonterminals}
;
;Nondeterminism can arise even if all the expressions in the grammar
;are internally deterministic. This is caused by the interaction of
;rules. Whether an expression is deterministic or not, depends on the
;contex where it appears, ie. its FOLLOW set.
;
;The problem is, that the FOLLOW set is different in each place
;where the nonterminal appears. The expression defining the nonterminal
;must thus be checked many times. If the expression were to be checked
;every time it appeared in a rule, it would result in a lot of
;duplicated effort.  This problem is avoided by introducing a new set,
;the \dfn{DANGEROUS-FOLLOW} set. This set includes all the terminal
;symbols that can cause nondeterminism if present in the FOLLOW set.
;
;To check a that a nonterminal does not cause nondeterminism in the given
;context, its DANGEROUS-FOLLOW set is compared to the current
;\code{follow-set}. If there are common symbols, nondeterminism arises.
;
;NOTE: it is not adequate just to tell about the nondeterminism, its
;cause should be analyzed!
;
;\findex{check-ll1-nonterminal}
(define (check-ll1-nonterminal expr follow-set grammar current-nonterminal-name)
  (if (have-common-symbols?
       (append (remove (make-empty) follow-set)
	       (dangerous-follow-set expr grammar)))
      (report-nondeterminism
       current-nonterminal-name
       "Nonterminal "
       (nonterminal-name expr)
       " causes nondeterminism. "
       " Common symbols are:"
       (map terminal-name (set-intersection (remove (make-empty) follow-set)
					    (dangerous-follow-set expr grammar))))))

;\node Computing DANGEROUS-FOLLOW sets, , , 
;\comment  node-name,  next,  previous,  up
;\subsection{Computing DANGEROUS-FOLLOW sets}
;
;The DANGEROUS-FOLLOW set is computed by looking for alternative
;expressions and iterations whose FOLLOW set \emph{within the rule}
;includes \code{empty}. These are the potential places where
;nondeterminism might occur.  Each time a nonterminal appears in an
;expression, its DANGEROUS-FOLLOW set is compared with the current FOLLOW
;set. If there are terminal symbols in common, nondeterminism is reported
;and a more detailed examination is made in order to find the exact
;expression that caused the nondeterminism. The computed sets are kept
;in \code{*dangerous-follow-sets*}.
;
;\findex{*dangerous-follow-sets*}
(define *dangerous-follow-sets* '())

;\findex{dangerous-follow-set}
(define (dangerous-follow-set nonterminal grammar)
  (cond ((assoc nonterminal *dangerous-follow-sets*)
	 (cdr (assoc nonterminal *dangerous-follow-sets*)))
	(else
	 (let ((entry `(,nonterminal . ())))
	   (set! *dangerous-follow-sets*
		 (cons entry *dangerous-follow-sets*))
	   (set-cdr! entry
		    (dangerous-follow-set-expr
		     (rule-expr (grammar-rule-for nonterminal
						  grammar))
		     grammar))
	   (cdr entry)))))

;\subsubsection{DANGEROUS-FOLLOW Set of an Expression}
;
;Each kind of expression needs different 
;treatment.
;
;\findex{dangerous-follow-set-expr}
(define (dangerous-follow-set-expr expr grammar)
  (cond ((or (terminal? expr)
	     (action? expr)
	     (empty? expr))
	 '())
	((nonterminal? expr)
	 (dangerous-follow-set expr grammar))
	((sequence? expr)
	 (dangerous-follow-set-sequence expr grammar))
	((alternative? expr)
	 (dangerous-follow-set-alternative expr grammar))
	((or (zero-iteration? expr)
	     (nonzero-iteration? expr))
	 (dangerous-follow-set-iteration expr grammar))
	(else (taurus-error "dangerous-follow-set-expr: bad expression "
			    expr))))
  

;\subsubsection{DANGEROUS-FOLLOW Set of a Sequence}
;
;The DANGEROUS-FOLLOW set of a sequence is collected with a right to
;left scan of the elements. The DANGEROUS-FOLLOW sets of the elements
;are added to the DANGEROUS-FOLLOW set of the sequence until the FIRST
;set of the element does not include \code{empty}. This element acts as a
;``fence'' that prevents the elements left to it from causing
;nondeterminism via rule interaction.
;
;\findex{dangerous-follow-set-sequence}
(define (dangerous-follow-set-sequence expr grammar)
  (let loop ((elements (reverse (sequence-elements expr)))
	     (result '()))
    (cond ((null? elements) result)
	  ((member (make-empty) (first-set expr grammar))
	   (loop (cdr elements)
		 (set-union
		  (dangerous-follow-set-expr (car elements)
					     grammar)
		  result)))
	  (else
	   (set-union
	    (dangerous-follow-set-expr (car elements)
				       grammar)
	    result)))))

;\subsubsection{DANGEROUS-FOLLOW Set of an Alternative Expression}
;
;The DANGEROUS-FOLLOW set of an alternative expression includes the set
;union of DANGEROUS-FOLLOW sets of its choices.  If one of the choices
;in the expression may reduce to \code{empty}, the DANGEROUS-FOLLOW set
;includes also the set union of the FIRST sets of the choices.
;
;\findex{dangerous-follow-set-alternative}
(define (dangerous-follow-set-alternative expr grammar)
  (define (element-df-sets)
    (let loop ((choices (alternative-choices expr))
	       (result '()))
      (if (null? choices) 
	  result
	  (loop (cdr choices)
		(set-union
		 (dangerous-follow-set-expr (car choices)
					    grammar)
		 result)))))
  (define (element-first-sets)
    (let loop ((choices (alternative-choices expr))
	       (result '()))
      (if (null? choices) 
	  result
	  (loop (cdr choices)
		(set-union (first-set (car choices)
				      grammar)
			   result)))))

  (let ((set (first-set expr grammar)))
    (if (member (make-empty) set)
	(set-union (element-df-sets)
		   (element-first-sets))
	(element-df-sets))))

;\subsubsection{DANGEROUS-FOLLOW Set of an Iteration}
;
;The DANGEROUS-FOLLOW set of an iteration is the set union of its FIRST
;set and the DANGEROUS-FOLLOW set of the iterand, because the
;termination of iteration is based on looking at the FIRST set.
;
;\findex{dangerous-follow-set-iteration}
(define (dangerous-follow-set-iteration expr grammar)
  (set-union (first-set expr grammar)
	     (dangerous-follow-set-expr
	      (iteration-iterand expr)
	      grammar)))
