#! /usr/local/bin/tclsh
#
# Copyright (c) 1994
#	The Regents of the University of California.  All rights reserved.
#
# This code is derived from software contributed to Berkeley by
# Alistair G. Crooks.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. All advertising materials mentioning features or use of this software
#    must display the following acknowledgement:
#	This product includes software developed by the University of
#	California, Berkeley and its contributors.
# 4. Neither the name of the University nor the names of its contributors
#    may be used to endorse or promote products derived from this software
#    without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

set copyright {
@(#) Copyright (c) 1989, 1993, 1994
The Regents of the University of California.  All rights reserved.
}

#
# This is a small program to produce a (very slow) lexer in tcl
# This is not meant to be (and isn't) the pinnacle of efficiency - for
# that use real flex, and then access yylex through the C interface
# This is meant for prototyping applications that require an input
# language.
#
# Written by: Alistair G. Crooks (agc@uts.amdahl.com)
# Tuesday December 13th 1994
#
# tcllex 5.03

set header {#! /usr/local/bin/tclsh

# This scanner was produced by tcllex 5.03, by 
# Alistair G. Crooks (agc@uts.amdahl.com)
# It's inefficient
# If you want efficiency, use flex, and tcl's C interface

# this is why it's so slow...
set TCLLEX_SCANNER 1

# exportable global tcl variables
set yyleng 0
set yytext ""
set yylineno 1
set yyin stdin

# these variables are global, but shouldn't be exported
set yy_ch_buf ""
set yy_ch_buf_p 0
set yy_ch_buf_c 0
set yy_filec 0
set yy_state normal

# regular expressions and actions go here
}

set tail {
# start of function definitions

# echo something on standard output
proc ECHO { s } {
	puts -nonewline stdout "$s"
}

# procedure to read a file's input
proc YY_INPUT { } {
	global yy_ch_buf yy_ch_buf_p yy_ch_buf_c yyin
	if { [eof $yyin] } {
		return 0
	}
	set yy_ch_buf [read $yyin]
	set yy_ch_buf_c [string length $yy_ch_buf]
	set yy_ch_buf_p 0
	return 1
}

# procedure to set the start state for the lexer
# this is analogous to flex's BEGIN macro
proc yy_set_state { state } {
	global yy_state
	set yy_state $state
}

# start using file 'newfp'
proc yy_push_file { newfp } {
	global yy_filev yy_filec yyin yy_ch_buf yy_ch_buf_p yy_ch_buf_c
	set yy_filev($yy_filec:yyin) $yyin
	set yy_filev($yy_filec:yy_ch_buf) $yy_ch_buf
	set yy_filev($yy_filec:yy_ch_buf_p) $yy_ch_buf_p
	set yy_filev($yy_filec:yy_ch_buf_c) $yy_ch_buf_c
	incr yy_filec
	set yyin $newfp
	set yy_ch_buf ""
	set yy_ch_buf_p 0
	set yy_ch_buf_c 0
}

# finished with file - pop it off the stack
proc yy_pop_file {} {
	global yy_filev yy_filec yyin yy_ch_buf yy_ch_buf_p yy_ch_buf_c
	if { $yy_filec == 0 } {
		return 0
	}
	incr yy_filec -1
	set yyin $yy_filev($yy_filec:yyin)
	set yy_ch_buf $yy_filev($yy_filec:yy_ch_buf)
	set yy_ch_buf_p $yy_filev($yy_filec:yy_ch_buf_p)
	set yy_ch_buf_c $yy_filev($yy_filec:yy_ch_buf_c)
	return 1
}

# This is Karl Lehenbauer's marvellous 'static' procedure
# used to provide the same functionality as a static declaration
# within a function in a C program
proc static { args } {
	set procName [lindex [info level -1] 0]
	foreach varName $args {
		uplevel 1 "upvar #0 staticvars($procName:$varName) $varName"
	}
}

# procedure to produce a token and return it to caller
# A side effect is that yytext is filled with the character
# representation of the token.
#
# The lexer uses tcl's regexp command to locate the tokens.
# This is slow. You have been warned.
#
# We look through all the regexps for one lexer state
# If there's only one match, then that's the one returned.
# If there is more than one match, the longer one (in characters)
# is returned. If more than one match is the same length,
# the first rule given in the input language is selected.
#
proc yylex {} {
	# global variables
	global yy_ch_buf yy_ch_buf_p yy_ch_buf_c yy_state 
	global yy_regv yy_actv yy_rulev yy_rulec
	global yytext yyleng yylineno

	# static variables
	static yy_line_donev yy_bolnv yy_eolnv

	# we need some input from somewhere
	if { $yy_ch_buf_c == 0 && ![YY_INPUT] && ![yy_pop_file] } {
		set yytext ""
		set yyleng 0
		return "EOF"
	}
	while 1 {
		# have we exhausted the input?
		if { $yy_ch_buf_p > $yy_ch_buf_c && ![yy_pop_file] } {
			set yytext ""
			set yyleng 0
			return "EOF"
		}
		# another slow part?
		set text [string range $yy_ch_buf $yy_ch_buf_p end]
		set eoln [string first "\n" $text]
		if { $eoln < 0 } {
			set eoln end
		} else {
			incr eoln -1
		}
		set short_text [string range $text 0 $eoln]
		set best ""
		set got 0
		# check we've set up the array for beginning of line regexps
		if { ![info exists yy_line_donev($yy_state)] } {
			foreach re $yy_rulev($yy_state) {
				if { [string index $yy_regv($yy_state:$re) 0] == "^" } {
					set yy_bolnv($yy_state:$re) 1
				} else {
					set yy_bolnv($yy_state:$re) 0
				}
				if { [string index $yy_regv($yy_state:$re) [expr [string length $yy_regv($yy_state:$re)]-1]] == "\$" } {
					set yy_eolnv($yy_state:$re) 1
				} else {
					set yy_eolnv($yy_state:$re) 0
				}
			}
			set yy_line_donev($yy_state) 1
		}
		# file the score array with matches of the regular expression
		foreach re $yy_rulev($yy_state) {
			# because of the way text is set up, we'll get
			# false matches on ^ boln regexps. This next statement
			# tries to avoid that
			if { $yy_bolnv($yy_state:$re) &&
			     $yy_ch_buf_p > 0 &&
			     [string index $yy_ch_buf [expr $yy_ch_buf_p-1]] != "\n" } {
				continue
			}
			if { $yy_eolnv($yy_state:$re) } {
				set local_text $short_text
			} else {
				set local_text $text
			}
			if { [regexp -indices -- $yy_regv($yy_state:$re) $local_text score($re)] } {
				if { [lindex $score($re) 0] == 0} {
					if { $best == "" } {
						set best $re
					}
					incr got
				} else {
					unset score($re)
				}
			}
		}
		# have we got any match at all?
		if { $best == "" } {
			# No match at all
			# echo it and go on to next character
			ECHO "[string range $yy_ch_buf $yy_ch_buf_p $yy_ch_buf_p]"
			incr yy_ch_buf_p
			continue
		}
		# pick (the first) longest one
		set blen -1
		foreach re [lsort -integer [array names score]] {
			set slen [expr [lindex $score($re) 1]-[lindex $score($re) 0]]
			if { $blen < $slen } {
				set blen $slen
				set best $re
			}
		}
		set yyleng [expr $blen+1]
		set yytext [string range $text 0 $blen]
		incr yy_ch_buf_p $yyleng
		# do action for $best
		for { } { $best <= $yy_rulec } { incr best } {
			if { [info exists yy_actv($yy_state:$best)] } {
				eval $yy_actv($yy_state:$best)
				break
			}
		}
		# flush the score array as we're going round again
		foreach i [array names score] {
			unset score($i)
		}
	}
}
}

# expand tabs and newlines in the regular expression
proc expandre { re } {
	global _yy_dot_match_newlines_

	if { $_yy_dot_match_newlines_ } {
		set re1 $re
	} else {
		regsub -all {([^\\])\.} $re "\\1\[^\\n\]" re1
	}
	regsub -all {\\t} $re1 "\t" re2
	regsub -all {\\n} $re2 "\n" re3
	return $re3
}

# count the number of inc and dec chars in a line
# don't bother about quoted chars or strings for now - XXX
proc balance { inc dec line } {
	set off 0
	set cnt 0
	while 1 {
		set substr [string range $line $off end]
		set first [string first $inc $substr]
		switch -- $first {
		-1	{
				break
			}
		0	{
				incr cnt
				break
			}
		default	{
				incr cnt
				set off [expr $off+$first+1]
			}
		}
	}
	set off 0
	while 1 {
		set substr [string range $line $off end]
		set first [string first $dec $substr]
		switch -- $first {
		-1	{
				break
			}
		0	{
				incr cnt -1
				break
			}
		default	{
				incr cnt -1
				set off [expr $off+$first+1]
			}
		}
	}
	return $cnt
}

set optind 0
set optarg ""

proc getopt { argv optstr } {
	global optind optarg

	set optarg ""
	set arg [lindex $argv $optind]
	if { [string index $arg 0] != "-" } {
		return EOF
	}
	incr optind
	if { [string length $arg] <= 1 } {
		puts stderr "Bad option `$ret'"
		return EOF
	}
	set ret [string index $arg 1]
	set ind [string first $ret $optstr]
	if { $ind < 0 } {
		puts stderr "Not an option `$ret'"
		return EOF
	}
	if { [string index $optstr [expr $ind+1]] != ":" } {
		# a simple argument - return it
		return $ret
	}
	if { [string length $arg] > 2 } {
		# -carg form
		set optarg [string range $arg 2 end]
		return $ret
	}
	if { $optind >= [llength $argv] } {
		puts stderr "No arg to $ret"
		return EOF
	}
	set optarg [lindex $argv $optind]
	if { [string index $optarg 0] == "-" } {
		puts stderr "`$ret' takes an argument"
		return EOF
	}
	incr optind
	return $ret
}

# check that we've got the right arguments
if { [llength $argv] < 1 } {
	puts stderr "Usage: $argv0 language-description"
	exit 1
}

# default behaviour is for '.' to match a newline character
set _yy_dot_match_newlines_ 1

# check to see if we need to be in flex compatibility mode
while 1 {
	set opt [getopt $argv "C"]
	switch $opt {
	C	{
			set _yy_dot_match_newlines_ 0
		}
	EOF	{
			break
		}
	}
}

# open output file
set fpout [open "yylex.tcl" "w"]
puts $fpout $header

set fpin [open [lindex $argv $optind]]
# write the regexps and actions here
set rule 1
set rulev(normal) ""
set state_list { normal }
while { [gets $fpin line] >= 0 } {
	# first look for a start state
	if { [regexp {^<(.*)>([^	]+)	(.*)} $line a state re act] } {
		# check re for backslashes
		puts $fpout "set yy_regv($state:$rule) \{[expandre $re]\}"
		if { ![regexp {^[ 	]*\|[ 	]*$} $act] } {
			puts $fpout "set yy_actv($state:$rule) $act"
		}
		if { ![info exists rulev($state)] } {
			set rulev($state) "$rule "
			append state_list "$state "
		} else {
			append rulev($state) "$rule "
		}
		incr rule
		set blockc [balance "\{" "\}" $act]
		while { $blockc > 0 } {
			if { [gets $fpin line] <= 0 } {
				puts stderr "EOF in action of `$re'"
				exit 1
			}
			puts $fpout "$line"
			incr blockc [balance "\{" "\}" $line]
		}
	} elseif { [regexp {^([^	]+)	(.*)} $line a re act] } {
		# check re for backslashes
		puts $fpout "set yy_regv(normal:$rule) \{[expandre $re]\}"
		if { ![regexp {^[ 	]*\|[ 	]*$} $act] } {
			puts $fpout "set yy_actv(normal:$rule) $act"
		}
		append rulev(normal) "$rule "
		incr rule
		set blockc [balance "\{" "\}" $act]
		while { $blockc > 0 } {
			if { [gets $fpin line] <= 0 } {
				puts stderr "EOF in action of `$re'"
				exit 1
			}
			puts $fpout "$line"
			incr blockc [balance "\{" "\}" $line]
		}
	}
}
set rulec 0
foreach state $state_list {
	puts $fpout "set yy_rulev($state) \{ $rulev($state)\}"
	incr rulec [llength $rulev($state)]
}
puts $fpout "set yy_rulec $rulec"
close $fpin

puts $fpout $tail
close $fpout

exit 0
