head	1.19;
access;
symbols;
locks
	root:1.19; strict;
comment	@# @;


1.19
date	94.12.15.14.08.11;	author root;	state Exp;
branches;
next	1.18;

1.18
date	94.12.15.13.54.11;	author root;	state Exp;
branches;
next	1.17;

1.17
date	94.12.15.12.09.57;	author root;	state Exp;
branches;
next	1.16;

1.16
date	94.12.15.10.29.57;	author root;	state Exp;
branches;
next	1.15;

1.15
date	94.12.15.10.13.43;	author root;	state Exp;
branches;
next	1.14;

1.14
date	94.12.15.09.53.42;	author root;	state Exp;
branches;
next	1.13;

1.13
date	94.12.15.09.26.47;	author root;	state Exp;
branches;
next	1.12;

1.12
date	94.12.14.16.14.56;	author root;	state Exp;
branches;
next	1.11;

1.11
date	94.12.14.16.06.23;	author root;	state Exp;
branches;
next	1.10;

1.10
date	94.12.14.15.41.04;	author root;	state Exp;
branches;
next	1.9;

1.9
date	94.12.14.15.24.25;	author root;	state Exp;
branches;
next	1.8;

1.8
date	94.12.14.15.15.31;	author root;	state Exp;
branches;
next	1.7;

1.7
date	94.12.14.14.18.58;	author root;	state Exp;
branches;
next	1.6;

1.6
date	94.12.14.13.18.54;	author root;	state Exp;
branches;
next	1.5;

1.5
date	94.12.14.12.59.28;	author root;	state Exp;
branches;
next	1.4;

1.4
date	94.12.14.12.31.16;	author root;	state Exp;
branches;
next	1.3;

1.3
date	94.12.14.12.26.51;	author root;	state Exp;
branches;
next	1.2;

1.2
date	94.12.14.11.33.33;	author root;	state Exp;
branches;
next	1.1;

1.1
date	94.12.14.10.21.02;	author root;	state Exp;
branches;
next	;


desc
@@


1.19
log
@added copyright message
@
text
@#! /usr/local/bin/tclsh
#
# Copyright (c) 1994
#	The Regents of the University of California.  All rights reserved.
#
# This code is derived from software contributed to Berkeley by
# Alistair G. Crooks.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. All advertising materials mentioning features or use of this software
#    must display the following acknowledgement:
#	This product includes software developed by the University of
#	California, Berkeley and its contributors.
# 4. Neither the name of the University nor the names of its contributors
#    may be used to endorse or promote products derived from this software
#    without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

set copyright {
@@(#) Copyright (c) 1989, 1993, 1994
The Regents of the University of California.  All rights reserved.
}

#
# This is a small program to produce a (very slow) lexer in tcl
# This is not meant to be (and isn't) the pinnacle of efficiency - for
# that use real flex, and then access yylex through the C interface
# This is meant for prototyping applications that require an input
# language.
#
# Written by: Alistair G. Crooks (agc@@uts.amdahl.com)
# Tuesday December 13th 1994
#
# tcllex 4.50

set header {#! /usr/local/bin/tclsh

# This scanner was produced by tcllex 4.50, by 
# Alistair G. Crooks (agc@@uts.amdahl.com)
# It's inefficient
# If you want efficiency, use flex, and tcl's C interface

# this is why it's so slow...
set TCLLEX_SCANNER 1

# exportable global tcl variables
set yyleng 0
set yytext ""
set yylineno 1
set yyin stdin

# these variables are global, but shouldn't be exported
set yy_ch_buf ""
set yy_ch_buf_p 0
set yy_ch_buf_c 0
set yy_filec 0
set yy_state normal

# regular expressions and actions go here
}

set tail {
# start of function definitions

# echo something on standard output
proc ECHO { s } {
	puts -nonewline stdout "$s"
}

# procedure to read a file's input
proc YY_INPUT { } {
	global yy_ch_buf yy_ch_buf_p yy_ch_buf_c yyin
	if { [eof $yyin] } {
		return 0
	}
	set yy_ch_buf [read $yyin]
	set yy_ch_buf_c [string length $yy_ch_buf]
	set yy_ch_buf_p 0
	return 1
}

# procedure to set the start state for the lexer
# this is analogous to flex's BEGIN macro
proc yy_set_state { state } {
	global yy_state
	set yy_state $state
}

# start using file 'newfp'
proc yy_push_file { newfp } {
	global yy_filev yy_filec yyin yy_ch_buf yy_ch_buf_p yy_ch_buf_c
	set yy_filev($yy_filec:yyin) $yyin
	set yy_filev($yy_filec:yy_ch_buf) $yy_ch_buf
	set yy_filev($yy_filec:yy_ch_buf_p) $yy_ch_buf_p
	set yy_filev($yy_filec:yy_ch_buf_c) $yy_ch_buf_c
	incr yy_filec
	set yyin $newfp
	set yy_ch_buf ""
	set yy_ch_buf_p 0
	set yy_ch_buf_c 0
}

# finished with file - pop it off the stack
proc yy_pop_file {} {
	global yy_filev yy_filec yyin yy_ch_buf yy_ch_buf_p yy_ch_buf_c
	if { $yy_filec == 0 } {
		return 0
	}
	incr yy_filec -1
	set yyin $yy_filev($yy_filec:yyin)
	set yy_ch_buf $yy_filev($yy_filec:yy_ch_buf)
	set yy_ch_buf_p $yy_filev($yy_filec:yy_ch_buf_p)
	set yy_ch_buf_c $yy_filev($yy_filec:yy_ch_buf_c)
	return 1
}

# This is Karl Lehenbauer's marvellous 'static' procedure
# used to provide the same functionality as a static declaration
# within a function in a C program
proc static { args } {
	set procName [lindex [info level -1] 0]
	foreach varName $args {
		uplevel 1 "upvar #0 staticvars($procName:$varName) $varName"
	}
}

# procedure to produce a token and return it to caller
# A side effect is that yytext is filled with the character
# representation of the token.
#
# The lexer uses tcl's regexp command to locate the tokens.
# This is slow. You have been warned.
#
# We look through all the regexps for one lexer state
# If there's only one match, then that's the one returned.
# If there is more than one match, the longer one (in characters)
# is returned. If more than one match is the same length,
# the first rule given in the input language is selected.
#
proc yylex {} {
	# global variables
	global yy_ch_buf yy_ch_buf_p yy_ch_buf_c yy_state 
	global yy_regv yy_actv yy_rulev yy_rulec
	global yytext yyleng yylineno

	# static variables
	static yy_line_donev yy_bolnv yy_eolnv

	# we need some input from somewhere
	if { $yy_ch_buf_c == 0 && ![YY_INPUT] && ![yy_pop_file] } {
		set yytext ""
		set yyleng 0
		return "EOF"
	}
	while 1 {
		# have we exhausted the input?
		if { $yy_ch_buf_p > $yy_ch_buf_c && ![yy_pop_file] } {
			set yytext ""
			set yyleng 0
			return "EOF"
		}
		# another slow part?
		set text [string range $yy_ch_buf $yy_ch_buf_p end]
		set eoln [string first "\n" $text]
		if { $eoln < 0 } {
			set eoln end
		} else {
			incr eoln -1
		}
		set short_text [string range $text 0 $eoln]
		set best ""
		set got 0
		# check we've set up the array for beginning of line regexps
		if { ![info exists yy_line_donev($yy_state)] } {
			foreach re $yy_rulev($yy_state) {
				if { [string index $yy_regv($yy_state:$re) 0] == "^" } {
					set yy_bolnv($yy_state:$re) 1
				} else {
					set yy_bolnv($yy_state:$re) 0
				}
				if { [string index $yy_regv($yy_state:$re) [expr [string length $yy_regv($yy_state:$re)]-1]] == "\$" } {
					set yy_eolnv($yy_state:$re) 1
				} else {
					set yy_eolnv($yy_state:$re) 0
				}
			}
			set yy_line_donev($yy_state) 1
		}
		# file the score array with matches of the regular expression
		foreach re $yy_rulev($yy_state) {
			# because of the way text is set up, we'll get
			# false matches on ^ boln regexps. This next statement
			# tries to avoid that
			if { $yy_bolnv($yy_state:$re) &&
			     $yy_ch_buf_p > 0 &&
			     [string index $yy_ch_buf [expr $yy_ch_buf_p-1]] != "\n" } {
				continue
			}
			if { $yy_eolnv($yy_state:$re) } {
				set local_text $short_text
			} else {
				set local_text $text
			}
			if { [regexp -indices -- $yy_regv($yy_state:$re) $local_text score($re)] } {
				if { [lindex $score($re) 0] == 0} {
					if { $best == "" } {
						set best $re
					}
					incr got
				} else {
					unset score($re)
				}
			}
		}
		# have we got any match at all?
		if { $best == "" } {
			# No match at all
			# echo it and go on to next character
			ECHO "[string range $yy_ch_buf $yy_ch_buf_p $yy_ch_buf_p]"
			incr yy_ch_buf_p
			continue
		}
		# pick (the first) longest one
		set blen -1
		foreach re [lsort -integer [array names score]] {
			set slen [expr [lindex $score($re) 1]-[lindex $score($re) 0]]
			if { $blen < $slen } {
				set blen $slen
				set best $re
			}
		}
		set yyleng [expr $blen+1]
		set yytext [string range $text 0 $blen]
		incr yy_ch_buf_p $yyleng
		# do action for $best
		for { } { $best < $yy_rulec } { incr best } {
			if { [info exists yy_actv($yy_state:$best)] } {
				eval $yy_actv($yy_state:$best)
				break
			}
		}
		# flush the score array as we're going round again
		foreach i [array names score] {
			unset score($i)
		}
	}
}
}

# expand tabs and newlines in the regular expression
proc expandre { re } {
	global _yy_dot_match_newlines_

	if { $_yy_dot_match_newlines_ } {
		set re1 $re
	} else {
		regsub -all {([^\\])\.} $re "\\1\[^\\n\]" re1
	}
	regsub -all {\\t} $re1 "\t" re2
	regsub -all {\\n} $re2 "\n" re3
	return $re3
}

# count the number of inc and dec chars in a line
# don't bother about quoted chars or strings for now
proc balance { inc dec line } {
	set off 0
	set cnt 0
	while 1 {
		set substr [string range $line $off end]
		set first [string first $inc $substr]
		if { $first >= 0 } {
			incr cnt
			set off [expr $first+1]
			continue
		}
		break
	}
	set off 0
	while 1 {
		set substr [string range $line $off end]
		set first [string first $dec $substr]
		if { $first >= 0 } {
			incr cnt -1
			set off [expr $first+1]
			continue
		}
		break
	}
	return $cnt
}

set optind 0
set optarg ""

proc getopt { argv optstr } {
	global optind optarg

	set optarg ""
	set arg [lindex $argv $optind]
	if { [string index $arg 0] != "-" } {
		return EOF
	}
	incr optind
	if { [string length $arg] <= 1 } {
		puts stderr "Bad option `$ret'"
		return EOF
	}
	set ret [string index $arg 1]
	set ind [string first $ret $optstr]
	if { $ind < 0 } {
		puts stderr "Not an option `$ret'"
		return EOF
	}
	if { [string index $optstr [expr $ind+1]] != ":" } {
		# a simple argument - return it
		return $ret
	}
	if { [string length $arg] > 2 } {
		# -carg form
		set optarg [string range $arg 2 end]
		return $ret
	}
	if { $optind >= [llength $argv] } {
		puts stderr "No arg to $ret"
		return EOF
	}
	set optarg [lindex $argv $optind]
	if { [string index $optarg 0] == "-" } {
		puts stderr "`$ret' takes an argument"
		return EOF
	}
	incr optind
	return $ret
}

# check that we've got the right arguments
if { [llength $argv] < 1 } {
	puts stderr "Usage: $argv0 language-description"
	exit 1
}

# default behaviour is for '.' to match a newline character
set _yy_dot_match_newlines_ 1

# check to see if we need to be in flex compatibility mode
while 1 {
	set opt [getopt $argv "C"]
	switch $opt {
	C	{
			set _yy_dot_match_newlines_ 0
		}
	EOF	{
			break
		}
	}
}

# open output file
set fpout [open "yylex.tcl" "w"]
puts $fpout $header

set fpin [open [lindex $argv $optind]]
# write the regexps and actions here
set rule 1
set rulev(normal) ""
set state_list { normal }
while { [gets $fpin line] >= 0 } {
	# first look for a start state
	if { [regexp {^<(.*)>([^	]+)	(.*)} $line a state re act] } {
		# check re for backslashes
		puts $fpout "set yy_regv($state:$rule) \{[expandre $re]\}"
		if { ![regexp {^[ 	]*\|[ 	]*$} $act] } {
			puts $fpout "set yy_actv($state:$rule) $act"
		}
		if { ![info exists rulev($state)] } {
			set rulev($state) "$rule "
			append state_list "$state "
		} else {
			append rulev($state) "$rule "
		}
		incr rule
		set blockc [balance "\{" "\}" $act]
		while { $blockc > 0 } {
			if { [gets $fpin line] <= 0 } {
				puts stderr "EOF in action of `$re'"
				exit 1
			}
			puts $fpout "$line"
			incr blockc [balance "\{" "\}" $line]
		}
	} elseif { [regexp {^([^	]+)	(.*)} $line a re act] } {
		# check re for backslashes
		puts $fpout "set yy_regv(normal:$rule) \{[expandre $re]\}"
		if { ![regexp {^[ 	]*\|[ 	]*$} $act] } {
			puts $fpout "set yy_actv(normal:$rule) $act"
		}
		append rulev(normal) "$rule "
		incr rule
		set blockc [balance "\{" "\}" $act]
		while { $blockc > 0 } {
			if { [gets $fpin line] <= 0 } {
				puts stderr "EOF in action of `$re'"
				exit 1
			}
			puts $fpout "$line"
			incr blockc [balance "\{" "\}" $line]
		}
	}
}
set rulec 0
foreach state $state_list {
	puts $fpout "set yy_rulev($state) \{ $rulev($state)\}"
	incr rulec [llength $rulev($state)]
}
puts $fpout "set yy_rulec $rulec"
close $fpin

puts $fpout $tail
close $fpout

exit 0
@


1.18
log
@added recognition of '$' regexp
@
text
@d3 40
@


1.17
log
@added getopt, -C for flex compatibility, and dot_match_newlines option
@
text
@d125 1
a125 1
	static yy_boln_donev yy_bolnv
d142 7
d152 1
a152 1
		if { ![info exists yy_boln_donev($yy_state)] } {
d159 5
d165 1
a165 1
			set yy_boln_donev($yy_state) 1
d177 6
a182 1
			if { [regexp -indices -- $yy_regv($yy_state:$re) $text score($re)] } {
@


1.16
log
@made .* in input language behave like [^\n]* - lex compatibility
@
text
@d213 7
a219 1
	regsub -all {([^\\])\.} $re "\\1\[^\\n\]" re1
d254 44
d304 16
d324 1
a324 1
set fpin [open [lindex $argv 0]]
@


1.15
log
@made it a bit more efficient
@
text
@d213 4
a216 3
	regsub -all {\\t} $re "\t" ret
	regsub -all {\\n} $ret "\n" re
	return $re
@


1.14
log
@made what variables I could to be static, and added Karl's static proc
@
text
@d213 2
a214 6
	while { [regexp {(.*)\\t(.*)} $re a first next] } {
		set re "$first\t$next"
	}
	while { [regexp {(.*)\\n(.*)} $re a first next] } {
		set re "$first\n$next"
	}
@


1.13
log
@fixed loophole relating to boln regexps (^.*) etc
@
text
@d95 10
d119 1
d123 4
a126 1
	global yy_boln_donev yy_bolnv
@


1.12
log
@small cleanups
@
text
@d112 1
d130 11
d143 8
d210 1
a210 1
proc count { inc dec line } {
d267 1
a267 1
		set blockc [count "\{" "\}" $act]
d274 1
a274 1
			incr blockc [count "\{" "\}" $line]
d284 1
a284 1
		set blockc [count "\{" "\}" $act]
d291 1
a291 1
			incr blockc [count "\{" "\}" $line]
@


1.11
log
@made the right version number in tcllex
@
text
@d217 1
a217 3
set fpout [open "yylex.tcl" "w"]
puts $fpout $header

d222 4
@


1.10
log
@added alternate actions
@
text
@d12 1
a12 1
# tcllex 1.0
d16 1
a16 1
# This scanner was produced by tcllex 1.0, by 
@


1.9
log
@fixed bug whereby it wasn't picking up the first rule when > 1 match
@
text
@d110 1
a110 1
	global yy_regv yy_actv yy_rulev
d163 6
a168 1
		eval $yy_actv($yy_state:$best)
d235 3
a237 1
		puts $fpout "set yy_actv($state:$rule) $act"
d257 3
a259 1
		puts $fpout "set yy_actv(normal:$rule) $act"
d273 1
d276 1
d278 1
@


1.8
log
@bug that stopped blank lines in lang description
@
text
@d152 1
a152 1
		foreach re [array names score] {
@


1.7
log
@tarted up with comments
@
text
@d225 1
a225 1
while { [gets $fpin line] > 0 } {
@


1.6
log
@cleaned up comments
@
text
@d24 1
a27 1

d30 1
d42 2
d48 1
d60 2
d67 1
d81 1
d95 13
d112 1
d119 1
d125 1
d129 1
d142 1
d150 1
a150 1
		# pick longest one
d164 1
@


1.5
log
@got start state stuff working OK
@
text
@d11 2
d16 8
d199 1
d238 1
a238 1
	puts $fpout "set yy_rulev($state) \{ $rulev($state) \}"
@


1.4
log
@cleanup a bit
@
text
@d79 1
a79 1
	global yy_regv yy_actv
d95 2
a96 5
		foreach re [array names yy_regv] {
			if { ![string match "$yy_state:*" $re] } {
				continue
			}
			if { [regexp -indices -- $yy_regv($re) $text score($re)] } {
d127 1
a127 1
		eval $yy_actv($best)
d186 2
d189 1
a189 1
	if { [regexp {^([^	]+)	(.*)} $line a re act] } {
d191 8
a198 2
		puts $fpout "set yy_regv(normal:$rule) \{[expandre $re]\}"
		puts $fpout "set yy_actv(normal:$rule) $act"
d209 1
a209 1
	} elseif { [regexp {^<(.*)>([^	]+)	(.*)} $line a state re act] } {
d211 3
a213 2
		puts $fpout "set yy_regv($state:$rule) \{[expandre $re]\}"
		puts $fpout "set yy_actv($state:$rule) $act"
d225 3
@


1.3
log
@added multi-line actions
@
text
@d4 1
a4 1
# This is not meant (and isn't) the pinnacle of efficiency - for
d150 1
@


1.2
log
@fixed bug whereby scores not blanked out
@
text
@d2 9
d149 28
d180 5
d187 2
a188 1
for { set rule 1 } { [gets $fpin line] > 0 } { } {
d194 10
a203 2
	}
	if { [regexp {^<(.*)>([^	]+)	(.*)} $line a state re act] } {
d208 9
d223 2
@


1.1
log
@Initial revision
@
text
@d7 1
d71 1
a71 1
	global yytext yyleng
d122 3
d129 5
a133 1
proc expand { re } {
d148 1
a148 1
		puts $fpout "set yy_regv(normal:$rule) \{[expand $re]\}"
d150 6
@
