/*
 * regex - Regular expression pattern matching
 *         and replacement
 *
 *
 * By:  Ozan S. Yigit (oz)
 *      Dept. of Computer Science
 *      York University
 *
 *
 * These routines are the PUBLIC DOMAIN equivalents 
 * of regex routines as found in 4.nBSD UN*X, with minor
 * extensions.
 *
 * These routines are derived from various implementations
 * found in software tools books, and Conroy's grep. They
 * are NOT derived from licensed/restricted software.
 * For more interesting/academic/complicated implementations,
 * see Henry Spencer's regexp routines, or GNU Emacs pattern
 * matching module.
 *
 */


/*
 * Khoros: $Id$
 */

#if !defined(__lint) && !defined(__CODECENTER__)
static char rcsid[] = "Khoros: $Id$";
#endif

/*
 * $Log$
 */

/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<<<<<<
   >>>>
   >>>>            Regular Expression Routines
   >>>>
   >>>>  Private:
   >>>>   Static:
   >>>>		    pmatch()
   >>>>		    dfadump()
   >>>>   Public:
   >>>>             kre_comp()
   >>>>             kre_exec()
   >>>>             kre_subs()
   >>>>
   >>>>>>>>>>>>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<<<<<<<<< */

#include <bootstrap.h>
#include <kutils/ksignal.h>
#include "kimakedef.h"

#define MAXDFA  1024*3
#define MAXTAG  128

#define OKP     1
#define NOP     0

#define CHR     1
#define ANY     2
#define CCL     3
#define NCL     4
#define BOL     5
#define EOL     6
#define BOT     7
#define EOT     8
#define WB	9
#define NWB	10
#define REF     11
#define CLO     12

#define END     0

#define ICHR	13 /* Case Insensitive Char */
#define BOS	14 /* Beginning Of String */
#define EOS	15 /* End Of String */
#define OR      16 /* Or Symbol */

/*
 * The following defines are not meant
 * to be changeable. They are for readibility
 * only.
 *
 */
#define MAXCHR	128
#define CHRBIT	8
#define BITBLK	MAXCHR/CHRBIT
#define BLKIND	0170
#define BITIND	07

#define ASCIIB	0177

typedef /*unsigned*/ char CHAR;

static int  tagstk[MAXTAG];             /* subpat tag stack..*/
static CHAR dfa[MAXDFA];		/* automaton..       */
static int  sta = NOP;               	/* status of lastpat */

static CHAR bittab[BITBLK];		/* bit table for CCL */

/*
 * things that make life nice....
 */
static int  case_insensitive = FALSE;

static void
chset(register int c) 
{ 
	bittab[((c)&BLKIND)>>3] |= 1<<((c)&BITIND);

	if (case_insensitive && isalpha(c)) {
		if (islower(c))
        		c = toupper(c);
		else
			c = tolower(c);

		bittab[((c)&BLKIND)>>3] |= 1<<((c)&BITIND);
	} 
  
}

#define badpat(x)	return(*dfa = END, x)
#define store(x)	*mp++ = x
 
/*
 * character classification table for word boundary
 * operators WB. the reason for not using 
 * ctype macros is that we can let the user add into 
 * our own table. see kre_modw. This table is not in
 * the bitset form, since we may wish to extend it
 * in the future for other character classifications. 
 *
 *	TRUE for 0-9 A-Z a-z _
 */
static char chrtyp[MAXCHR] = {
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
	0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 
	1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 
	0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
	1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
	1, 1, 1, 0, 0, 0, 0, 0
	};

#define inascii(x)	(0177&(x))
#define iswordc(x) 	chrtyp[inascii(x)]
#define isinset(x,y) 	((x)[((y)&BLKIND)>>3] & (1<<((y)&BITIND)))

/*
 * skip values for CLO XXX to skip past the closure
 */

#define ANYDFA  1		/* ANY                             */
#define CHRDFA  2		/* BOT num || EOT num              */
#define GRPDFA  2		/* BOT num || EOT num              */
#define CLODFA	3		/* CLO min max ... END             */
#define NCLDFA	17		/* CCL 16bytes || NCL 16bytes      */
#define ANYSKIP	(CLODFA+ANYDFA) /* CLO min max ANY END ...         */
#define CHRSKIP	(CLODFA+CHRDFA)	/* CLO min max CHR chr END ...     */
#define CCLSKIP (CLODFA+NCLDFA)	/* CLO min max CCL 16bytes END ... */

static CHAR *grpmkr[MAXTAG][1024];
static char *bol;
static CHAR *tagstrt[MAXTAG];           /* subpat tag start..*/
static char *bopat[MAXTAG];
static char *eopat[MAXTAG];

static char *clolpstore[1024];	/* store lp ptr for CLO backtrack */

static int grpskip(CHAR *ap, int num, int orstop, int *cnt)
{
	CHAR *sp = ap;
	int i = *cnt;
	while (*ap) {
		if ((*ap==EOT && *(ap+1)==num) || (orstop && *ap==OR))
			break;
		ap = grpmkr[num][i++];
	}
	*cnt = i;
	return((int) (ap - sp));
}

/*-----------------------------------------------------------
|
|  Routine Name: pmatch - internal routine for the hard part
|
|       Purpose: This code is mostly snarfed from an early
| 	grep written by David Conroy. The backref and
| 	tag stuff, and various other mods are by oZ.
|
|	special cases: (dfa[n], dfa[n+1])
|		CLO ANY
|			We KNOW ".*" will match ANYTHING
|			upto the end of line. Thus, go to
|			the end of line straight, without
|			calling pmatch recursively. As in
|			the other closure cases, the remaining
|			pattern must be matched by moving
|			backwards on the string recursively,
|			to find a match for xy (x is ".*" and 
|			y is the remaining pattern) where
|			the match satisfies the LONGEST match
|			for x followed by a match for y.
|		CLO CHR
|			We can again scan the string forward
|			for the single char without recursion, 
|			and at the point of failure, we execute 
|			the remaining dfa recursively, as
|			described above.
|
|	At the end of a successful match, bopat[n] and eopat[n]
|	are set to the beginning and end of subpatterns matched
|	by tagged expressions (n = 1 to 127).	
|
|         Input: lp -
|                ap -
|        Output:
|       Returns: 
|    Written By: Ozan S. Yigit (oz)
|          Date: 1993
| Modifications: Integrated into Khoros 2.0 (MY) 7/93
|
------------------------------------------------------------*/

static char *pmatch(
   register char *lp,
   register CHAR *ap)
{
	register char *e;		/* extra pointer for CLO     */
	register char *bp;		/* beginning of subpat..     */
	register char *ep;		/* ending of subpat..	     */
	register int op, c, n;
	register int c1;
	int clocnt, max, min, clogrp = -1, curgrp = 0, i;

	char *are, *tmp, *tmp1;		/* to save the line ptr. */

	while ((op = (int) *ap++) != END)
		switch(op) {

		case CHR:
			if (*lp++ != *ap++)
				return(NULL);
			break;
		case ICHR:
			{
				char ch1 = *lp++, ch2 = *ap++;

				if (toupper(ch1) != toupper(ch2))
					return(NULL);
			}
			break;
		case ANY:
			if (*lp == '\n' || !*lp) {
				lp++;
				return(NULL);
			}
			lp++;
			break;
		case CCL:
			c = (int) *lp++;
			if (!isinset(ap,c))
				return(NULL);
			ap += BITBLK;
			break;
		case NCL:
			c = (int) *lp++;
			if (isinset(ap,c))
				return(NULL);
			ap += BITBLK;
			break;
		case BOL:
			if (lp != bol && lp[-1] != '\n')
				return(NULL);
			break;
		case EOL:
			if (*lp != '\n' && *lp)
				return(NULL);
		        else if (*lp)
				lp++;
			break;
		case BOS:
			if (lp != bol)
				return(NULL);
			break;
		case EOS:
		        if (*lp)
				return(NULL);
			break;
		case BOT:
			curgrp = *ap--;
			bopat[curgrp] = lp;
			tmp = NULL;
			i = 0;
			do {
				if (*ap == OR)
					ap++;
				else
					ap += 2;
				tmp1 = pmatch(lp, ap);
				if (tmp == NULL || tmp1 > tmp)
				   tmp = tmp1;
				   
				ap += grpskip(ap, curgrp, TRUE, &i);
			} while (*ap != EOT);

			if (tmp == NULL)
				return(NULL);

			eopat[curgrp] = lp = tmp;
			ap += 2;
			break;

		case OR:
		case EOT:
			return(lp);

 		case WB:
			if ((!(lp != bol && iswordc(lp[-1])) && iswordc(*lp))||
			    ((lp != bol && iswordc(lp[-1])) && !iswordc(*lp)))
				break;
			return(NULL);
		case NWB:
			if (!((!(lp != bol && iswordc(lp[-1])) && iswordc(*lp))
			 || ((lp != bol && iswordc(lp[-1])) && !iswordc(*lp))))
				break;
			return(NULL);
		case REF:
			n = (int) *ap++;
			bp = bopat[n];
			ep = eopat[n];
			while (bp < ep)
				if (*bp++ != *lp++)
					return(NULL);
			break;
		case CLO:
			are = lp;
			clocnt = 0;
			min = (int) *ap;
			max = (int) *(ap+1);
			switch(*(ap+2)) {

			case ANY:
				while ((max == 0 || clocnt < max) &&
				       (*lp != '\n' && *lp)) {
					lp++;
					clocnt++;
				}
				n = ANYSKIP;
				break;
			case CHR:
				c = (int) *(ap+3);
				while ((max == 0 || clocnt < max) &&
				       (*lp && c == *lp)) {
					lp++;
					clocnt++;
				}
				n = CHRSKIP;
				break;
			case ICHR:
				c = (int) toupper(*(ap+3));
				while((max == 0 || clocnt < max) && *lp) {
					if (islower(*lp))
						c1 = toupper(*lp);
					else
						c1 = *lp;
					if (c != c1)
					  break;
					lp++;
					clocnt++;
				}
				n = CHRSKIP;
				break;
			case CCL:
			case NCL:
				while ((max == 0 || clocnt < max) &&
				     (*lp && (e = pmatch(lp, ap+2)) != NULL)) {
					lp = e;
					clocnt++;
				}
				n = CCLSKIP;
				break;
			case BOT:
				clogrp = *(ap+3);
				while ((max == 0 || clocnt < max) &&
				     (*lp && (e = pmatch(lp, ap+2)) != NULL)) {
					/* save old lp on stack here */
					clolpstore[clocnt] = lp;
					lp = e;
					clocnt++;
				}
				/* find n value that skips the group in ap */
				i = 0;
				n = grpskip(ap+GRPDFA+2, *(ap+GRPDFA+1), FALSE,
					    &i) + GRPDFA + GRPDFA + CLODFA;
				break;
			default:
				fprintf(stderr, "closure: bad dfa.\n");
				return(NULL);
			}

			ap += n;

			if (clogrp != -1) {
				bopat[clogrp] = clolpstore[0];
				eopat[clogrp] = lp;
			}
			while (clocnt >= min && lp >= are) {
				if ((e = pmatch(lp, ap)) != NULL)
					return(e);
				--clocnt;
				if (clogrp == -1)
					--lp;
				else {
					lp = clolpstore[clocnt];
					bopat[clogrp] = clolpstore[0];
					eopat[clogrp] = lp;
				}
			}
			return(NULL);
		default:
			fprintf(stderr, "re_exec: bad dfa.\n");
			return(NULL);
		}
	return(lp);
}

/************************************************************
*
*  Routine Name: kre_comp - compile a regular expression
*
*       Purpose: Compiles a regular expression.
* !   .             Match any single character except newline
* !
* !   ^             If this is the first character of the 
* !                 regular expression, it matches the beginning
* !                 of the line.
* !  
* !   $             If this is the last character of the 
* !                 regular expression, it matches the end of
* !                 the line. 
* !
* ! [...] or [^..]  Matches any one character contained within 
* !                 the brackets. If the first character after 
* !                 the '[' is the ']', then it is included in 
* !                 the characters to match.  If the first 
* !                 character after the '[' is a '^', then it 
* !                 will match all characters NOT included in 
* !                 the [].  The '-' will indicate a range of 
* !                 characters.  For example, [a-z] specifies 
* !                 all characters between and including the 
* !                 ascii values 'a' and 'z'.  If the '-' 
* !                 follows the '[' or is right before the ']' 
* !                 then it is interpreted literally.  There are
* !                 special symbols that can be used as short
* !                 hand:  \\w will expand to '0-9a-z_A-Z', \\d
* !                 expands to '0-9', and \\s expands to ' \\t\\n\\r\\f'
* !
* !   {n,m}	    Match between n and m times the DFA directly
* !   {n,}          before this range syntax.  Thus, 'a{2,10}'
* !   {n}           will match a minimum of 2 a's and a maximum
* !                 10.  The {n,} syntax tells the parser to
* !                 match n or more times., and the {n} syntax
* !                 tells it to match exactly n times.
* !                 
* !   *             Match the preceding character or range 
* !                 of characters 0 or more times.  This is
* !                 equivalent to the range syntax {0,}
* !
* !   +             Match the preceding character or range 
* !                 of characters 1 or more times. This is
* !                 equivalent to the range syntax {1,}
* !
* !   ?             Match the preceding character or range
* !                 of characters 0 or 1 times.  This is
* !                 equivalent to the range syntax {0,1}
* !
* !   |             This symbol is used to indicate where to
* !                 separate two sub regular expressions for a
* !                 logical OR operation.
* !             
* !   (..)          Group boundaries.  This pattern indicates an area
* !                 of a memory tagged region of the regular
* !                 expression that can be used to match the exact
* !                 same pattern later in the regular expression via
* !                 a reference, or used in kre_subs to bring in this
* !                 part of the matched string.  It can also be used
* !                 to indicate areas where the or symbol '|' should
* !                 be applied.  Note, only 127 groups are allowed.
* !
* !   \\b            Word boundary.  This pattern will match the
* !                 empty char before the start and after the end
* !                 of a word.  By default, a word character
* !                 contains 0-9a-z_A-Z.  This can be modified by
* !                 the kre_modw routine.
* !
* !   \\B            Non-word boundary.  This pattern will match
* !                 the empty character between two characters
* !                 in a word.
* !
* !   \\1-\\127       These symbols are used to reference the 1st
* !                 through 127th () region.
* !
* !   \\h            Stored the ASCII character '\\b'
* !
* !   \\A            If it is the first character of the regular
* !                 expression, it matches the empty character
* !                 at the beginning of the string.
* !
* !   \\Z            If it is the last character of the regular
* !                 expression, it matches the empty character
* !                 at the end of the string.
* !   
* !   \\c@-\\cZ       These symbols are translated into control-@
* !                 through control-Z.  Any other values, and the
* !                 \\c part is ignored.
* !
* !   \\d            Same as [0-9].
* !
* !   \\D            Same as [^0-9].
* !
* !   \\s            Same as [ \\t\\n\\r\\f].
* !
* !   \\S            Same as [^ \\t\\n\\r\\f].
* !
* !   \\w            Same as [a-zA-Z_0-9].
* !
* !   \\W            Same as [^a-zA-Z_0-9].
* !
* !   \\Q..\\E        A section enclosed in these symbols it
* !                 taken literally.  In side these sections, meta
* !                 characters and special symbols have no meaning.
* !                 If a \\E needs to appear in one of these
* !                 sections, the \\ must be escaped with \\.
* !
* !   \\              This escapes the meaning of a special
* !                 character.
*         Input: pat - regular expression pattern to be compiled
*        Output: 
*       Returns: NULL on success, a string indicating the error otherwise
*    Written By: Ozan S. Yigit (oz)
*          Date: 1993
*      Verified: See regex testsuites in $BOOTSTRAP/testsuite/kutils/03.kregex
* Modifications: Integrated into Khoros 2.0 (MY) 7/93.
*
*                Extended to support \\d\\s\\w\\Q\\E\\c?\\A\\Z.  \\< and \\>
*		 were modified to be \\b and \\B. (SJ) 11/94.
*
*   Declaration: char *kre_comp(
*		 !   char *pat)
*
*************************************************************/

char *kre_comp(
   char *pat)
{
	register char *p, *e;           /* pattern pointer   */
	register CHAR *mp=dfa;          /* dfa pointer       */
	register CHAR *lp;              /* saved pointer..   */
	register CHAR *sp=dfa;          /* another one..     */

	register int tagi = 0;          /* tag stack index   */
	register int tagc = 1;          /* actual tag count  */

	register int n,n1;
	int quote_mode = FALSE;
	int c1, c2;
	int grpcnt[MAXTAG],i,j;
		
	if (!pat || !*pat)
		if (sta) {
			return(0);
		}
		else
			badpat("No previous regular expression");
	sta = NOP;

	grpcnt[0] = 0;
	store(BOT);
	store(0);
	for (p = pat; *p; p++) {
		if (quote_mode == FALSE && (*p == '\\' && *(p+1) == 'Q')) {
			quote_mode = TRUE;
			p++;
			continue;
		}
		if (mp > &dfa[MAXDFA - CCLSKIP]) {
			badpat("DFA array overfilled");
		}
		if (quote_mode == TRUE) {
			if (*p == '\\' && *(p+1) == 'E') {
				quote_mode = FALSE;
				p++;
				continue;
			}
			lp = mp;
			if (case_insensitive && isalpha(*p))
				store(ICHR);
			else
				store(CHR);
			store(*p);
			sp = lp;
			continue;
		}
		lp = mp;
		switch(*p) {

		case '.':               /* match any char..  */
			store(ANY);
			break;

		case '^':               /* match beginning.. */
			if (p == pat)
				store(BOL);
			else {
				store(CHR);
				store(*p);
			}
			break;

		case '|':               /* or pattern.. */
			grpmkr[tagstk[tagi]][grpcnt[tagstk[tagi]]++] = mp;
			store(OR);
			break;

		case '$':               /* match endofline.. */
			if (!*(p+1))
				store(EOL);
			else {
				store(CHR);
				store(*p);
			}
			break;

		case '[':               /* match char class..*/

			if (*++p == '^') {
				store(NCL);
				p++;
			}
			else
				store(CCL);

			if (*p == '-')		/* real dash */
				chset(*p++);
			if (*p == ']')		/* real brac */
				chset(*p++);
			while (*p && *p != ']') {
				if (*p == '-' && *(p+1) && *(p+1) != ']') {
					p++;
					c1 = *(p-2) + 1;
					c2 = (int) *p++;
					while (c1 <= c2)
						chset(c1++);
				}
				else if (*p == '\\' && *(p+1)) {
					p++;
					switch(*p) {
					case 'd':
                                		for (c1 = '0'; c1 <= '9'; c1++)
                                        		chset(c1);
						if (*p == 'd')
							break;
					case 'w':
						n1 = case_insensitive;
        		                        case_insensitive = TRUE;
                		                for (c1 = 'a'; c1 <= 'z'; c1++)
                        		                chset(c1);
                                		case_insensitive = n1;
		                                chset('_');
					break;
					case 's':
						chset(' ');
						chset('\t');
						chset('\n');
						chset('\r');
						chset('\f');
					break;
					default:
						chset(*p);
					}
					p++;
				}
				else
					chset(*p++);
			}
			if (!*p)
				badpat("Missing ]");

			for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
				store(bittab[n]);
	
			break;

		case '*':               /* match 0 or more.. */
		case '+':               /* match 1 or more.. */
		case '?':
		case '{':
			if (p == pat)
				badpat("Empty closure");
			lp = sp;                /* previous opcode */
			if (*lp == CLO)         /* equivalence..   */
				break;
			switch(*lp) {

			case EOT:
				for (j = (int) *(lp+1); j < tagc; j++)
				   for (i = 0; i < grpcnt[j]; i++)
				      grpmkr[j][i] += 3;
				lp = tagstrt[(int) *(lp+1)];
				break;
			case BOT:
			case BOL:
			case BOS:
			case WB:
			case NWB:
			case REF:
				badpat("Illegal closure");
			default:
				break;
			}

			store(END);
			store(END);
			store(END);
			store(END);
			sp = mp;
			while (--mp > lp + 2)
				*mp = mp[-3];
			mp -= 2;
			store(CLO);
			switch (*p) {
			case '+':
				store(1);
				store(0);
				break;
			case '*':
				store(0);
				store(0);
				break;
			case '?':
				store(0);
				store(1);
				break;
			case '{':
				e = p+1;
				while (*e && *e != '}')
					e++;
				if (!*e)
					badpat("Illegal closure range values");
				c1 = 0;
				p++;
				while (isdigit(*p) && p<e) {
					c1 = c1*10 + (*p - '0');
					p++;
				}
				if (p == e)
					c2 = c1;
				else if (*p != ',')
					badpat("Illegal char in closure range");
				else {
					p++;
					c2 = 0;
					while (isdigit(*p) && p < e) {
						c2 = c2*10 + (*p - '0');
						p++;
					}
				}

				if ( p != e )
					badpat("Illegal char in closure range");
				if (c2 < c1 && c2 != 0)
					badpat("Illegal range: min is bigger than max");
				store((char) c1);
				store((char) c2);
			
				break;
			}
			mp = sp;
			break;

		case '(':
			if (tagc < MAXTAG) {
				grpcnt[tagc] = 0;
				tagstk[++tagi] = tagc;
				tagstrt[tagc] = mp;
				store(BOT);
				store(tagc++);
			}
			else
				badpat("Too many () pairs");
			break;

		case ')':
			if (*sp == BOT)
				badpat("Null pattern inside ()");
			if (tagi > 0) {
				grpmkr[tagstk[tagi]][grpcnt[tagstk[tagi]]++] = mp;
				store(EOT);
				store(tagstk[tagi--]);
			}
			else
				badpat("Unmatched )");
			break;

		case '\\':              /* tags, backrefs .. */
			switch(*++p) {

			case 'b':
				if (*sp == WB || *sp == NWB)
					badpat("Null pattern between \\b\\B");
				store(WB);
				break;
			case 'B':
				if (*sp == NWB || *sp == WB)
					badpat("Null pattern inside \\B\\b");
				store(NWB);
				break;
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
			case '8':
			case '9':
				n = *p-'0';
				while (isdigit(p[1]))
				{
					n = n*10 + p[1]-'0';
					p++;
				}

				if (tagi > 0 && tagstk[tagi] == n)
					badpat("Cyclical reference");
				if (tagc > n) {
					store(REF);
					store(n);
				}
				else
					badpat("Undetermined reference");
				break;
			case 'h':
				store(CHR);
				store('\b');
				break;
			case 'n':
				store(CHR);
				store('\n');
				break;
			case 'f':
				store(CHR);
				store('\f');
				break;
			case 'r':
				store(CHR);
				store('\r');
				break;
			case 't':
				store(CHR);
				store('\t');
				break;
                        case 'A':
				store(BOS);
				break;
                        case 'Z':
				store(EOS);
				break;
			case 'e':
				store(CHR);
				store('\033');
				break;
			case 'v':
				store(CHR);
				store('\013');
				break;
                        case 'c':
				if ((*(p+1) > 63) && (*(p+1) < 96)) {
					p++;
					store(CHR);
					store(*p-64);
				}
				else {
					if (case_insensitive && isalpha(*p))
						store(ICHR);
					else
						store(CHR);
					store(*p);
				}
				break;
                        case 'd':
                        case 'D':
				if (*p == 'D')
                                	store(NCL);
				else
                                	store(CCL);
                                for (c1 = '0'; c1 <= '9'; c1++)
                                        chset(c1);
                                for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
                                        store(bittab[n]);
                                break;
                        case 's':
                        case 'S':
				if (*p == 'S')
                                	store(NCL);
				else
                                	store(CCL);
                                chset('\t');
                                chset('\n');
                                chset('\r');
                                chset('\f');
                                chset(' ');
                                for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
                                        store(bittab[n]);
                                break;
                        case 'w':
                        case 'W':
				if (*p == 'W')
                                	store(NCL);
				else
                                	store(CCL);
				n1 = case_insensitive;
                                case_insensitive = TRUE;
                                for (c1 = 'a'; c1 <= 'z'; c1++)
                                        chset(c1);
                                case_insensitive = n1;
                                chset('_');
                                for (c1 = '0'; c1 <= '9'; c1++)
                                        chset(c1);
                                for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
                                        store(bittab[n]);
                                break;
			default:
				store(CHR);
				store(*p);
			}
			break;

		default :               /* an ordinary char  */
			if (case_insensitive && isalpha(*p))
				store(ICHR);
			else
				store(CHR);
			store(*p);
			break;
		}
		sp = lp;
	}
	if (tagi > 0)
		badpat("Unmatched (");
	grpmkr[0][grpcnt[0]] = mp;
	store(EOT);
	store(0);
	store(END);
	sta = OKP;
	return(0);
}


/************************************************************
*
*  Routine Name: kre_exec - execute dfa to find a match.
*
*       Purpose: Execute DFA to match a pattern.
*
*	special cases: (dfa[0])	
*		BOS
*			Match only once, starting from the
*			beginning of the string.
*		CHR
*			First locate the character without
*			calling pmatch, and if found, call
*			pmatch for the remaining string.
*		END
*			re_comp failed, poor luser did not
*			check for it. Fail fast.
*
*	If a match is found, bopat[0] and eopat[0] are set
*	to the beginning and the end of the matched fragment,
*	respectively.
*
*         Input: lp - string to exec the DFA on
*       Returns: TRUE (1) on success, FALSE (0) otherwise
*    Written By: Ozan S. Yigit (oz)
*          Date: 1993
*  Restrictions: kre_comp or kre_icomp must have been called
*		 previously to calling this routine.
* Modifications: Integrated into Khoros 2.0 (MY) 7/93
*
*   Declaration: int kre_exec(
*		 !    char *lp)
*
*************************************************************/

int kre_exec(
   register char *lp)
{
	register char *ep = 0;
	register CHAR *ap = dfa;
	int i;

	if ((bol = lp) == NULL)
	{
	   return(FALSE);
	}

	for (i = 0; i < sizeof(bopat); i++)
	   bopat[0] = 0;
	switch(*ap) {
	default:			/* regular matching all the way. */
		while (*lp) {
			if ((ep = pmatch(lp,ap)) != NULL)
				break;
			lp++;
		}
		break;
	case END:			/* munged automaton. fail always */
		return(FALSE);
	}
	if (!ep) {
		return(FALSE);
	}

	bopat[0] = lp;
	eopat[0] = ep;
	return(TRUE);
}


/************************************************************
*
*  Routine Name: kre_subs - substitute the matched portions of the src in
*			    dst
*
*       Purpose: Substitutes the matched portions of the source 
*                string in the in destination string.
*
*	         ! '&'      - substitute the entire matched pattern.
*	         ! '\\digit' - substitute a subpattern, with the given
*		 !            tag number. Tags are numbered from 1 to
*		 !            127. If the particular tagged subpattern
*		 !            does not exist, null is substituted.
*		 Other symbols can be used to modify the substitution
*		 patterns:
*
*		 !     \\Q..\\E - Ignore all special characters between the
*		 !              \\Q and \\E symbols
*		 !     \\l     - Convert next character to lower case.
*                !     \\L..\\E - Convert all text between \\L and \\E to lower
*		 !              case
*		 !     \\u     - Convert next character to upper case.
*                !     \\U..\\E - Convert all text between \\U and \\E to upper
*                !              case
*
*         Input: src - source string
*        Output: dst - destination string
*       Returns: TRUE (1) on success, FALSE (0) otherwise
*
*  Restrictions: kre_exec must be called before this routine will do any
*		 substitutions.
*    Written By: Ozan S. Yigit (oz)
*          Date: 1993
*      Verified:
*  Side Effects:
* Modifications: Integrated into Khoros 2.0 (MY) 7/93
*
*   Declaration: int kre_subs(
*		 !    char *src,
*		 !    char *dst)
*
*************************************************************/

int kre_subs(
   register char *src,
   register char *dst)
{
	register char c;
	register int  pin;
	register char *bp;
	register char *ep;
        int pin_break = FALSE;
	int lower_next = FALSE;
	int upper_next = FALSE;
	int lower_mode = FALSE;
	int upper_mode = FALSE;
	int quote_mode = FALSE;

	if (!*src || !bopat[0])
		return(FALSE);

        pin = 0;
	while ((c = *src++) != '\0') {
		if (quote_mode == TRUE) {
			if (c == '\\' && *src == 'E') {
				src++;
				quote_mode = FALSE;
				continue;
			}
			if (c == '\\' && *src == '\\') {
				src++;
			}
			if (islower(c) &&
			    (upper_next == TRUE || upper_mode == TRUE)) {
				*dst++ = toupper(c);
				upper_next = FALSE;
			}
			else if (isupper(c) &&
				 (lower_next == TRUE || lower_mode == TRUE)) {
				*dst++ = tolower(c);
				lower_next = FALSE;
			}
			else
				*dst++ = c;
			continue;
		}
		switch(c) {

		case '&':
			pin = 0;
			break;

		case '\\':
			c = *src++;
			switch(c) {
			case 'l':
				lower_next = TRUE;
				continue;
			case 'u':
				upper_next = TRUE;
				continue;
			case 'L':
				lower_mode = TRUE;
				upper_mode = FALSE;
				continue;
			case 'U':
				upper_mode = TRUE;
				lower_mode = FALSE;
				continue;
			case 'Q':
				quote_mode = TRUE;
				continue;
			case 'E':
				lower_mode = FALSE;
				upper_mode = FALSE;
				quote_mode = FALSE;
				continue;
			case '0': case '1': case '2': case '3': case '4':
			case '5': case '6': case '7': case '8': case '9':
				pin = c - '0';
				while (isdigit(*src)) {
					c = *src++;
					pin = pin*10 + c - '0';
				}
				pin_break = TRUE;
				break;
			}
			if (pin_break)
			{
				pin_break = FALSE;
				break;
			}
		default:
			if (islower(c) &&
			    (upper_next == TRUE || upper_mode == TRUE)) {
				*dst++ = toupper(c);
				upper_next = FALSE;
			}
			else if (isupper(c) &&
				 (lower_next == TRUE || lower_mode == TRUE)) {
				*dst++ = tolower(c);
				lower_next = FALSE;
			}
			else
				*dst++ = c;
			continue;
		}

		if ((bp = bopat[pin]) != NULL && (ep = eopat[pin]) != NULL) {
			while (*bp && bp < ep) {
				c = *bp++;
				if (islower(c) && (upper_next == TRUE ||
				    upper_mode == TRUE)) {
					*dst++ = toupper(c);
					upper_next = FALSE;
				}
				else if (isupper(c) && (lower_next == TRUE ||
					 lower_mode == TRUE)) {
					*dst++ = tolower(c);
					lower_next = FALSE;
				}
				else
					*dst++ = c;
			}
			if (bp < ep)
				return(FALSE);
		}
	}
	*dst = (char) 0;
	return(TRUE);
}
