/* automata.d/src file kmp.c */
/*
Into this file we put all the code which refers exlusively to labelled
graphs of type |KMP|, that is all code which has some knowledge of
the magic pocket of a vertex in a graph of that type. 
The file is divided into two sections; all those 
functions which refer explicitly to the magic pocket of a vertex, and need
to know the details of its structure are in the first part.
*/
#include <stdio.h>
#include "defs.h"
#include "list.h"
#include "word.h"
#include "input.h"
#include "lg.h"
#include "kb.h"

extern lg * being_refined;
extern lg * to_add;
extern lg * archive;
static int KMPstruct_sgn PARMS((vindex, vindex));
static KMPstruct *KMPstruct_create PARMS((void));
static void KMPstruct_kill PARMS((vindex));
static void KMPstruct_print PARMS((FILE*,vindex));
static int KMPstruct_get_heap_index PARMS((vindex));
static void KMPstruct_set_heap_index PARMS((vindex, int));
static void prune PARMS ((VOID));
static void merge PARMS (( lg *lgp, vindex keep, vindex discard)); 
static void lift_arrow PARMS ((lg *lgp, vindex vertex_to_delete, gen g));

/*
Functions which need to know the details of the magic pocket.
*/
/* If the arguments are in their desired order, the procedure returns +1.
If they are in their opposite order, return -1.
If the priorities are equal, the word lengths are examined. The
shorter word should come first.
\Pre |v1| and |v2| are pointers
to initialized |vertex|'s, and each |vertex| is assumed to be a vertex with
a |KMPstruct| for its |mp| (magic pocket).
\Returns +1 if the priority of the second |vindex| is larger, -1 if
it is smaller.
If the priorities are equal, the word lengths are examined. The
shorter word comes first. 0 is returned only if the priorities are
equal and the words are equal.
*/
static int
KMPstruct_sgn(v1,v2)
	vindex  v1,  v2;
{
	int ans =0;
	assert(v1);
	assert(v2);
	if (get_priority(v1) < get_priority(v2))
		ans = 1;
	else if (get_priority(v1) > get_priority(v2))
		ans = -1;
	return ans;
}

/* The next procedure is used for printing out structures while
debugging.
*/
static void
KMPstruct_print(wfile,v)
	FILE * wfile;
	vindex v;
{
	KMPstruct *Wts = (KMPstruct *)(v->mp);
	assert((v->fntab) == KMP);
	if (Wts) {
		fprintf(wfile,
			"\nlhs=%d, lhs_tree=%d, fails_to_reducible=%d, ",
					Wts->status.lhs,Wts->status.lhs_tree,
					Wts->status.fails_to_reducible);
		fprintf(wfile, "\ninv_lhs=%d, inv_lhs_tree=%d",
				Wts->status.inv_lhs,Wts->status.inv_lhs_tree);
		fprintf(wfile,"under_consideration=%d",
					Wts->status.under_consideration);
		fprintf(wfile,"\npriority=%d heap_index=%d failure=0x%x",
			Wts->priority, Wts->heap_index, Wts->failure);
	}
}

/* This returns a pointer to a new |KMPstruct|.
The conditional compilations protect against a change in the
|#define|'s.
*/
static KMPstruct *
KMPstruct_create()
{
	KMPstruct * Wts = vzalloc1(KMPstruct);
#if (UNDEFINED != 0)
	Wts->failure = UNDEFINED;
#endif
#if (INVALID_GEN != 0)
	Wts->back = INVALID_GEN;
#endif
#if (INVALID_HEAP_INDEX != 0)
	Wts->heap_index = INVALID_HEAP_INDEX;
#endif
	return Wts;
}

static void
KMPstruct_kill(v)
	vindex v;
{
	assert((v->fntab) == KMP);
	Free_dp(v->mp);
	v->mp=0;
}

static int
KMPstruct_get_heap_index(v)
	vindex v;
{
	KMPstruct * Wts = (KMPstruct *)((v)->mp);
	return(Wts->heap_index);
}

static void
KMPstruct_set_heap_index(v,i)
	vindex v;
	int i;
{
	KMPstruct * Wts = (KMPstruct *)((v)->mp);
	Wts->heap_index=i; 
	} 

/* Here is the function table which allows us to perform standard various
operations on the magic pocket pointers associated to a vertex.
*/
mp_fntab KMP_fntab =
{
	KMPstruct_sgn,
	(V2DP)KMPstruct_create,
	KMPstruct_kill,
	KMPstruct_print,
	KMPstruct_get_heap_index,
	KMPstruct_set_heap_index
};

boolean
get_status(v,st)
	vindex v;
	int st;
{
	KMPstruct *Wts = (KMPstruct *)(v->mp);
	boolean ans = FALSE;
	vindex w = UNDEFINED;
	assert((v->fntab) == KMP);
	switch(st) {
		case LHS :
			ans = Wts->status.lhs;
		break;
		case INV_LHS :
			ans = Wts->status.inv_lhs;
		break; 
		case REDUCIBLE :
			ans = ((w = archive_image(v)) != UNDEFINED &&
					w != get_rhs(w));
		break;
		case FAILS_TO_REDUCIBLE :
			ans = Wts->status.fails_to_reducible;
		break;
		case LHS_TREE :
			ans = Wts->status.lhs_tree;
		break;
		case INV_LHS_TREE :
			ans = Wts->status.inv_lhs_tree;
		break;
		case CONSIDERED :
			ans = Wts->status.considered;
		break;
		case INV_CONSIDERED :
			ans = Wts->status.inv_considered;
		break;
		case CONSIDERED_TREE :
			ans = Wts->status.considered_tree;
		break;
		case INV_CONSIDERED_TREE :
			ans = Wts->status.inv_considered_tree;
		break;
		case UNDER_CONSIDERATION :
			ans = Wts->status.under_consideration;
		break;
		case UNDER_INV_CON :
			ans = Wts->status.under_inv_con;
		break;
		case ON_PQ :
			ans = (Wts->heap_index != INVALID_HEAP_INDEX);
		break;
		case OVERLAP :
			ans = Wts->status.overlap;
		break;
		case WORD_DIFF :
			ans = Wts->status.wdiff;
		break;
		default :
			fprintf(stderr,"\nCase %d not defined",st);
			assert(FALSE);

	}
	return ans;
}

void
set_status(v,st,b)
	vindex v;
	int st;
	boolean b;
{
	KMPstruct *Wts = (KMPstruct *)(v->mp);
	assert(b == TRUE || b == FALSE);
	assert((v->fntab) == KMP);
	switch(st) {
		case LHS :
			Wts->status.lhs = b;
		break;
		case INV_LHS :
			Wts->status.inv_lhs = b;
		break;
		case FAILS_TO_REDUCIBLE :
			Wts->status.fails_to_reducible = b;
		break;
		case LHS_TREE :
			Wts->status.lhs_tree = b;
		break;
		case INV_LHS_TREE :
			Wts->status.inv_lhs_tree = b;
		break;
		case CONSIDERED :
			Wts->status.considered = b;
		break;
		case CONSIDERED_TREE :
			Wts->status.considered_tree = b;
		break;
		case INV_CONSIDERED :
			Wts->status.inv_considered = b;
		break;
		case INV_CONSIDERED_TREE :
			Wts->status.inv_considered_tree = b;
		break;
		case UNDER_CONSIDERATION :
			Wts->status.under_consideration = b;
		break;
		case UNDER_INV_CON :
			Wts->status.under_inv_con = b;
		break;
		case OVERLAP :
			Wts->status.overlap = b;
		break;
		case WORD_DIFF :
			Wts->status.wdiff = b;
		break;
		default :
			fprintf(stderr,"\nCase %d not defined",st);
			assert(FALSE);
	}
}


/* This is |#define|'d in lg.webh. Here is a version that I used when
faced with a very sticky bug.
*/
#ifdef BAD
void
set_next(v,w)
	vindex v,w;
{
	KMPstruct * Wts = (KMPstruct *)((v)->mp);
	assert(((v)->fntab) == KMP)
	assert(w == 0 || ((v)->fntab) == KMP);
	Wts->nextc = w;
	if (w != 0) {
		word wv, ww;
		gen gv, gw;
		word_init(&wv);
		word_init(&ww);
		vindex2word(v,&wv);
		vindex2word(w,&ww);
		while(word_delget_first(&wv,&gv)){
			assert(word_delget_first(&ww,&gw));
			assert(gw >= gv);
			if (gw > gv) break;
		}
		word_clear(&wv);
		word_clear(&ww);
	}
}
#endif

/*
Functions specific to |KMP|s which do not need to understand the
specific details of the magic pocket.

This procedure takes a labelled graph of type |KMP| and calculates 
the fine structure of the magic pocket for the vertices that were already 
there.
New information, which should have been there and wasn't, is recorded
in the labelled graph |to_add|, as one goes through the vertices of
|*being_refined| one by one. One needs to go through in two passes.
The reason for this is that we can only really calculate if a KB rule
should be inserted or not AFTER we have computed whether the vertex
fails to a |mpplhs|. We compute the failure function in the first
pass, and in the second pass we first set |fails_to_mpplhs| and then
look for a KB rule.
\Pre The labelled graph pointed to by |being_refined| has no fine structure. It
is initialized and may already have a substantial number of vertices.
\Post If changes have been made to the labelled graph, then it is
returned without any fine structure. If no changes have been made,
then all fine structure has been computed.
\Returns TRUE if and only if a loop is added to |to_add|.
*/
void
refine()
{
	vindex s = UNDEFINED;
	vindex archive_s = UNDEFINED;
	bfs_traverser tlg;
	assert(being_refined); /*This is the address of the labelled graph we
		are going to refine.*/
	assert(to_add);
	assert(get_type(being_refined) == KMP_WTREE);
	bfs_init(&tlg,being_refined);
	while (bfs_next(&tlg,&s))  /*first pass: compute failure pointers */
		comp_failure(s);
	bfs_clear(&tlg);
	bfs_init(&tlg,being_refined);
	while (bfs_next(&tlg,&s))  {/*second pass: throw out some
					left hand sides */
		gen g = INVALID_GEN;
		archive_s = archive_image(s);
		if ((does_fail_to_reducible(s) || is_reducible(s)) &&
					get_status(archive_s,CONSIDERED_TREE)) {
			vindex t, big, small;/*these vertices are in |arhive|*/
			vindex prevs = UNDEFINED;
			t = big = archive_s;
	/*Find the biggest and the smallest considered left hand sides which
	contain the word corresponding to |archive_s| as a left hand side.*/
			while (t == big) {
				for (g = num_gens; g > 0; g--) {
					t = get(big,g);
		if (t != 0 &&  backg(t) == g && get_status(t,CONSIDERED_TREE)){
						big = t;
						break;/*from for loop*/
					}
				}
			}
			assert(get_status(big,CONSIDERED));
			t = small = archive_s;
			while (t == small) {
				for (g = 1; g <= num_gens; g++) {
					t = get(small,g);
		if (t != 0 &&  backg(t) == g && get_status(t,CONSIDERED_TREE)){
						small = t;
						break;/*from for loop*/
					}
				}
			}
			assert(get_status(small,CONSIDERED));

/*Run through the linked list, rejecting the bad left hand sides*/
			t = small;
			while(TRUE) {
		/*|archive_s| is the subword that we started off with*/
				if (t != archive_s && get_status(t,CONSIDERED))
				(void)reject(t);
				if (t == big)
					break;
				t = get_next(t);
			}
	/*now we throw away the bad left hand sides, except for |small|,
	which may have a pointer into it which we don't know about*/
			prevs = small;
			while (prevs != big && get_next(prevs) != prevs){
				t = get_next(prevs);
			/*definitely one along from |prevs|*/
		/*|t| may equal |big|, but is no further along the list.*/
				if (!CON_LEAF(t)) {
			/*suck list in towards |prevs|, deleting as we go*/
					vindex temp = get_next(t);
					/*|temp| may be beyond |big|, 
						but it doesn't matter*/
					assert( t != small);
					set_next(t,(vindex)UNDEFINED);
					if (t == temp) {
					/*|t| was at the end of the list*/
						assert(t == big);
						set_next(prevs,prevs);
					/*make |prevs| the end of the list*/
					}
					else set_next(prevs,temp);
				}
				else prevs = t;
				if (t == big)
					break;
			}
		}
	}
	bfs_clear(&tlg);
	prune();
}


/* Remove all dross from a labelled graph of |KMP| type.
*/
static void
prune()
{
	list deletion_list;
	bfs_traverser tlgp;
	vindex v = UNDEFINED;
	gen g = INVALID_GEN;
	list_init(&deletion_list,VINDEX,FIFO);
	bfs_init(&tlgp,being_refined);
	while (bfs_next(&tlgp,&v)) { /*first pass of |prune()|*/
		if (is_basepoint(v))
			continue;
		if (!get_status(v,LHS_TREE) && !get_status(v,INV_LHS_TREE)) {
			(void)list_insert(&deletion_list,(dp)&v);
					/*delay deletion in order to
				avoid upsetting the breadth first search*/
		}
	}
	while(list_delget_first(&deletion_list,(dp)&v))
		lg_vertex_kill(being_refined,v);
	bfs_clear(&tlgp);
	/*the failure pointers are probably now wrong*/
	bfs_init(&tlgp,being_refined);
	while(bfs_next(&tlgp,&v)) { /*third pass of |prune()|*/
		FOR_EACH_GENERATOR(g) {
			/*push forward and make necessary changes*/
			vindex t= get(v,g);
			if (t != UNDEFINED &&  backg(t) == g)
				comp_failure(t);
		}
	}
	bfs_clear(&tlgp);
	list_clear(&deletion_list);
}


void
set_status_on_tree(v,status,b)
	vindex v;
	int status;
	boolean b;
{
	vindex w=v;
	boolean notb;
	int which_tree;
	notb = ((b)? FALSE : TRUE);
	switch(status) {
		case LHS:
			which_tree = LHS_TREE;
		break;
		case INV_LHS:
			which_tree = INV_LHS_TREE;
		break;
		case CONSIDERED:
			which_tree = CONSIDERED_TREE;
		break;
		case INV_CONSIDERED:
			which_tree = INV_CONSIDERED_TREE;
		break;
		case RHS:
			which_tree = RHS_TREE;
		break;
		default :
			assert(FALSE);
	}
	set_status(w,status,FALSE); 
		/* to get the loop going, this will be adjusted at the end */
while (get_status(w,which_tree) == notb && get_status(w,status)==FALSE) {
		gen g;
		if (notb){
			boolean tree_here = FALSE;
			FOR_EACH_GENERATOR(g) {
				vindex t = get(w,g);
				if (t == UNDEFINED) continue;
		else if (g == backg(t) && get_status(t,which_tree)==TRUE) {
					tree_here = TRUE;
			/*another branch comes in at this point*/
					break; /*from FOR loop*/
				}
			}
			if (tree_here)
				/*the sawn off branch meets the trunk here*/
				break;
		}
		set_status(w,which_tree,b);
		g = backg(w);
		if (is_basepoint(w) || g == INVALID_GEN)
				/*avoid possible trouble if in a coarse graph*/
			break;
		w = get(w,inv(g));
		assert( w != UNDEFINED );
	} /*have now joined up with previously defined tree, or else reached
			the basepoint */
	set_status(v,status,b);
}



/* It is assumed that |t| is not the basepoint, and that all vertices
smaller than |t| have had their failure functions set.
We do not define the
failure function on vertices which lie beyond
a left hand side on the tree. The reason is that the failure function
is going to be used for reduction, and there is no point in having it
if the string includes a left hand side.
*/
void
comp_failure(t)
	vindex t;
{
	vindex f = UNDEFINED;
	vindex fg = UNDEFINED;
	gen g;
	set_failure(t,t); /*make sure we don't inherit faulty information*/
	if (!is_basepoint(t)) {
		g = backg(t);
		f = get(t,inv(g)); /*one step towards the basepoint*/
		assert(f != UNDEFINED);
		while (!is_basepoint(f)) {
			f = get_failure(f);
			if ((fg = get(f,g)) != UNDEFINED && backg(fg) == g) {
				set_failure(t,fg);
				break;
			}
		}
		if (get_failure(t) == t) { /*arrived at basepoint disappointed*/
			assert(is_basepoint(f));
			set_failure(t,f);
		}
		if (is_reducible(get_failure(t)) ||
					does_fail_to_reducible(get_failure(t))) 
			set_status(t,FAILS_TO_REDUCIBLE,TRUE);
	}
}

/* A special create function for |to_add|.
*/
void
to_add_create()
{
	to_add = vzalloc1(lg);
	lg_init(to_add,KMP,KMP_WTREE,num_gens);
	set_archive_image(to_add->basepoint,archive->basepoint);
}

/* The |KMP|s pointed to by |remain| and |disappear| are
amalgamated.
Some of the magic pocket information which used to be recorded in
these labelled graphs is now irrelevant, and so we start by discarding it.
\Pre |remain| and |disappear| point to |KMP|s.
In |*remain| all magic pockets are empty; in |*disappear| some may not be.
\Post All the vertices in |*disappear| have been absorbed into
|*remain|, and the magic pockets of |*remain| remain empty.
*/
void
kill_and_devour(remain,disappear)
	lg *remain;
	lg *disappear;
{
	vindex v = UNDEFINED;
	vindex w = disappear->basepoint;
	bfs_traverser tdis;
	assert(non_trivial(disappear));
	set_num_verts(remain, get_num_verts(remain) + get_num_verts(disappear));
	if (get_type(remain) != MF_WTREE) {
		bfs_init(&tdis,remain);
		while (bfs_next(&tdis,&v))
			set_failure(v,v);
		bfs_clear(&tdis);
		set_type(remain,MF_WTREE);
	}
	if (get_type(disappear) != MF_WTREE) {
		bfs_init(&tdis,disappear);
		while (bfs_next(&tdis,&v))
			set_failure(v,v);
		bfs_clear(&tdis);
		set_type(disappear,MF_WTREE);
	}
	disappear->basepoint = UNDEFINED;
	assert(list_empty(&(disappear->discards)));
	(void)coin(remain,remain->basepoint,w);
	disappear->num_verts = 0;
	lg_clear(disappear);
	Free_dp((dp)disappear);
	disappear=0;
}

/* Identify two vertices, and make all other identifications which are
implied by this.
We get rid of each vertex |v| on the list of |lgp->discards| one at a time,
first lifting each arrow from |v| (and its inverse), and then lifting all
the characteristics of |v| itself.
\Pre |lgp->discards| is an initialized list of (addresses of) vertices,
which have to be discarded after all relevant information is lifted from them.
\Post All appropriate information associated with |discard| has been
transferred to |keep|, and |discard| has been thrown away (provided
|discard != keep|). |lgp->discards| is an empty list. 

*/
boolean
coin(lgp,keep,discard)
	lg *lgp;
	vindex keep, discard;
{ 
	vindex v;
	gen g;
	boolean ans = FALSE;
	assert(lgp->fntab==KMP);
	merge(lgp,keep,discard);
	while (list_delget_first(&(lgp->discards),(data_ptr)&v)) {
		assert(get_parent(v));
		if (v == lgp->basepoint) {
			lgp->basepoint = root(v);
			set_back_ptr(lgp->basepoint,BACK_FROM_BASE);
		}
		assert(is_basepoint(lgp->basepoint));
		FOR_EACH_GENERATOR(g)
			lift_arrow(lgp,v,g);
		lift_vertex_characteristics(v);
		lg_vertex_kill(lgp,v);
		ans = TRUE;
	}
	return ans;
}

/* Two vertices of the labelled graph, which might previously have
represented distinct vertices of the labelled graph we are aiming for,
are from now on to be regarded as the same.
This procedure does not actually arrange the disposal of one of the
vertices.
It only arranges for future disposal.
\Pre |discard| and |keep| point to vertices in a labelled graph.
\Post |discard| is placed on the list |lgp->discards|, unless it was
there already. Some path halving may be done.

Ch: |lgp|, |discard|, |lgp->discards|.
*/
static void
merge(lgp, keep,discard) 
	lg *lgp;
	vindex discard, keep; 
{ 
	assert(lgp);
	assert(keep != UNDEFINED);
	assert(discard != UNDEFINED);
	assert(get_type(lgp) == MF_WTREE);
	keep = root(keep); 
	discard = root(discard);
	if (discard != keep) {
		set_parent(discard,keep); 
		(void)list_insert(&(lgp->discards),(data_ptr)&discard); 
	}
}



/* We have an arrow from |vtd| (vertex to delete) and we have to lift
it to the root of the tree in the fast merge-find algorithm.
We must make sure that all arrows continue to be double headed.
That is to say, if we have an arrow g \colon u \rightarrow v, then
we must also have an arrow inv(g) \colon v \rightarrow u.
We must also make sure that, in degenerate cases, we do not create
arrows pointing to |vtd|, the vertex to be discarded, and that we do
not add |vtd| back onto the list |lgp->discards|. Actually, add |vtd|
back onto the list is not a danger, because the only routine that adds
things onto this list is |merge()|, and this routine only does so to a
vindex which is not yet on the list (the parent pointer is null before
|merge()|, changes the parent pointer and adds the vindex to the list).

The main idea is to merge the image of |vtd| under |g| (which we call
|target|) with the image
of the root of |vtd| under |g|.

\Pre In the fast merge find structure, there are no parent pointers
pointing into |vtd|.

The reason for this is that elimination of vertices is done by taking
vindexes off |lgp->discards| one by one on a fifo basis.
Once |vtd| is placed on |lgp->discards|, all further parent pointers which
would have been made to point at |vtd| would in fact be pointed
instead at |root(vtd)|. So, although |vtd| may be put onto |lgp->discards|
while a parent to a number of other vindexes due for discarding, all
these vertices will be dealt with before we get to |vtd|, and no new
pointers into |vtd| will be created after it joins the fifo queue.
*/
static void
lift_arrow(lgp,vtd,g)
	lg *lgp;
	vindex vtd;
	gen g;
{
	vindex target = get(vtd,g);
	assert(lgp);
	assert(vtd != UNDEFINED);
	assert(get_type(lgp) == MF_WTREE);
	assert(get_parent(vtd) != vtd);
	assert(1 <= g && g <= (lgp->degree));
	if (target != UNDEFINED) { /*if this condition is not satisfied,
						we do not need to do anything */
		vindex root_vtd=root(vtd);
		vindex rg = get(root_vtd,g);
		/*now, keep all arrows double headed*/
		unset(vtd,g);
		unset(target,inv(g));
		if (target == vtd) {
			vindex rig = get(root_vtd,inv(g));
			if (rg != UNDEFINED)
				merge(lgp,root_vtd,rg);
			else if (rig != UNDEFINED)
				merge(lgp,root_vtd,rig);
			else { /*now, keep all arrows double headed*/
				set(root_vtd,g,root_vtd);
				set(root_vtd,inv(g),root_vtd);
			}
		}
		else { /*|target| can be used as the image of new arrows,
				because it is not equal to |vtd|*/
			if (rg != UNDEFINED) {
				merge(lgp,target,rg);
				unset(root_vtd,g);
				unset(rg,inv(g));
			}
			set(root_vtd,g,target);
			set(target,inv(g),root_vtd);
		}
	}
}



vindex
root(v)
	vindex v;
{
	vindex p, grandp;

	assert(v != UNDEFINED);
	while((p = get_parent(v)) != v) {
		if ((grandp = get_parent(p)) != p) {
			set_parent(v, grandp); /*path halving*/
			v = grandp;
		}
		else 
			return p;
	}
	return v;
}
