/* -u num freq : for unlearning
   -ur num freq: for unlearning internal resistance
   [-s] -n     : for generating multiple hypotheses.
   [-snc]       : Add size unit but don't clamp it.
   -r	       : for generating training instances for reinforcing
	       : specific connections
   -rr	       : for generating training instances for reinforcing
	       : internal resistance
   -z          : Generates zone information for letters in zone 1 or 3
   -nl -z      : Generates only zones (No letter symbols).
   -l2	       : Generate word name also to train L2 layer as follows
		 -> cat { c1 a2 t3 }
Eg  cat ->  z3_3
*/
#include <stdio.h>
#include <string.h>
#define MAX_WORD 30
#define boolean int
#define MATCH 0
#define FALSE 0
#define TRUE 1
#define X_PARTS 8
int num_vowel,num_cons;
/* This program takes arbitrary text as input and strips each word
into it's constituent symbols in the format required by mlhn. This
is fed to standard output so it can be piped to mlhn.
Eg

cat -> { c1 a2 t3 }

If invocation flag is '-s', then it also includes a size unit while
doing the conversion and clamps it to 1.

cat -> { c1 a2 t3 3*1 }

-u 2 1
cot -> 1 { c1 o2*0 } 
       1 { t3 o2*0 }
-r 2 1
cat -> 1 { c1 a2 } 
       1 { t3 a2 }
*/

/* The 'map' stores the confusion matrix between pairs of letters.
   The confusion matrix is directed. It is currently boolean (0, 255)
   but will be generalised to store probabilities. Each row indicates
   the hypotheses to be generated for a particular character */
int map[26][26]; /* Initially all 0's */
/* For each letter the first entry is 255 if it has an ascender,
the 2nd is 255 if it has a descender, 3rd entry for hole, 4th for dot. 0 O/W in all cases. The 5th entry
is the number of strokes in the letter. The first
26 elements are for lower case, the next for upper case*/
int shape[52][5];/* Initially all 0's */

init()
{FILE *fp;
 char sym[4];
 int row_no,letter_no;
 
 fp = fopen("letter_map","r");
 while (fscanf(fp,"%s",sym) != EOF) {
 /* First char determines 'row_no' */
   row_no = sym[0] - 'A';

   fscanf(fp,"%s",sym);
   while (strcmp(sym,"0") != MATCH) {
      letter_no = sym[0] - 'A';
      map[row_no][letter_no] = 255;
      fscanf(fp,"%s",sym);
   } 
 }
}

print_from_map(sym,pos)
char sym;
int pos;
{int row_no,bin_no,i;
 char let[4],str[4];
   row_no = sym - 'A';
   for (i = 0; i < 26; i++)
     if (map[row_no][i] == 255) {
       let[0] = 'A' + i;
       let[1] = '\0';
       itoa(pos,str);
       strcat(let,str);
       printf("%s ",let);
     }
}

init_shape()
/* This reads a file called letter_shape which contains zone info for
each letter. It uses the array 'shape' The file must have lines
in the order a-z, A-Z */
{FILE *fp;
 char sym[4];
 int row_no;
 
 fp = fopen("letter_shape","r");
 row_no = 0;
 while (fscanf(fp,"%s",sym) != EOF) {
 /* First char determines 'row_no' */
   /* row_no = sym[0] - 'a'; */

   fscanf(fp,"%s",sym);
   while (strcmp(sym,"0") != MATCH) {
      if (strcmp(sym,"as") == MATCH) shape[row_no][0]++;
      else if (strcmp(sym,"ds") == MATCH) shape[row_no][1]++;
      else if (strcmp(sym,"mho") == MATCH) shape[row_no][2]++;
      else if (strcmp(sym,"uho") == MATCH) shape[row_no][3]++;
      else shape[row_no][4] = atoi(sym);
      fscanf(fp,"%s",sym);
   } 
   row_no++;
 }
}

print_shape(sym,pos,num, num_asdsp )
/* This prints the shape info 'as','ds', 'mho' or 'uho' and appends pos
and num to it. 'pos' is quantized (word into 8 parts). 'num'
is the repetition number of this symbol in the same quantum.
*/
char sym;
int pos,num,*num_asdsp;
{int row_no,bin_no,i,numfeat;
 char let[8],str[4];
  if ((sym >= 'a') && (sym <= 'z'))
     row_no = sym - 'a';
  else 
     row_no = (sym - 'A') + 26;

   (*num_asdsp) += shape[row_no][4];
   i = 0;
   numfeat = shape[row_no][0];
   while (numfeat >  0) {
       strcpy(let,"as");
       itoa(pos,str);
       strcat(let,str);
       itoa((num+i),str);
       strcat(let,str);
       printf("%s ",let);
       numfeat--;
       i++;
     }

   i = 0;
   numfeat = shape[row_no][1];
   while (numfeat > 0) {
       strcpy(let,"ds");
       itoa(pos,str);
       strcat(let,str);
       itoa((num+i),str);
       strcat(let,str);
       printf("%s ",let);
       numfeat--;
       i++;
     }

   i = 0;
   numfeat = shape[row_no][2];
   while (numfeat > 0) {
       strcpy(let,"mho");
       itoa(pos,str);
       strcat(let,str);
       itoa((num+i),str);
       strcat(let,str);
       printf("%s ",let);
       numfeat--;
       i++;
     }

   i = 0;
   numfeat = shape[row_no][3];
   while (numfeat > 0) {
       strcpy(let,"uho");
       itoa(pos,str);
       strcat(let,str);
       itoa((num+i),str);
       strcat(let,str);
       printf("%s ",let);
       numfeat--;
       i++;
     }
}

boolean is_vowel(let,updte)
char let[];
boolean updte;
/* This function checks if 'let[0]' is a vowel, if so, it
returns true else false. It also updates num_vowel and num_cons
if updte is TRUE..
*/
{
if ((let[0] == 'a') || (let[0] == 'e') || (let[0] == 'i') ||
    (let[0] == 'o') || (let[0] == 'u') || 
    (let[0] == 'A') || (let[0] == 'E') || (let[0] == 'I') ||
    (let[0] == 'O') || (let[0] == 'U')) 
    {
    if (updte) num_vowel++;
    return(TRUE);
 }
else {
    if (updte) num_cons++;
    return(FALSE);
 }
}

main(argc,argv)
int argc;
char *argv[];
{char wrd[MAX_WORD],let[7],ru_let[5],dig[5];
 int i,pos_no,freq_no,vowel_no,cons_no,unlearn_no,flag_pos,num_pos; 
 boolean add_size,add_size_no_clamp,freq,poserr,vowelerr,conserr,not_to_print,unlearn,
	 reinforce,noise,shape_flag,no_letter,l2_format, reinforce_R, unlearn_R;
 /* Required for quanta */
 int quant, prev_quant, num_in_quant,num_asds;

conserr = FALSE;
vowelerr= FALSE;
freq = FALSE;
poserr = FALSE;
add_size = FALSE;
add_size_no_clamp = FALSE;
unlearn = FALSE;
reinforce = FALSE;
unlearn_R = FALSE;
reinforce_R = FALSE;
noise	= FALSE;
shape_flag = FALSE;
no_letter  = FALSE;
l2_format = FALSE;

/* First check if any flags at all */
if (argc > 1) {
  /* Check if '-f' */
  if ((argc > 2) && (strcmp(argv[1],"-f") == MATCH)) {
      freq = TRUE;
      freq_no = atoi(argv[2]);
  }
  else if ((argc > 3) && (strcmp(argv[1],"-u") == MATCH)) {
	unlearn = TRUE;
        unlearn_no = atoi(argv[2]);
	freq_no = atoi(argv[3]);
	freq = TRUE;
       }
  	else if ((argc > 3) && (strcmp(argv[1],"-r") == MATCH)) {
		reinforce = TRUE;
        	unlearn_no = atoi(argv[2]);
		freq_no = atoi(argv[3]);
		freq = TRUE;
       	}
  	else if ((argc > 3) && (strcmp(argv[1],"-ur") == MATCH)) {
		unlearn_R = TRUE;
        	unlearn_no = atoi(argv[2]);
		freq_no = atoi(argv[3]);
		freq = TRUE;
       	}
  	else if ((argc > 3) && (strcmp(argv[1],"-rr") == MATCH)) {
		reinforce_R = TRUE;
        	unlearn_no = atoi(argv[2]);
		freq_no = atoi(argv[3]);
		freq = TRUE;
       	}
     else { 
  /* Else first check if '-s' or '-n' or '-z'*/
    if (strcmp(argv[1],"-s") == MATCH) {
      add_size = TRUE; flag_pos = 2; num_pos = 3;
    }
    else if (strcmp(argv[1],"-snc") == MATCH) {
      add_size_no_clamp = TRUE; flag_pos = 2; num_pos = 3;
    }
    else if (strcmp(argv[1],"-nl") == MATCH) {
      no_letter = TRUE; flag_pos = 2; num_pos = 3;
    }
    else
      {
      flag_pos = 1; num_pos = 2;
      }


    if (argc > flag_pos) {
      if (strcmp(argv[flag_pos],"-n") == MATCH) {
        init();
        noise = TRUE; 
      }

      if (strcmp(argv[flag_pos],"-l2") == MATCH) {
        l2_format = TRUE; 
      }

      if (strcmp(argv[flag_pos],"-z") == MATCH) {
        init_shape();
        shape_flag = TRUE; 
      }

      if ((strcmp(argv[flag_pos],"-p") == MATCH)) {
       poserr = TRUE;
       if (argc > num_pos) pos_no = atoi(argv[num_pos]);
      }

      if ((strcmp(argv[flag_pos],"-v") == MATCH)) {
       vowelerr = TRUE;
       if (argc > num_pos) vowel_no = atoi(argv[num_pos]);
      }

      if ((strcmp(argv[flag_pos],"-c") == MATCH)) {
       conserr  = TRUE;
       if (argc > num_pos) cons_no = atoi(argv[num_pos]);
      }
   }
  }
}

/* Used only with -z */
prev_quant = X_PARTS + 1;

while (scanf("%s",wrd) != EOF) {
 /* First check if -u, -r, -ur or -rr was used - They are handled completely
 differently */
  
  if (reinforce || unlearn || reinforce_R || unlearn_R) {
    /* If letter at position is '_', then no unlearning */
    if (wrd[unlearn_no-1] == '_') continue;

    ru_let[0] = wrd[unlearn_no-1];
    ru_let[1] = '\0';
    itoa(unlearn_no,dig);  
    strcat(ru_let,dig);
    if (unlearn || reinforce_R) strcat(ru_let,"*0");
    if (reinforce_R || unlearn_R) {
       printf("%d { %s } \n",freq_no,ru_let);
       continue;
    }
    for (i = 0; i <strlen(wrd); i++) {
      if (((i+1) != unlearn_no) && (wrd[i] != '_')) {
    	let[0] = wrd[i];
    	let[1] = '\0';
        printf("%d ",freq_no);
  	printf("{ ");
        itoa((i+1),dig);  
        strcat(let,dig);
        printf("%s ",let);
        printf("%s ",ru_let);
  	printf("} \n");
      }
    }
    /* Now the size unit */
    itoa(strlen(wrd),let);
    printf("%d ",freq_no);
    printf("{ ");
    printf("%s ",let);
    printf("%s ",ru_let);
    printf("} \n");

    continue;
  }

  num_asds = num_vowel = num_cons = 0;
  if (l2_format) {
    printf("-> %s ",wrd);
  }
  if (freq) printf("%d ",freq_no);
  printf("{ ");
  if (shape_flag) printf("%s ",wrd);
  if (poserr && (pos_no == 0)) pos_no = strlen(wrd);
  for (i = 0; i <strlen(wrd); i++) {
    let[0] = wrd[i];
    let[1] = '\0';
    not_to_print = (poserr && ((i+1) == pos_no)) ||
		   (vowelerr && (num_vowel < vowel_no) && is_vowel(let,TRUE))   ||
		   (conserr && (num_cons < cons_no) && !is_vowel(let,TRUE));

    if (!not_to_print) {
     itoa((i+1),dig);  
     strcat(let,dig);
     if (noise) print_from_map(let[0],(i+1));
     else {
       if (!no_letter) printf("%s ",let);
       if (shape_flag)  {
	 quant = ((i+1)*X_PARTS)/strlen(wrd) + 1;
	 if (quant == prev_quant) num_in_quant++;
	 else num_in_quant = 1;
         print_shape(let[0],quant,num_in_quant,&num_asds);
	 if (quant != prev_quant) prev_quant = quant;
       }
     }
#ifdef TRANSIT
     if ((i < 5) && (i < (strlen(wrd)-1))) {
     /* Will ONLY WORK for transitions at positions < 9 [single digit]*/
     let[0] = wrd[i];
     let[1] = '\0';
     itoa((i+1),dig);  
     strcat(let,dig);
     let[2] = wrd[i+1];
     let[3] = '\0';
     itoa((i+2),dig);  
     strcat(let,dig);
     not_to_print = (poserr && ((i+2) == pos_no)) ||
		   (vowelerr && (num_vowel < vowel_no) && is_vowel(&let[2],FALSE))   ||
		   (conserr && (num_cons < cons_no) && !is_vowel(&let[2],FALSE));
     if (!not_to_print) printf("%s ",let);
     }
#endif
    }
  }
  if (add_size || add_size_no_clamp) {
    itoa(strlen(wrd),dig);
    if (add_size) strcat(dig,"*1");
    printf("##%s ",dig);
  }
  if (shape_flag) {
    strcpy(let,"nasds");
    itoa(num_asds,dig);
    strcat(let,dig);
    printf("%s ",let);
  }
  printf("} \n");
}
}
