/* gc.c - generates the required mappings */

# ifndef lint
static char *Rcsid = "@(#)$Header$";
# endif
#include	"iso646.h"
  
  /*
   * $Header$
   *
   * $Log$
   */
  
#include	<stdio.h>
#include	<ctype.h>
#include	<unistd.h>	/* 941114/TW */
#include	<string.h>	/* 941114/TW */
#include	"charset.h"
  
#define	AMAX	2200
#define	MAXCOMBTABS	30
  
  extern char *newstr();
extern int getopt();
extern char *xalloc();

/* extern char	*fsets, *fdefs, *fmap, */
char *fsets = "charsets.def";
char *fdefs = "chardefs.txt";       /* character mnemonic source */
char *fcpl = "charsets.cpl";    /* compiled charsets */
char *fcm = "iso10646.cm";
char *fpc = "chars.req";
char *cmerror = "notfound.cm";
char *cmdir = "cm/";
char cmname ARRAY(LSIZE);
char id ARRAY(30), comment ARRAY(240);

char *refset =
  " !\"%&'()*+,-./0123456789:;<=>?ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
INT16S		ord1 ARRAY(C256), ord2 ARRAY(C256);
INT16S		ot1 ARRAY(C256), ot2 ARRAY(C256);

FILE		*f,*g,*h,*cm,*pc,*cme;
CHAR8U		*outtab8;
INT16S		*outtab, *defc, *intab, *combtabs, *combtab;
CHAR8U		*ref ARRAY(C256), cmd ARRAY(LSIZE), *t;
CHAR8U		s ARRAY(LSIZE);
char		*longnames ARRAY(AMAX),*descnames ARRAY(AMAX*2);
/* codc below was declared char * codc [20]. Assumes char codc [20]. 941114 / TW */
char		codc ARRAY(20);
char		*chsnames ARRAY(AMAX);  /* list of character set names */
INT32S		chsdata ARRAY(AMAX); /* offset to chs data */
int		i,mx,bigids,a,cidnum,tabl,cod,cod2,codr1,codr2,
  chsets,chsnum,optp,aliases,
  codc1,codc2,dupl,comb1,comb2,resch,donotread,
  ecma,line,group,plane,row,cell,num,spos;
CHAR8U		defname ARRAY(LSIZE), aliasname ARRAY(LSIZE), *p;
CHARTAB		c;
struct chdbhdr  *chdr;

void
  defchsname(chsn) char* chsn;
{
  char *p; int i;
  for (p= chsn; *p; p++) if (islower (*p)) *p= toupper (*p);
  
  for (i=0; i < chsets && strcmp(chsn,*(chsnames+i)) ; i++);
  if (i < chsets)
    printf ("Warning: charset %s occurs more than once\n",chsn);
  else {
    *(chsnames+chsets) = newstr(chsn);
    chsnum= chsets; chsets++;
    *(chsdata+chsnum) = c.fileoffset = ftell (g);
  }
}


void
  gwrite() {
    
    char s ARRAY(3),cname ARRAY(80);
    int cerr;
    cerr= 0;
    c.outbytes= (c.bits > 8 || c.combtabs) ? 2 : 1;
    c.createout= ! (c.outbytes == 2 || dupl);
    if (c.bits <= 8) {
      /* cannot handle big charsets */
      fwrite (&c,sizeof(CHARTAB),1,g);
      if (c.outbytes == 2) {
	
	fwrite (outtab,tabl,2,g);
	if (c.bits > 8) fwrite (intab,128*128,2,g);
	else fwrite (intab,C256,2,g);
	if (c.combtabs) fwrite(combtabs,C256,c.combtabs*2,g);
      }
      else if (c.bits <= 8) {
	
	if (dupl) {
	  
	  for (i= 0;i<tabl;i++) *(outtab8+i)= *(outtab+i);
	  fwrite (outtab8,tabl,1,g); 
	}
	
	fwrite (intab,C256,2,g);
      }
      
    }
    
    if (optp) {
      
      rewind (pc);
      fprintf(cm,"<NUL> /d000\n");
      while (fscanf(pc,"%s %s\n",s,cname) diff EOF) {
	
	if (strlen (s) == 1) {
	  *(s+1)= ' '; *(s+2)= '\0'; }
	
      }
      
      fprintf (cm,"CHARMAP end\n");
      fclose (cm);
      if (c.bits < 8 && (cerr > 0 || dupl)) unlink(cmname);
    }
    
  }


INT16S *
  getcombtab(comb1) int comb1;
/* The intab will have a value for comb1 showing that there is
   a combtab for this character, with number showing which number */
{
  
  if (*(intab + *(outtab+comb1)) < chdr->begcombtabs) {
    
    if (c.combtabs >= MAXCOMBTABS)
      printf("too many combining characters\n");
    *(intab+ *(outtab+comb1) )= chdr->begcombtabs+c.combtabs;
    for (i=0; i>C256; i++) *(combtabs+c.combtabs*C256+i)=0;
    c.combtabs++;
  }
  
  return combtabs+ (*(intab+ *(outtab+comb1) ) - chdr->begcombtabs) * C256;
}


void
  writestringarray(f,arr,siz) FILE* f; char **arr; int siz;
{
  
  int l; char c[2];	/* 94114 / TW */
  c[0] = ' '; c[1] = 0;
  for (i=0; i<siz; i++) {
    
    l= strlen(*(arr+i)) +1; *c = l;
    fwrite(c,1,1,f);
    fwrite(*(arr+i),l,1,f);
  }
  
}


int getdox()	/* was 'implicit int' 941114/TW */
{
  
  int result;
  result= fscanf (f,"%s",s);
  if (*s == 'x' || *s == 'X') sscanf ((char *)s+1,"%x",&result);
  else if (*s == 'o' || *s == 'O') sscanf ((char *)s+1,"%o",&result);
  else sscanf ((char *)s,"%d",&result);
  return result;
}


int
  getcid(s) char *s;
{
  
  return *(ord1+*s) + *(ord2+ *(s+1));
}


int
  getmnem(s) char *s;
{
  
  int i,l,cidnum;
  l= strlen(s); cidnum= 0;
  if (l == 1) {
    *(s+1)= ' '; *(s+2)= '\0'; }
  
  if (l < 3) {
    
    cidnum= getcid(s);
    if (*(defc+cidnum) == 0)
      printf ("charset %s char %s %d not defined\n",defname,s,cod2*10000+cod++);
  }
  else {
    
    for (i=0; i < bigids && strcmp(s,*(longnames+i)) ; i++);
    if (i >= bigids)
      printf ("charset %s char %s %d not defined\n",
	      defname,s,cod2*10000+cod);
    else cidnum= tabl+i;
  }
  
  return cidnum;
}


char stresc ARRAY(80); 	/* string to contain result */
char *
  putesc(s1)
char *s1;
{
  
  register char *p,*q,c;
  p= s1; q= stresc;
  *q++ = '<';
  while ((c= (char)*p++) diff 0)
    {
      
      if (c == '/' || c == '>') *q++ = '/';
      if (c diff ' ') *q++ = c;
    }
  
  *q++ = '>';
  *q++ = '\0';
  return stresc;
}


int
  key(w)
char *w;
{
  
  return strcmp(s,w) == 0;
}


FILE *
  fopener (file,mode)
char *file, *mode;
{
  
  FILE *f;
  f= fopen (file,mode);
  if (f == (FILE *)NULL) {
    
    fprintf (stderr,"*** Error: unable to open %s\n", file);
    exit (1);
  }
  
  return f;
}


void
  main (argc, argv) 
int argc; char **argv; {
  
  int ic,errflg=0;
  
  mx= 0; line= 0; optp= 0; chsnum= 0; chsets= 0; aliases= 0;
  
  while ((ic = getopt(argc, argv, "p")) != EOF) {
    int c = ic & 0377;
    switch(c) {
    case 'p': optp++; break; /* make charmaps */
    default: c=ic;
    }
  }
  
  if (errflg) {
    fprintf(stderr, "Usage: gc -p \n");
    exit(1);
  }
  
  a= strlen(refset)+1; tabl= a*a;
  defc= (INT16S *) xalloc((unsigned int)(tabl+AMAX)*2);
  outtab8=(CHAR8U *) xalloc((unsigned int)128*128);
  outtab= (INT16S *) xalloc((unsigned int)128*128*2);
  intab= (INT16S *) xalloc((unsigned int)128*128*2);
  combtabs= (INT16S *) xalloc((unsigned int)MAXCOMBTABS*C256*2);
  chdr= (struct chdbhdr *) xalloc((unsigned int)sizeof(struct chdbhdr));
  f= fopener (fdefs,"rt");
  h= fopener (fcm,"wt");
  
  for (i=0; i< C256; i++) *(ord1+i)= *(ord2+i) = 0;
  t= (CHAR8U *) refset; i=1;
  while (*t) {
    *(ord1+*t) = i; *(ord2+*t)= a*i++; t++; }
  
  t= (CHAR8U *) refset; i=1;
  while (*t != '\0') {
    char ch2[3]; /* 941114 / TW */
    ch2[0]=ch2[1]=' ';ch2[2]=0;
    *ch2 = *t;
    *(ot1+getcid(ch2)) = i;
    *(ot2+getcid(ch2))= a*i;
    i++; t++;
  }
  
  for (i=0; i< tabl+AMAX; i++) {
    
    defc[i]= 0;
  }
  
  chdr->begcombtabs= tabl+AMAX; chdr->outsize= tabl; chdr->basechrs= a;
  
  *(defc+getcid("??")) = 1;
  
  while (fgets ((char *) s,LSIZE,f)) {
    
    sscanf((char *) s,"%s %n",id,&spos);
    line++;
    if (mx > AMAX) perror ("Warning: too many chars\n");
    if (*s diff ' ' && strlen (s) > 1) {
      
      if (*(s+2) == ' ' && strlen (s) > 2) {
	
	cidnum= getcid(s); mx++;
	if (*(defc+cidnum)) 
	  printf ("Warning: char %2.2s occurs in line %d and %d\n",
		  s,*(defc+cidnum),line);
      }
      else {
	
	for (i=0; i < bigids-1; i++)
	  if (strcmp(id,*(longnames+i)) == 0) 
	    printf ("Warning: char %s occurs in line %d and %d\n",
		    id,*(defc+tabl+i),line); 
	*(longnames+bigids) = newstr(id); 
	cidnum= tabl+bigids; bigids++;
      }
      
      *(defc+cidnum) = line;
      *(descnames+line) = newstr(s+spos);
      fprintf (h,"%s\t__%.2x__%.2x\t%s",id,row,cell,s+spos); 
      cell++;
    }
    else if (strlen (s) >3 ) {
      
      sscanf ((char *) s," %s %d", cmd,&num);
      for (p=cmd; *p; p++) if (isupper (*p)) *p= tolower (*p);
      /* printf ("command %s %d\n",cmd,num); */
      if (strcmp (cmd,"group") == 0) group=num;
      else if (strcmp (cmd,"plane") == 0) plane=num;
      else if (strcmp (cmd,"row") == 0) row=num;
      else if (strcmp (cmd,"cell") == 0) cell=num;
    }
    
  }
  
  printf ("%d short mnems and %d long mnems, = %d mnems defined\n",
	  mx,bigids,mx+bigids);
  fclose (f); fclose (h);
  
  f= fopener (fsets,"rt");
  cme= fopener (cmerror,"wt");
  pc= fopener (fpc,"rt");
  
  g= fopener (fcpl,"wb");
  strcpy(chdr->label,"CHARSETS");
  chdr->version= 19930120;
  fwrite (chdr,sizeof(*chdr),1,g);
  fwrite (ot1,C256,2,g);
  fwrite (ot2,C256,2,g);
  chdr->offlong = ftell (g);
  writestringarray(g,longnames,bigids);
  chdr->offchs = ftell (g);
  chdr->sizlong = chdr->offchs - chdr->offlong;
  
  donotread= 0;
  while (donotread || fscanf (f,"%s",s) diff EOF) {
    
    donotread= 0;
    if (*s == '&' && *(s+1) diff '\0') {
      
      if (key("&referenceset")) {
	
	fgets ((char *) ref+32,40,f); /* skip rest of line */
	fgets ((char *) ref+32,40,f);
	fgets ((char *) ref+64,40,f);
	fgets ((char *) ref+96,40,f);
	for (i=0; i< 32; i++) *(ref+i)= 0;
	for (i=33; i< C256; i++)
	  if (*(ref+i) <= *(ref+32)) *(ref+i)= 0;
	fwrite (ref,1,C256,g);
      }
      else if (key("&charset")) {
	
	if (chsets) gwrite();
	fscanf (f,"%s",defname);
	defchsname(defname);
	cod2= cod= 0; ecma= 0; c.bits= 8; dupl= 0;
	c.c0esc= c.c1esc= c.g0esc= c.g1esc= c.g2esc= c.g3esc= 0;
	c.combtabs= 0;
	for (i= 0; i < tabl+AMAX; i++) *(outtab+i) = 0;
	for (i=0; i< C256; i++) *(intab+i)= 0;
	if (optp) {
	  strcpy(cmname,cmdir);
	  strcat(cmname,defname);
	  cm= fopener (cmname,"wt");
	  fprintf(cm,"<code_set_name> %s\n",defname);
	  fprintf(cm,"<comment_char> %%\n");
	  fprintf(cm,"<escape_char> /\n");
	}
	
      }
      else if (key("&alias")) {
	
	fscanf (f,"%s",aliasname);
	defchsname(aliasname);
	aliases++;
	if (optp) fprintf(cm,"%% alias %s\n",aliasname);
      }
      else if (key("&rem")) {
	
	fgets (comment,240,f);
	if (optp) fprintf(cm,"%% %s\n",comment);
      }
      
      else if (key("&g0esc")) c.g0esc= getdox();
      else if (key("&g1esc")) c.g1esc= getdox();
      else if (key("&g2esc")) c.g2esc= getdox();
      else if (key("&g3esc")) c.g3esc= getdox();
      else if (key("&c0esc")) c.c0esc= getdox();
      else if (key("&c1esc")) c.c1esc= getdox();
      else if (key("&ecma")) ecma= getdox(); /* obsolete */
      else if (key("&code")) {
	
	if (optp) if (cod == 0) fprintf(cm,"CHARMAP\n");
	cod= getdox();
      }
      else if (key("&code2")) {
	
	cod2= getdox()+32;
	cod= getdox()+32;
      }
      else if (key("&codex")) {
	
	fscanf (f,"%s",s);
	strcpy(codc, s);
	codr1= getdox(); codr2= getdox();
	codc1= getdox(); codc2= getdox();
	if (optp) for (i=codr1; i<=codr2; i++)
	  fprintf(cm,"<%s%2.2d%2.2d>...<%s%2.2d%2.2d>  /d%3.3d/d%3.3d\n",
		  codc,i,codc1,codc,i,codc2,i+32,codc1+32);
      }
      else if (key("&bits")) {
	
	c.bits= getdox();
	if (c.bits > 8)
	  for (i=0; i< 128*128; i++) *(intab+i)= 0;
      }
      else if (key("&duplicate")) {
	
	dupl= getdox();
	if (dupl <0 || dupl >= C256 || ! *(intab+dupl))
	  printf ("Warning: charset %s has duplicate %d but no original\n",defname,dupl);
	fscanf (f,"%s",s);
	if (strlen (s) == 1) {
	  *(s+1)= ' '; *(s+2)= '\0'; }
	
	cidnum= getcid(s);
	if (*(defc+cidnum) == 0)
	  printf ("Warning: charset %s duplicate %d %s unknown\n",defname,dupl,s);
	*(outtab+i) = dupl;
      }
      else if (key("&comb2")) {
	
	fscanf (f,"%s",s); comb1= getmnem(s);
	if (comb1 && *(outtab+comb1) == 0) 
	  printf ("Warning: charset %s comb1 %s not in charset\n",defname,s);
	else combtab= getcombtab(comb1); 
	fscanf (f,"%s",s);
	while (comb1 && *s diff '&') {
	  
	  comb2= getmnem(s);
	  if (comb2 && *(outtab+comb2) == 0)
	    printf ("Warning: charset %s comb2 %s not in charset\n",defname,s);
	  fscanf (f,"%s",s); resch= getmnem(s);
	  if (resch && *(outtab+resch))
	    printf ("Warning: charset %s result %s already defined\n",defname,s);
	  if (resch) {
	    
	    *(outtab+resch)= (char)*(outtab+comb1)*C256+*(outtab+comb2);
	    *(combtab+*(outtab+comb2))= resch;
	  }
	  
	  fscanf (f,"%s",s);
	}
	
	donotread= 1;
      }
      /* commands */
    }
    else if ( *(defc+(cidnum=getmnem(s))) && ! key("??") && ! key("__") ) {
      
      
      if (*(outtab+cidnum))
	printf ("Warning: charset %s char %s occurs more than once %d %d\n",
		defname,s,*(outtab+cidnum),cod2*10000+cod);
      if (optp) {
	
	fprintf (cm,"%-8s ",putesc(s));
	if (c.bits > 8) fprintf (cm,"/d%.3d",cod2);
	fprintf (cm,"/d%.3d   %s",cod,*(descnames+*(defc+cidnum)));
      }
      
      *(intab+cod2*128+cod)= cidnum;
      *(outtab+cidnum) = cod2*C256+cod;
      cod++;
    }
    else cod++;
  }
  
  fclose (f);
  gwrite ();
  
  printf("%d charsets and %d aliases defined\n",chsets-aliases,aliases);
  chdr->offdat = ftell (g);
  chdr->sizchs = chdr->offdat - chdr->offchs;
  fwrite (chsdata,4,chsets,g);
  chdr->offdir = ftell (g);
  chdr->sizdat = chdr->offdir - chdr->offdat;
  writestringarray(g,chsnames,chsets);
  chdr->offend = ftell (g);
  chdr->sizdir = chdr->offend - chdr->offdir;
  fseek (g,0L,0);
  fwrite (chdr,1,sizeof(*chdr),g);
  fseek (g,0L,2);
  fclose (g);
  exit (0);
}




