/* compress.c */

/* Compression routines */
#include "copyright.h"
#include "config.h"
#include "db.h"

/* bigrams.h included after constant definitions */

/* These use a pathetically simple encoding that takes advantage of the */
/* eighth bit on a char; if you are using an international character set, */
/* they may need substantial patching. */

#define BUFFER_LEN 16384	/* nice big buffer */

#define TOKEN_BIT 0x80		/* if on, it's a token */
#define TOKEN_MASK 0x7f		/* for stripping out token value */
#define NUM_TOKENS (128)
#define MAX_CHAR (128)

#include "bigrams.h"

static char token_table[MAX_CHAR][MAX_CHAR];
static int table_initialized = 0;

static void init_compress()
{
  int i;
  int j;
  for (i = 0; i < MAX_CHAR; i++) {
    for (j = 0; j < MAX_CHAR; j++) {
      token_table[i][j] = 0;
    }
  }

  for (i = 0; i < NUM_TOKENS; i++) {
    token_table[tokens[i][0]][tokens[i][1]] = i | TOKEN_BIT;
  }

  table_initialized = 1;
}

static int compressed(s)
    const char *s;
{
  while (*s) {
    if (*s++ & TOKEN_BIT)
      return 1;
  }
  return 0;
}

const char *compress(s)
    const char *s;
{
  static char buf[BUFFER_LEN];
  char *to;
  char token;
  if (!table_initialized)
    init_compress();

  if (compressed(s))
    return (char *)s;			/* already compressed */

  /* tokenize the first characters */
  for (to = buf; s[0] && s[1]; to++) {
    token = token_table[s[0]][s[1]];
    if (token) {
      *to = token;
      s += 2;
    } else {
      *to = s[0];
      s++;
    }
  }

  /* copy the last character (if any) and null */
  while ((*to++ = *s++) != NULL)
    ;

  return buf;
}

const char *uncompress(s)
    const char *s;
{
  /* to avoid generating memory problems, this function should be
   * used with something of the format
   * char tbuf1[BUFFER_LEN];
   * strcpy(tbuf1, uncompress(a->value));
   * if you are using something of type char *buff, use the
   * safe_uncompress function instead.
   */

  static char buf[BUFFER_LEN];
  char *to;
  const char *token;
  for (to = buf; *s; s++) {
    if (*s & TOKEN_BIT) {
      token = tokens[*s & TOKEN_MASK];
      *to++ = *token++;
      *to++ = *token;
    } else {
      *to++ = *s;
    }
  }

  *to++ = *s;

  return buf;
}

char *safe_uncompress(s)
    const char *s;
{
  /* this function should be used when you're doing something like
   * char *attrib = safe_uncompress(a->value);
   * NEVER use it with something like 
   * char tbuf1[BUFFER_LEN]; strcpy(tbuf1, safe_uncompress(a->value));
   * or you will create a horrendous memory leak.
   */

  char *to;
  const char *token;
  char *buf;

  buf = (char *) malloc((unsigned)sizeof(char) * BUFFER_LEN);

  for (to = buf; *s; s++) {
    if (*s & TOKEN_BIT) {
      token = tokens[*s & TOKEN_MASK];
      *to++ = *token++;
      *to++ = *token;
    } else {
      *to++ = *s;
    }
  }

  *to++ = *s;

  return buf;
}
