/*
 * database.c: (ndbm) database interface for the cache
 *
 * The database maintains for every document a set of attributes,
 * its outgoing and (as far as known) ingoing links, and the filename
 * of its contents.
 *
 * $Log: database.c,v $
 * Revision 0.14  1994/05/17  13:39:05  reinpost
 * no changes
 *
 * Revision 0.14  1994/05/17  13:39:05  reinpost
 * no changes
 *
 * Revision 0.12  1994/04/01  15:07:08  reinpost
 * *** empty log message ***
 *
 * Revision 0.12  1994/04/01  15:07:08  reinpost
 * *** empty log message ***
 *
 * Revision 0.10  1994/03/11  13:38:41  reinpost
 * one bug fix
 *
 * Revision 0.10  1994/03/11  13:38:41  reinpost
 * one bug fix
 *
 * Revision 0.8  1994/02/25  20:24:23  reinpost
 * database.c and nodatabase.c were merged
 * database.c HAS NOT BEEN TESTED!
 * (the nodatabase.c part, now in here, works)
 *
 * Revision 0.8  1994/02/25  20:24:23  reinpost
 * database.c and nodatabase.c were merged
 * database.c HAS NOT BEEN TESTED!
 * (the nodatabase.c part, now in here, works)
 *
 * Revision 0.6  1994/02/17  21:24:56  reinpost
 * *** empty log message ***
 *
 * Revision 0.5  1994/02/17  10:21:42  reinpost
 * *** empty log message ***
 *
 * Revision 0.3  1994/02/01  19:45:09  reinpost
 * doesn't work, likely to change in a major way
 *
 */

/* there are two implementations: one uses ndbm, the other doesn't */

#if USE_NDBM

static char rcsid[] =
  "$Id: database.c,v 0.14 1994/05/17 13:39:05 reinpost Exp $";

#include <sys/fcntlcom.h>
#include <string.h>
#include <stdio.h>

#include "system.h"

#include "constants.h"
#include "config.h"
#include "error.h"
#include "log.h"
#include "util.h"

/* filenames for dbm files */
static char url2file_name[] = URL2FILE_NAME;
static char file2url_name[] = FILE2URL_NAME;

static DBM *file2url = (DBM *)NULL;
static DBM *url2file = (DBM *)NULL;
/* these two are supposed to be inverses at all times */
/* at the moment, we don't actually check this anywhere */

/*
#define string2datum(s) (datum)(s,strlen(s)+1)
#define datum2string(s) (s ? s.dptr : (char *)NULL)
*/

#define string2datum(d,s) {d.dptr = s;d.dsize = strlen(s)+1;}
#define datum2string(s,d) {s = d.dptr;}

/* string generation */

static char alphas[] =
   "0123456789QWERTYUIOPASDFGHJKLZXCVBNMqwertyuiopasdfghjklzxcvbnm";

#define FIRST_NUMBER 0

static void put_this_number (int n)
{
  char fullname[MAX_STRING_LEN+1];
  FILE *cas_file;

  log_if_debug(">","put_this_number");

  sprintf(fullname,"%s/%s",db_dir,CASFILE_NAME);

  cas_file = fopen(fullname,"w");
  if (!cas_file)
  {
    /* fatal problem */
    char err[MAX_STRING_LEN+1];
    fclose(cas_file);
    sprintf(err,"current number could not be written (to file %s)",
      fullname);
    log_error(err);
    die(CAS,"problems generating a new filename");
  }
  else
  {
    if (fprintf(cas_file,"%d\n",n) <= 1)
    {
      char err[MAX_STRING_LEN+1];
      fclose(cas_file);
      sprintf(err,"current number could not be written (to file %s)",
        fullname);
      log_error(err);
      die(CAS,"problems generating a new filename");
    }
  }
  fclose(cas_file);
  log_if_debug("<","put_this_number");
}

static int next_number()
/* gets the number stored in a file - or FIRST_NUMBER if not stored */
{
  int nn;  /* the next number */

  char fullname[MAX_STRING_LEN+1];
  FILE *cas_file;
  struct stat finfo;

  log_if_debug(">","next_number");
  sprintf(fullname,"%s/%s",db_dir,CASFILE_NAME);

  if (stat(fullname,&finfo))
  /* couldn't stat file - assume it doesn't exist */
  {
    log_if_debug("no current number in",fullname);
    /* initialize the number */
    nn = FIRST_NUMBER;
  }
  else if (!(cas_file = fopen(fullname,"r")))
  {
    char err[MAX_STRING_LEN+1];
    sprintf(err,
      "current number could not be read (couldn't open file %s)",
        fullname);
    log_error(err);
    die(CAS,"problems generating a new filename");
  }
  else
  /* the file is open for reading now */
  {
    if (fscanf(cas_file,"%d\n",&nn) != 1 || !feof(cas_file))
    {
      char err[MAX_STRING_LEN+1];
      fclose(cas_file);
      sprintf(err,
        "current number could not be read (incorrect format in file %s)",
          fullname);
      log_error(err);
      die(CAS,"problems generating a new filename");
    }
    fclose(cas_file);
  }

  log_if_debug("in next_number","nn has been set");

  put_this_number(nn+1);
  log_if_debug("<","next_number");
  return(nn);
}

static void set_next_string(char *ns)
/* produces a malloc'ed alphanumeric string */
/* works for all nonempty string values of alphas and cas */
/* no allocation is done, so strcpy the result */
/* this cycles through all the alphanumeric strings of a certain length */
/* case sensitivity is assumed */
{
  /* increment the current number and write it out using alphas[] as digits */
  /* put the result into ns */

  int i = 0;
  int j;
  int cn = next_number();
  int radix = strlen(alphas);
  char sn[MAX_STRING_LEN+1];  /* ns reversed */

  while (cn > radix-1)
  {
    int digit = cn % radix;
    cn = cn / radix;
    sn[i++] = alphas[digit];
  }
  /* cn is the last digit */
  sn[i++] = alphas[cn];

  /* i is past last char of sn[] */
  j = 0;
  while (--i >= 0)
  {
    ns[j++] = sn[i];
  }
  ns[j] = '\0';

  log_if_debug("the next new string:",ns);
}

/* cfn: content file names, filenames to store document contents */

static void new_cfn(char *cfn, char *url)
/* generates a new filename (without extension, without directory path */
/* actually, cycles through strlen(alpha_string)**strlen(alphas) */
/*   different names */
{
  /* simply use cas prepended with f or q */
  /* this assumes a case sensitive file system! */

  char ns[MAX_STRING_LEN+1];  /* 'next string' */
  int is_query = (rind(url,'?') != -1);

  if (is_query)
    strcpy(cfn,"q");
  else
    strcpy(cfn,"f");

  set_next_string(ns);
  strcat(cfn,ns); /* n_s must be under 7 to work under DOS etc. */
}

/* full file names */

static void cfn_to_filename (char *file, char *cfn, char *url)
/* converts the basic filename cfn by adding an extension and a full path */
/* simple implementation: use a single directory; use the url's extension */
/* where an extension is everything following the rightmost dot, if any,  */
/* unless the URL ends with a slash; we assume it does if no path given   */

/* we use this heuristic to stand a faint chance that the extension matches */
/* the document type.  This is almost certain to fail for query results     */
{
  int is_query = (rind(url,'?') != -1); /* crude, incorrect? */
  int ext_start = rind(url,'.');
  if (url[strlen(url)-1] == '/') ext_start = -1; /* crude, incorrect? */

  strcpy(file,cache_root);
  strcat(file,"/");
  strcat(file,cfn);
  if (is_query)
    strcat(file,".qry");
  else if (ext_start > -1)
    strcat(file,&url[ext_start]);
}

static void be_open(DBM **dbm, char *file)
/* make sure the dbm database is open, if not, try to open it as file */
/* exit with an error message if the database can't be opened */
{
  if (!*dbm)
  {
    char fullfile[MAX_STRING_LEN+1];  /* the filename, without extension */
    sprintf(fullfile,"%s/%s",db_dir,file);

    *dbm = dbm_open(fullfile,O_RDWR|O_CREAT,0644);
    if (!*dbm)
    {
      char msg[MAX_STRING_LEN];
      sprintf(msg,"database '%s' could not be opened",fullfile);
      log_error(msg);
      sprintf(msg,"database '%s' could not be opened",file);
      die(DB,msg);
    }
  }
}

/* routines for export */

void quit_database()
/* close all open dbm databases */
{
  if (!url2file) dbm_close(url2file);
  if (!file2url) dbm_close(file2url);
}


int is_present(char *url)
/* 1 if the contents of the URL are present in cache, 0 otherwise */
/* simply checks whether a file has been assigned to the URL */
/* maybe needs to check the existence of the file etc. ? */
{
  datum file_d, url_d;

  be_open(&url2file,url2file_name);
  string2datum(file_d,url);
  url_d = dbm_fetch(url2file,file_d);
  datum2string(url,url_d);
  return(!!url);
}

void url_to_file(char *file,char *url)
/* produces a filename for the URL contents - doesn't copy the result */
/* the filename is a full path; the URL contains a query string, if given */
/* newly chosen if the URL is not in cache */
{
  datum url_d, file_d;
  char *value;
  string2datum(url_d,url);

  be_open(&url2file,url2file_name);

  log_if_debug("url:",url_d.dptr);

  file_d = dbm_fetch(url2file,url_d);
  datum2string(value,file_d);

  log_if_debug("value:",value?value:"no value");

  if (!value)
  {
    datum file_d;
    char new_filename[MAX_STRING_LEN+1];
    char cfn[MAX_STRING_LEN+1];
    new_cfn(cfn,url);
    cfn_to_filename(new_filename,cfn,url);
    string2datum(file_d,new_filename);

    log_if_debug("new_filename just before stored:",file_d.dptr);

    if (0 > dbm_store(url2file,url_d,file_d,DBM_REPLACE))
    {
      char msg[2*MAX_STRING_LEN];
      sprintf(msg,
      "filename for url '%s' could not be stored in database '%s'",
        url,url2file_name);
      die(DB,msg);
    }
    be_open(&file2url,file2url_name);
    if (0 > dbm_store(file2url,file_d,url_d,DBM_REPLACE))
    {
      char msg[2*MAX_STRING_LEN];
      sprintf(msg,
      "filename for url '%s' could not be stored in database '%s'",
        url,file2url_name);
      dbm_delete(url2file,url_d);/* try to maintain consistency */
      die(DB,msg);
    }
    value = new_filename;
  }
  strcpy(file,value);
}

void url_to_file_tr(char* file_tr, char *url)
{
  char file[MAX_STRING_LEN+1];  /* assume it will do */
  int on_slash;

  url_to_file(file,url);

  if ((on_slash = rind(file,'/')) == -1)
    die(DIE_BUG,"incorrect conversion from URL to filename");
    /* there MUST be a slash in the resulting filename */

  file[on_slash] = '\0'; /* split string into two separate ones */
  sprintf(file_tr,"%s/%s/%s",file,TR_SUBDIR,file[on_slash+1]);
}

void file_to_url(char *url,char *file)
/* produces a URL (w/ query string), given a filename with document contents */
/* doesn't check if the file exists */
/* is supposed to be the inverse of url_to_file at all times */
/* the result is not a copy */
/* yields (char *)NULL if the file has no URL */
{
  datum file_d;
  be_open(&file2url,file2url_name);
  string2datum(file_d,file);
  datum2string(url,dbm_fetch(file2url,file_d));
}

#else /* if not USE_NDBM */

#include <sys/stat.h>
#include <stdio.h>

#include "system.h"

#include "constants.h"
#include "config.h"
#include "util.h"
#include "error.h"
#include "log.h"  /* for log_if_debug" */

void url_to_file(char* file, char *url)
{
  char scratch[HUGE_STRING_LEN+1];
  /* get filename by escaping and prepending cache_root */
  strcpy(scratch,url);
  escape_url_with_slashes(scratch);
  sprintf(file,"%s/%s",cache_root,scratch);
}

void url_to_file_tr(char* file_tr, char *url)
{
  char scratch[HUGE_STRING_LEN+1];
  /* get filename by escaping and prepending cache_root/tr */
  strcpy(scratch,url);
  escape_url_with_slashes(scratch);
  sprintf(file_tr,"%s/%s/%s",cache_root,TR_SUBDIR,scratch);
}

void file_to_url(char *url, char *file)
{
  /* reverse of url_to_file(file,url) */
  char *stem = &file[rind(file,'/')+1];  /* remove path */
  strcpy(url,stem);
  unescape_url(url);
}

int is_present(char *url)
{
  struct stat finfo;
  char file[HUGE_STRING_LEN];
  url_to_file(file,url);
  return(!stat(file,&finfo));
  /* stat returns 0 iff no error occurred */
}

void quit_database() {}

#endif /* USE_NDBM */

/* the rest is valid for both implementations */

/* code for iterating through all cached files				 */
/* in fact, it iterates through cache_root and cache_root/tr and returns */
/* all regular files in these directories				 */
/* set_first_cached_file() returns 0 if no such file exists,		 */
/* set_next_cached_file() returns 0 if both directories have been done	 */
/* they return 1 otherwise.  file is assumed to be large enough		 */

static char the_dirname[MAX_STRING_LEN+1];
DIR *the_dir = (DIR *)NULL;

int set_next_cached_file(char *file)
{
  struct DIR_TYPE *dp;
  struct stat finfo;
  char this_file[MAX_STRING_LEN+1];

  if (!the_dir) the_dir = opendir(the_dirname);
  if (!the_dir) die(DIE_DIR,the_dirname);
  dp = readdir(the_dir);
  if (!dp)  /* no more files in this directory */
  {
    closedir(the_dir);
    if (!strcmp(the_dirname,cache_root))
    {
      /* enter the second directory */
      sprintf(the_dirname,"%s/%s",cache_root,TR_SUBDIR);
      the_dir = opendir(the_dirname);
      if (!the_dir) die(DIE_DIR,the_dirname);
      dp = readdir(the_dir);
      if (!dp)
      {
	/* at end */
	closedir(the_dir);
	the_dir = (DIR *)NULL;
	return(0);
      }
    }
    else
    {
      /* at end */
      closedir(the_dir);
      the_dir = (DIR *)NULL;
      return(0);
    }
  }
  /* now dp points to a file */

  sprintf(this_file,"%s/%s",the_dirname,dp->d_name);

  if ((stat(this_file,&finfo) == -1) || !S_ISREG(finfo.st_mode))
  {
    /* file could not be stat-ed or is not a regular file */
    return(set_next_cached_file(this_file));
  }
  else
  {
    /* assume the file is one we're interested in - no check for readability */
    strcpy(file,this_file);
  }
  return(1);
}

int set_first_cached_file(char *file)
{
  if (the_dir) closedir(the_dir);
  strcpy(the_dirname,cache_root);
  the_dir = (DIR *)NULL;  /* superfluous, I suppose */
  return(set_next_cached_file(file));
}
