/* getline.c */

/* Source code for the RLaB getline function.
 *
 * Syntax: getline ( "filename" )
 *
 * Description:
 *
 * SEE THE GETLINE HELPFILE FOR UP-TO-DATE DESCRITPION
 */

/*
 * Much of the getline scanner is code that came from mawk,
 * by Mike Brennan (GNU Copylefted). I have un-mercifily chopped,
 * sliced, and diced it to meet RLaB's needs, so any bugs are
 * mine. The original logic is Mike's.
 */

/*  This file is a part of RLaB ("Our"-LaB)
    Copyright (C) 1992, 1993, 1994  Ian R. Searle

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    See the file ./COPYING
 ***********************************************************************/

#include "rlab.h"
#include "symbol.h"
#include "mem.h"
#include "listnode.h"
#include "btree.h"
#include "bltin.h"
#include "scalar.h"
#include "matrix.h"
#include "r_string.h"
#include "util.h"
#include "scan.h"
#include "print.h"

#include <stdio.h>
#include <errno.h>

#ifdef titan
extern double strtod ();
#endif

/*
 * The getline function will be a simple C-scanner, that reads a line
 * and splits into tokens. It would be nice in the future to be able to
 * change the token delimiter (say to a comma), or be able to read fixed
 * field data (ala FORTRAN).
 *
 * The token types getline will recognize are NUMBER and STRING.
 * For now getline will skip blanks and tabs.
 * Everything other than NUMBER will be a string.
 * Getline should recognize quotes (") and get everthing between them.
 */

static int next _PROTO ((FILE * fn, char *chr));
static int getline_scanner _PROTO ((FILE * fn));
static int collect_decimal _PROTO ((FILE * fn, int c));
static int collect_string _PROTO ((FILE * fn));

/*
 * Union for getline() scanner
 */

static union _uscan
{
  double dval;
  char *str;
}
uscan;

/* Char array for holding tmp strings during scanning */
#define MAX_STRING_BUFF  512
static char string_buff[MAX_STRING_BUFF];

#define NUMBER      100
#define UNEXPECTED  -1
#define BAD_DECIMAL -2

char scan_code[256] =
{
  0, 34, 34, 34, 34, 34, 34, 34, 34, 1, 2, 1, 1, 1, 34, 34,
  34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
  1, 27, 23, 25, 33, 15, 10, 34, 17, 18, 13, 11, 30, 12, 31, 14,
  22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 8, 3, 28, 26, 29, 7,
  34, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 19, 24, 20, 16, 21,
  34, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 5, 9, 6, 32, 34,
  34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
  34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
  34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
  34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
  34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
  34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
  34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
  34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34
};

/*
 * RLaB builtin function interface for getline().
 */
void
Getline (return_ptr, n_args, d_arg)
     VPTR *return_ptr;
     int n_args;
     Datum *d_arg;
{
  char istr[10], *string;
  int i, token_type;
  FILE *fn;
  Btree *btree;
  ListNode *FN, *lnode;

  /* Check n_args */
  if (n_args != 1)
    error_1 ("getline: 1 argument allowed", 0);

  /* get file name from argument list */
  FN = bltin_get_string ("getline", d_arg, 1);
  string = string_GetString (e_data (FN));

  if ((fn = get_file_ds (string, "r", 0)) == 0)
  {
    warning_1 (string, "cannot open for read");
    *return_ptr = (VPTR) scalar_Create ((double) 0.0);
    return;
  }

  /* Create the list */
  btree = btree_Create ();

  /* Call scanner until newline is returned */

  i = 1;
  while ((token_type = getline_scanner (fn)) != '\n'
	 && token_type != 0)
  {
    sprintf (istr, "%d", i++);
    if (token_type < 0)		/* Scanner error */
    {
      warning_1 ("scanner error, index skipped", (char *) 0);
    }
    else if (token_type == NUMBER)
    {
      lnode = install (btree, cpstr (istr), SCALAR, scalar_Create (uscan.dval));
      scalar_SetName (e_data (lnode), cpstr (istr));
    }
    else if (token_type == STRING)
    {
      lnode = install (btree, cpstr (istr), STRING, string_Create (uscan.str));
      string_SetName (e_data (lnode), cpstr (istr));
    }
  }

  /* Now check for blank line */

  if (i == 1 && token_type == '\n')
  {
    /* Add a NULL string to the list */
    lnode = install (btree, cpstr ("1"), STRING, string_Create (0));
    string_SetName (e_data (lnode), cpstr ("1"));
  }

  *return_ptr = (VPTR) btree;
  remove_tmp_destroy (FN);
}

/*
 * Scanner code.
 */

static int
getline_scanner (fn)
     FILE *fn;
{
  char c;

reswitch:

  switch (next (fn, &c))
  {
  case 0:			/* EOF */
    return (0);

  case SC_SPACE:
    goto reswitch;

  case SC_NL:
    return ('\n');

  case SC_MINUS:		/* Numbers */
  case SC_PLUS:
  case SC_DIGIT:
  case SC_DOT:
    return (collect_decimal (fn, c));

  case SC_DQUOTE:		/* Double quoted strings */
    return (collect_string (fn));

  default:			/* Anything else is a whitespace delim string */
    {
      char *p = (char *) string_buff + 1;
      string_buff[0] = c;

      while (next (fn, p++) != 0 &&
	     scan_code[(int) p[-1]] != SC_SPACE &&
	     scan_code[(int) p[-1]] != SC_NL)
	;

      ungetc (*--p, fn);
      *p = '\0';
      uscan.str = cpstr (string_buff);
      return (STRING);
    }

  }
}

/* **************************************************************
 * Scanner support functions.
 * ************************************************************** */

static int
next (fn, chr)
     FILE *fn;
     char *chr;
{
  int c;
  c = getc (fn);
  if (c == EOF)
  {
    *chr = 0;
    return (0);
  }
  else
  {
    *chr = (char) c;
    return (scan_code[c]);
  }
}

/*
 * Collect a decimal constant in string_buff.
 * If the number turns out to be a string, then
 * return a string (not an error).
 */

static int
collect_decimal (fn, c)
     FILE *fn;
     int c;
{
  double d;
  char *p = (char *) string_buff + 1;
  char *endp;

  string_buff[0] = c;

  if (c == '+' || c == '-')
    next (fn, p++);

  if (p[-1] == '.')
  {
    if (next (fn, p++) == 0)
      return (0);
    else if (scan_code[(int)p[-1]] != SC_DIGIT)
      goto string;
  }
  else
  {
    while (next (fn, p++) == SC_DIGIT)
      ;
    if (scan_code[(int)p[-1]] == 0)
      goto finish;
    else if (p[-1] != '.')
    {
      ungetc (p[-1], fn);
      p--;
    }
  }

  /* get rest of digits after decimal point */
  while (next (fn, p++) == SC_DIGIT)
    ;
  if (scan_code[(int)p[-1]] == 0)
    goto finish;

  /* check for exponent */
  if (p[-1] != 'e' && p[-1] != 'E')
  {
    if (scan_code[(int)p[-1]] != SC_SPACE && scan_code[(int)p[-1]] != SC_NL)
      goto string;
    ungetc (p[-1], fn);
    *--p = 0;
  }
  else if (next (fn, p++) != SC_DIGIT
	   && p[-1] != '-' && p[-1] != '+')
  {
    goto string;
  }
  else if (scan_code[(int)p[-1]] == 0)
  {
    goto finish;
  }
  else
  {
    /* get the rest of the exponent */
    while (next (fn, p++) == SC_DIGIT)
      ;
    ungetc (p[-1], fn);
    *--p = 0;
  }

finish:

  errno = 0;			/* check for overflow/underflow */
  d = strtod (string_buff, (char **) &endp);

  if (errno)
    error_1 ("decinal over/under flow problem", (char *) 0);

  if (endp < (p - 1))
    goto string;

  uscan.dval = d;
  return (NUMBER);

string:

  /* Not a number, collect a string. */
  while (next (fn, p++) != 0 &&
	 scan_code[(int) p[-1]] != SC_SPACE &&
	 scan_code[(int) p[-1]] != SC_NL)
    ;
  ungetc (*--p, fn);
  *p = '\0';
  uscan.str = cpstr (string_buff);
  return (STRING);
}

/*
 * Collect a doubly quoted string in string_buff
 * Stuff the result into the scanner union, and
 * return the token type.
 */

extern char *rm_escape _PROTO ((char *s));

static int
collect_string (fn)
     FILE *fn;
{
  char *p = (char *) string_buff;
  char c;
  int e_flag = 0;		/* on if have an escape char */

  while (1)
    switch (next (fn, p++))
    {
    case 0:			/* EOF, unterminated string */
      error_1 ("runaway string constant", (char *) 0);

    case SC_DQUOTE:		/* done */
      *--p = 0;
      goto out;

    case SC_NL:
      p[-1] = 0;
      /* fall thru */

    case SC_ESCAPE:
      if (next (fn, &c) == '\n')
      {
	p--;
      }
      else
      {
	if (c == 0)
	{
	  error_1 ("runaway string constant", 0);
	}
	else
	{
	  *p++ = c;
	  e_flag = 1;
	}
      }
      break;

    default:
      break;
    }

out:
  uscan.str = cpstr (e_flag ? rm_escape (string_buff) : string_buff);
  return STRING;
}

/* **************************************************************
 * The string-split function (strsplt)
 * ************************************************************** */

void
Strsplt (return_ptr, n_args, d_arg)
     VPTR *return_ptr;
     int n_args;
     Datum *d_arg;
{
  int i;
  ListNode *S;
  Matrix *m;
  String *str;

  /* Check n_args */
  if (n_args != 1)
    error_1 ("strsplt: 1 argument allowed", 0);

  S = bltin_get_string ("strsplt", d_arg, 1);
  str = (String *) e_data (S);

  /* Create string matrix to hold split results */
  m = matrix_CreateS (1, string_GetLength (str));

  for (i = 0; i < MNC (m); i++)
  {
    m->val.ms[i] = (char *) MALLOC (2 * sizeof (char));
    m->val.ms[i][0] = str->string[i];
    m->val.ms[i][1] = '\0';
  }

  *return_ptr = (VPTR) m;
  remove_tmp_destroy (S);
}
