/*
 * smart date parsing code
 */

#include <stdio.h>
#include <sys/types.h>
#include <ctype.h>
#include <time.h>

/*
 * token types
 */

#define YEAR 01
#define MONTH 02
#define MDAY 04
#define WDAY 010
#define MODIFIER 020

static char int2month [12][4] = 
    {"jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov",
     "dec"};
    
/*
 * list of keywords and their associated types
 */

static struct keyword {
    char *str;
    int token_type;
    int value;
} keywords[] = {
    { "january",   MONTH, 1, },
    { "jan",       MONTH, 1, },
    { "february",  MONTH, 2, },
    { "feb",       MONTH, 2, },
    { "feburary",  MONTH, 2, },	/* frequent misspelling */
    { "march",     MONTH, 3, },
    { "mar",       MONTH, 3, },
    { "april",     MONTH, 4, },
    { "apr",       MONTH, 4, },
    { "may",       MONTH, 5, },
    { "june",      MONTH, 6, },
    { "jun",       MONTH, 6, },
    { "july",      MONTH, 7, },
    { "jul",       MONTH, 7, },
    { "august",    MONTH, 8, },
    { "aug",       MONTH, 8, },
    { "september", MONTH, 9, },
    { "sep",       MONTH, 9, },
    { "sept",       MONTH, 9, },
    { "october",   MONTH, 10, },
    { "oct",       MONTH, 10, },
    { "november",  MONTH, 11, },
    { "nov",       MONTH, 11, },
    { "december",  MONTH, 12, },
    { "dec",       MONTH, 12, },
    /* the following aren't checked, but are there
       so they'll be ignored if the user types them in */
    { "sunday",    WDAY,  0, },
    { "sun",       WDAY,  0, },
    { "monday",    WDAY,  1, },
    { "mon",       WDAY,  1, },
    { "tuesday",   WDAY,  2, },
    { "tue",       WDAY,  2, },
    { "wednesday", WDAY,  3, },
    { "wed",       WDAY,  3, },
    { "thursday",  WDAY,  4, },
    { "thu",       WDAY,  4, },
    { "friday",    WDAY,  5, },
    { "fri",       WDAY,  5, },
    { "saturday",  WDAY,  6, },
    { "sat",       WDAY,  6, },
};

/*
 * find a date keyword in the list, return an index or -1 otherwise
 */

static int
find_keyword (k)
char *k;
{
    int i;
    for (i = 0; i < sizeof(keywords)/sizeof(*keywords); ++i)
	if (strcasecmp (keywords[i].str, k) == 0)
	    return i;
    return -1;
}

/*
 * private storage for a tokenized date.
 * we split up the date into tokens and store them here
 */

static struct token {
    char *str;			/* text of the token */
    int token_set;		/* set of elements this might be */
    int value;			/* year, day of month, etc. */
} tokens[10];
static int ntokens = 0;


/*
 * when we identify something that we know is (e.g.) a "year",
 * call this routine.  This sets the type for this element, and
 * removes it from consideration for other elements. 
 */

static void
gotit (n, type)
int n, type;
{
    int i;
    for (i = 0; i < ntokens; ++i)
	if (i == n)
	    tokens[i].token_set = type;
	else
	    tokens[i].token_set &= ~type;
}


/*
 * call this to add a new token to the list
 */

static int 
new_token (s, length)
char *s;
int length;
{
    char *ptr;
    char *malloc ();

    ptr = malloc (length + 1);
    memcpy (ptr, s, length);
    ptr[length] = '\0';

    if (ntokens < sizeof (tokens)/sizeof(*tokens)) {
	tokens[ntokens].str = ptr;
	tokens[ntokens].token_set = ~0;
    }
    ++ntokens;
}

/*
 * parse a string as a date.  Print it out when we're done.
 *
 * (you could also return the date as an integer...or whatever)
 */

int
parse_date (buf, outdate)
char *buf;
char *outdate;
{
    char *s = buf;
    int old_state = 0;
    int new_state;
    char *s0 = s;
    int i, x;
    int sum, onion;
    int nfound = 0;		/* number of tokens identified */
    int mask;
    int month, mday, year;
    time_t now;
    struct tm *tm;
    char *tmpoutdate;

    /*
     * get current time of day, so we know how to handle something
     * like "March 1" by itself...whether it's the current year
     * or next year.
     */

#ifdef DEBUG
    printf("in parse_date, buf: %s\n", buf);
#endif
    time (&now);
    tm = localtime (&now);
    if ((strcmp(buf,"now") == 0) ||
        (strcmp(buf,"today") == 0) ) {
        tmpoutdate = ctime(&now);
#ifdef DEBUG
    printf("now: %s\n", tmpoutdate);
#endif
        strncpy(outdate,tmpoutdate+4,7);
        strncat(outdate,tmpoutdate+20,4);
        outdate[12] = '\0';
#ifdef DEBUG
    printf("modified now: %s\n", outdate);
#endif
        return;
    }
    ntokens = 0;
    while (*s) {
	if (isalpha (*s)) {
	    s0 = s;
	    while (isalpha (*s))
		++s;
	    new_token (s0, s - s0);
	}
	else if (isdigit (*s)) {
	    s0 = s;
	    while (isdigit (*s))
		++s;
	    new_token (s0, s - s0);
	}
	else
	    ++s;
    }	    
    if (ntokens >= sizeof (tokens)/sizeof(*tokens)) {
	fprintf (stderr, "too many tokens\n");
	return -1;
    }

    /*
     * scan through list looking for tokens we recognize
     */

    for (i = 0; i < ntokens; ++i) {
	if (isalpha (*tokens[i].str)) {
	    if ((x = find_keyword (tokens[i].str)) >= 0) {
		gotit (i, keywords[x].token_type);
		tokens[i].value = keywords[x].value;
	    }
	}
	else {
	    x = atoi (tokens[i].str);
	    if (x >= 100) {
		/* 100 = earliest year we can deal with */
		gotit (i, YEAR);
		tokens[i].value = x;
	    }
	    else if (31 < x && x <= 99) {
		/* years from "31" to "99" are assumed to be 20th century */
		gotit (i, YEAR);
		tokens[i].value = x + 1900;
	    }
	    else if (x == 0) {
		/* year "00" is 2000 */
		gotit (i, YEAR);
		tokens[i].value = 2000;
	    }
	    else if (12 < x && x <= 31) {
		/* assume a # between 12 and 31 is a day of the month */
		gotit (i, MDAY);
		tokens[i].value = x;
	    }
	    else if (0 < x && x <= 12) {
		/* could be either an mday or month */
		tokens[i].token_set &= MDAY | MONTH;
		tokens[i].value = x;
		/*
		 * note: this logic fails after 2000, but the
		 * world ends on July 5, 1998 anyway
		 */
	    }
	}
    }

    /*
     * clean up month/mday ambiguity:
     * if year comes first, month comes first. (European yy.mm.dd)
     * if year comes last, month comes first. (American mm/dd/yy)
     * ...so month always comes first.
     */
    for (i = 0; i < ntokens; ++i) {
	if (tokens[i].token_set == (MDAY | MONTH)) {
	    gotit (i, MONTH);
	    break;
	}
    }


    /*
     * see if there are any remaining ambiguities (can this happen?)
     */
    sum = 0;
    onion = 0;			/* was "union", but that's reserved in C */
    for (i = 0; i < ntokens; ++i) {
	sum += tokens[i].token_set;
	onion |= tokens[i].token_set;
    }
    if (sum != onion) {
	fprintf (stderr, "ambiguous date\n");
	return -1;
    }

    /*
     * scan the token list and set appropriate variables
     */

    mask = 0;
    for (i = 0; i < ntokens; ++i) {
	mask |= (tokens[i].token_set & (MONTH | MDAY | YEAR));
	switch (tokens[i].token_set) {
	case MONTH:
	    month = tokens[i].value;
	    break;
	case MDAY:
	    mday = tokens[i].value;
	    break;
	case YEAR:
	    year = tokens[i].value;
	    break;
	}
    }

    /*
     * deal with unspecified fields.
     * mask has the set of things that were specified.
     *
     * month day year
     *                      no information (error)
     *            x         assume jan 1
     *        x             error
     *        x   x         error
     *   x                  assume 1st of the next month with that name
     *   x        x         assume 1st of the month
     *   x    x             assume current year
     *   x    x   x	    everything is specified
     */
    
    switch (mask) {
    case YEAR:
	month = 1;		/* Jan 1 */
	mday = 1;
	break;
    case MDAY:
    case MDAY|YEAR:
	fprintf (stderr, "missing month\n");
	break;
    case MONTH:
	/* find next month with this name */
	year = (month < tm->tm_mon + 1) ?
	    (tm->tm_year + 1901) : (tm->tm_year + 1900);
	mday = 1;
	break;
    case MONTH|YEAR:
	mday = 1;
	break;
    case MONTH|MDAY:
	/* find next month with this name */
	year = (month < tm->tm_mon + 1) ?
	    (tm->tm_year + 1901) : (tm->tm_year + 1900);
	break;
    case MONTH|MDAY|YEAR:
	break;
    }
#ifdef DEBUG
    printf("end of parse_date, month %d, mday %d, year %d\n",
           month, mday, year);
#endif
    if ( (month < 1) || (month > 12) )
        month = 1;
    sprintf (outdate, "%s %d %d", int2month[month-1], mday, year);
}
