/*
 *	(c) Copyright 1990, Kim Fabricius Storm.  All rights reserved.
 *
 *	Digest article handling
 */

#include "config.h"
#include "news.h"
#include "debug.h"

#ifdef DG_TEST

#define TEST(fmt, x, y) if (Debug & DG_TEST) printf(fmt, x, y)

#else

#define TEST(fmt, x, y)

#endif

#define UNIFY 040

static char digest_pattern[] = "igest";

init_digest_parsing()
{
    register char *m;

    for (m = digest_pattern; *m; m++) *m |= UNIFY;
}


is_digest()
{
    register char *subject;
    register char c, *q, *m;

    if ((subject = news.ng_subj) == NULL) return 0;

    while (c = *subject++) {
	if ((c | UNIFY) != ('d' | UNIFY)) continue;

	q = subject; m = digest_pattern;
	while ((c = *m++) && (*q++ | UNIFY) == c);
	if (c == NUL) return 1;
    }
    return 0;
}


/*
 * expect that f is positioned at header of an article
 */

static int is_mmdf_folder = 0;

get_digest_article(f, hdrbuf)
FILE *f;
news_header_buffer hdrbuf;
{
    int cont;

    digest.dg_hpos = ftell(f);
    TEST("GET DIGEST hp=%ld\n", digest.dg_hpos, 0);

    do {
	if (!parse_digest_header(f, 0, hdrbuf)) return -1;
	digest.dg_fpos = ftell(f);
	TEST("END HEADER hp=%ld fp=%ld\n", digest.dg_hpos, digest.dg_fpos);
    } while ((cont = skip_digest_body(f)) < 0);

    TEST("END BODY lp=%ld next=%ld\n", digest.dg_lpos, ftell(f));

    return cont;
}

#define BACKUP_LINES	 50	/* remember class + offset for parsed lines */

#define	LN_BLANK	0x01	/* blank line */
#define	LN_DASHED	0x02	/* dash line */
#define	LN_HEADER	0x04	/* (possible) header line */
#define	LN_ASTERISK	0x08	/* asterisk line (near end) */
#define	LN_END_OF	0x10	/* End of ... line */
#define	LN_TEXT		0x20	/* unclassified line */


/*
 * skip until 'Subject: ' (or End of digest) line is found
 * then backup till start of header
 */

/*
 * Tuning parameters:
 *
 *	MIN_HEADER_LINES:	number of known header lines that must
 *				be found in a block to identify a new
 *				header
 *
 *	MAX_BLANKS_DASH		max no of blanks on a 'dash line'
 *
 *	MIN_DASHES		min no of dashes on a 'dash line'
 *
 *	MAX_BLANKS_ASTERISKS	max no of blanks on an 'asterisk line'
 *
 *	MIN_ASTERISKS		min no of asterisks on an 'asterisk line'
 *
 *	MAX_BLANKS_END_OF	max no of blanks before "End of "
 */

#define	MIN_HEADER_LINES	2
#define	MAX_BLANKS_DASH		3
#define	MIN_DASHES		16
#define	MAX_BLANKS_ASTERISK	1
#define	MIN_ASTERISKS		10
#define	MAX_BLANKS_END_OF	1

skip_digest_body(f)
register FILE *f;
{
    off_t  backup_p[BACKUP_LINES];
    int	   line_type[BACKUP_LINES];
    register int backup_index, backup_count;
    int    more_header_lines, end_or_asterisks, blanks;
    char   line[1024];
    register char *cp;
    char **dg_hdr_field();

#define	decrease_index()	\
    if (--backup_index < 0) backup_index = BACKUP_LINES - 1

    backup_index = -1;
    backup_count = 0;
    end_or_asterisks = 0;

    digest.dg_lines = 0;


 next_line:
    more_header_lines = 0;

 next_possible_header_line:
    digest.dg_lines++;

    if (++backup_index == BACKUP_LINES) backup_index = 0;
    if (backup_count < BACKUP_LINES) backup_count++;

    backup_p[backup_index] = ftell(f);
    line_type[backup_index] = LN_TEXT;

    if (fgets(line, 1024, f) == NULL) {
	TEST("end_of_file, bc=%d, lines=%d\n", backup_count, digest.dg_lines);

	if (is_mmdf_folder) {
	    digest.dg_lpos = backup_p[backup_index];
	    is_mmdf_folder = 0;
	    return 0;
	}

	/* end of file => look for "****" or "End of" line */

	if (end_or_asterisks)
	    while (--backup_count >= 0) {
		--digest.dg_lines;
		decrease_index();
		if (line_type[backup_index] & (LN_ASTERISK | LN_END_OF)) break;
	    }

	if (digest.dg_lines == 0) return 0;

	while (--backup_count >= 0) {
	    --digest.dg_lines;
	    digest.dg_lpos = backup_p[backup_index];
	    decrease_index();
	    if ((line_type[backup_index] &
		(LN_ASTERISK | LN_END_OF | LN_BLANK | LN_DASHED)) == 0)
		break;
	}

	return 0;	/* no article follows */
    }

    TEST("\n>>%-.50s ==>>", line, 0);

    if (line[0] == '\001' && strcmp(line, "\001\001\001\001\n") == 0) {
	digest.dg_lpos = backup_p[backup_index];
	if (!is_mmdf_folder) fseek(f, digest.dg_lpos, 0);
	--digest.dg_lines;
	is_mmdf_folder = 0;
	return (digest.dg_lines <= 0) ? -1 : 1;
    }

    if (is_mmdf_folder) goto next_line;

    for (cp = line; *cp && isascii(*cp) && isspace(*cp); cp++);

    if (*cp == NUL) {
	TEST("BLANK", 0, 0);
	line_type[backup_index] = LN_BLANK;
	goto next_line;
    }

    blanks = cp - line;

    if (*cp == '-') {
	if (blanks > MAX_BLANKS_DASH) goto next_line;

	while (*cp == '-') cp++;
	if (cp - line - blanks > MIN_DASHES) {
	    while (*cp && (*cp == '-' || (isascii(*cp) && isspace(*cp)))) cp++;
	    if (*cp == NUL) {
		TEST("DASHED", 0, 0);

		line_type[backup_index] = LN_DASHED;
	    }

	}
	goto next_line;
    }

    if (*cp == '*') {
	if (blanks > MAX_BLANKS_ASTERISK) goto next_line;

	while (*cp == '*') cp++;
	if (cp - line - blanks > MIN_ASTERISKS) {
	    while (*cp && (*cp == '*' || (isascii(*cp) && isspace(*cp)))) cp++;
	    if (*cp == NUL) {
		TEST("ASTERISK", 0, 0);
		line_type[backup_index] = LN_ASTERISK;
		end_or_asterisks++;
	    }
	}
	goto next_line;
    }

    if (blanks <= MAX_BLANKS_END_OF &&
	*cp == 'E' && strncmp(cp, "End of ", 7) == 0) {
	TEST("END_OF_", 0, 0);
	line_type[backup_index] = LN_END_OF;
	end_or_asterisks++;
	goto next_line;
    }

    if (blanks == 0) {
	if (dg_hdr_field(line, 0)) {
	    TEST("HEADER", 0, 0);

	    line_type[backup_index] = LN_HEADER;
	    if (++more_header_lines < MIN_HEADER_LINES)
		goto next_possible_header_line;

	    /* found block with MIN_HEADER_LINES */

	    /* search for beginning of header */

	    TEST("\nSearch for start of header\n", 0, 0);

	    for (;;) {
		fseek(f, backup_p[backup_index], 0);
		--digest.dg_lines;
		if (--backup_count == 0) break;
		decrease_index();
		if ((line_type[backup_index] & (LN_HEADER | LN_TEXT)) == 0)
		    break;
	    }

	    if (digest.dg_lines == 0) {
		TEST("Skipped empty article\n", 0, 0);
		return -1;
	    }

	    for (;;) {
		digest.dg_lpos = backup_p[backup_index];
		if (--backup_count < 0) break;
		decrease_index();
		if ((line_type[backup_index] & (LN_BLANK | LN_DASHED)) == 0)
		    break;
		--digest.dg_lines;
	    }

	    return (digest.dg_lines == 0) ? -1 : 1;
	}
	goto next_possible_header_line;
    }

    goto next_line;
}


parse_digest_header(f, all, hdrbuf)
FILE *f;
int all;
news_header_buffer hdrbuf;
{
    extern char *parse_header(), **dg_hdr_field();

    digest.dg_date = digest.dg_from = digest.dg_subj = digest.dg_to = NULL;

    parse_header(f, dg_hdr_field, all, hdrbuf);

    return digest.dg_from || digest.dg_subj;
}


static char **dg_hdr_field(lp, all)
register char *lp;
int all;
{

#define check(name, lgt, field) \
    if (isascii(lp[lgt]) && isspace(lp[lgt]) && strncmp(name, lp, lgt) == 0) {\
	TEST("MATCH: field ", 0, 0); \
	return &digest.field; \
    }


    TEST("\nPARSE[%.20s] ==>> ", lp, 0);

    switch (*lp++) {

     case '\001':
	if (!is_mmdf_folder && strncmp(lp, "\001\001\001\n", 4) == 0) {
	    is_mmdf_folder = 1;
	    digest.dg_hpos += 5;
	    return NULL;
	}
	break;

     case 'D':
     case 'd':
	check("ate:",	4, dg_date);
	break;

     case 'F':
     case 'f':
	check("rom:",	4, dg_from);
	break;

     case 'R':
     case 'r':
	if (!all) break;
	check("e:",	2, dg_subj);
	break;

     case 'S':
     case 's':
	check("ubject:", 7, dg_subj);
	check("ubject",	6, dg_subj);
	break;

     case 'T':
     case 't':
	check("itle:",	5, dg_subj);
	if (!all) break;
	check("o:",	2, dg_to);
	break;
    }

#undef check
    TEST("NOT MATCHED ", 0, 0);

    return NULL;
}
