/* import.c  1994 may 19  [gh]
+-----------------------------------------------------------------------------
| Abstract:
|    Functions to parse MIME headers and decode MIME messages.
|
| History:
|    2.0 94 may 19 [gh] Release of version 2.0
|    1.1 94 feb 01 [gh] Improved documentation and MIME support.
|    1.0 94 jan 03 [gh] Wrote first version.
|
| Authorship:
|    Copyright (c) 1994 Gisle Hannemyr.
|    Permission is granted to hack, make and distribute copies of this program
|    as long as this copyright notice is not removed.
|    Flames, bug reports, comments and improvements to:
|       snail: Gisle Hannemyr, Brageveien 3A, 0452 Oslo, Norway
|       email: Inet: gisle@oslonett.no
+---------------------------------------------------------------------------*/

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "mimelite.h"

/*---( globals )------------------------------------------------------------*/

static int BPos;	/* Postition in BASE64 o/p buffer.		    */
static unsigned char BBuf[4];

static int InHeadP;	/* T if state is parsing header. */
static int EndP;	/* T if state is end of BASE64.  */
static int TempEncd;	/* Temporary encoding state.     */

/*---( import )-------------------------------------------------------------*/

static int parsecharset(unsigned char *token)
{
    if (!strncmp(token, "us-ascii",     8)) return(CS_IR002);
    if (!strncmp(token, "iso-8859-1",  10)) return(CS_ISOL1);
    return(CS_UNKWN);
} /* parsecharset */


/*
| Abs: Copy a token into a buffer.
| Des: Don't bother about delimiters, etc.  It is up to whoever uses the token
|      to make sense of the syntax.
| Par: dd  = buffer to copy token into
|      ss  = token  to copy
|      max = max size of buffer
*/
static void copytoken(unsigned char *dd, unsigned char *ss, int max)
{
    unsigned char *oo;
    int ii = 0;

    oo = dd;
    while ((*ss) && (!isspace(*ss)) && (++ii <= max)) {
        *dd++ = *ss++;
    } /* while */
    oo[max-1] = '\0';
} /* copytoken */


/*
| Abs: Flush base64 encoded buffer.
| Ret: Pointer to decoded buffer.
*/ 
static unsigned char *decodebase64(void)
{
    int cc;
    static unsigned char outbuf[3];

    outbuf[0] =  (BBuf[0]         << 2) | ((BBuf[1] & 0x30) >> 4);
    outbuf[1] = ((BBuf[1] & 0x0F) << 4) | ((BBuf[2] & 0x3C) >> 2);
    outbuf[2] = ((BBuf[2] & 0x03) << 6) |  (BBuf[3] & 0x3F);
/*  for (cc = 0; cc < 3; cc++) printf("%c", outbuf[cc]); */

    BPos = 0;
    return(outbuf);
} /* decodebase64 */



/*
| Abs: Decode a line with quoted printable codes.
| Par: dest     = destination
|      src      = where to start decoding from
|      endbuf   = where to stop decoding (or NULL if end of buffer).
|      encoding = content-transfer-encoding
|      junkp    = scanning junk (reference)
|      uscore	= true if we are to decode underscores to spaces
| Ret: Pointer to new destination.
*/
static unsigned char *decode(unsigned char *dest, unsigned char *src,
	unsigned char *endbuf, int encoding, int *junkp, int uscore)
{
    int cc, show;
    unsigned char *ss;

    if (encoding == CE_UNCODED) return(endbuf);

    /* else */

    if (encoding == CE_QUOTEDP) {
	while (*src && (src != endbuf)) {
	    if (*src == '=') {
		src++; if (!*src) break;
		if (('\n' == *src) || ('\r' == *src)) break;
		cc  = isdigit(*src) ? (*src - '0') : (*src - 55);
		cc *= 0x10;
		src++; if (!*src) break;
		cc += isdigit(*src) ? (*src - '0') : (*src - 55);
		*dest = cc;
	    } else if (uscore && *src == '_') {
		*dest = '\040';
	    } else *dest = *src;
	    dest++; src++;
	} /* while */
	*dest = '\0';
	return(dest);
    } /* if (quoted printable) */

    /* else */

    if (encoding == CE_BASE064) {
        if (EndP) {
            *junkp = 1;
	    return(dest);
        } /* if */
        BPos = 0;
	while (*src && (src != endbuf)) {
	    cc = *src++;
	    if	    ((cc >= 'A') && (cc <= 'Z')) cc = cc - 'A';
	    else if ((cc >= 'a') && (cc <= 'z')) cc = cc - 'a' + 26;
	    else if ((cc >= '0') && (cc <= '9')) cc = cc - '0' + 52;
	    else if  (cc == '/')		 cc = 63;
	    else if  (cc == '+')		 cc = 62;
	    else if  (cc == '=') { EndP = 1;	 cc = -1; }
	    else if  (cc == '-') {			/* end    */
		if (!uscore) *junkp = 1;		/* junk?  */
		break;
	    } else cc = -1;				/* ignore */

	    if (cc >= 0) {
		BBuf[BPos++] = cc;
		if (BPos == 4) {
		    ss = decodebase64();
		    for (cc = 0; cc < 3; cc++) *dest++ = ss[cc];
		} /* if (got buffer) */
	    } /* if (significant) */
	} /* while */

        show = BPos;
        if (show) show--;
        ss = decodebase64();
        for (cc = 0; cc < show; cc++) *dest++ = ss[cc];

	*dest = '\0';
	return(dest);
    } /* if (base64) */

    return(endbuf);

} /* decode */


/*
| Abs: Remove MIME codes in heading fields.
| Cby: ml_unmimeline
| Des: RFC 1522
*/
static void decodhead(unsigned char *buf)
{
    int encoding, jj;
    /* int charset; */
    unsigned char cc;
    unsigned char *ss, *dd, *zz;

    if (!buf) return;

    dd = ss = buf;
    /* printf("[%s", buf); /* DB */
    while (*ss) {
	if ((ss[0] == '=') && (ss[1] == '?')) {
	    if (ss[3] == '\0') {
	        dd[0] = '=';
	        dd[1] = '?';
	        dd[2] = '\0';
	        break;
	    } /* if (bogus) give up */

	    ss += 2; /* position source at first char. after lead in	    */
	    /* charset = parsecharset(ss); * UNUSED coz assuming ISO works  */
	    ss = (unsigned char *)strchr(ss, '?'); /* ss at encoding '?'    */
	    if (!ss) break;			   /* if (bogus) give up    */
	    ss++;				   /* ss at cte char	    */
	    if (!*ss) break;			   /* if (no cte) give up   */
	    cc = tolower(*ss);
	    if      (cc == 'b') encoding = CE_BASE064;
	    else if (cc == 'q') encoding = CE_QUOTEDP;
	    else		encoding = CE_NOTIMPL;
	    ss = (unsigned char *)strchr(ss, '?'); /* ss at body '?'	    */
	    if (!ss) break;			   /* if (bogus) give up    */
	    ss++;				   /* ss at first body char */
	    if (!*ss) break;			   /* if (no body) give up  */
	    zz = (unsigned char *)strchr(ss, '?'); /* where to stop scan    */
	    if (!zz) return;			   /* if (bogus) give up    */
	    dd = decode(dd,ss,zz,encoding,&jj,1);  /* dd is new dest	    */
	    zz++;				   /* zz is at final '='   */
	    if (*zz != '=') break;		   /* if (bogus) give up    */
	    ss = zz;				   /* ss is at final '='    */
	    /* printf("\ndd:[%s]<-ss:[%s]\n", dd, ss);  /* DB */
	} else {
	    *dd++ = *ss;
	}  /* if (encoded) decode; else copy; */
	ss++;					   /* advance to next char  */
    } /* while */
    *dd++ = '\0';				   /* terminate ASCIZ	    */
} /* decodhead */


/*
| Abs: Parse headers to determine MIME version, etc.
| Imp: Variable hline is used to hold one line from the header. This is
|      canonized by folding it to lower case and stripping away quotes.
| Sef: Sets the following: MimeVers, ContType, CSubType, Encoding and Charset.
| Par: hline = the  line from the message header to parse
*/
static void parsehead(unsigned char *hline)
{
    int ii, majv, minv;
    unsigned char *ss, *dd, cc;

    cc = tolower(hline[0]);
    if ((cc != 'c') && (cc != 'm')) return; /* This line is boring */

    dd = ss = hline;
    while (*ss) {
	if (*ss != '"') *dd++ = tolower(*ss);
	ss++;
    } /* canonize line */
    *dd++ = '\0';

    /* printf("HEADER: %s", hline); /* DB */

    if (!strncmp(hline, "mime-version:", 13)) {
	sscanf(hline+13, "%d.%d", &majv, &minv);
	MimeVers = majv*100 + minv;
    } else if (!strncmp(hline, "content-transfer-encoding:", 26)) {
	dd = hline+26;
	while (isspace(*dd)) dd++;
	if (!strncmp(dd, "quoted-printable", 16)) Encoding = CE_QUOTEDP;
	else if (!strncmp(dd, "base64", 6))       Encoding = CE_BASE064;
	else if (!strncmp(dd, "binary", 6))       Encoding = CE_BINCODE;
	else if ((*dd == '7') || (*dd == '8'))    Encoding = CE_UNCODED;
	else					  Encoding = CE_NOTIMPL;
	/* fputs(dd, stdout); fputc('\n', stdout); DB */
    } else if (!strncmp(hline, "content-type:", 13)) {
	unsigned char *tt;
	dd = hline+13;
	while (isspace(*dd)) dd++;
	/* printf("[%s]\n", dd); /* DB */
	if (!strncmp(dd, "text", 4))		  ContType = CT_ASCTEXT;
	else if (*dd == 'x'){
	    					  ContType = CT_PRIVATE;
	    copytoken(PrivCTyp, dd, PRIVSIZ);
	} else					  ContType = CT_NOTIMPL;
 	tt = (unsigned char *)strchr(dd, '/');
	if (tt && (ContType < CT_NOTIMPL)) {
	    tt++;
	    if (!strncmp(tt, "plain", 5))	  CSubType = ST_PLAINTX;
	    else if (*tt == 'x') {
		CSubType = ST_PRIVATE;
		copytoken(PrivSTyp, tt, PRIVSIZ);
	    } else				  CSubType = CT_NOTIMPL;
	} else					  CSubType = ST_UNKNOWN;
	if (dd && (dd = (unsigned char *)strstr(dd, "charset="))) {
	    dd += 8;
	    if (*dd == 'x') {
						  Charset = CS_PRIVT;
		copytoken(PrivCSet, dd, PRIVSIZ);
	    } else				  Charset = parsecharset(dd);
	    /* fputs(dd, stdout); fputc('\n', stdout); */
    	} /* if (charset) */
    } /* if (mime-version) else if (c-t-e) else if (content-type) */

} /* parsehead */


/*
| Abs: Reset state machine for new message.
|      cset  = default character set to assume for messages w/o MIME header.
|              The value CS_IR002 is RFC-822 conformant, but the value CS_ISOL1
|              will do no harm an will give much more robust behaviour in most
|              western european locations (some news articles uses CS_ISOL1,
|              and omits MIME headers).
|      encod = CE_PRSHEAD if message has a header, so we should determine the
|              content-transfer-encoding by parsing the header.  Otherwise,
|              we are decoding a message body witout the header, so this
|              parameter is used to etablish default encoding.
| Des: ContType is set to CT_UNKNOWN so we can detect RFC-1049 content-type
|      headers.
*/
void ml_unmimsetup(int cset, int encod)
{
    EndP    = 0;
    Charset = cset;
    if (encod == CE_PRSHEAD) {
        InHeadP  = 1;
	MimeVers = MV_R0822;
	ContType = CT_UNKNOWN;
	CSubType = ST_UNKNOWN;
	Encoding = CE_UNCODED;
    } else {
        InHeadP  = 0;
	MimeVers = 100;
	ContType = CT_ASCTEXT;
	CSubType = ST_PLAINTX;
	TempEncd = Encoding = encod;
	ml_foldinit(Charset, CS_IGNOR);
    } 
} /* ml_unmimsetup */

/*
| Abs: End of header predicate.
| Ret: TRUE if detected a null line, else false.
*/
static int eohp(unsigned char *buf)
{
    if ((buf[0] == '\r') && (buf[1] == '\n')) return(1);
    if  (buf[0] == '\n')		      return(1);
    return(0);
} /* eohp */


/*
| Abs: Process one line of the buffer.
| Ret: 0: nothing special 
|      1: line is null line separating header from body
|      2: fond junk trailing BASE64 encoding
| Des: The mimelite library doesn't really handle RFC-1049 content types, but
|      it assumes that somthing _with_ a content-type header, but _without_ a
|      mime-version header must be RFC-1049 and sets MimeVers accordingly.
|      The rest is up to you.  
*/
int ml_unmimeline(unsigned char *buf)
{
    unsigned char *dd;
    int junkp;

    junkp = 0;
    if (!InHeadP) {
	decode(buf, buf, NULL, TempEncd, &junkp, 0);
	ml_foldbuff(buf);
	if (junkp) { TempEncd = CE_UNCODED; return(2); }
	else return(0);
    } /* if (body) */

    /* else we are processing header: */

    if (eohp(buf)) {				/* end of head  */
	InHeadP = 0;
	if (MimeVers  < 0) {			/* NOT MIME     */
	    if (ContType != CT_UNKNOWN)	{	/* RFC1049	*/
		MimeVers = MV_R1049;
	    } else			{	/* no head	*/
	     /* MimeVers = MV_R0822;	default (ml_unmimsetup) */
		ContType = CT_ASCTEXT;
		CSubType = ST_PLAINTX;
	     /* Encoding = CE_UNCODED;	default (ml_unmimsetup) */
	     /* Charset  = CS_ISOL1;	default (ml_unmimsetup) */
	    } /* if (RFC1049) set up; else assume no header */
	} /* if (NOT MIME) */
	TempEncd = Encoding;
	ml_foldinit(Charset, CS_IGNOR);
	return(1);
    } /* if (empty line);

    /* else */

    parsehead(buf);
    decodhead(buf);
    ml_foldbuff(buf);
    return(0);

} /* ml_unmimeline */

/* EOF */
