%{

/*
** Copyright Heikki Suonsivu 1989
**
** Kent, you owe me a beer, if you use it and meet me some day. No other
** limitations. You can use, sell, put your name in it, print it out and
** eat the printout, or use it any other way. I take no responsibility
** for the consequences.
**
** This software is Copyright (c) 1989, 1990, 1991 by Kent Landfield.
**
** Permission is hereby granted to copy, distribute or otherwise 
** use any part of this package as long as you do not try to make 
** money from it or pretend that you wrote it.  This copyright 
** notice must be maintained in any copy made.
**
*/
#if !defined(lint) && !defined(SABER)
static char SID[] = "@(#)match.y	2.2 2/23/91";
#endif

#define YYDEBUG 1
#include <stdio.h>
#include <sys/types.h>
#include <ctype.h>
#include "rkive.h"
#include "article.h"

extern FILE *logfp;
extern FILE *errfp;
extern int yydebug;
int parser_return_value;

%}

%union {
  int integer;
  char *string;
  char *header;	/* Must be different for parser, no "" around header names */
}

%token <string> STRING
%token <header> HEADER
%left OR AND
%left NOT
%left INCLUDES GLOB_MATCHES
%type <integer> exp
  
/* Tries to understand some expressions. */
%%

expression:	/* Empty */	{ return 1; } /* Empty always matches */
|	exp	{ parser_return_value = $1; }
  
exp:	exp OR exp	{ $$ = $1 || $3; }
|	exp AND exp	{ $$ = $1 && $3; }
|	NOT exp	{ $$ = ! $2; }
|	'(' exp ')'	{ $$ = $2; }
|	HEADER INCLUDES STRING	{ $<integer>$ = substr($1, $3); }
|	STRING INCLUDES HEADER	{ $<integer>$ = substr($1, $3); }
|	HEADER GLOB_MATCHES STRING { $<integer>$ = match_str($1, $3); }
%%

typedef struct {
  char *keyword;
  int key;
} KEYWORD;

KEYWORD keys[] = {
  "and", AND,
  "or", OR,
  "includes", INCLUDES,
  "glob-matches", GLOB_MATCHES,
  "not", NOT,
  NULL, 0
  };

KEYWORD headers[] = {
  "from", FROM,
  "path", PATH,
  "newsgroup", NEWSGROUP,
  "subject", SUBJECT,
  "message-id", MSG_ID,
  "reply-to", REPLY_TO,
  "references", REFERENCES,
  "date", DATE,
  "expire", EXPIRE,
  "control", CONTROL,
  "sender", SENDER,
  "followup-to", FOLLOWUP_TO,
  "distribution", DISTRIBUTION,
  "organization", ORGANIZATION,
  "numlines", NUMLINES,
  "keywords", KEYWORDS,
  "summary", SUMMARY,
  "approved", APPROVED,
  "supersedes", SUPERSEDES,
  "xref", XREF,
  "posting-number", POSTING_NUMBER,
  "submitted-by", SUBMITTED_BY,
  "archive-name", ARCH_NAME,
  "articleid", ARTICLEID,
  "patch-to", PATCH_TO,
  "environment", ENVIRONMENT,
  NULL, 0
  };

/* Global, copy matching-expression here, then call yyparse */

char inputstring[MAXMATCHLEN];

/* Parser for expression */

yylex()
{
  static char input_buffer[MAXMATCHLEN];
  static char string_buffer[MAXMATCHLEN];
  static char *p;
  register char *s, *string;
  register KEYWORD *keyword;
  
  if (*inputstring) {
      if (strlen(inputstring) >= MAXMATCHLEN) {
          (void) fprintf(logfp, "Match string too long, max %d characters",
		       MAXMATCHLEN);
          exit (1);
      }
	  
      (void) strcpy(input_buffer, inputstring);
      p = input_buffer;
      *inputstring = 0;	/* We got it */
  }

  /* Skip whitespace separating tokens */
  
  while (*p && isspace(*p)) p++;
  
  if (!*p) {
      if (yydebug) 
          (void) fprintf(logfp, "yylex: return eof\n");
      return 0; /* Eof */
  }

  if (*p == '"') {
      string = string_buffer;
      
      /* Collect the string, try to be intelligent with escaped '"'.
      ** Trailing " is not needed. 
      */
      
      for (p++; *p && *p != '"'; p++) {
	if (*p == '\\') {
	    *string++ = *p++;
	    if (*p) *string++ = *p; /* Skip the next char, whatever it is */
	}
	else
	  *string++ = *p;
      }

      p++; /* Disgard trailing '"' */
      
      *string = 0;

      yylval.string = string_buffer;
      if (yydebug) 
         (void) fprintf(logfp, "yylex: return string <%s>\n", string_buffer);
      return STRING;
  }
  else if (isalpha(*p)) {
      string = string_buffer;

      /* Collect keyword/header */

      for (; *p && (isalpha(*p) || !isspace(*p)); p++)
	*string++ = isupper(*p) ? tolower(*p) : *p;
      
      *string = 0;
      
      /* Any of the operands? */
  
      for (keyword = keys; keyword->keyword; keyword++) {
	if (!strcmp(keyword->keyword, string_buffer)) {
	    if (yydebug)
                (void) fprintf(logfp, "yylex: return keyword %d (%s)\n",
				 keyword->key, keyword->keyword);
	    return keyword->key;
	}
      }
      
      /* A header? */
      
      for (keyword = headers; keyword->keyword; keyword++)
	if (!strcmp(keyword->keyword, string_buffer)) {
	    yylval.header = header.header[keyword->key];
	    if (yydebug) 
                (void) fprintf(logfp, "yylex: return header %s <%s>\n",
				 keyword->keyword, yylval.header);
	    return HEADER;
	}

      /* Nope, maybe its a string without "". Note, it converts
	 the string to lower-case. */

      yylval.string = string_buffer;
      if (yydebug) 
          (void) fprintf(logfp,"yylex: return unquoted string <%s>\n",string_buffer);
      return STRING;
  }
  else if (strchr("()", *p))
    return *p++;
  else {
      if (yydebug) 
          (void) fprintf(logfp, "yylex: Bad character '%c'\n", *p);
      return 0; /* Eof, could be something else, but I'm too tired now */
  }
    /* Never here... (?) */
}
      
yyerror(s)
     char *s;
{
  (void) fprintf(errfp,"Cannot parse match pattern: %s\n",s);
  exit (1);
}

/*
** int match_str( char *string, char *format ) {  return(0); }
**
** Search the "string" to see if it matches the format specified.
** 
** The characters `[', `]', `*', `?', `^', `-', and `\' are considered
** special metacharacters and have the following meanings...
**    `*'   matches any set of characters,
**    `?'   matches any one character,
**   [..]   matches any character specified in the brackets,
**   [^..]  matches any character that is *not* specified in the brackets.
**          The use of the brackets is suppose to work in much the same
**          manner as the shell in that you can use [a-z] notation as well.
** 
** If there is a need to match one of the special metacharacters characters
** the character must be prefixed with a backslash (`\') to negate its 
** special meaning.
** 
** This function returns 1 if there is a match and 0 otherwise.
** 
*/ 

int match_str(string, format)
char *string;
char *format;
{
   register char *s = string;
   register char *f = format;
   register char ch;
   register char nextch;
   register int negate;

   while ((ch = *f++)) {
       switch (ch) {
           case '*':
                  /*
                  ** Now let's match against what is left...
                  */
     
                  while ((ch = *f++) == '?' || ch == '*') {
                     if (ch == '?' && *s++ == 0)
                        return 0;
                  }
     
                  if (ch == 0)   /* at the end of the format ? */
                     return 1;
     
                  if (ch == '\\') 
                      nextch = *f;
                  else
                      nextch = ch;
     
                  for (;;) {
                     if ((ch == '[' || *s == nextch)) {
                         if (match_str (s, f - 1))
                            return 1;
                     }
                     if (*s++ == 0)
                        return 0;
                  }
     
           case '?':
              if (!*s)
                  return 0;
              else
                  ++s;
              break;

           case '\\':
                 if (*f++ != *s++) 
                     return 0;
                 break;
     
           case '[':
                 nextch = *s++;
     
                 if (*f == '^') {
                     negate = 1;
                     f++;
                 }
                 else
                     negate = 0;
     
                 for(ch = *f++;;) {
                    register char sp = ch, rp = ch;

                    sp = ch;
                    rp = ch;
     
                    if (ch == '\\') {
                       sp = *f++; 
                       rp = sp;
                    }
     
                    if (!ch)
                        return (0);
     
                    ch = *f++;
     
                    if (ch == '-') {
                       rp = *f++;
                       if (rp == '\\')
                          rp = *f++;
                       if (!rp)
                          return (0);
                       ch = *f++;
                    }
                    if (nextch >= sp && nextch <= rp) {
                       /*
                       ** Ignore the rest of [...] previously matched.  
                       */
                       while (ch != ']') {
                          if (!ch || !(ch = *f++)) 
                              return (0);
                          if (ch == '\\') 
                              f++;
                       }
                       if (negate) 
                           return 0;
                    }
                    if (ch == ']')
                       break;
                 }
                 if (!negate) 
                    return 0;
                 break;
              
           default:
              if (ch != *s++)
                  return 0;
              break;
       }
   }
   return(*s ? 0 : 1); 
}
