/*
    The WebDruid - a web server log analysis program

    Copyright (C) 2003-2004  Fabien Chevalier (fabien@juliana-multimedia.com)

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version, and provided that the above
    copyright and permission notice is included with all distributed
    copies of this or derived software.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/

#include "config.h"

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <zlib.h>

#include "webdruid.h"
#include "lang.h"
#include "parser.h"
#include "logs_mgmt.h"

/*
 * Log File struct
 *
   It contains:
     - the last record read from this file
     - two file pointers. the one which is used
       is 'file' if gz_log = 0, 'gz_file' elsewhere
     - the name of the file
     - gz_* some variables used in case of gzip log
     - nr_records : current line in the log file;
 */

/*********************************
         STRUCTURES
*********************************/

#define GZ_BUFSIZE 16384                      /* our_getfs buffer size    */

struct log_file {
   struct log_file   *next;
   struct log_struct cur_record;
   char              *filename;
   FILE              *file;
   gzFile            *gz_file;
   char              *gz_f_buf;             /* our_getfs buffer         */
   char              *gz_f_cp;              /* pointer into the buffer  */
   int               gz_f_end;              /* count to end of buffer   */
   int               gz_log;
   unsigned long     nr_records;
};

/*********************************
         GLOBAL VARIABLES
*********************************/

unsigned long total_rec    ;                /* Total Records Processed     */
unsigned long total_ignore ;                /* Total Records Ignored       */
unsigned long total_bad    ;                /* Total Bad Records           */

/*********************************
         LOCAL VARIABLES
*********************************/

/*
 * List of log files being processed
 */

struct log_file *active_files = 0;

/*
 * List of log files from which all records
 * have been read
 */

struct log_file *read_files = 0;

/*
 * This boolean variable remembers if a record
 * has already been read from the pool
 */

int first_time = 1;

/*
 * log file record buffer
 */

char buffer[BUFSIZE];

/*********************************
         LOCAL FUNCTIONS
*********************************/

static char *our_gzgets(struct log_file *, char *, int);
static void insert_log(struct log_file *plog);
static int  jump_next_record(struct log_file *log);
static void print_warning(const char * msg, struct log_file *log);
static void free_log_struct(struct log_file *log);

/*******************************************/
/* LOGS_ADD_FILE - add file to the pool    */
/*******************************************/

int logs_add_file(const char * filename)
{
   struct log_file *plog;

   /* Check if gzip'ed log file & open file*/
   if (!strcmp((filename + strlen(filename) - 3), ".gz"))
   {
      gzFile gz_file = gzopen(filename,"rb");
      if (gz_file == Z_NULL)
      {
         /* Error: Can't open log file ... */
         fprintf(stderr, "%s %s\n",_("Error: Can't open log file"), filename);
         return 1;
      }
      plog = malloc(sizeof(struct log_file));
      plog->gz_log = 1;
      plog->gz_file = gz_file;
      plog->gz_f_buf = malloc(GZ_BUFSIZE);
      plog->gz_f_cp = plog->gz_f_buf + GZ_BUFSIZE;
      plog->gz_f_end = 0;

   }
   else
   {
      FILE *file = fopen(filename,"r");
      if (file == 0)
      {
         /* Error: Can't open log file ... */
         fprintf(stderr, "%s %s\n",_("Error: Can't open log file"), filename);
         return 1;
      }
      plog = malloc(sizeof(struct log_file));
      plog->gz_log = 0;
      plog->gz_f_buf = 0; /* we will free it later, we don't forget to zero it ! */
      plog->file = file;
   }

   /* Add struct log_file to list of open files */

   plog->filename = strdup(filename);
   plog->nr_records = 0;

   plog->next = active_files;
   active_files = plog;

   if (verbose>1)
   {
      printf("%s %s (",_("Using logfile"), filename);
      if (plog->gz_log) printf("gzip-");
      switch (log_type)
      {
         /* display log file type hint */
         case LOG_CLF:   printf("clf)\n");   break;
         case LOG_W3C: printf("w3c)\n"); break;
      }
   }

   return 0;
}

/*******************************************/
/* LOGS_ADD_STDIN - adds STDIN to the pool */
/*******************************************/

int logs_add_stdin()
{
   struct log_file *plog;

   /* Add struct log_file to list of open files */
   plog = malloc(sizeof(struct log_file));

   plog->file = stdin;
   plog->gz_file = 0;
   plog->filename = strdup("STDIN");

   plog->next = active_files;
   active_files = plog;

   if (verbose>1)
   {
      printf("%s %s (",_("Using log from"), "STDIN");
      if (plog->gz_log) printf("gzip-");
      switch (log_type)
      {
         /* display log file type hint */
         case LOG_CLF:   printf("clf)\n");   break;
         case LOG_W3C:   printf("w3c)\n");   break;
      }
   }

   return 0;
}

/*******************************************/
/* LOGS_NR_FILES - # of files in the pool  */
/*******************************************/

int logs_nr_files()
{
   struct log_file *cur;
   int nr = 0;

   for(cur = active_files; cur != 0; cur = cur->next) nr++;

   return nr;
}

/*******************************************/
/* LOGS_NEXT_RECORD                        */
/*******************************************/

/*
 * Fills log_struct with the oldest record in
 * the pool.
 * This function moves the file from the main pool
 * to the read files pool when there is no more
 * records to read.
 */

int logs_next_record(struct log_struct * precord)
{
   /* let's do first time initialisation if required */
   if(first_time == 0)
   {
      if(jump_next_record(active_files) != 0)
      {
         /* We are at the end of file, add it to the list of read files */
         struct log_file *plog = active_files;
         active_files = active_files->next;

         plog->next = read_files;
         read_files = plog;

         //TODO: This is completely buggy
         if(active_files)
            return jump_next_record(active_files); /* try next log file */
         else
            return 1; /* No more log lines to process */
      }
      else
      {
         struct log_file *plog;

         /* now we try to parse the record */
         if(parse_record(buffer, &active_files->cur_record) == 0)
         {
            /* We take the current log file out of the list, and then re-add it on the right place */
            plog = active_files;
            active_files = active_files->next;
            insert_log(plog);
         }
         else
         {
            /* Check if it's a W3C header or IIS Null-Character line */
            if ((buffer[0]=='\0') || (buffer[0]=='#'))
            {
               total_ignore++;
               print_warning(_("Skipping garbage w3c record"), active_files);
            }
            else
            {
               /* really bad record... */
               total_bad++;
               print_warning(_("Skipping bad record"), active_files);
            }

            /* let's try again */
            return logs_next_record(precord);
         }
      }
   }
   else
   {
      /* first time initialization */
      struct log_file *current_log = active_files, *next_log;
      first_time = 0;
      active_files = 0;

      total_rec = 0;
      total_ignore = 0;
      total_bad = 0;

      /* let's reorder log files */
      while(current_log != 0)
      {
         /* Read first record */
         int try_next_file = 0;
         int found_good_rec = 0;

         while(!try_next_file)
         {
            if(jump_next_record(current_log) != 0)
            {
               if (verbose) fprintf(stderr,_("Could not read any record from file %s.\n"), current_log->filename);
               try_next_file  = 1;
               found_good_rec = 0;
            }
            else
            {
               if(parse_record(buffer, &current_log->cur_record))
               {
                  /* As first record, check if stupid Netscape header stuff      */
                  if ( strncmp(buffer,"format=",7) == 0 )
                  {
                     /* Skipping Netscape header record */
                     if (verbose>1) printf("%s\n",_("Skipping Netscape header record"));
                     /* count it as ignored... */
                     total_ignore++;
                  }
                  else
                  {
                     /* Check if it's a W3C header or IIS Null-Character line */
                     if ((buffer[0]=='\0') || (buffer[0]=='#'))
                     {
                        total_ignore++;
                        print_warning(_("Skipping garbage w3c record"), current_log);
                     }
                     else
                     {
                        /* really bad record... */
                        total_bad++;
                        print_warning(_("Skipping bad record"), current_log);
                     }
                  }
               }
               else
               {
                  try_next_file  = 1;
                  found_good_rec = 1;
               }
            }
         }

         next_log = current_log->next; /* backup list element before to change it's children */
         if(found_good_rec)
         {
            /* We have the first record... insert the file in the right place in the list */
            insert_log(current_log);
         }
         else
         {
            /* nothing in this file, just remove it from the pool */
            free_log_struct(current_log);
         }

         current_log = next_log; /* jump to next element */

      } /* for(...) */
   } /* first_time */

   if(active_files)
   {
      /* The youngest record is actually on the top of the list */
      memcpy(precord, &active_files->cur_record, sizeof(struct log_struct));
      return 0;
   }
   else
      return 1;
}

/*******************************************/
/* LOGS_REWIND                             */
/*******************************************/

/*
 * Once all files in the pool have been read,
 * rewind them all, so that a call to
 * logs_next_record gives us the first record
 * again.
 */

void logs_rewind()
{
   struct log_file *current_log;
   /* rewinding all files */
   for(current_log = read_files; current_log != 0; current_log = current_log->next)
   {
      if(current_log->gz_log)
         gzrewind(current_log->gz_file);
      else
         rewind(current_log->file);
   }

   first_time = 1;
   active_files = read_files;
   read_files = 0;
}

/*******************************************/
/* LOGS_CLOSE                              */
/*******************************************/

/*
 * Close log files, free internal data structs
 */

void logs_close()
{
   struct log_file *current_log, *next_log;
   current_log = read_files;
   while(current_log != 0)
   {
      next_log = current_log->next;
      free_log_struct(current_log);
      current_log = next_log;
   }
}

/*********************************************/
/* OUR_GZGETS - enhanced gzgets for log only */
/*********************************************/

static char *our_gzgets(struct log_file *plog, char *buf, int size)
{
   char *out_cp = buf;      /* point to output */
   while (1)
   {
      if (plog->gz_f_cp > (plog->gz_f_buf + plog->gz_f_end - 1))     /* load? */
      {
         plog->gz_f_end = gzread(plog->gz_file, plog->gz_f_buf, GZ_BUFSIZE);
         if (plog->gz_f_end <= 0) return Z_NULL;
         plog->gz_f_cp = plog->gz_f_buf;
      }

      if (--size)                   /* more? */
      {
         *out_cp++ = *plog->gz_f_cp;
         if (*plog->gz_f_cp++ == '\n')
         {
            *out_cp='\0';
            return buf;
         }
      }
      else
      {
         *out_cp='\0';
         return buf;
      }
   }
}

/*********************************************/
/* JUMP_NEXT_RECORD                          */
/*********************************************/

/*
 * This function, given a log file, fills the buffer
 * with the next record.
 * If the record is too long, it is truncated.
 *
 */

static int jump_next_record(struct log_file *log)
{
   char *ret;

   if(log->gz_log)
      ret = our_gzgets(log, buffer, BUFSIZE);
   else
      ret = fgets(buffer, BUFSIZE, log->file);

   if(ret != 0)
   {
      total_rec++;
      log->nr_records++;
      if (strlen(buffer) == (BUFSIZE-1))
      {
         print_warning(_("Error: Skipping oversized log record"), log);

         total_bad++;                     /* bump bad record counter      */

         /* get the rest of the record */
         while ( (log->gz_log)?
                 (our_gzgets(log, buffer, BUFSIZE) != Z_NULL):
                 (fgets(buffer, BUFSIZE, log->file) != NULL)
               )
         {
            if (strlen(buffer) < BUFSIZE-1)
            {
               if (debug_mode && verbose) fprintf(stderr,"%s\n",buffer);
               break;
            }
            if (debug_mode && verbose) fprintf(stderr,"%s",buffer);
         }
         return jump_next_record(log); /* go get next record if any    */
      }
      else
         return 0;
   }
   else
      return 1;
}

/*********************************************/
/* INSERT_LOG                                */
/*********************************************/

/*
 * Insert log in the right place in the list of logs
 * so that the log file with the youngest current
 * record is on top of the list.
 *
 */

static void insert_log(struct log_file *plog)
{
   /*
      current_logs_seconds is made to sort entries.
      To avoid an integer overflow, we substract 1990
      to the year -- we assume nobody will process logs
      that old...
      Question : did CLF log format even exist at this time ? :-)
   */

   unsigned long current_log_seconds = ((((((plog->cur_record.year - 1990) *12 + plog->cur_record.month) * 31 + plog->cur_record.day) * 24)
                                         + plog->cur_record.hour) *60 + plog->cur_record.min) * 60 + plog->cur_record.sec;
   unsigned long cur_seconds;
   struct log_file *cur, *prev;

   cur = active_files;
   prev = 0;
   for(; cur != 0; prev = cur, cur = cur->next)
   {
      cur_seconds = ((((((cur->cur_record.year - 1990) *12 + cur->cur_record.month) * 31 + cur->cur_record.day) * 24)
                     + cur->cur_record.hour) *60 + cur->cur_record.min) * 60 + cur->cur_record.sec;
      if(current_log_seconds <= cur_seconds)
         break;
   }
   if(prev == 0)
   {
      /* Add on the top of the list */
      plog->next = active_files;
      active_files = plog;
   }
   else
   {
      plog->next = cur;
      prev->next = plog;
   }
}

/*********************************************/
/* FREE_LOG_STRUCT -- Does what it says      */
/*********************************************/

static void free_log_struct(struct log_file *plog)
{
   if(plog->gz_log)
      gzclose(plog->gz_file);
   else
      fclose(plog->file);

   free(plog->filename);
   free(plog->gz_f_buf);
   free(plog);
}

/*********************************************/
/* PRINT_WARNING -- does what it says        */
/*********************************************/

static void print_warning(const char * msg, struct log_file *log)
{
   if (verbose)
   {
      fprintf(stderr,_("Log file %s, line %lu: %s"), log->filename, log->nr_records, msg);
      if (debug_mode)
         fprintf(stderr,":\n%s",buffer);
      else
         fprintf(stderr,"\n");
   }
}
