/*  Analysis of ftpd access logs

    Feed analysis to STDIN, and other format comes from STDOUT..
*/


#define DOMAINFILE "Ip2Domains.data"
#define NETDOMAINFILE "/usr/local/etc/netdomains"


#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <ctype.h>
#include <netdb.h>

extern void *malloc();
extern char *strrchr();
extern char *strchr();

int files = 0;
double totmb = 0.0;
int finfiles = 0;
double finmb = 0.0;
int forfiles = 0;
double formb = 0.0;
int dirs = 0;
double totdirmb = 0.0;
int findirs = 0;
double findirmb = 0.0;
int fordirs = 0;
double fordirmb = 0.0;


char *TotalParser = "%*s %*s %*s %*s %*s %*s %*s %*s %*s %d %*s %f %*s %*s %d %f";
char *FileParser  = "%*s %d %d %s"; /* *fil, size, finnflg, host */
char line[10240];

#define DOMAINSIZE 46
typedef struct DomEntry {
	char	Domain[DOMAINSIZE];
	double	mbcount;
	int	filecnt;
      } DomEntry;
typedef struct Ip2NetDom {
	struct in_addr addr;	/* Network */
	char	name[60];	/* Domain it is in */
      } Ip2NetDom;
typedef struct Ip2Dom {
	struct in_addr addr;
	char	name[60];
	int	validmap;
	int	attempts;	/* Attempts to resolv mapping... */
	int	used;
      } Ip2Dom;


static DomEntry *DomainArray  = NULL;
static int	 DomArraySize = 31547; /* Primary numbers for HASH array.. */
static int	 DAD_Entries  = 0;
static Ip2Dom	*Ip2DomArray  = NULL;
static Ip2NetDom *Ip2NetDomArray = NULL;

extern void DumpIp2DomData();

void *
calloc(size)
     int size;
{
	void *P = malloc(size);
	if(P) memset(P,0,size);
	return P;
}


void
AddI2NDentry(I2NDe)
     Ip2NetDom *I2NDe;
{	
	register long hash = (I2NDe->addr.s_addr) % DomArraySize;
	int cnt = DomArraySize;
	char *s;

	if( Ip2NetDomArray == NULL )
	  Ip2NetDomArray = calloc(sizeof(Ip2Dom) * DomArraySize);

#ifdef	DEBUG0
s = inet_ntoa( & I2NDe->addr );
fprintf(stderr,"NetDomains: IP: '%s', Name: '%s', hash: %d\n",s,I2NDe->name,hash);
#endif
	while ((0 != Ip2NetDomArray[hash].name[0]) && 
	       (I2NDe->addr.s_addr != Ip2NetDomArray[hash].addr.s_addr) &&
	       ( --cnt ))
	  hash = (hash + 1) % DomArraySize;

	if( cnt == 0 ) {
	  fprintf(stderr,"Ip2NetDom hash table is FULL!  Enlarge DomArraySize! (now=%d)\n",DomArraySize);
	  DumpIp2DomData();
	  exit(99);
	}
	memcpy( & Ip2NetDomArray[hash], I2NDe, sizeof(Ip2NetDom));
}

char *
FindI2NDentry(addr)
     struct in_addr *addr;
{
	register long hash;
	int cnt = DomArraySize;
	struct in_addr Laddr;
	unsigned long Addr,amask,hash0;
	char *s,*ss,*rc;

	Laddr = *addr;
	amask = 0xFFFFFFFF;
	Addr = ntohl(Laddr.s_addr);
	if (IN_CLASSA(Addr))
	  amask = IN_CLASSA_NET;
	else if (IN_CLASSB(Addr))
	  amask = IN_CLASSB_NET;
	else if (IN_CLASSC(Addr))
	  amask = IN_CLASSC_NET;
	Addr = htonl(Addr & amask);
	Laddr.s_addr = Addr;
	hash = (Laddr.s_addr) % DomArraySize;
	hash0 = hash;

	while((0 != Ip2NetDomArray[hash].name[0]) && 
	      (Laddr.s_addr != Ip2NetDomArray[hash].addr.s_addr) &&
	      ( --cnt )) {
	  hash = (hash + 1) % DomArraySize;
	}
	

	rc = Ip2NetDomArray[hash].name;
	if (Laddr.s_addr != Ip2NetDomArray[hash].addr.s_addr) rc = NULL;
#ifdef DEBUG0
s = inet_ntoa( & Laddr );
ss = inet_ntoa( & Ip2NetDomArray[hash].addr );
fprintf(stderr,"FindI2NDentry: after masking IP: '%s', hash: %d, returns: '%s' '%s'\n",s,hash0,ss,rc);
#endif
	return rc;
}

unsigned long inet_addr(str)
     char *str;
{
	unsigned long addr = 0;
	char *s = str;
	int	i,shifts = 24;

	while(shifts > 0) {
	  i = 0;
	  while(isdigit(*s))
	    i = i*10 + (*(s++) - '0');
	  if( *s == '.' ) {
	    addr |= (i & 0xFF) << shifts;
	    shifts -= 8;
	    ++s;
	  } else
	    addr |= i;
	  if (!isdigit(*s)) break;
	}
	return addr;
}


int
parse_nets(fname)
     char *fname;
{
	FILE *FD;
	char line[132]; /* Hopefully enough */
	char *s;
	unsigned long addr,amask,atest;
	struct in_addr IA;
	int rc,linecnt,netcnt = 0;
	Ip2NetDom	I2ND;

	FD = fopen(fname,"r");
	if( FD == NULL ) {
	  fprintf(stderr,"Failed to fopen NETWORK -> Domain database\n");
	  return 0;
	}
	linecnt = 0;
	while(!feof(FD) && !ferror(FD) ) {
	  *line = 0;
	  s = fgets(line,sizeof line,FD);
	  ++linecnt;
	  if ((s == NULL) || (*line == 0)) break;
	  if ((*line == '#') || (*line == ';') || (*line == '\n')) continue;
	  s = strchr(line,'\n');
	  if (s) *s = 0;		/* Snub \n */
	  addr = inet_addr(line);	/* Parse network addr */
	  /* mask it just to be network */
	  amask = 0xFFFFFFFF;
	  if (IN_CLASSA(addr))
	    amask = IN_CLASSA_NET;
	  else if (IN_CLASSB(addr))
	    amask = IN_CLASSB_NET;
	  else if (IN_CLASSC(addr))
	    amask = IN_CLASSC_NET;
	  atest = ((((addr>>24) & 0xFF)==0x00)	? 0x01000000 : 0 );
	  atest |= ((((addr>>24) & 0xFF)==0xFF)	? 0x01000000 : 0 );
	  atest |= ((((addr>>16) & 0xFF)==0x00)	? 0x00010000 : 0 );
	  atest |= ((((addr>>16) & 0xFF)==0xFF)	? 0x00010000 : 0 );
	  atest |= ((((addr>>8) & 0xFF)==0x00)	? 0x00000100 : 0 );
	  atest |= ((((addr>>8) & 0xFF)==0xFF)	? 0x00000100 : 0 );
	  atest |= ((((addr) & 0xFF)==0x00)	? 0x00000001 : 0 );
	  atest |= ((((addr) & 0xFF)==0xFF)	? 0x00000001 : 0 );
	  if (atest & amask) {
	    fprintf(stderr,"Network table entry on line %d has bad network IP number\n: '%s'",linecnt,line);
	    continue;
	  }
	  IA.s_addr = htonl(addr);
	  I2ND.addr = IA;
	  rc = sscanf(line,"%*s %59s",I2ND.name);
	  if (rc != 1) {
	    fprintf(stderr,"Didn't get domain name from Network dbase entry line %d\n: '%s'\n",linecnt,line);
	    continue;
	  }
	  AddI2NDentry(&I2ND);
	  ++netcnt;
	}
	fclose(FD);
	return netcnt;
}


void
AddI2Dentry(I2De,validmap)
     Ip2Dom *I2De;
     int validmap;
{
	register long hash = (I2De->addr.s_addr) % DomArraySize;
	unsigned long amask;
	int cnt = DomArraySize;

	while((0 != Ip2DomArray[hash].name[0]) && 
	      (I2De->addr.s_addr != Ip2DomArray[hash].addr.s_addr) &&
	      ( --cnt ))
	  hash = (hash + 1) % DomArraySize;

	if( cnt == 0 ) {
	  fprintf(stderr,"Ip2Dom hash table is FULL!  Enlarge DomArraySize! (now=%d)\n",DomArraySize);
	  DumpIp2DomData();
	  exit(99);
	}
	I2De->validmap |= validmap;
	memcpy( Ip2DomArray[hash],I2De,sizeof(Ip2Dom));
}

char *
FindI2Dentry(addr)
     struct in_addr *addr;
{
	register long hash = (addr->s_addr) % DomArraySize;
	int cnt = DomArraySize;

	while((0 != Ip2DomArray[hash].name[0]) && 
	      (addr->s_addr != Ip2DomArray[hash].addr.s_addr) &&
	      ( --cnt ))
	  hash = (hash + 1) % DomArraySize;

	if( cnt == 0 ) {
	  fprintf(stderr,"Ip2Dom hash table is FULL!  Enlarge DomArraySize! (now=%d)\n",DomArraySize);
	  DumpIp2DomData();
	  exit(99);
	}
	if( Ip2DomArray[hash].name[0] == 0 ) return NULL;
	++Ip2DomArray[hash].used;
	return Ip2DomArray[hash].name;
}


void
GetIp2DomData()
{
	FILE *DF = fopen(DOMAINFILE,"r+");
	int  Entries,rc,i;
	Ip2Dom	I2D;
	char line[80];

	if( Ip2DomArray != NULL ) free(Ip2DomArray);
	Ip2DomArray = calloc(sizeof(Ip2Dom) * DomArraySize);

	if( DF == NULL ) return;

	*line = 0;
	fgets(line,sizeof(line),DF);

	rc = sscanf(line,"%d",&Entries);
	if( (rc != 1) || (Entries != DomArraySize)) {
	  /* For reason or another didn't get it... */
	  if( rc == 1 ) { /* Size mismatch */
	    while(1) {
	      rc = fread(&I2D,sizeof(Ip2Dom),1,DF);
	      if( rc != 1 ) break;
	      AddI2Dentry(&I2D,1);
	    }
	  }
	} else {
	  rc = fread(Ip2DomArray,sizeof(Ip2Dom),DomArraySize,DF); /* FAST! */
	  if( rc != DomArraySize ) {
	    fprintf(stderr,"Fault at reading in '%s'\n",DOMAINFILE);
	    exit(99);
	  }
	}
	fclose(DF);
	for(i=0;i<DomArraySize;++i)
	  Ip2DomArray[i].used = 0;
}

void
DumpIp2DomData()
{
  	int i;
	FILE *DF;

	if( Ip2DomArray == 0 ) return;

	/* Flush unused ones! */

	DF = fopen(DOMAINFILE,"w");
	fprintf(DF,"%d\n",DomArraySize);
	fwrite(Ip2DomArray,sizeof(Ip2Dom),DomArraySize,DF);
	fclose(DF);
}


void
AccountDomain(Domain,usage)
     char *Domain;
     int usage;
{
	register char *s;
	register long c,hash;

	s = Domain;
	while(*s) {
	  c = *s;
	  if( isupper(c) )
	    *s = tolower(c);
	  ++s;
	}

	if(DomainArray == NULL) {
	  DomainArray = calloc(DomArraySize * sizeof( DomEntry ));
	}
	s = Domain;
	c = 0;
	while(*s) c += *s++;  /* So small, doesn't overflow yet... */
	hash = c % DomArraySize;

	if ((DomainArray[hash].Domain[0] != 0) &&
	    (strcmp(DomainArray[hash].Domain,Domain) != 0)) {
	  /* Not ours */
	  c = DomArraySize; /* Use as countdown.. */
	  while(c) {
	    /* Next hash... */
	    hash = (hash + 1) % DomArraySize;
	    if ((DomainArray[hash].Domain[0] == 0) ||
		(strcmp(DomainArray[hash].Domain,Domain) == 0))
	      break;
	    --c;
	  }
	  if( c == 0 ) {
	    fprintf(stderr,"\nHASH table overflow!  Need larger prime for table!  Was: %d\n",DomArraySize);
	    DumpIp2DomData();
	    exit(99);
	  }
	}
	if( DomainArray[hash].Domain[0] == 0 ) { /* NEW! */
	  strncpy(DomainArray[hash].Domain,
		  Domain,sizeof(DomainArray[hash].Domain)-1);
	  DomainArray[hash].Domain[sizeof(DomainArray[hash].Domain)-1] = 0;
	  DAD_Entries += 1;
	  DomainArray[hash].mbcount = 0.0;
	  DomainArray[hash].filecnt =   0;
	}
	DomainArray[hash].filecnt +=    1;
	DomainArray[hash].mbcount += (double) usage;
}

int compare_DomEntry(a,b)
     register DomEntry *a,*b;
{
	register double A = a->mbcount;
	register double B = b->mbcount;

	if( A < B ) return  1;
	if( A > B ) return -1;
	return 0;
}


void
LargestDomains(outfile,totmb)
     FILE *outfile;
     double totmb;
{
	DomEntry *DA = calloc( (DomArraySize+1) * sizeof(DomEntry) );
	int rc,i;
	double uu = 1.0/(1024.0*1024.0);
	register double part,cumul = 0.0;

	if (DomArraySize == 0)
	  return;
	memcpy(DA,DomainArray, DomArraySize * sizeof(DomEntry) );
	rc = qsort(DA,DomArraySize,sizeof(DomEntry),compare_DomEntry);
	for(i = 0; i < DomArraySize; ++i )
	  if( DA[i].Domain[0] != 0 ) {
	    part = 100.0*(DA[i].mbcount*uu / totmb);
	    cumul += part;
	    printf("  %9.3f  %9d  %-16s  %5.2f%%  %5.2f%%\n",
		   DA[i].mbcount*uu,DA[i].filecnt,DA[i].Domain,part,cumul);
	  }
	free(DA);
}


void
register_domain(hostp,size,line)
     char *hostp;
     int size;
     char *line;
{
	int x;
	char Domain[DOMAINSIZE];
	unsigned long addr = 0;
	struct in_addr IA;
	struct hostent *hent;
	char *s,*ss;
	Ip2Dom I2D;

	Domain[sizeof(Domain)-1] = 0;

	if( *hostp == '[' ) {
	  s = ++hostp;
	  while((*s != 0) && (*s != ']')) ++s;
	  if( *s == ']' ) *s = 0;
	  if( (addr = inet_addr( hostp )) == -1)
	    strncpy(Domain,"[BAD]",sizeof Domain);
	  else {
	    /* Just netwoks... */
	    IA.s_addr = htonl(addr);
	    s = FindI2NDentry(&IA);
#ifdef DEBUG
fprintf(stderr,"Searching for '%s', got: '%s'\n",hostp-1,s);
#endif
	    if( NULL == s ) {
#if 0
	      /* Not in cache... */
	      hent = NULL;
/*	      hent = gethostbyaddr(&IA,4,AF_INET);	*/
	      if( hent ) {
		/* Found! */
		strncpy(I2D.name,hent->h_name,sizeof(I2D.name)-1);
		I2D.name[sizeof(I2D.name)-1] = 0;
		I2D.addr = IA;
		I2D.used = 1;
		AddI2Dentry(&I2D,1);
		s = I2D.name;
	      } else {
#endif
		/* Not found from reversing system */
		if( IN_CLASSA(addr) )
		  addr &= IN_CLASSA_NET;
		else if( IN_CLASSB(addr) )
		  addr &= IN_CLASSB_NET;
		else if( IN_CLASSC(addr) )
		  addr &= IN_CLASSC_NET;

		IA.s_addr = htonl(addr);
		s = inet_ntoa( &IA );
/*		strcpy(I2D.name,s);
		I2D.addr = IA;
		I2D.used = 1;
		AddI2Dentry(&I2D,0);	*/
#if 0
	      }
#endif
	    }
	    strncpy(Domain,s,sizeof(Domain)-1);
	    ss = strrchr(Domain,'.');
	    if (ss) ++ss;
	    if ((ss != NULL) && isalpha(*ss))
	      strncpy(Domain,ss,sizeof(Domain)-1);
	  }
	}
	else {   /* if (*hostp != '[')... */
	  s = strrchr(hostp,'.');
	  if (s) ++s;
	  if ((s != NULL) && (isalpha(*(s))))
	    strncpy(Domain,s,sizeof(Domain)-1);
	  else
	    strncpy(Domain,hostp,sizeof(Domain)-1);
	}
	/* Domain[]  contains toplevel domain/network number/... */
	AccountDomain(Domain,size);
}


main()
{
	int rc;
	int i,di,size;
	float f,df;
	char FinnFlg;
	char *s, *sizep, *hostp, *rcp;

	parse_nets(NETDOMAINFILE);
	GetIp2DomData();

	while(!feof(stdin) && !ferror(stdin)) {
	  *line = 0;
	  rcp = fgets(line,sizeof(line),stdin);
	  if( rcp == NULL ) {
	    break;
	  }
	  if( *line == ' ' ) {
	    if( strncmp(" TotAls:",line,8)==0 ) {
	      /* Totals counters */
	      rc = sscanf(line,TotalParser,&i,&f,&di,&df);
	      if(rc != 4) {
		fprintf(stderr,"Failed to parse FOUR values from input line. rc=%d\n",rc);
		DumpIp2DomData();
		exit(3);
	      }
	      files += i;
	      totmb += f;
	      dirs  += di;
	      totdirmb += df;
	    } else if (strncmp(" TotFinnish:",line,12)==0 ) {
	      /* Finnish share... */
	      rc = sscanf(line,TotalParser,&i,&f,&di,&df);
	      if(rc != 4) {
		fprintf(stderr,"Failed to parse FOUR values from input line. rc=%d\n",rc);
		DumpIp2DomData();
		exit(3);
	      }
	      finfiles += i;
	      finmb += f;
	      findirs += di;
	      findirmb += df;
	    }
	  }
	  if ( *line != '/' ) continue; /* Not for us... */
	  /* Count by COUNTRIES(Domains) */
	  s = line;
	  while((*s != 0) && (*s != ' ')) ++s; /* File name */
	  while((*s != 0) && (*s == ' ')) ++s; /* Space after it */
	  sizep = s;
	  size  = 0;
	  rc = sscanf(sizep,"%d",&size);
	  while((*s != 0) && (*s != ' ')) ++s; /* Size.. */
	  while((*s != 0) && (*s == ' ')) ++s;
	  FinnFlg = *s;
	  while((*s != 0) && (*s != ' ')) ++s; /* FinnFlg.. */
	  while((*s != 0) && (*s == ' ')) ++s;

	  if( *s == '\'' ) ++s;  /* Points to begin of host name. */
	  hostp = s;
	  while((*s != 0 ) && (*s != '\'')) ++s;
	  if( *s == '\'' ) *s = 0;
	  /* hostp  points to '[' in case of number only,
	     or reversed host name. */
	  register_domain(hostp,size,line);
	}

	forfiles = files - finfiles;
	formb = totmb - finmb;
	fordirs = dirs - findirs;
	fordirmb = totdirmb - findirmb;
	printf("Tot: %8d files, %9.1f MB, Dirs: %8d, %9.1f MB\n",
	       files,totmb,dirs,totdirmb);
	printf("For: %8d files, %9.1f MB, Dirs: %8d, %9.1f MB\n                             (%4.1f%%/%4.1f%%/%4.1f%%/%4.1f%%)\n\n",
	       forfiles,formb,fordirs,fordirmb,
	       (forfiles*100.0)/files,(formb*100.0)/totmb,
	       (fordirs*100.0)/dirs,(fordirmb*100.0)/totdirmb);
	printf(" %10s  %9s  %-16s   %5s  %s\n","Volume MB","Files","Top domain","Share","Cumulative");

	DumpIp2DomData();
	LargestDomains(stdout,totmb);
	return 0;
}
