/*+	$Header: /nocol/src/pingmon/RCS/poll_sites.c,v 1.16 1992/06/18 21:14:17 aggarwal Exp $
 *
 */

/* Copyright 1992 JvNCnet, Princeton */

/*+
**
** FUNCTION
**
**	This function pings all the sites and determines their status.
**
**	It reads the site info from the output file and after pinging it,
**	writes the new status back in the file. It uses raw i/o.
**
**	If the status remains DOWN during the next test of the
**	site, it raises the severity (it polls 7 sites a minute)
**
**	Also, if previous status is same as present, it does not
**	change the time of test (thus the time is the time that the 
**	site has been at the current status - useful for any other
**	program that needs to know status of the sites down).
**
**      Does one pass over the entire file, returns -1 if error.
**
**/

/*
 *	$Log: poll_sites.c,v $
 * Revision 1.16  1992/06/18  21:14:17  aggarwal
 * Added code for 'multiping'. Also added macros for increasing
 * severity and sending signal to watchdog.
 *
 * Revision 1.15  1992/05/13  16:09:15  aggarwal
 * Changed the nocol.h struct so that addr is now a char[] instead of
 * inet_addr. Altered this file to match change.
 *
 * Revision 1.12  90/06/04  14:28:51  aggarwal
 * Moved get_ppid() to the poll_sites module. This was done so that
 * pingmon rescans the watchdog.pid file just before sending out the
 * SIGUSR1 signal (didn't make sense to send out a signal to a process
 * which might have restarted since the pid was first read).
 * 
 * Revision 1.11  90/04/16  18:11:09  aggarwal
 * Now sends a signal to the designated program (watchdog ?) the moment
 * a site goes to critical state (signal SIGUSR1)
 * 
 * Revision 1.9  90/03/09  12:53:22  aggarwal
 * Major changes so that it can read in SIGNAL progname to send a
 * signal to another process when a site goes to CRITICAL the first
 * time.
 * 
 * Revision 1.7  89/12/19  10:43:54  network
 * The location of 'ping' should be explicitly defined or else
 * the path should include '/etc' before calling the program
 * else the shell does not find 'ping' and all the sites show
 * down.
 * 
 * Revision 1.6  89/11/27  17:08:10  aggarwal
 * Added macro 'ESC_SEVERITY' to escalate the severity.
 * Shifted around the 'case DOWN' statements a bit since the
 * state was being changed before the 'if' test making it
 * kinda redundant.
 * 
 * Revision 1.1  89/11/08  12:09:55  aggarwal
 * Initial revision
 * 
 */

/*+ 
** INCLUDE FILES
**/

#include "pingmon.h"			/* program specific defines	*/

#include <signal.h>
#include <sys/file.h>

/*
 * Macro to escalate the severity of a site.
 * Change E_CRITICAL to E_ERROR if you don't want this program to put
 * events in the critical state.
 */
#define ESC_SEVERITY(sev) ((sev == E_CRITICAL)? E_CRITICAL:(sev - 1))

/*
 * The level trasition at which a signal is sent to the 'watchdog' program.
 * This is called before increasing the severity in the structure, so if
 * you want that a signal should be sent when it changes from WARN -> ERROR,
 * then, check for sev = WARN. Don't set true if 'sev' is CRIT since the
 * state will not transition anymore and you don't want to send a signal
 * in each pass- only when it first changed state.
 */
#define AT_SIGNAL_LEVEL(sev)  ((sev <= E_ERROR && sev != E_CRITICAL) ? 1 : 0)


/* #defines for finish_status */
#define REACHED_EOF 1
#define READ_ERROR  2

poll_sites(fdout)
     int fdout;				/* Descriptors to open files	*/
{
    extern int debug;			/* Enable debug (in pingmon.h)	*/
    static FILE *p_cmd;			/* for creating the ping cmd	*/
    static int batchsize = BATCHSIZE ;	/* num of sites to ping at a time */
    EVENT v[BATCHSIZE];			/* described in nocol.h		*/
    char line[BUFSIZ];		 	/* to create the ping command	*/
    struct tm *ltime ;    
    time_t locclock ;			/* careful: don't use 'long'	*/
    long status;		       	/* site status			*/
    int recv, bufsize;			/* recieved response		*/
    int sigpid;				/* PID of program to get signal	*/
    int i, numsites ;			/* actual number of sites read in */
    int finish_status = 0;		/* why we stopped */
    int send_signal = 0 ;	    	/* if need to send SIG to watchdog */

    if ( lseek(fdout, (off_t)0, L_SET) == -1)	/* rewind the file	*/
    {
	perror (prognm);
	return (-1);
    }

    while (!finish_status)		/* until end of all sites... */
    {
	char sites[BUFSIZ] ;		/* for list of sites to ping */
	*sites = '\0' ;

	/* try to read in as many sites as we can, up to batchsize */
	for (numsites = 0; numsites < batchsize; numsites++)
	{
	    bufsize = read(fdout, &v[numsites], sizeof(EVENT));
	    if (bufsize != sizeof(EVENT))
	    {
		finish_status = bufsize ? READ_ERROR : REACHED_EOF;
		break;
	    } 
	    else
	      strcat(strcat(sites, " "), v[numsites].site.addr);
	}	/* end for */

	if (!numsites) 		/* means something bad happened, or EOF */
	  continue;		/* ...next while statement, finish_stat set */

	/*
	 * The ping command for 'multiping' is different from the standard.
	 * Hence the 'ifdef'. It allows for the -t and -q options.
	 */

#ifdef MULTIPING
	/*
	 * multiping -q (quiet) -c <pkt count> -s <pkt size> -t (tabular) sites
	 *
	 * For tabular output, data is separated by a line, so use the
	 * sed command to chop off top portion.
	 */
	/*
	 * the output from ping looks something like this:
	 *
	 *   PING 128.121.50.145 (128.121.50.145): 56 data bytes
	 *   PING 128.121.50.147 (128.121.50.147): 56 data bytes
	 *   PING 128.121.50.140 (128.121.50.140): 56 data bytes
	 *   
	 *   -=-=- PING statistics -=-=-
	 *                                         Number of Packets
	 *   Remote Site                     Sent    Rcvd    Rptd   Lost
	 *   -----------------------------  ------  ------  ------  ----
	 *   128.121.50.145                     10      10       0    0%
	 *   128.121.50.147                     10      10       0    0%
	 *   128.121.50.140                     10      10       0    0%
	 *   -----------------------------  ------  ------  ------  ----
	 *   TOTALS                             30      30       0    0%
	 *
	 * (I've cut off the right part of the screen to make it fit)
	 * the sed command below kills everything up to and including the
	 * first row of dashes ----
	 *
	 * The site name is printed as %30.30 (30 spaces)
	 */

	sprintf(pingcmd, "%s -qtc %d -s %d %s | %s\0",
		ping, NPACKETS, DATALEN, sites, "sed '1,/^-----/d'") ;

#else /* if not MULTIPING */

	/*
	 * A typical (standard) 'ping | tail -2' output looks like this:
	 *
	 * If you have a different style ping command format and output, then
	 * alter here.
	 *
	 *	 r2d2-vikas> /usr/etc/ping -s nisc.jvnc.net 1000  5 | tail -2
	 *	 5 packets transmitted, 5 packets received, 0% packet loss
	 *	 round-trip (ms)  min/avg/max = 4/4/5
	 */
	sprintf(pingcmd,"%s -s %s %d %d | %s\0",
		ping, sites, DATALEN, NPACKETS, "tail -2" );

#endif	/* ifdef MULTIPING */

	if (debug)
	  fprintf(stderr, "(debug) %s: PINGCMD is\t%s\n", prognm, pingcmd) ;

	if ((p_cmd = popen(pingcmd, "r")) == NULL)	/* open up the pipe */
	{
	    perror("poll_sites (popen)");
	    return(-1);
	}

	/*
	 * 'multiping' produces output lines in the order in which they
	 * appeared in the command line, so we can just step thru the v[]
	 * array.
	 * After reading each line, update the corresponding event struct
	 */

      for (i = 0; i < numsites; i++)
      {
	  int n ;
	  n = fgetline(p_cmd, line, sizeof (line));
#ifdef MULTIPING
	  /*
	   * The output after 30 characters has pkts sent and recieved
	   * Can't use '%*s' to skip over the sitename since sometimes
	   * the sitename gets truncated and we end up with two words.
	   * The '30' size is defined in the multiping program (yeah, so
	   * its a hack.)
	   */
	  sscanf(&line[30], "%*d %d", &recv);
	  status = (NPACKETS - recv) > PING_THRES ? 0 : 1 ;
#else
	  if ( n <= 40)		/* bad site address */
	  {
	      recv = 0 ;
	      status = 0 ;
	  }
	  else
	  {
	      sscanf (line, "%*d %*s %*s %d", &recv);
	      status = ((NPACKETS - recv) > PING_THRES) ? 0 : 1 ;
	  }
#endif	/* MULTIPING */

	  locclock = time((time_t *) NULL);
	  ltime = localtime((long *)&locclock);

	  switch (status)
	  {
	   case 1:				/* Site responded, is up */
	      v[i].var.value = recv ;		/* change the old status */
	      v[i].nocop = SETF_UPDOUN(v[i].nocop, n_UP) ;
	      v[i].severity = E_INFO ;
	      v[i].mon = ltime->tm_mon + 1;
	      v[i].day = ltime->tm_mday;
	      v[i].hour = ltime->tm_hour;
	      v[i].min = ltime->tm_min;
	      break;
	   case 0:			/* site down,  escalate the severity */
	      if (!(v[i].nocop & n_DOWN))	/* recent change of state.. */
	      {
		  v[i].mon = ltime->tm_mon + 1;
		  v[i].day = ltime->tm_mday;
		  v[i].hour = ltime->tm_hour;
		  v[i].min = ltime->tm_min;
	      }

	      /* If previously in lower severity state, then send signal */
	      if (AT_SIGNAL_LEVEL(v[i].severity))
		send_signal = 1 ;		/* ..state, set flag */

	      /* escalate severity level */
	      v[i].severity = ESC_SEVERITY(v[i].severity);
	      v[i].var.value = recv;		/* new value and state */
	      v[i].nocop = SETF_UPDOUN (v[i].nocop, n_DOWN);
	      break;
	  }		/* end switch */

      }	    	/* end for */

	if (pclose(p_cmd) < 0)		      /* close the pipe */
	  perror("poll_sites (pclose)");

      /* rewind the file and write out the whole array */
      lseek(fdout, -(off_t)(sizeof(EVENT) * numsites), L_INCR);
      write(fdout, (char *)v, sizeof(EVENT) * numsites);

	if (send_signal && (sigpid = get_ppid(sigtoprog)) > 0)
	{
	    kill(sigpid, SIGUSR1);  /*..send out signal to watchdog */
	    if (debug)
	      fprintf(stderr, 
		      "(debug) %s: sent SIGUSR1 to %d\n", prognm, sigpid);
	}
	
    }	/* end while (until end of all sites) */

    return (finish_status != REACHED_EOF) ? -1 : 1;

}	/* end poll_sites */


/*+ 		get_ppid()
** FUNCTION:
** 	Get the pid of the process which recieves the SIGUSR1 signal.
** Simply opens the filename passed to it and reads the pid 
** from the file. Returns a zero if it gets an invalid value else it
** returns the pid of the process.
**/
get_ppid(program)
     char *program;
{
    extern int debug, errno;
    extern char *sys_errlist[];
    int fd, pid ;
    char buffer[MAXLINE];

    if (program == NULL)
    {
	if (debug)
	  fprintf(stderr, "(debug) %s: (get_ppid) no program name supplied\n",
		  prognm);
	return(0);
    }

    if ((fd = open(program, O_RDONLY)) < 0)
      return(0);

    if (read(fd, buffer, MAXLINE) < 0)
    {
	if (debug)
	  fprintf(stderr, 
		  "%s (read): %s- %s\n", prognm, program, sys_errlist[errno]);
	pid = 0;
    }
    else
      sscanf(buffer, "%d", &pid);

    close(fd);
    return(pid);
}					/* end: get_ppid		*/
