#!/bin/csh  -f
#
# $Header: /home/vikas/netmgt/nocol/src/utility/RCS/keepalive_monitors,v 1.10 1994/06/17 15:19:55 vikas Exp $
#
# Make sure that the various nocol programs are doing their job and are
# not dead. Mails out the old errors file to the OPS folks.
#
# This file checks to see if the list of 'PROGRAMS1' is running on HOST1
# and all the PROGRAMS2 are running on HOST2 (the lists can be empty). Set
# these lists and hostnames. Set a mail address for OPSMAIL also.
#
# Can run this from the crontab every 30 minutes.
#
# DEFINE THE VALUES OF 'TOP' 'OPSMAIL' 'PROGRAMS1-2' 'HOST1-2'
#
#	-Vikas Aggarwal,  vikas@navya.com, May 1994
#
umask 002

## Tweak these
##
## TOP = "/nocol" ?  OPSMAIL = "ops@your.domain" ?
set TOP = "<TOP>"
set OPSMAIL = "<OPSMAIL>"			# mail on restarting
	# to run on HOST1
set PROGRAMS1 = "noclogd etherload ippingmon rpcpingmon hostmon"
set PROGRAMS2 = "etherload.host2"			# to run on HOST2
set HOST1 =	"nocol.navya.com"
set HOST2 =	"host2.navya.com"


## Rest is pretty standard 'shell'. Perhaps 'MAIL' needs adjustment!!
##
set BIN = "${TOP}/bin"
set ETC = "${TOP}/etc"
set MAIL = /usr/ucb/mail
set path = (${BIN} /bin /etc /usr/bin /usr/ucb /usr/lib)

set HOST = `hostname`

##
# Which program should run on which host.
if ( ${HOST} == "$HOST1" ) then
	set PROGRAMS = "$PROGRAMS1"
else if ( ${HOST} == "$HOST2" ) then
	set PROGRAMS = "$PROGRAMS2"
else
	echo "Current host is not one of $HOST1 or $HOST2"
	exit 0
endif

if ( "${PROGRAMS}" == "")  exit 0

cd $BIN

## Account for the programs that have a pid file, and those that don't.
#  See which monitors are running.
foreach p ( ${PROGRAMS}  )
  set START = "0"		#initial value

  if ( -e ${ETC}/${p}.pid ) then
	set pid = `head -1 ${ETC}/${p}.pid`
	if ( `ps ${pid} | grep ${p} | wc -l` == 0 && -x $p ) set START = "1"
  else
	set lc = `ps -ax |egrep "${BIN}/${p}" |grep -v grep`
	if ( "$lc" == "" && -x $p ) set START = "1"
  endif

  ## Now start the monitors
  if ( ${START} == "1" ) then
    if ( $?prompt ) echo "Starting $p at `date` on `hostname`"

    (echo "previous error file"; echo "---"; if ( -e ${ETC}/${p}.error ) cat ${ETC}/${p}.error) |${MAIL} -s "NOCOL: keepalive restarting ${p} on ${HOST}" $OPSMAIL
    ( ${BIN}/${p} ) >&! ${ETC}/${p}.error &
  endif

end # of foreach()

####
