#!/usr/local/bin/perl # $Id: monitor,v 1.2 2018/03/29 23:05:59 gps Exp $ ######################################################################## # Monitor and notification program for: # 1. disk space # 2. computers # 3. processes # 4. program-specific info. # # History: # 2018/03/29 MAA modified 'monitor_prog()' to return more meaningful error(s). # See http://www.perlmonks.org/?node_id=81640 # Other minor tweaks/comments. # 2014/05/21 PNL Ported to Linux: changed some paths; changed monitor_alive # to support Linux ping; added error check after running ps. # 08/06/18 DSN Changed open to specify mode as separate argument. # 07/08/30 PNL Changed listpat to allow full email addresses (including # `@' and `.'). # 06/01/12 PNL Changed monitor_proc to look at entire cmd field of ps # output. The proc RE in config file must match this entire # cmd field. # 99/03/01 DSN Add BOOT_DELAY env var option, increase timestamp precision # on logging messages to seconds and 4-digit year. # 98/02/21 DSN Fixed placement of ALARM_USER label. # 98/01/10 DSN Convert to perl5 modules, use strict. # 97/12/09 DSN Added optional PIDFILE variable to config file. # 97/08/17 DSN Converted to Solaris and SYSV command syntax. # 95/03/05 DSN Convert from `cmd` to explicit pipe from cmd. # `cmd` appears to have a memory leak. # 94/04/15 DSN Implement factoring in user and action notification list. # 94/04/15 DSN Initial coding # ######################################################################## use strict; use sigtrap; use FileHandle; use Getopt::Std; use vars qw ( $ps $df $ping $mail $pager ); use vars qw ( $DRUN $DTIME $DSORT $DCMD $DALARM $DDUMP $DPROC ); use vars qw ( $TDELIM $IDELIM $ADELIM ); use vars qw ( $ps_time $ps_valid $default_notify_mode $read_config_flag ); use vars qw ( $opt_h $opt_d $opt_l $opt_b $debug $logfile ); use vars qw ( $config_file $logging $cmdname $host $now ); use vars qw ( @monitor_list @ps ); ######################################################################## # print_syntax - print syntax and exit. ######################################################################## sub print_syntax { my($cmdname) = @_; $cmdname = $1 if ($cmdname =~ m|^.*/([^/]+)$|); printf "$cmdname - Monitor processes, computers, disks, and other progs. Syntax: $cmdname [-h] [-d n] [-l logfile] [config_file] where: -b Boot option. Delay monitoring at boot time for BOOT_DELAY seconds specified in config file. -d n Debug value n. Value can be the OR of: 1 = print scheduling info 2 = print time info 4 = print sorting info 8 = print out commands 16 = print alarm messages instead of sending them. 64 = display monitor_list structure 128 = print proc list when missing process. -h Help - prints this help message. -l logfile Log all alarms to the specified logfile. config_file Configuration file. Default file is: $config_file "; exit(0); } ######################################################################## # Main procedure for monitor. ######################################################################## { #################################################################### # Initialization. #################################################################### *ps = \'/bin/ps -ef'; *df = \'/bin/df -k'; *ping = \'/bin/ping'; *mail = \'/bin/mail'; *pager = \'/usr/local/bin/pager'; *DRUN = \1; *DTIME = \2; *DSORT = \4; *DCMD = \8; *DALARM = \16; *DDUMP = \64; *DPROC = \128; *TDELIM = \'!'; *IDELIM = \','; *ADELIM = \':'; $default_notify_mode = "email"; $config_file = "monitor.config"; $ps_valid = 10; $logging = 0; STDERR->autoflush(1); STDOUT->autoflush(1); my($monitor,$run_at,$interval,$type,$notify_interval,$next_notify,$notify_on_clear); my($state,$notify,@rest); my($sleep,$response); getopts ("hbd:l:"); &print_syntax($0) if ($opt_h); $debug = $opt_d; $config_file = $ARGV[0] if ($#ARGV >= 0); $logfile = $opt_l; $cmdname = $0; $cmdname = $1 if ($cmdname =~ m|^.*/([^/]+)$|); chomp($host = `/bin/hostname`); $host = $1 if ($host =~ /^(\w+)\./); # NOTE - signal handlers must be installed AFTER dump. $SIG{'HUP'} = \&hup_handler; $SIG{'ALRM'} = \&alrm_handler; $SIG{'TERM'} = \&term_handler; READ_CONFIG: # Close/open and select logfile if logfile was specified. close (LOG) if ($logging); $logging = ($logfile ne "" && open(LOG, ">>", "$logfile")); select(LOG), $| = 1 if ($logging); @monitor_list = (); $now = time(); printf ("Reading config file: $config_file at %s\n", &date_string($now)); &read_config ($config_file); $read_config_flag = 0; @monitor_list = sort(by_time @monitor_list); &update_pidfile(); # At boot time, delay monitoring for specified time to allow system to # finish startup processing. if ($opt_b && $ENV{"BOOT_DELAY"} > 0) { my($boot_delay) = $ENV{"BOOT_DELAY"}; $now = time(); printf ("Boot delay of $boot_delay seconds at %s\n", &date_string($now)); sleep ($boot_delay); } # Always process the first element on the list. $now = time(); printf ("Monitoring started at %s\n", &date_string($now)); for(;;) { last if ($#monitor_list < 0); &dump_monitor_list() if ($debug & $DDUMP); $now = time(); $monitor = shift(@monitor_list); ($run_at,$interval,$type,$notify_interval,$next_notify,$notify_on_clear, $state,$notify,@rest) = split(/$TDELIM/,$monitor); if (($sleep = $run_at - $now) > 0) { print "next time = $run_at, sleep for $sleep\n" if ($debug & $DSORT); sleep ($sleep); } goto READ_CONFIG if ($read_config_flag); $now = time(); $response = ""; printf ("At %d (%s) run %s : %s\n", $now, &date_string($now), $type, $monitor) if ($debug & $DRUN); { $response = &monitor_disk(@rest), last if ($type =~ /disk/); $response = &monitor_prog(@rest), last if ($type =~ /prog/); $response = &monitor_proc(@rest), last if ($type =~ /proc/); $response = &monitor_alive(@rest), last if ($type =~ /alive/); } if ($response ne "") { &alarm_notify ($notify,$response,$type,$now,$next_notify); $next_notify = $now + $notify_interval if ($now >= $next_notify); $state = 1; } else { if ($state == 1) { $response = "Alarm cleared from $type " . join(' ',@rest); $next_notify = ($notify_on_clear) ? $now : $now + 1; &alarm_notify ($notify,$response,$type,$now,$next_notify); } $next_notify = $now; $state = 0 } $run_at += $interval; $monitor = join($TDELIM,$run_at,$interval,$type,$notify_interval,$next_notify, $notify_on_clear,$state,$notify,@rest); &insert_by_time($monitor); } } ######################################################################## # read_config - Read configuration file # global variables: $now, %ENV ######################################################################## sub read_config { my ($config) = @_; my($line,$type,$notify,@args,$var,$val,$target); $now = time(); open (CONFIG,$config) || die "Unable to open config file $config"; while ($line = ) { chop($line); next if ($line =~ /^\s*\#/); next if ($line =~ /^\s*$/); # Look for environment variable setting. ($var,$val) = $line =~ /^\s*(\w+)=(.*)/; $ENV{$var} = $val, print ("$line", "\n"), next if ($var ne ""); # Look for config line. ($type,$target,$notify,@args) = $line =~ /^\s*(\S+)\s+([^\"]\S*|\"[^\"]*\")\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s*(\S?.*)/; $notify = &parse_notify($notify); print (join($TDELIM,$type,$target,$notify,@args), "\n");# if ($debug & $DTIME); &parse_prog($target,$notify,@args), next if ($type =~ /prog/); &parse_disk($target,$notify,@args), next if ($type =~ /disk/); &parse_proc($target,$notify,@args), next if ($type =~ /proc/); &parse_alive($target,$notify,@args), next if ($type =~ /alive/); die "invalid line in config: $line"; } close(CONFIG); } ######################################################################## # alarm_notify - handle alarm raised by monitor. # global variables: $now, $host, %ENV, @ps, debug variables ######################################################################## sub alarm_notify { my($userlist,$response,$type,$now,$next_notify) = @_; my(@users,$datestr,$ulist,$user,$action,$cmd,$msg,$ACTION); if ($now < $next_notify) { print ("===> Silent alarm at $now for $userlist\: $response\n") if ($debug & $DALARM); return; } print ("===> Notify Alarm at $now for $userlist\: $response\n") if ($debug & $DALARM); @users = split(/$IDELIM/,$userlist); $datestr = &date_string($now); if ($type =~ /proc/i && $DPROC) { print ("proc list:\n"); print (@ps); } ALARM_USER: foreach $ulist (@users) { ($user,$action) = split(/$ADELIM/,$ulist); $ACTION = ($action eq "") ? $ENV{DEFAULT_NOTIFY_MODE} : $action; $ACTION =~ tr/a-z/A-Z/; $ACTION = $ENV{$ACTION}; { $cmd = "$ACTION $user", last if ($ACTION ne ""); $msg = "$datestr Invalid action $action for $user - $response\n"; #Invalid action - print error and skip to next user. print ($msg); next ALARM_USER; } $msg = sprintf("%s (%s@%s) %s\n", $datestr, $cmdname, $host, $response); print ("$cmd; $msg"); if ($debug & $DALARM) { } else { if (open (CMD, "|-", $cmd)) { print (CMD $msg); close (CMD); } else { $msg = "Error sending previous alarm via $action to $user\n"; print ($msg); } } } } ######################################################################## # parse_notify - parse a notify list # Notify list can consist of N1 or N1,...,Nk where # Ni = userlist | userlist:actionlist where # userlist = user | (user1,...,userN) # actionlist = action | (action1,...,actionN) # global variables: $ADELIM, $IDELIM, %ENV # An environment variable name may be used in place of any # user:action string. The value of the environment variable # will be used for the user:action value. ######################################################################## sub parse_notify { my($input) = @_; my($output,$inlist,$first,$rest,$ulist,$alist,$user,$action); my(@users,@actions,@out); my($listpat) = '(([\w\-\@\.]+)|\([\w\-\@\.]+(,[\w\-\@\.]+)*\))'; $inlist = $input; # Allow environment variable subsitution. $inlist = $ENV{$inlist} if ($ENV{$inlist} ne ""); while ($inlist ne "") { if (($inlist =~ /^$listpat(:$listpat)?/)) { # Pick off next ulist:al entry. $first = $&; $rest = $'; substr($rest,0,1) = "" if (length($rest)>0 && substr($rest,0,1) eq ","); # Allow environment variable subsitution. $first = $ENV{$first} if ($ENV{$first} ne ""); # Split the entry into a user and action list. ($ulist,$alist) = split ($ADELIM,$first); $alist = $default_notify_mode if ($alist eq ""); # Remove parentheses from the user and mech lists. $ulist = $1 if ($ulist =~ /^\((.+)\)$/); $alist = $1 if ($alist =~ /^\((.+)\)$/); @users = split ($IDELIM,$ulist); @actions = split ($IDELIM,$alist); foreach $user (@users) { foreach $action (@actions) { push(@out, $user . $ADELIM . $action); } } } else { die "Invalid notify list: $input parsing: $inlist"; } $inlist = $rest; } $output = join($IDELIM,@out); return ($output); } ######################################################################## # parse_prog - parse prog line. # global variables: @monitor_list, $now, $TDELIM ######################################################################## sub parse_prog { my($prog,$notify,$interval,$notify_interval,$notify_on_clear,$params)= @_; my($monitor,$next_notify,$state); die "Invalid prog line: $prog $notify $interval" if ($prog eq "" || $notify eq "" || $interval <= 0); $next_notify = $now; $state = 0; $monitor = join($TDELIM,$now,$interval,"prog",$notify_interval, $next_notify,$notify_on_clear,$state,$notify,$prog,$params); push(@monitor_list,$monitor); } ######################################################################## # parse_disk - parse disk line. # global variables: @monitor_list, $now, $TDELIM ######################################################################## sub parse_disk { my($disk,$notify,$interval,$notify_interval,$notify_on_clear,$params) = @_; my($monitor,$next_notify,$state); die "Invalid disk line: $disk $notify $interval $params" if ($disk eq "" || $notify eq "" || $interval <= 0 || $params eq ""); $next_notify = $now; $state = 0; $monitor = join($TDELIM,$now,$interval,"disk",$notify_interval, $next_notify,$notify_on_clear,$state,$notify,$disk,$params); push(@monitor_list,$monitor); } ######################################################################## # parse_proc - parse proc line. # global variables: @monitor_list, $now, $TDELIM ######################################################################## sub parse_proc { my($proc,$notify,$interval,$notify_interval,$notify_on_clear,$params) = @_; my($monitor,$next_notify,$state); die "Invalid proc line: $proc $notify $interval $params" if ($proc eq "" || $notify eq "" || $interval <= 0); $next_notify = $now; $state = 0; $monitor = join($TDELIM,$now,$interval,"proc",$notify_interval, $next_notify,$notify_on_clear,$state,$notify,$proc,$params); push(@monitor_list,$monitor); } ######################################################################## # parse_alive - parse alive line. # global variables: @monitor_list, $now, $TDELIM ######################################################################## sub parse_alive { my($alive,$notify,$interval,$notify_interval,$notify_on_clear,$count) = @_; my($monitor,$next_notify,$state); die "Invalid alive line: $alive $notify $interval $count" if ($alive eq "" || $notify eq "" || $interval <= 0); $next_notify = $now; $state = 0; $monitor = join($TDELIM,$now,$interval,"alive",$notify_interval, $next_notify,$notify_on_clear,$state,$notify,$alive,$count); push(@monitor_list,$monitor); } ######################################################################## # monitor_disk - monitor disk space. # global variables: $df, debugging variables ######################################################################## sub monitor_disk { my($disk,$free) = @_; my($cmd,$dfree,$pfull,@result); $cmd = "$df $disk"; print ("$cmd","\n") if ($debug & $DCMD); open (PIPE, "-|", "$cmd"); @result = ; close (PIPE); return ("Error $? from df") if ($?); return ("Bad data for disk $disk") if ($#result < 1); shift (@result); if ($result[0] =~ /^\S+\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+/) { $dfree = $1; $pfull = $2; } else { shift(@result); if ($result[0] =~ /^\s+\S+\s+\S+\s+(\S+)\s+/) { $dfree = $1; $pfull = $2; } else { return ("Unknown data for disk $disk"); } } if ($free =~ /\d+%$/) { chop($free); chop($pfull); return ("disk $disk\: $pfull% full") if ($pfull >= $free); } else { return ("disk $disk\: $dfree free") if ($dfree <= $free); } return (""); } ######################################################################## # monitor_process - monitor whether a program is running. # global variables: $now, $ps, $ps_time, $ps_valid, @ps, $ADELIM, # debugging variables, ######################################################################## sub monitor_proc { my($userproc) = @_; my($cmd,$line,$ps_user,$ps_proc); my($user,$proc) = split(/$ADELIM/,$userproc); if ($ps_time + $ps_valid < $now) { $cmd = $ps; printf ("%s\n", $cmd) if ($debug & $DCMD); open (PIPE, "-|", $cmd); @ps = ; close (PIPE); return ("Error $? from ps") if ($?); $ps_time = time(); } foreach $line (@ps) { ($ps_user) = $line =~ /^\s*(\S*)/; next if (! ($ps_user =~ /^$user$/)); ($ps_proc) = $line =~ /.{46}\S*\s+(\S+.*$)/; next if (! ($ps_proc =~ /^$proc$/)); return (""); } return ("missing proc: $user\:$proc"); } ######################################################################## # monitor_prog - monitor output from specific program. # global variables: debugging variables # Expections of a program that is run under monitor: # writes to STDOUT only when conditions are unsatisfactory; # if conditions are normal, it should write nothing to STDOUT # The STDOUT of the program will be sent as part of the page/email # anything written to STDERR will go to to terminal or where the # command line redirects it, e.g., piped to 'conlog'. # Exit status: should be 0 under most conditions; # use non-zero exit status only for failure to execute part of # part of the program. This non-zero exit status will REPLACE # anything written to STDOUT in the page/email. ######################################################################## sub monitor_prog { my($prog,$params) = @_; my($cmd,$msg,@result); $cmd = "$prog $params"; print ("$cmd","\n") if ($debug & $DCMD); open (PIPE, "-|", "$cmd") or die "cannot fork: $!"; @result = ; close (PIPE); # return error value - see perldoc -v '$?' for details if ($? == -1) { $msg = sprintf("Error %d from %s: failed to execute", $?, $prog); return ($msg); } elsif ($? & 127) { $msg = (sprintf "Error $? from $prog: child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without'); return ($msg); } elsif ( ($? >> 8 ) != 0 ) { # called external program run failure $msg = sprintf ("Error %d from %s: exited\n", $? >> 8, $prog); return ($msg); } # return ALARM msg $msg = join(' ', map { sprintf " %s", $_ } @result); chop $msg; return ($msg); } ######################################################################## # monitor_alive - monitor machine. # global variables: $ping, debugging variables ######################################################################## sub monitor_alive { my($computer,$count) = @_; my($cmd,$msg,$result); $cmd = "$ping -c 1 $computer > /dev/null"; print ("$cmd","\n") if ($debug & $DCMD); while ($count--) { system($cmd); if ($? == -1) { return ("Failed to execute ps: $!\n"); } else { $result = $? >> 8; return ("") if ($result == 0); } } return ("no answer from $computer") if ($result == 1); return ("no such host $computer") if ($result == 2); return ("unknown result from ping $computer: $result"); } ######################################################################## # insert_by_time - insert monitor line into monitor_list by # sorted time order. # global variables: @monitor_list, $TDELIM, debugging variables ######################################################################## sub insert_by_time{ my($monitor) = @_; my($run_at, $at, $rest, $i); print "insert_by_time: $monitor\n" if ($debug & $DSORT); $at = 0; ($run_at,$rest) = split(/$TDELIM/,$monitor,2); print "run_at = $run_at\n" if ($debug & $DSORT); for ($i=0;$i<@monitor_list;$i++) { ($at,$rest) = split(/$TDELIM/,$monitor_list[$i],2); if ($at > $run_at) { print "Insert $run_at before $at\n" if ($debug & $DSORT); splice(@monitor_list,$i,0,$monitor); return; } } print "Insert $run_at at end after $at\n" if ($debug & $DSORT); push(@monitor_list,$monitor); } ######################################################################## # by_time - sort procedure to sort monitor entries by time. # global variables: $TDELIM, sort arguments (a,b) ######################################################################## sub by_time { my($aa,$bb,$rest); ($aa,$rest) = split(/$TDELIM/,$a,2); ($bb,$rest) = split(/$TDELIM/,$b,2); $aa <=> $bb; } ######################################################################## # hup_handler - set read_config_flag when HUP signal received. ######################################################################## sub hup_handler { $SIG{'HUP'} = \&hup_handler; # reinstall myself ++$read_config_flag; print ("In hup_handler\n"); kill 'ALRM', $$; } ######################################################################## # alrm_handler - handle wakeup calls from hup handler. ######################################################################## sub alrm_handler { } ######################################################################## # term_handler - handle wakeup calls from term handler. ######################################################################## sub term_handler { printf ("Exiting %s at %s\n", $cmdname, &date_string($now)); exit(0); } ######################################################################## # dump_monitor_list - dump the monitor_list (debugging purposes) # global variables: @monitor_list ######################################################################## sub dump_monitor_list { print "\n", "Monitor list:\n",join("\n",@monitor_list), "\n\n"; } ######################################################################## # date_string - create a date/time string from time value. ######################################################################## sub date_string { my($timeval) = @_; my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdat) = localtime($timeval); return(sprintf("%04d/%02d/%02d %02d:%02d:%02d", $year+1900,$mon+1,$mday,$hour,$min,$sec)); } ######################################################################## # update_pidfile - update PIDFILE with PID if requested. # Return: -1 on error, 0 on success ######################################################################## sub update_pidfile { my($pidfile, $datestr); $datestr = &date_string($now); if (($pidfile=$ENV{"PIDFILE"}) ne "") { if (! open (PIDFILE, ">", "$pidfile")) { print ("$datestr ERROR: Unable to open PIDFILE $pidfile\n"); return (-1); } printf (PIDFILE "%d\n", $$); if (! close (PIDFILE)) { print ("$datestr ERROR: Unable to close PIDFILE $pidfile\n"); return (-1); } } return (0); }