#!/usr/bin/perl # Copyright 2000 - 2004 George Shaffer # Anyone may use or modify this code for any purpose PROVIDED # that as long as it is recognizably derived from this code, # that this copyright notice, remains intact and unchanged. # No warrantees of any kind are expressed or implied. # This process monitoring script analyzes the contents of wps.log # files. A wps.log file contains the output of two UNIX utilities, # w and ps (run with the -ax option). Basically these files list # uptime and load averages, who's on the system, where they're # logged in from and what they are running. All currently executing # processes are listed by the ps -ax command. # Most of the real core of the program is contained in a matching # text file that is read at the begining of execution and which # defines the hosts from which wps.log files are to be examined # and what processes are normal and what processes are required, # i.e. should always be executing on the specified machine. my $basedir = "m:\\alert"; my $logfile = "m:\\alert\\wpserr.log"; # The %match hash contains a list of regular expressions which are # used to match lines found in the wps.log files. Each expression # is identified and keyed on by a unique number. my %match; # %HOSTS hash contains list of host names to be examined and specifies # whether the machine is required or optional. Required machines # should be up 7x24. Optional machines may not always be on. my %HOSTS; # The %hosts hash is a hash of hashes. The lowest level is a list # match expression IDs and an indicator that specifies if the matching # processes are allowed ('a') or required ('r'). Required processes # should always be runing. Allowed process are processes known to be # typical and not worth calling attention to. The second level of # hash are the host names. %hosts matches required and allowed # processes to specific hosts as different machines should have # different process mixtures. For example a web server (httpd) # should be required on a web server and not allowed on a firewall. # Likewise ipmon should be required on a BSD firewall and not be # allowed on a web server. The required kernel processes will be # quite different on BSD and Linux. my %hosts; # Read the chkproc.txt file initializing the hashes described above. open(MATCH, ") { next if (/^#/); chomp; # Initialize the %HOSTS hash. if (/^HOSTS/) { my($junk,$hosts) = split /=/; my @hosts = split /,/,$hosts; foreach $hostdata (@hosts) { my($host,$opt) = split /-/, $hostdata; # Ensure that each host is only defined once. die "Duplicate host $host\n" if ($HOSTS{$host} ne ""); $HOSTS{$host} = $opt; } next; } # Create a hash of hashes containing host names which contain # lists of match expressions which have indicators that the # processes matching the expression are allowed or required. if (/^HOST/) { my($junk,$host,$ids) = split /=/; my @procs = split /,/,$ids; foreach $proc (@procs) { my($id,$data) = split /-/, $proc; # Ensure that the same process number is not used more than # once with the same host. die "Host process redefined: $host $id\n" if ($hosts{$host}{$id} ne ""); $hosts{$host}{$id} = $data; } next; } # Load a hash of match expressions keyed on unique numbers. The # %hosts hash of hashes will be used to retrieve the expressions # as needed by number. if (/^[0-9]+/) { my($nbr,$expr) = split "~"; # Ensure that duplicate process IDs are not defined. die "Duplicate process ID = $nbr\n" if ($match{$nbr} ne ""); $match{$nbr} = $expr; } } # Ensure that every process ID defined for each host actually has # a matching ID in the hash of regular expressions. If a typo creates # an invalid process ID for a host, then the expression will be null # which if matched against a line will match any line thus allowing # any process to go undetected. foreach $HOST (sort(keys %HOSTS)) { foreach $key (keys %{ $hosts{$HOST} }) { my $match = $match{$key}; die "No expression for $HOST $key \n" if ($match eq ""); } } my $errors; my $time = time_wait(); # Loop through the hosts identifid in the %HOSTS hash. Change into # a directory for each and retrieve the approprite wps.log file for # analysis. while (1) { print "$time\n"; $errors = ""; foreach $HOST (sort(keys %HOSTS)) { die "Can't change to $basedir\\$HOST\\wps" unless chdir("$basedir\\$HOST\\wps"); my $err; #opendir(DIR, ".") or die "Can't open current directory."; #while (defined($infile = readdir(DIR))) { $infile = $time . ".log"; #next unless (-f $infile); #next unless $infile =~ /[0-9]{8}\.log/; open(IN, "<$infile") or $err .= "$HOST - $time: Can't open $infile\n"; if ($err) { $errors .= $err if ($HOSTS{$HOST} eq "req"); } else { my $j1 = 0; # Process the lines in a wps.log file. For each read line # save it to and array for further processing. Then compare # it to the match expressions for the current host. If the # wps.log line matches one of the expressions, check the next # line. If the wps.log line matches no expression for the # current host, log it as suspicious. my @lines; while () { my $line = $_; $lines[$j1++] = $line; my $matched = 0; foreach $key (keys %{ $hosts{$HOST} }) { my $match = $match{$key}; if ($line =~ /$match/) { $matched = 1; last; } } $err .= "$HOST Unknown-$time: $_" unless ($matched); } # Using the array of saved wps.log lines, get all match expressions # that are required for the current host. In other words, get a # list of processes that should always be running on the current # host and match it against the wps.log lines. When a match is # found go on to the next wps.log line. If no match is found, # log the expression as missing - no current process matches it. foreach $key (keys %{ $hosts{$HOST} }) { next unless ($hosts{$HOST}{$key} eq "r"); $j1 = 0; my $match = $match{$key}; my $matched = 0; foreach $line (@lines) { if ($line =~ /$match/) { $matched = 1; last; } } $err .= "$HOST Missing-$time: $match\n" unless $matched; } $errors .= $err; } #} } process_errors($errors) if ($errors); # print $errors if ($errors); $time = time_wait(); } # time_wait has a hardcoded list of minutes at which the wps.log file will # be created. The computer(s) creating the wps.log file(s) and the one # doing the analysis (this one) need to be time synchronized and the # transfer needs to occur in a reliable small time interval. The delay # in seconds can be addjusted to allow for the closeness of synchronization # and transfer time. The time in ticks is obtained and converted to # date and time elements. Month, day, hour and minute are needed to # match wps.log file names. A loop passes through the table of run times # finding the next run time. The time delay to the next run time is # calculated and the program sleeps for that period plus the delay. # When the new wps.log file(s) should be available this function returns # the expected file name as mmddHHMM to the calling process. sub time_wait { my $delay = 10; my @runtimes = (0,1,2,3,4,5,6,7,8,9); my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time()); my $mins; print "$min $sec - "; for ($i=0;$i<10;$i++) { if ($sec == 0) { if ($min == $runtimes[$i]) { sleep $delay; last; } if ($min < $runtimes[$i]) { my $secs = 60 * ($runtimes[$i] - $min) + $delay; sleep $secs; $min = $runtimes[$i]; last; } } else { my $secs = 60 - $sec; $mins = $min + 1; if ($mins == 60) { $mins = 0; } if ($mins == $runtimes[$i]) { sleep ($secs + $delay); last; } if ($mins < $runtimes[$i]) { $secs += 60 * ($runtimes[$i] - $mins) + $delay; sleep $secs; $mins = $runtimes[$i]; last; } } } # The next execution time is adjusted for hour, day, month # and year boundaries. No allowance is made for leap years. if ($min == 59 and $sec != 0) { $hour += 1; if ($hour == 24) { $hour = 0; $day += 1; if ($mon == 1 and $day == 29) { $day = 1; $mon = 2; } elsif ($day == 31 and ($mon == 3 or $mon == 5 or $mon == 8 or $mon == 10)) { $day = 1; $mon += 1; } elsif ($day == 32) { $day = 1; $mon += 1; } $mon = 0 if ($mon == 12); } } my $mmddhhmm = (sprintf "%02.2d", $mon+1) . (sprintf "%02.2d", $mday) . (sprintf "%02.2d", $hour) . (sprintf "%02.2d", $mins); return $mmddhhmm; } # process_errors appends any "errors" (suspicious or unusual conditions) # found on each pass to the logfile. It then checks running process # (this version is specific to NT and uses tlist) to see if any earlier # versions of the log are being displayed. If so the display program # (notepad becuase this is NT) is killed. # # The latest version of the logfile is displayed to the terminal. sub process_errors { my $errs = shift; open(ERR, ">>$logfile"); print ERR $errs; close ERR; # Finding and killing previous versions of the error log display # prevents this program from consuming excessive system resources # if the computer this is running on is not attended for some # time at a time when unusual conditions are being encountered. my $nbr; $tlist = `tlist`; @list = split /\n/,$tlist; for ($i=0;$list[$i] ne "";$i++) { $list[$i] =~ /\s?([0-9]+) notepad.exe\s+wpserr.log - Notepad/; $nbr = $1; last if ($nbr); } system("kill $nbr") if ($nbr); # If an alert supress file exists, how old is it? my $nwfile = "m:\\alert\\nowarn.txt"; if (-e $nwfile) { open (NOWARN, "<$nwfile"); my $timeout = ; close NOWARN; chomp $timeout; my ($atime, $mtime) = (stat($nwfile))[8,9]; my $time = time(); unlink($nwfile) if ($time - $mtime > $timeout or $time - $mtime > 14400); } system("start notepad $logfile") unless (-e $nwfile); }