#!/usr/bin/perl
#
# Spong network monitoring script.  This runs various tests against IP based
# network services (like nntp, smtp, http, pop, etc...)  If it can not
# connect to a service, then it sends a message to the spong server machine
# about the problems.  This script currently checks the following:
#
#    * ping  (try connecting to it period...)
#    * ftp, pop3, smtp, http, nntp, imap (a suite of tcp based services)
#    * dns (if the Net::DNS module is installed)
#
# I'm not currently using the non-core Net:: tools since that would require an 
# installation of a seperate perl package, but I probably will at some point
#
# History:
# (1) Ported bb-network.sh script to perl. (Ed Hill Feb 27, 1997)
# (2) Converted checks to new plugin mechanism (Stephen Johnson May 28, 1999)
#     Added user-configurable escalation mechanism
#
# $Id: spong-network.pl,v 1.41 2002/04/18 15:39:00 sljohnson Exp $

use Carp;
use lib "/usr/share/spong";

$0 = "spong-network";

use Sys::Hostname;
use Socket;
use IO::Socket;
use POSIX;
use Getopt::Long;

use Spong::Daemon;
use Spong::Status qw(status);
use Spong::Log;

# Check to see if the Time::HiRes module is available
eval { require Time::HiRes; import Time::HiRes qw(time); };
if (! $@ ) { $hires = 1; } else { $hires = 0; };

srand( time() ^ ($$ + ($$ << 15 )) );

$debug = $restart = $kill = 0;

if ( ! GetOptions("debug:i" => \$debuglevel, "restart" => \$restart, 
          "kill" => \$kill, "nosleep|refresh" => \$nosleep,
          "nodaemonize" => \$nodaemonize) ) {
   &usage();
   exit 1;
}

# Initial debugging for preconfiguration debugging
Spong::Log::set_debug_context( 'debuglevel' => $debuglevel );

$me         = "/usr/bin/spong-network";
$conf_file  = $ARGV[0] || "/etc/spong/spong.conf";
$hosts_file = "/etc/spong/spong.hosts";
($HOST)     = gethostbyname(&Sys::Hostname::hostname());
$HOST       =~ tr/A-Z/a-z/;

&load_config_files(); # Loads the user specified configuration information
&init_logging();      # Initialize logging contexts
Spong::Daemon::Daemonize()  # Daemonize if not signalling or a one-shot
   unless ($nosleep || $restart || $kill || $debug || $nodaemonize );
&handle_signals();    # Set up handlers, and signal the current server if asked

# Find our SPONGSLEEP value

$SPONGSLEEP = $SPONGSLEEP{'spong-network'} || $SPONGSLEEP{'DEFAULT'} ||
              $SPONGSLEEP || 300;

&munge_services();  # Convert service list in %HOSTS to a form that we like

%PLUGINS = {};
&config_funcs();

my @bad_checks;

# Do the various network tests for each host.

my $lastcheck = time();

while( 1 ) {
   @bad_checks = ();  # Clear out the bad check list

   # Main checking loop, check everything.
   foreach $host ( @HOSTS_LIST ) {
      &debug( "checking network services on $host" );

      if( $HOSTS{$host}{'skip_network_checks'} ) {
         &debug('skipping network checks');
         next;
      }

      my $stopafter = 0;
      my $set_clear = 0;
      foreach $_ ( split(/[\s,]+/,$HOSTS{$host}->{'services'}) ) {
         # ok...if the check has a : on the end, then stop if it
         # comes back negative
         if (/^(\w+)(:)$/) {
            $check = $1;
            $stopafter = 1;
         } else {
            $check = $_;
            $stopafter = 0;
         }

         $0 = "spong-network (checking $host/$check)";
         # Reset the critical error count
         $HOSTS{$host}->{'service'}->{$check}->{'count'} = 0;
         &debug( "checking $host/$check", 9 );

         &do_check($host,$check);

         # Get the last status of the check
         my $laststatus = $HOSTS{$host}{'service'}{$check}{'laststatus'}
                       || 'green';

         if ($stopafter && $laststatus eq 'red') {
            debug("Check $check failed for host $host," .
                  " skipping remaining tests", 3);
             last;
#            $set_clear = 1;   # Set remaining check to clear status
         }
      }
   }


   # Now recheck the services that were down, but need to be rechecked
   while ( @bad_checks ) {
      my @tmp_checks = @bad_checks;
      @bad_checks = ();
      foreach $tmp (@tmp_checks) {
         my ( $host, $check ) = split / /,$tmp;
         $0 = "spong-network (rechecking $host/$check)";
         &debug( "checking $host/$check", 9 );
         &do_check($host,$check);
      }

      sleep $RECHECKSLEEP;
   }
 
   # If we are suppose to stay alive, then calculate the time for the next
   # main time loop which is $lastcheck + $SPONGSLEEP, add a little randomness
   # so that things don't get in sync and pound the spong-server. If we need
   # to sleep off the difference.  Otherwise, just exit.

   if( $nosleep ) {
      last;
   } else {

      # Calculate the time for the next loop
      my $nexttime = $lastcheck + int($SPONGSLEEP - (.05 * $SPONGSLEEP) + 
   		        rand(.1 * $SPONGSLEEP));
   
      if ( $nexttime > time() ) {
         my $sleep = $nexttime - time();
         &debug( "sleeping for $sleep seconds" );
         $0 = "spong-network (sleeping)";
         sleep $sleep;
      }
      $lastcheck = $nexttime;  # Save the current check time as the last time
   }

}

unlink( "/var/run/spong/spong-network.pid" ) unless $nosleep;
exit(0);


# This routine performs the checks and determines what status code to send 
# to the spong-server.

sub do_check {
   my ($host,$service) = @_;
   my ( $crit_count );

   # Get the hosts current color and warning count for service
   if (defined $HOSTS{$host}->{'service'}->{$service}->{'count'} ) {
      $crit_count = $HOSTS{$host}->{'service'}->{$service}->{'count'};
   } else {
      $crit_count = 0;
   }

   # Get last known status
   my $laststatus = $HOSTS{$host}->{'service'}->{$service}->{'laststatus'}
                       || 'green';

   # Call the check function as referenced by the PLUGIN hash
   eval {
      ($status,$summary,$message) = (&{$PLUGINS{$service}}($host));
   };

   if ($@) {
      &error("No check function defined for $service: $@");
      return;
   }

   my $color;

   # If current status is critical, increment the critical count counter
   # If counter < $CRIT_WARN_LEVEL, reduce status to yellow
   # else pass a critical as a critical 
   # If current status is not red, reset the critical level counter.
   $skip_status = 0;
   if ($status eq 'red') {
      $crit_count += 1;
      $color = 'red'; 
      if ($crit_count < $CRIT_WARN_LEVEL) { 
         $color = 'yellow' if $laststatus eq 'green'; 
         $skip_status = 1;
         $HOSTS{$host}->{'service'}->{$service}->{'laststatus'} = 'yellow';
      } else {
         $HOSTS{$host}->{'service'}->{$service}->{'laststatus'} = 'red';
      }
      $summary = "($crit_count/$CRIT_WARN_LEVEL) " . $summary
         if $CRIT_WARN_LEVEL > 1;
      push @bad_checks, "$host $service" if ($crit_count < $CRIT_WARN_LEVEL);
   } else {
      $crit_count = 0;
      $color = $status;
      $HOSTS{$host}->{'service'}->{$service}->{'laststatus'} = 'green';
   }

   &debug("$status - $crit_count - $CRIT_WARN_LEVEL - $color - $summary");

   # Save the critical counter in the host for the service
   $HOSTS{$host}->{'service'}->{$service}->{'count'} = $crit_count;

   eval {
      &status( $SPONGSERVER, $host, $service, $color, $summary, $message );
   } if ! $skip_status;
   &debug("Status change to down, deferring report until after rechecks",5)
      if $skip_status;
   if ($@) {
      &error("$@");
   }

  # Do set_clear processing if necessary
  if( ! $skip_status and $color eq "red" ) {
     do_set_clear( $host, $service );
  }
}

# Do the set_clear process if the service for the host has the 'last'
# flag set in it's 'services'. Flag every service past it as clear.

sub do_set_clear {
   my( $host, $service ) = @_;
   
   my $set_clear = 0;
   my $stop_after = 0;
   foreach ( split(/[\s,]+/,$HOSTS{$host}->{'services'}) ) {

      if (/^(\w+)(:)$/) {
         $s = $1;
         $stop_after = 1;
      } else {
         $s = $_;
         $stop_after = 0;
      }

      # If our service
      if( $s eq $service) {
         # Has the stop_after flag, set flag and skip to next service
         if( $stop_after ) { $set_clear = 1; next; }
         else              { last; }
      }

      # If set_clear, set service to clear status
      if( $set_clear ) {
         sleep 1;
         set_clear( $host, $s );
      }
   }

}


# Set service for a host to 'clear' status for non checking of service
# This used for for skipping remaining test for a host if last flag
# is set for the service

sub set_clear {
   my( $host, $service ) = @_;

   $summary = "Test skipped due. A prev test flagged as stop_after";
   $message = "";

  eval {
      &status( $SPONGSERVER, $host, $service, 'clear', $summary, $message );
  };   

   if ($@) {
      &error("set_clear: @");
   }

}

# A generic tcp port checking routine.  You give this function a hostname, a
# port, a message to send (can be ""), a return regular expression string to 
# check for, and the name of the service.  This will go out connect to that
# port and check to make sure you get back expected results.

sub check_simple {
   my( $host, $port, $send, $check, $service ) = @_;
   my( $color, $summary ) = ( "red", "" );
   my( $attempt, $start, $message, $diff, $errcd );

   for $timeout (@{( $TCP_SIMPLE_TIMEOUT{$service} || $TCP_SIMPLE_TIMEOUT{'DEFAULT'} || [3,5] )}) {
      $start = $hires ? main::time() : time();
      ($errcd,$message) = &check_tcp( $host, $port, $send, $timeout );
      $diff    = ($hires ? main::time() : time())  - $start;

      $attempt++;
      if( $message =~ /$check/ ) { $color = "green"; last; }
      &debug("check_simple: $service - $host - timeout $timeout - attempt $attempt failed");
   }

   $diff = sprintf("%.3f",$diff);

   $summary = "$service is down, $errcd" if $color eq "red" and $errcd;
   $summary = "$service ok - $diff second response time" if $color eq "green";
   $summary .= ", attempt $attempt" if ($attempt != 1 && $color eq "green");
      
   &debug( "$service - $host - $color, $summary" );
   return(  $color, $summary, $message );
}



# ---------------------------------------------------------------------------
# &check_tcp( HOST, PORT, DATA, TIMEOUT, MAXLEN )
#
# This function will make a connection to a port at a given port, and send a
# message, it will then return what it gets back to the caller of this
# function.
# ---------------------------------------------------------------------------

sub check_tcp {
   my( $addr, $port, $data, $timeout, $maxlen ) = @_;
   my( $iaddr, $paddr, $proto, $line, $ip, $sock, $err );

   if( $addr =~ /^\s*((\d+\.){3}\d+)\s*$/ ) {
      $ip = $addr;
   } else {
      my( @addrs ) = (gethostbyname($addr))[4];
      if ( ! @addrs ) { return ( 1, "" ); }
      my( $a, $b, $c, $d ) = unpack( 'C4', $addrs[0] );
      $ip = "$a.$b.$c.$d";
   }

   $timeout = 5  if ( ! defined $timeout || $timeout <= 0);
   $maxlen = 256 if ( ! defined $maxlen  || $maxlen  <= 0);

   $err = 0;
   $line = "";
   $msg = "";

   $iaddr = inet_aton( $ip )                    || return -1;
   $paddr = sockaddr_in( $port, $iaddr );
   $proto = getprotobyname( 'tcp' );
 
   # Set an alarm so that if we can't connect "immediately" it times out.
   # Poor man's exception handling in perl...
   
   eval {
      local $SIG{'ALRM'} = sub { die "Socket timed out"; };
      alarm($timeout);

      socket( SOCK, PF_INET, SOCK_STREAM, $proto ) || die "socket: $!";
      connect( SOCK, $paddr )                      || die "connect: $!";
      select((select(SOCK), $| = 1)[0]);
      print SOCK "$data";
      while (length($msg) < $maxlen) {
         recv( SOCK, $line, 256, 0 );
         $msg .= $line;
         # If the socket is closed, exit the loop
         if (length($line) == 0) { last; }
      }
      alarm(0);
      close( SOCK ) || die "close: $!";
   };
   alarm(0);

   if ( $@ =~ /^(.*) at/ ) { $err = $1; }
   if ( $@ =~ /timed out/ )  { $err = "check_tcp timed out"; }
   if ( $@ =~ /connect:(.*) at/ )   { $err = $1; }

   return ($err,$msg);
}


# ===========================================================================
# Utility functions, and signal handlers...
# ===========================================================================

# Print out the usage for the program

sub usage {

   print qq
(Usage:
   $0  [--nodaemonize] [--debug n] [--nosleep|--refresh] [config_file]
   $0  --kill | --restart

   --debug n
         Run in the foreround and print debugging output
   --nodaemonize
         Run without becoming a daemon
   --nosleep
   --refresh
         Run one cycle of checks in the foreground and exit
   --restart
         Signal a running spong-network to restart.
   --kill
         Signal a running spong-network to terminate.
   config_file
         Use the named file as the configuration file
);
     
}

# This function initializes the debug and error logging contexts in the 
# Log module.

sub init_logging {
   if (defined $debuglevel) {
      $debug = ($debuglevel == 0) ? 1 : $debuglevel
   }

   Spong::Log::set_debug_context( 'debuglevel' => $debug );

   my $filename = ($SPONG_LOG_FILE) ? "/var/log/spong/spong-network.log" : "";
   my $syslog = ($SPONG_LOG_SYSLOG) ? 1 : 0;

   Spong::Log::set_error_context(  syslog   => $syslog,
                                   ident    => 'spong-network',
                                   logopt   => 'pid,cons',
                                   priority => 'ERR',
                                   filename => $filename,
                                 );
}


# Load our configuration variables, including anything specific to the host
# that we are running on.

sub load_config_files {
   require $conf_file || die "Can't load $conf_file: $!";
   if( -f "$conf_file.$HOST" ) {
      require "$conf_file.$HOST" || die "Can't load $conf_file.$HOST: $!";
   } else {
      my $tmp = (split( /\./, $HOST ))[0];
      if( -f "$conf_file.$tmp" ) { # for lazy typist
	 require "$conf_file.$tmp" || die "Can't load $conf_file.$tmp: $!";
      }
   }
   &debug( "configuration file(s) loaded" );

   # Read in the spong.hosts file.  We are a little nasty here in that we do
   # some junk to scan through the file so that we can maintain the order of
   # the hosts as they appear in the file.

   open( HOSTS, $hosts_file ) || die "Can't load $hosts_file: $!";
   while( <HOSTS> ) {
      $evalme .= $_;
      if( /^\s*%HOSTS\s*=\s*\(/ ) { $inhosts = 1;; }
      if( $inhosts && /^\s*[\'\"]?([^\s\'\"]+)[\'\"]?\s*\=\>\s*\{/ ) {
	 push( @HOSTS_LIST, $1 );
      }
   }
   close( HOSTS );
   eval $evalme || die "Invalid spong.hosts file: $@";
   
   # Fallback, if we didn't read things correctly...

   if( sort ( @HOSTS_LIST ) != sort ( keys %HOSTS ) ) { 
      @HOSTS_LIST = sort keys %HOSTS; }
   &debug( "host file loaded" );
}

# This is part of the set up code, this sets up the signal handlers, and
# handles any command line arguments that are given to us that tell us to
# signal the current running spong-server program.

sub handle_signals {

   # Clear out signal mask in case we inherit any blocked sigs

   my $sigset = POSIX::SigSet->new;
   sigprocmask(SIG_SETMASK, $sigset );

   # Set up some signal handlers to handle our death gracefully, and also
   # listen for the HUP signal, and if we se that, we re-exec ourself.

   $SIG{'TERM'} = \&exit_handler;
   $SIG{'QUIT'} = \&exit_handler;
   $SIG{'HUP'}  = \&hup_handler;
   $SIG{'PIPE'} = \&pipe_handler;

   # If the user gives us the --restart or --kill flags, then we signal the
   # currently running spong-network process, and tell it to either die, or
   # re-exec itself (which causes it to re-read it's configuration files.

   if( $restart || $kill ) {
      open( PID, "/var/run/spong/spong-network.pid") || die "Can't find pid: $!";
      my $pid = <PID>; chomp $pid;
      close PID;
      
      if( $restart ) { 
	 &debug( "telling pid $pid to restart" ); kill( 'HUP', $pid ); }
      if( $kill ) { 
	 &debug( "telling pid $pid to die" ); kill( 'QUIT', $pid );}
      
      exit(0);
   }

   # Check to see if we are already running 
   &already_running() unless $nosleep;

   # Write our pid to the spong tmp directory.
   
   system( "echo $$ >/var/run/spong/spong-network.pid" ) unless $nosleep;
}


# This routine check to see if another instance of spong-server is already
# running. If there is another instance, this instance will complain and die

sub already_running {
   # if there is a PID file
   if ( -f "/var/run/spong/spong-network.pid" ) {
      # Read the pid
      open( PID, "/var/run/spong/spong-network.pid" ) || die "Can't open pid: $!";
      my $pid = <PID>; chomp $pid;
      close PID;

      if ( kill 0,$pid ) {
         &error("Spong-network is already running as pid $pid");
         exit 1;
      }
   }
}


# Output functions, one for debugging information, the other for errors.

sub debug { Spong::Log::debug($_[0],$_[1]); }
sub error { Spong::Log::error($_[0]); }


# Handle some signals...

sub exit_handler { 
   &debug( "caught QUIT signal, exiting..." );
   unlink "/var/run/spong/spong-network.pid" if "/var/run/spong/spong-network.pid";
   exit(0);
}

sub hup_handler {
   &debug( "caught HUP signal, restarting..." );
   unlink "/var/run/spong/spong-network.pid" if "/var/run/spong/spong-network.pid";
   alarm(0);
   @args = ($me);
   if( $debug ) { push(@args, "--debug", $debug); }
   if( $nodaemonize ) { push(@args, "--nodaemonize"); }
   exec $me, @args or die "Couldn't exec $me after HUP";
}

sub pipe_handler {
   wait();
   &debug( "caught $_[0] signal." );
   $SIG{$_[0]} = \&pipe_handler;
}

# This routine munges the services list in %HOSTS to a form that spong-network likes
# it and converts/removed any meta-service names
sub munge_services { 

   my ( $service, $services, $host, $ping );
   foreach $host ( @HOSTS_LIST )  {
      $ping = 1; $services = "";

      foreach $service ( split(/[\s,]+/,$HOSTS{$host}->{'services'}) ) {
         # noping is a meta-service that say to skip ping test for this host
         if ( $service eq "noping" ) { $ping = 0; next; }
         $service = " $service" if ( $services );
         $services .= $service;
      }

      $services = "ping $services" if ( $ping && $services !~ /ping/ );
      $HOSTS{$host}->{'services'} = $services;
   }
}


# Load all of the checks specified in all of the 'services' attribute
# in %HOSTS

sub config_funcs {

  # Consolidate all of the service to be checked into a unique list
  my (%checks,$check);
  foreach $host ( @HOSTS_LIST ) {
      foreach $check (split(/\s+/,$HOSTS{$host}->{'services'})) {
         # Strip any ':' from service names
         $check =~ s/://g;
         $checks{$check} = 1;
      }
  }

   my $plugin;
   foreach $plugin ( keys(%checks) ) {
      &debug("Loading $plugin plugin");
      eval "require 'Spong/Network/plugins/check_$plugin';";
      if ( $@ ) { &error("Could not load $plugin plugin: $@"); }
   }
}

