From 76167d46f33e918ee3848e2b9364a0707ba532e8 Mon Sep 17 00:00:00 2001 From: Johan Lundberg Date: Tue, 14 Apr 2015 13:49:06 +0200 Subject: Added nrpe configuration for sto-tug-kvm2 --- .../overlay/usr/lib/nagios/plugins/check_reboot | 37 ++ .../overlay/usr/lib/nagios/plugins/check_uptime.pl | 721 +++++++++++++++++++++ 2 files changed, 758 insertions(+) create mode 100755 sto-tug-kvm2.swamid.se/overlay/usr/lib/nagios/plugins/check_reboot create mode 100755 sto-tug-kvm2.swamid.se/overlay/usr/lib/nagios/plugins/check_uptime.pl (limited to 'sto-tug-kvm2.swamid.se/overlay/usr') diff --git a/sto-tug-kvm2.swamid.se/overlay/usr/lib/nagios/plugins/check_reboot b/sto-tug-kvm2.swamid.se/overlay/usr/lib/nagios/plugins/check_reboot new file mode 100755 index 0000000..4cb9df3 --- /dev/null +++ b/sto-tug-kvm2.swamid.se/overlay/usr/lib/nagios/plugins/check_reboot @@ -0,0 +1,37 @@ +#!/bin/bash +declare -rx PROGNAME=${0##*/} +declare -rx PROGPATH=${0%/*}/ + +function cleanup { + #if [ -e "$TMPFILE" ] ; then + #rm "$TMPFILE" + #fi + exit $1 +} + +if [ -r "${PROGPATH}utils.sh" ] ; then + source "${PROGPATH}utils.sh" +else + echo "Can't find utils.sh." + printf "Currently being run from %s\n" "$PROGPATH" + # since we couldn't define STATE_UNKNOWN since reading utils.sh failed, we use 3 here but everywhere else after this use cleanup $STATE + cleanup 3 +fi + +STATE=$STATE_UNKNOWN + + +if [ -f /var/run/reboot-required.pkgs ] +then + pkg=`cat /var/run/reboot-required.pkgs` +fi + +if [ -f /var/run/reboot-required ] +then + echo "Reboot WARNING: System reboot required by package $pkg" + cleanup $STATE_WARNING; +fi + echo "Reboot OK: No reboot required" + cleanup $STATE_OK; +cleanup $STATE; + diff --git a/sto-tug-kvm2.swamid.se/overlay/usr/lib/nagios/plugins/check_uptime.pl b/sto-tug-kvm2.swamid.se/overlay/usr/lib/nagios/plugins/check_uptime.pl new file mode 100755 index 0000000..dda05e4 --- /dev/null +++ b/sto-tug-kvm2.swamid.se/overlay/usr/lib/nagios/plugins/check_uptime.pl @@ -0,0 +1,721 @@ +#!/usr/bin/perl -w +# +# ============================== SUMMARY ===================================== +# +# Program : check_uptime.pl +# Version : 0.52 +# Date : June 19, 2012 +# Authors : William Leibzon - william@leibzon.org +# Licence : GPL - summary below, full text at http://www.fsf.org/licenses/gpl.txt +# +# =========================== PROGRAM LICENSE ================================= +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +# ===================== INFORMATION ABOUT THIS PLUGIN ========================= +# +# This plugin returns uptime of the system returning data in text (readable) +# format as well as in minutes for performance graphing. The plugin can either +# run on local system unix system (that supports standard 'uptime' command +# or check remote system by SNMP. The plugin can report one CRITICAL or +# WARNING alert if system has been rebooted since last check. +# +# ====================== SETUP AND PLUGIN USE NOTES ========================= +# +# The plugin can either retrieve information from local system (when you +# run it through check_nrpe for example) or by SNMP from remote system. +# +# On local system it will execute standard unix 'uptime' and 'uname -a'. +# +# On a remote system it'll retrieve data from sysSystem for system type +# and use that to decide if further data should be retrieved from +# sysUptime (OID 1.3.6.1.2.1.1.3.0) for windows or +# hostUptime (OID 1.3.6.1.2.1.25.1.1.0) for unix system or +# snmpEngineTime (OID 1.3.6.1.6.3.10.2.1.3) for cisco switches +# +# For information on available options please execute it with --help i.e: +# check_uptime.pl --help +# +# As I dont have time for extensive documentation below is all very brief: +# +# 1. You can also specify warning and critical thresholds which will +# give warning or critical alert if system has been up for lees then +# specified number of minutes. Example: +# check_uptime.pl -w 5 +# Will give warning alert if system has been up for less then 5 minutes +# +# 2. For performance data results you can use '-f' option which will give +# total number of minutes the system has been up. +# +# 3. A special case is use of performance to feed data from previous run +# back into the plugin. This is used to cache results about what type +# of system it is (you can also directly specify this with -T option) +# and also means -w and -c threshold values are ignored and instead +# plugin will issue ONE alert (warning or critical) if system uptime +# changes from highier value to lower +# +# ============================ EXAMPLES ======================================= +# +# 1. Local server (use with NRPE or on nagios host), warning on < 5 minutes: +# +# define command { +# command_name check_uptime +# command_line $USER1$/check_uptime.pl -f -w 5 +# } +# +# 2. Local server (use with NRPE or on nagios host), +# one critical alert on reboot: +# +# define command { +# command_name check_uptime +# command_line $USER1$/check_uptime.pl -f -c -P "SERVICEPERFDATA$" +# } +# +# 3. Remote server SNMP v2, one warning alert on reboot, +# autodetect and cache type of server: +# +# define command { +# command_name check_snmp_uptime_v2 +# command_line $USER1$/check_uptime.pl -2 -f -w -H $HOSTADDRESS$ -C $_HOSTSNMP_COMMUNITY$ -P "$SERVICEPERFDATA$" +# } +# +# 4. Remote server SNMP v3, rest as above +# +#define command { +# command_name check_snmp_uptime_v3 +# command_line $USER1$/check_uptime.pl -f -w -H $HOSTADDRESS$ -l $_HOSTSNMP_V3_USER$ -x $_HOSTSNMP_V3_AUTH$ -X $_HOSTSNMP_V3_PRIV$ -L sha,aes -P "$SERVICEPERFDATA$" +# } +# +# 5. Example of service definition using above +# +# define service{ +# use std-service +# hostgroup_name all_snmp_hosts +# service_description SNMP Uptime +# max_check_attempts 1 +# check_command check_snmp_uptime +# } +# +# 6. And this is optional dependency definition for above which makes +# every SNMP service (service beloning to SNMP servicegroup) on +# same host dependent on this SNMP Uptime check. Then if SNMP +# daemon goes down you only receive one alert +# +# define servicedependency{ +# service_description SNMP Uptime +# dependent_servicegroup_name snmp +# } +# +# ============================= VERSION HISTORY ============================== +# +# 0.1 - sometime 2006 : Simple script for tracking local system uptime +# 0.2 - sometime 2008 : Update to get uptime by SNMP, its now alike my other plugins +# 0.3 - Nov 14, 2009 : Added getting system info line and using that to decide +# format of uptime line and how to process it. Added support +# for getting uptime with SNMP from windows systems. +# Added documentation header alike my other plugins. +# Planned to release it to public, but forgot. +# 0.4 - Dec 19, 2011 : Update to support SNMP v3, released to public +# 0.41 - Jan 13, 2012 : Added bug fix by Rom_UA posted as comment on Nagios Exchange +# Added version history you're reading right now. +# 0.42 - Feb 13, 2012 : Bug fix to not report WARNING if uptime is not correct output +# 0.5 - Feb 29, 2012 : Added support for "netswitch" engine type that retrieves +# snmpEngineTime. Added proper support for sysUpTime interpreting +# it as 1/100s of a second and converting to days,hours,minutes +# Changed internal processing structure, now reported uptime +# info text is based on uptime_minutes and not separate. +# 0.51 - Jun 05, 2012 : Bug fixed for case when when snmp system info is < 3 words. +# 0.52 - Jun 19, 2012 : For switches if snmpEngineTime OID is not available, +# the plugin will revert back to checking hostUptime and +# then sysUptime. Entire logic has in fact been changed +# to support trying more than just two OIDs. Also added +# support to specify filename to '-v' option for debug +# output to go to instead of console and for '--debug' +# option as an alias to '--verbose'. +# +# TODO: +# 0) Add '--extra-opts' to allow to read options from a file as specified +# at http://nagiosplugins.org/extra-opts. This is TODO for all my plugins +# 1) Add support for ">", "<" and other threshold qualifiers +# as done in check_snmp_temperature.pl or check_mysqld.pl +# 2) Support for more types, in particular network equipment such as cisco: [DONE] +# sysUpTime is a 32-bit counter in 1/100 of a second, it rolls over after 496 days +# snmpEngineTime (.1.3.6.1.6.3.10.2.1.3) returns the uptime in seconds and will not +# roll over, however some cisco switches (29xx) are buggy and it gets reset too. +# Routers running 12.0(3)T or higher can use the snmpEngineTime object from +# the SNMP-FRAMEWORK-MIB. This keeps track of seconds since SNMP engine started. +# 3) Add threshold into perfout as ';warn;crit' +# +# ========================== START OF PROGRAM CODE =========================== + +use strict; +use Getopt::Long; + +# Nagios specific +our $TIMEOUT; +our %ERRORS; +eval 'use utils qw(%ERRORS $TIMEOUT)'; +if ($@) { + $TIMEOUT = 10; + %ERRORS = ('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); +} + +our $no_snmp=0; +eval 'use Net::SNMP'; +if ($@) { + $no_snmp=1; +} + +# Version +my $Version='0.52'; + +# SNMP OID +my $oid_sysSystem = '1.3.6.1.2.1.1.1.0'; # windows and some unix +my $oid_hostUptime = '1.3.6.1.2.1.25.1.1.0'; # hostUptime, usually unix systems +my $oid_sysUptime = '1.3.6.1.2.1.1.3.0'; # sysUpTime, windows +my $oid_engineTime = '1.3.6.1.6.3.10.2.1.3'; # SNMP-FRAMEWORK-MIB + +my @oid_uptime_types = ( ['', '', ''], # type 0 is reserved + [ 'local', '', ''], # type 1 is local + [ 'win', 'sysUpTime', $oid_sysUptime ], # type 2 is windows + [ 'unix-host', 'hostUpTime', $oid_hostUptime ], # type 3 is unix-host + [ 'unix-sys', 'sysUpTime', $oid_sysUptime ], # type 4 is unix-sys + [ 'net', 'engineTime', $oid_engineTime ]); # type 5 is netswitch + +# Not used, but perhaps later +my $oid_hrLoad = '1.3.6.1.2.1.25.3.3.1.2.1'; +my $oid_sysLoadInt1 = '1.3.6.1.4.1.2021.10.1.5.1'; +my $oid_sysLoadInt5 = '1.3.6.1.4.1.2021.10.1.5.2'; +my $oid_sysLoadInt15 = '1.3.6.1.4.1.2021.10.1.5.3'; + +# Standard options +my $o_host = undef; # hostname +my $o_timeout= undef; # Timeout (Default 10) +my $o_help= undef; # wan't some help ? +my $o_verb= undef; # verbose mode +my $o_version= undef; # print version +my $o_label= undef; # change label instead of printing uptime +my $o_perf= undef; # Output performance data (uptime in minutes) +my $o_prevperf= undef; # performance data given with $SERVICEPERFDATA$ macro +my $o_warn= undef; # WARNING alert if system has been up for < specified number of minutes +my $o_crit= undef; # CRITICAL alert if system has been up for < specified number of minutes +my $o_type= undef; # type of check (local, auto, unix, win) + +# Login and other options specific to SNMP +my $o_port = 161; # SNMP port +my $o_community = undef; # community +my $o_version2 = undef; # use snmp v2c +my $o_login= undef; # Login for snmpv3 +my $o_passwd= undef; # Pass for snmpv3 +my $v3protocols= undef; # V3 protocol list. +my $o_authproto= 'md5'; # Auth protocol +my $o_privproto= 'des'; # Priv protocol +my $o_privpass= undef; # priv password + +## Additional global variables +my %prev_perf= (); # array that is populated with previous performance data +my $check_type = 0; + +sub p_version { print "check_uptime version : $Version\n"; } + +sub print_usage { + print "Usage: $0 [-v [debugfilename]] [-T local|unix-host|unix-sys|win|net] [-H (-C ) [-2] | (-l login -x passwd [-X pass -L ,) [-p ]] [-w -s ] [-f] [-P ] [-t ] | [-V] [--label ]\n"; +} + +sub isnnum { # Return true if arg is not a number + my $num = shift; + if ( $num =~ /^(\d+\.?\d*)|(^\.\d+)$/ ) { return 0 ;} + return 1; +} + +sub div_mod { return int( $_[0]/$_[1]) , ($_[0] % $_[1]); } + +sub help { + print "\nUptime Plugin for Nagios (check_uptime) v. ",$Version,"\n"; + print "GPL licence, (c) 2008-2012 William Leibzon\n\n"; + print_usage(); + print <, + : Authentication protocol (md5|sha : default md5) + : Priv protocols (des|aes : default des) + -p, --port=PORT + SNMP port (Default 161) +EOT +} + +# For verbose output (updated 06/06/12 to write to debug file if specified) +sub verb { + my $t=shift; + if (defined($o_verb)) { + if ($o_verb eq "") { + print $t,"\n"; + } + else { + if (!open(DEBUGFILE, ">>$o_verb")) { + print $t, "\n"; + } + else { + print DEBUGFILE $t,"\n"; + close DEBUGFILE; + } + } + } +} + +# load previous performance data +sub process_perf { + my %pdh; + my ($nm,$dt); + foreach (split(' ',$_[0])) { + if (/(.*)=(.*)/) { + ($nm,$dt)=($1,$2); + verb("prev_perf: $nm = $dt"); + # in some of my plugins time_ is to profile how long execution takes for some part of plugin + # $pdh{$nm}=$dt if $nm !~ /^time_/; + $pdh{$nm}=$dt; + } + } + return %pdh; +} + +sub type_from_name { + my $type=shift; + for(my $i=1; $i \$o_verb, 'verbose:s' => \$o_verb, "debug:s" => \$o_verb, + 'h' => \$o_help, 'help' => \$o_help, + 'H:s' => \$o_host, 'hostname:s' => \$o_host, + 'p:i' => \$o_port, 'port:i' => \$o_port, + 'C:s' => \$o_community, 'community:s' => \$o_community, + '2' => \$o_version2, 'v2c' => \$o_version2, + 'l:s' => \$o_login, 'login:s' => \$o_login, + 'x:s' => \$o_passwd, 'passwd:s' => \$o_passwd, + 'X:s' => \$o_privpass, 'privpass:s' => \$o_privpass, + 'L:s' => \$v3protocols, 'protocols:s' => \$v3protocols, + 't:i' => \$o_timeout, 'timeout:i' => \$o_timeout, + 'V' => \$o_version, 'version' => \$o_version, + 'f' => \$o_perf, 'perfparse' => \$o_perf, + 'w:i' => \$o_warn, 'warning:i' => \$o_warn, + 'c:i' => \$o_crit, 'critical:i' => \$o_crit, + 'label:s' => \$o_label, + 'P:s' => \$o_prevperf, 'prev_perfdata:s' => \$o_prevperf, + 'T:s' => \$o_type, 'type:s' => \$o_type, + ); + if (defined ($o_help) ) { help(); exit $ERRORS{"UNKNOWN"}}; + if (defined($o_version)) { p_version(); exit $ERRORS{"UNKNOWN"}}; + + $o_type = "win" if defined($o_type) && $o_type eq 'windows'; + $o_type = "net" if defined($o_type) && $o_type eq 'netswitch'; + if (defined($o_type) && $o_type ne 'auto' && type_from_name($o_type)==-1) { + print "Invalid system type specified\n"; print_usage(); exit $ERRORS{"UNNKNOWN"}; + } + + if (!defined($o_community) && (!defined($o_login) || !defined($o_passwd)) ) { + $o_type='local' if !defined($o_type) || $o_type eq 'auto'; + if ($o_type ne 'local') { + print "Put snmp login info!\n"; print_usage(); exit $ERRORS{"UNKNOWN"} + } + if (defined($o_host)) { + print "Why are you specifying hostname without SNMP parameters?\n"; print_usage(); exit $ERRORS{"UNKNOWN"}; + } + } + else { + $o_type='auto' if !defined($o_type); + if ($o_type eq 'local' ) { + print "Why are you specifying SNMP login for local system???\n"; print_usage(); exit $ERRORS{"UNKNOWN"} + } + if (!defined($o_host)) { + print "Hostname required for SNMP check.\n"; print_usage(); exit $ERRORS{"UNKNOWN"}; + } + if ($no_snmp) { + print "Can't locate Net/SNMP.pm\n"; print_usage(); exit $ERRORS{"UNKNOWN"}; + } + } + + # check snmp information + if ((defined($o_login) || defined($o_passwd)) && (defined($o_community) || defined($o_version2)) ) + { print "Can't mix snmp v1,2c,3 protocols!\n"; print_usage(); exit $ERRORS{"UNKNOWN"}} + if (defined ($v3protocols)) { + if (!defined($o_login)) { print "Put snmp V3 login info with protocols!\n"; print_usage(); exit $ERRORS{"UNKNOWN"}} + my @v3proto=split(/,/,$v3protocols); + if ((defined ($v3proto[0])) && ($v3proto[0] ne "")) {$o_authproto=$v3proto[0]; } # Auth protocol + if (defined ($v3proto[1])) {$o_privproto=$v3proto[1]; } # Priv protocol + if ((defined ($v3proto[1])) && (!defined($o_privpass))) + { print "Put snmp V3 priv login info with priv protocols!\n"; print_usage(); exit $ERRORS{"UNKNOWN"}} + } + + if (defined($o_timeout) && (isnnum($o_timeout) || ($o_timeout < 2) || ($o_timeout > 60))) + { print "Timeout must be >1 and <60 !\n"; print_usage(); exit $ERRORS{"UNKNOWN"}} + if (!defined($o_timeout)) {$o_timeout=$TIMEOUT+5;} + + if (defined($o_prevperf)) { + if (defined($o_perf)) { + %prev_perf=process_perf($o_prevperf); + $check_type = $prev_perf{type} if $o_type eq 'auto' && exists($prev_perf{tye}) && exists($oid_uptime_types[$prev_perf{type}][0]); + } + else { + print "need -f option first \n"; print_usage(); exit $ERRORS{"UNKNOWN"}; + } + } + + if ($o_type eq 'auto') { + $check_type=0; + } + else { + $check_type = type_from_name($o_type); + } +} + +sub create_snmp_session { + my ($session,$error); + + if ( defined($o_login) && defined($o_passwd)) { + # SNMPv3 login + if (!defined ($o_privpass)) { + verb("SNMPv3 AuthNoPriv login : $o_login, $o_authproto"); + ($session, $error) = Net::SNMP->session( + -hostname => $o_host, + -version => '3', + -port => $o_port, + -username => $o_login, + -authpassword => $o_passwd, + -authprotocol => $o_authproto, + -timeout => $o_timeout + ); + } else { + verb("SNMPv3 AuthPriv login : $o_login, $o_authproto, $o_privproto"); + ($session, $error) = Net::SNMP->session( + -hostname => $o_host, + -version => '3', + -username => $o_login, + -port => $o_port, + -authpassword => $o_passwd, + -authprotocol => $o_authproto, + -privpassword => $o_privpass, + -privprotocol => $o_privproto, + -timeout => $o_timeout + ); + } + } else { + if (defined ($o_version2)) { + # SNMPv2c Login + verb("SNMP v2c login"); + ($session, $error) = Net::SNMP->session( + -hostname => $o_host, + -version => 2, + -community => $o_community, + -port => $o_port, + -timeout => $o_timeout + ); + } else { + # SNMPV1 login + verb("SNMP v1 login"); + ($session, $error) = Net::SNMP->session( + -hostname => $o_host, + -community => $o_community, + -port => $o_port, + -timeout => $o_timeout + ); + } + } + if (!defined($session)) { + printf("ERROR opening session: %s.\n", $error); + exit $ERRORS{"UNKNOWN"}; + } + + return $session; +} + +$SIG{'ALRM'} = sub { + print "Alarm timeout\n"; + exit $ERRORS{"UNKNOWN"}; +}; + +########## MAIN ####### +my $system_info=""; +my $uptime_info=undef; +my $uptime_minutes=undef; +my $perf_out=""; +my $status=0; +my $uptime_output; +my ($days, $hrs, $mins); + +check_options(); + +# Check gobal timeout if snmp screws up +if (defined($o_timeout)) { + verb("Alarm at $o_timeout + 5"); + alarm($o_timeout+5); +} + +if ($check_type==1) { # local + # Process unix uptime command output + $uptime_output=`uptime`; + verb("Local Uptime Result is: $uptime_output"); + if ($uptime_output =~ /(\d+)\s+days?,\s+(\d+)\:(\d+)/) { + ($days, $hrs, $mins) = ($1, $2, $3); + } + elsif ($uptime_output =~ /up\s+(\d+)\shours?\s+(\d+)/) { + ($days, $hrs, $mins) = (0, $1, $2); + } + elsif ($uptime_output =~ /up\s+(\d+)\:(\d+)/) { + ($days, $hrs, $mins) = (0, $1, $2); + } + elsif ($uptime_output =~ /up\s+(\d+)\s+min/) { + ($days, $hrs, $mins) = (0,0,$1); + } + elsif ($uptime_output =~ /up\s+(d+)s+days?,s+(d+)s+min/) { + ($days, $hrs, $mins) = ($1,0,$2); + } + else { + $uptime_info = "up ".$uptime_output; + } + if (defined($days) && defined($hrs) && defined($mins)) { + $uptime_minutes = $days*24*60+$hrs*60+$mins; + } + my @temp=split(' ',`uname -a`); + if (scalar(@temp)<3) { + $system_info=`uname -a`; + } + else { + $system_info=join(' ',$temp[0],$temp[1],$temp[2]); + } +} +else { + # SNMP connection + my $session=create_snmp_session(); + my $result=undef; + my $oid=""; + my $guessed_check_type=0; + + if ($check_type==0){ + $result = $session->get_request(-varbindlist=>[$oid_sysSystem]); + if (!defined($result)) { + printf("ERROR: Can not retrieve $oid_sysSystem table: %s.\n", $session->error); + $session->close; + exit $ERRORS{"UNKNOWN"}; + } + verb("$o_host SysInfo Result from OID $oid_sysSystem: $result->{$oid_sysSystem}"); + if ($result->{$oid_sysSystem} =~ /Windows/) { + $guessed_check_type=2; + verb('Guessing Type: 2 = windows'); + } + if ($result->{$oid_sysSystem} =~ /Cisco/) { + $guessed_check_type=5; + verb('Guessing Type: 5 = netswitch'); + } + if ($guessed_check_type==0) { + $guessed_check_type=3; # will try hostUptime first + } + $oid=$oid_uptime_types[$guessed_check_type][2]; + } + else { + $oid=$oid_uptime_types[$check_type][2]; + } + + do { + $result = $session->get_request(-varbindlist=>[$oid,$oid_sysSystem]); + if (!defined($result)) { + if ($check_type!=0) { + printf("ERROR: Can not retrieve uptime OID table $oid: %s.\n", $session->error); + $session->close; + exit $ERRORS{"UNKNOWN"}; + } + else { + if ($session->error =~ /noSuchName/) { + if ($guessed_check_type==4) { + verb("Received noSuchName error for sysUpTime OID $oid. Giving up."); + $guessed_check_type=0; + } + if ($guessed_check_type==3) { + verb("Received noSuchName error for hostUpTime OID $oid, will now try sysUpTime"); + $guessed_check_type=4; + } + else { + verb("Received noSuchName error for OID $oid, will now try hostUpTime"); + $guessed_check_type=3; + } + if ($guessed_check_type!=0) { + $oid=$oid_uptime_types[$guessed_check_type][2]; + } + } + else { + printf("ERROR: Can not retrieve uptime OID table $oid: %s.\n", $session->error); + $session->close; + exit $ERRORS{"UNKNOWN"}; + } + } + } + else { + if ($check_type==0) { + $check_type=$guessed_check_type; + } + } + } + while (!defined($result) && $guessed_check_type!=0); + + $session->close; + if ($check_type==0 && $guessed_check_type==0) { + printf("ERROR: Can not autodetermine proper uptime OID table. Giving up.\n"); + exit $ERRORS{"UNKNOWN"}; + } + + my ($days, $hrs, $mins); + $uptime_output=$result->{$oid}; + verb("$o_host Uptime Result from OID $oid: $uptime_output"); + + if ($uptime_output =~ /(\d+)\s+days?,\s+(\d+)\:(\d+)/) { + ($days, $hrs, $mins) = ($1, $2, $3); + } + elsif ($uptime_output =~ /(\d+)\s+hours?,\s+(\d+)\:(\d+)/) { + ($days, $hrs, $mins) = (0, $1, $2); + } + elsif ($uptime_output =~ /(\d+)\s+min/) { + ($days, $hrs, $mins) = (0, 0, $1); + } + if (defined($days) && defined($hrs) && defined($mins)) { + $uptime_minutes = $days*24*60+$hrs*60+$mins; + } + elsif ($uptime_output =~ /^(\d+)$/) { + my $upnum = $1; + if ($oid eq $oid_sysUptime) { + $uptime_minutes = $upnum/100/60; + } + elsif ($oid eq $oid_engineTime) { + $uptime_minutes = $upnum/60; + } + } + else { + $uptime_info = "up ".$uptime_output; + } + my @temp=split(' ',$result->{$oid_sysSystem}); + if (scalar(@temp)<3) { + $system_info=$result->{$oid_sysSystem}; + } + else { + $system_info=join(' ',$temp[0],$temp[1],$temp[2]); + } +} + +if (defined($uptime_minutes) && !defined($uptime_info)) { + ($hrs,$mins) = div_mod($uptime_minutes,60); + ($days,$hrs) = div_mod($hrs,24); + $uptime_info = "up "; + $uptime_info .= "$days days " if $days>0; + $uptime_info .= "$hrs hours " if $hrs>0; + $uptime_info .= "$mins minutes"; +} + +verb("System Type: $check_type (".$oid_uptime_types[$check_type][0].")"); +verb("System Info: $system_info") if $system_info; +verb("Uptime Text: $uptime_info") if defined($uptime_info); +verb("Uptime Minutes: $uptime_minutes") if defined($uptime_minutes); + +if (!defined($uptime_info)) { + $uptime_info = "Can not determine uptime"; + $status = 3; +} + +if (defined($o_perf)) { + $perf_out = "type=$check_type"; + $perf_out .= " uptime_minutes=$uptime_minutes" if defined($uptime_minutes); +} + +if (defined($uptime_minutes)) { + if (defined($o_prevperf)) { + $status = 1 if defined($o_warn) && exists($prev_perf{uptime_minutes}) && $prev_perf{uptime_minutes} > $uptime_minutes; + $status = 2 if defined($o_crit) && exists($prev_perf{uptime_minutes}) && $prev_perf{uptime_minutes} > $uptime_minutes; + } + else { + $status = 1 if defined($o_warn) && !isnnum($o_warn) && $o_warn >= $uptime_minutes; + $status = 2 if defined($o_crit) && !isnnum($o_crit) && $o_crit >= $uptime_minutes; + } +} +alarm(0); + +my $exit_status="UNKNOWN"; +$exit_status="OK" if $status==0; +$exit_status="WARNING" if $status==1; +$exit_status="CRITICAL" if $status==2; +$exit_status="UNKNOWN" if $status==3; +$exit_status="$o_label $exit_status" if defined($o_label); +print "$exit_status: $system_info"; +print " - $uptime_info"; +print " | ",$perf_out if $perf_out; +print "\n"; +exit $status; -- cgit v1.1 From a441b737ad5b17203123f26034c846af3dca4100 Mon Sep 17 00:00:00 2001 From: Johan Lundberg Date: Wed, 15 Apr 2015 16:15:33 +0200 Subject: Changed ping-check to be run with run-parts --- .../overlay/usr/local/etc/docker.d/30flog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 sto-tug-kvm2.swamid.se/overlay/usr/local/etc/docker.d/30flog (limited to 'sto-tug-kvm2.swamid.se/overlay/usr') diff --git a/sto-tug-kvm2.swamid.se/overlay/usr/local/etc/docker.d/30flog b/sto-tug-kvm2.swamid.se/overlay/usr/local/etc/docker.d/30flog new file mode 100755 index 0000000..a90610d --- /dev/null +++ b/sto-tug-kvm2.swamid.se/overlay/usr/local/etc/docker.d/30flog @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Wait for dependent flog docker containers to be registered in local DNS. +# + +PING_CHECK="/usr/local/bin/ping-check" + +logtag="flog_docker_pre-post[$ACTION]" +logger -t "${logtag}" "$NAME ($IMAGE), CID: '$CID'" + +if [ "x$ACTION" = "xpre-start" ]; then + if [ "x$NAME" = "xflog_app" ]; then + ${PING_CHECK} flog_db.docker + exit $? + if [ "x$NAME" = "xflog_nginx" ]; then + ${PING_CHECK} flog_app.docker + exit $? + fi + exit 0 +fi -- cgit v1.1 From a1f6be0730d25fd6f4ecff0d91bc75a8a250c0fe Mon Sep 17 00:00:00 2001 From: Johan Lundberg Date: Wed, 15 Apr 2015 16:29:46 +0200 Subject: Bash skillz lacking... --- sto-tug-kvm2.swamid.se/overlay/usr/local/etc/docker.d/30flog | 1 + 1 file changed, 1 insertion(+) (limited to 'sto-tug-kvm2.swamid.se/overlay/usr') diff --git a/sto-tug-kvm2.swamid.se/overlay/usr/local/etc/docker.d/30flog b/sto-tug-kvm2.swamid.se/overlay/usr/local/etc/docker.d/30flog index a90610d..2b477a2 100755 --- a/sto-tug-kvm2.swamid.se/overlay/usr/local/etc/docker.d/30flog +++ b/sto-tug-kvm2.swamid.se/overlay/usr/local/etc/docker.d/30flog @@ -12,6 +12,7 @@ if [ "x$ACTION" = "xpre-start" ]; then if [ "x$NAME" = "xflog_app" ]; then ${PING_CHECK} flog_db.docker exit $? + fi if [ "x$NAME" = "xflog_nginx" ]; then ${PING_CHECK} flog_app.docker exit $? -- cgit v1.1 From 5e239681e344d938ef86eeec35d5755d4f9b2aac Mon Sep 17 00:00:00 2001 From: Johan Lundberg Date: Mon, 20 Apr 2015 15:25:43 +0200 Subject: Added stop on first error to postgres backup script --- sto-tug-kvm2.swamid.se/overlay/usr/local/bin/postgres_backup | 1 + 1 file changed, 1 insertion(+) (limited to 'sto-tug-kvm2.swamid.se/overlay/usr') diff --git a/sto-tug-kvm2.swamid.se/overlay/usr/local/bin/postgres_backup b/sto-tug-kvm2.swamid.se/overlay/usr/local/bin/postgres_backup index ebf052c..a2b4986 100755 --- a/sto-tug-kvm2.swamid.se/overlay/usr/local/bin/postgres_backup +++ b/sto-tug-kvm2.swamid.se/overlay/usr/local/bin/postgres_backup @@ -2,6 +2,7 @@ # # Simplistic postgres backup # +set -e BACKUPROOT="/var/docker/postgresql_data/backup" DBCONTAINER="flog_db" -- cgit v1.1 From e9f06d2ec17be72874fa30f310f8f41bbd695530 Mon Sep 17 00:00:00 2001 From: Johan Lundberg Date: Thu, 23 Apr 2015 15:27:30 +0200 Subject: Moving postgres backup script in to container. --- .../overlay/usr/local/bin/postgres_backup | 32 ---------------------- 1 file changed, 32 deletions(-) delete mode 100755 sto-tug-kvm2.swamid.se/overlay/usr/local/bin/postgres_backup (limited to 'sto-tug-kvm2.swamid.se/overlay/usr') diff --git a/sto-tug-kvm2.swamid.se/overlay/usr/local/bin/postgres_backup b/sto-tug-kvm2.swamid.se/overlay/usr/local/bin/postgres_backup deleted file mode 100755 index a2b4986..0000000 --- a/sto-tug-kvm2.swamid.se/overlay/usr/local/bin/postgres_backup +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# -# Simplistic postgres backup -# -set -e - -BACKUPROOT="/var/docker/postgresql_data/backup" -DBCONTAINER="flog_db" - -if [ ! -d ${BACKUPROOT} ]; then - echo "$0: Directory ${BACKUPROOT} does not exist - aborting." - exit 1 -fi - -set -e - -# keep seven days worth of dumps -rm -rf ${BACKUPROOT}/postgres-dumpall-flogdb.gz.7 -test -f ${BACKUPROOT}/postgres-dumpall-flogdb.gz.6 && mv ${BACKUPROOT}/postgres-dumpall-flogdb.gz.6 ${BACKUPROOT}/postgres-dumpall-flogdb.gz.7 -test -f ${BACKUPROOT}/postgres-dumpall-flogdb.gz.5 && mv ${BACKUPROOT}/postgres-dumpall-flogdb.gz.5 ${BACKUPROOT}/postgres-dumpall-flogdb.gz.6 -test -f ${BACKUPROOT}/postgres-dumpall-flogdb.gz.4 && mv ${BACKUPROOT}/postgres-dumpall-flogdb.gz.4 ${BACKUPROOT}/postgres-dumpall-flogdb.gz.5 -test -f ${BACKUPROOT}/postgres-dumpall-flogdb.gz.3 && mv ${BACKUPROOT}/postgres-dumpall-flogdb.gz.3 ${BACKUPROOT}/postgres-dumpall-flogdb.gz.4 -test -f ${BACKUPROOT}/postgres-dumpall-flogdb.gz.2 && mv ${BACKUPROOT}/postgres-dumpall-flogdb.gz.2 ${BACKUPROOT}/postgres-dumpall-flogdb.gz.3 -test -f ${BACKUPROOT}/postgres-dumpall-flogdb.gz.1 && mv ${BACKUPROOT}/postgres-dumpall-flogdb.gz.1 ${BACKUPROOT}/postgres-dumpall-flogdb.gz.2 - -echo "Running postgres pg_dumpall..." - -cd ${BACKUPROOT} -/usr/bin/docker exec ${DBCONTAINER} sudo -u postgres /usr/bin/pg_dumpall | /bin/gzip > postgres-dumpall-flogdb.gz - -mv ${BACKUPROOT}/postgres-dumpall-flogdb.gz ${BACKUPROOT}/postgres-dumpall-flogdb.gz.1 - -- cgit v1.1