import snippets from gitlab
This commit is contained in:
333
check_iostat/check_iostat.sh
Normal file
333
check_iostat/check_iostat.sh
Normal file
@@ -0,0 +1,333 @@
|
||||
#!/bin/bash
|
||||
# set -x
|
||||
#----------check_iostat.sh-----------
|
||||
#
|
||||
# Version 0.0.2 - Jan/2009
|
||||
# Changes: added device verification
|
||||
#
|
||||
# by Thiago Varela - thiago@iplenix.com
|
||||
#
|
||||
# Version 0.0.3 - Dec/2011
|
||||
# Changes:
|
||||
# - changed values from bytes to mbytes
|
||||
# - fixed bug to get traffic data without comma but point
|
||||
# - current values are displayed now, not average values (first run of iostat)
|
||||
#
|
||||
# by Philipp Niedziela - pn@pn-it.com
|
||||
#
|
||||
# Version 0.0.4 - April/2014
|
||||
# Changes:
|
||||
# - Allow Empty warn/crit levels
|
||||
# - Can check I/O, WAIT Time, or Queue
|
||||
#
|
||||
# by Warren Turner
|
||||
#
|
||||
# Version 0.0.5 - Jun/2014
|
||||
# Changes:
|
||||
# - removed -y flag from call since iostat doesn't know about it any more (June 2014)
|
||||
# - only needed executions of iostat are done now (save cpu time whenever you can)
|
||||
# - fixed the obvious problems of missing input values (probably because of the now unimplemented "-y") with -x values
|
||||
# - made perfomance data optional (I like to have choice in the matter)
|
||||
#
|
||||
# by Frederic Krueger / fkrueger-dev-checkiostat@holics.at
|
||||
#
|
||||
# Version 0.0.6 - Jul/2014
|
||||
# Changes:
|
||||
# - Cleaned up argument checking, removed excess iostat calls, steamlined if statements and renamed variables to fit current use
|
||||
# - Fixed all inputs to match current iostat output (Ubuntu 12.04)
|
||||
# - Changed to take last ten seconds as default (more useful for nagios usage). Will go to "since last reboot" (previous behaviour) on -g flag.
|
||||
# - added extra comments/whitespace etc to make add readability
|
||||
#
|
||||
# by Ben Field / ben.field@concreteplatform.com
|
||||
#
|
||||
# Version 0.0.7 - Sep/2014
|
||||
# Changes:
|
||||
# - Fixed performance data for Wait check
|
||||
#
|
||||
# by Christian Westergard / christian.westergard@gmail.com
|
||||
#
|
||||
|
||||
iostat=`which iostat 2>/dev/null`
|
||||
bc=`which bc 2>/dev/null`
|
||||
|
||||
function help {
|
||||
echo -e "
|
||||
Usage:
|
||||
|
||||
-d =
|
||||
--Device to be checked. Example: \"-d sda\"
|
||||
|
||||
Run only one of i, q, W:
|
||||
|
||||
-i = IO Check Mode
|
||||
--Checks Total Transfers/sec, Read IO/Sec, Write IO/Sec,
|
||||
Bytes Read/Sec, Bytes Written/Sec
|
||||
--warning/critical = Total Transfers/sec,Read IO/Sec,Write
|
||||
IO/Sec,Bytes Read/Sec,Bytes Written/Sec
|
||||
|
||||
-q = Queue Mode
|
||||
--Checks Disk Queue Lengths
|
||||
--warning/critial = Average size of requests, Queue length of requests
|
||||
|
||||
-W = Wait Time Mode
|
||||
--Check the time for I/O requests issued to the device to be served.
|
||||
This includes the time spent by the requests in queue and the time
|
||||
spent servicing them.
|
||||
--warning/critical = Avg I/O Wait Time (ms), Avg Read Wait Time (ms),
|
||||
Avg Write Wait Time (ms), Avg Service Wait Time (ms), Avg CPU
|
||||
Utilization
|
||||
|
||||
-w,-c = pass warning and critical levels respectively. These are not
|
||||
required, but with out them, all queries will return as OK.
|
||||
|
||||
-p = Provide performance data for later graphing
|
||||
|
||||
-g = Since last reboot for system (more for debugging that nagios use!)
|
||||
|
||||
-h = This help
|
||||
"
|
||||
exit -1
|
||||
}
|
||||
|
||||
# Ensuring we have the needed tools:
|
||||
( [ ! -f $iostat ] || [ ! -f $bc ] ) && \
|
||||
( echo "ERROR: You must have iostat and bc installed in order to run this plugin\n\tuse: apt-get install systat bc\n" && exit -1 )
|
||||
|
||||
io=0
|
||||
queue=0
|
||||
waittime=0
|
||||
printperfdata=0
|
||||
STATE="OK"
|
||||
sampleTime=3
|
||||
samples=2i
|
||||
status=0
|
||||
|
||||
MSG=""
|
||||
PERFDATA=""
|
||||
|
||||
#------------Argument Set-------------
|
||||
while getopts "d:w:c:ipqWhg" OPT; do
|
||||
case $OPT in
|
||||
"d") disk=$OPTARG;;
|
||||
"w") warning=$OPTARG;;
|
||||
"c") critical=$OPTARG;;
|
||||
"i") io=1;;
|
||||
"p") printperfdata=1;;
|
||||
"q") queue=1;;
|
||||
"W") waittime=1;;
|
||||
"g") samples=1;;
|
||||
"h") echo "help:" && help;;
|
||||
\?) echo "Invalid option: -$OPTARG" >&2
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Autofill if parameters are empty
|
||||
if [ -z "$disk" ]
|
||||
then disk=sda
|
||||
fi
|
||||
|
||||
#Checks that only one query type is run
|
||||
[[ `expr $io+$queue+$waittime` -ne "1" ]] && \
|
||||
echo "ERROR: select one and only one run mode" && help
|
||||
|
||||
#set warning and critical to insane value is empty, else set the individual values
|
||||
if [ -z "$warning" ]
|
||||
then warning=99999
|
||||
else
|
||||
#TPS with IO, Request size with queue
|
||||
warn_1=`echo $warning | cut -d, -f1`
|
||||
#Read/s with IO,Queue Length with queue
|
||||
warn_2=`echo $warning | cut -d, -f2`
|
||||
#Write/s with IO
|
||||
warn_3=`echo $warning | cut -d, -f3`
|
||||
#KB/s read with IO
|
||||
warn_4=`echo $warning | cut -d, -f4`
|
||||
#KB/s written with IO
|
||||
warn_5=`echo $warning | cut -d, -f5`
|
||||
#Crude hack due to integer expression later in the script
|
||||
warning=1
|
||||
fi
|
||||
|
||||
if [ -z "$critical" ]
|
||||
then critical=99999
|
||||
else
|
||||
#TPS with IO, Request size with queue
|
||||
crit_1=`echo $critical | cut -d, -f1`
|
||||
#Read/s with IO,Queue Length with queue
|
||||
crit_2=`echo $critical | cut -d, -f2`
|
||||
#Write/s with IO
|
||||
crit_3=`echo $critical | cut -d, -f3`
|
||||
#KB/s read with IO
|
||||
crit_4=`echo $critical | cut -d, -f4`
|
||||
#KB/s written with IO
|
||||
crit_5=`echo $critical | cut -d, -f5`
|
||||
#Crude hack due to integer expression later in the script
|
||||
critical=1
|
||||
fi
|
||||
#------------Argument Set End-------------
|
||||
|
||||
|
||||
#------------Parameter Check-------------
|
||||
#Checks for sane Disk name:
|
||||
if [ ! `echo $disk | grep ^\/dev\/` ]; then
|
||||
disk="/dev/$disk"
|
||||
fi
|
||||
[ ! -b "$disk" ] && echo "ERROR: Device incorrectly specified" && help
|
||||
|
||||
#Checks for sane warning/critical levels
|
||||
if ( [[ $warning -ne "99999" ]] || [[ $critical -ne "99999" ]] ); then
|
||||
if ( [ "`echo "$warn_1 > $crit_1" | bc`" == "1" ] || \
|
||||
[ "`echo "$warn_2 > $crit_2" | bc`" == "1" ] ); then
|
||||
echo "ERROR: critical levels must be higher than warning levels" && help
|
||||
elif ( [[ $io -eq "1" ]] || [[ $waittime -eq "1" ]] ); then
|
||||
# if ( [[ "$warn_3" -gt "$crit_3" ]] || [[ "$warn_4" -gt "$crit_4" ]] || [[ "$warn_5" -gt "$crit_5" ]] ); then
|
||||
if ( [ "`echo "$warn_3 > $crit_3" | bc`" == "1" ] || \
|
||||
[ "`echo "$warn_4 > $crit_4" | bc`" == "1" ] || \
|
||||
[ "`echo "$warn_5 > $crit_5" | bc`" == "1" ] ); then
|
||||
echo "ERROR: critical levels must be higher than warning levels" && help
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
#------------Parameter Check End-------------
|
||||
|
||||
|
||||
# iostat parameters:
|
||||
# -m: megabytes
|
||||
# -k: kilobytes
|
||||
# first run of iostat shows statistics since last reboot, second one shows current vaules of hdd
|
||||
# -d is the duration for second run, -x the rest
|
||||
|
||||
TMPX=`$iostat $disk -x -k -d $sampleTime $samples | sed '/^$/d' | tail -1`
|
||||
|
||||
#------------IO Test-------------
|
||||
if [ "$io" == "1" ]; then
|
||||
|
||||
TMPD=`$iostat $disk -k -d $sampleTime $samples | sed '/^$/d' | tail -1`
|
||||
#Requests per second:
|
||||
tps=`echo "$TMPD" | awk '{print $2}'`
|
||||
read_sec=`echo "$TMPX" | awk '{print $4}'`
|
||||
written_sec=`echo "$TMPX" | awk '{print $5}'`
|
||||
|
||||
#Kb per second:
|
||||
kbytes_read_sec=`echo "$TMPX" | awk '{print $6}'`
|
||||
kbytes_written_sec=`echo "$TMPX" | awk '{print $7}'`
|
||||
|
||||
# "Converting" values to float (string replace , with .)
|
||||
tps=${tps/,/.}
|
||||
read_sec=${read_sec/,/.}
|
||||
written_sec=${written_sec/,/.}
|
||||
kbytes_read_sec=${kbytes_read_sec/,/.}
|
||||
kbytes_written_sec=${kbytes_written_sec/,/.}
|
||||
|
||||
# Comparing the result and setting the correct level:
|
||||
if [ "$warning" -ne "99999" ]; then
|
||||
if ( [ "`echo "$tps >= $warn_1" | bc`" == "1" ] || [ "`echo "$read_sec >= $warn_2" | bc`" == "1" ] || \
|
||||
[ "`echo "$written_sec >= $warn_3" | bc`" == "1" ] || [ "`echo "$kbytes_read_sec >= $warn_4" | bc -q`" == "1" ] ||
|
||||
[ "`echo "$kbytes_written_sec >= $warn_5" | bc`" == "1" ] ); then
|
||||
STATE="WARNING"
|
||||
status=1
|
||||
fi
|
||||
fi
|
||||
if [ "$critical" -ne "99999" ]; then
|
||||
if ( [ "`echo "$tps >= $crit_1" | bc`" == "1" ] || [ "`echo "$read_sec >= $crit_2" | bc -q`" == "1" ] || \
|
||||
[ "`echo "$written_sec >= $crit_3" | bc`" == "1" ] || [ "`echo "$kbytes_read_sec >= $crit_4" | bc -q`" == "1" ] || \
|
||||
[ "`echo "$kbytes_written_sec >= $crit_5" | bc`" == "1" ] ); then
|
||||
STATE="CRITICAL"
|
||||
status=2
|
||||
fi
|
||||
fi
|
||||
# Printing the results:
|
||||
MSG="$STATE - I/O stats: Transfers/Sec=$tps Read Requests/Sec=$read_sec Write Requests/Sec=$written_sec KBytes Read/Sec=$kbytes_read_sec KBytes_Written/Sec=$kbytes_written_sec"
|
||||
PERFDATA=" | total_io_sec=$tps; read_io_sec=$read_sec; write_io_sec=$written_sec; kbytes_read_sec=$kbytes_read_sec; kbytes_written_sec=$kbytes_written_sec;"
|
||||
fi
|
||||
#------------IO Test End-------------
|
||||
|
||||
|
||||
#------------Queue Test-------------
|
||||
if [ "$queue" == "1" ]; then
|
||||
qsize=`echo "$TMPX" | awk '{print $8}'`
|
||||
qlength=`echo "$TMPX" | awk '{print $9}'`
|
||||
|
||||
# "Converting" values to float (string replace , with .)
|
||||
qsize=${qsize/,/.}
|
||||
qlength=${qlength/,/.}
|
||||
|
||||
# Comparing the result and setting the correct level:
|
||||
if [ "$warning" -ne "99999" ]; then
|
||||
if ( [ "`echo "$qsize >= $warn_1" | bc`" == "1" ] || [ "`echo "$qlength >= $warn_2" | bc`" == "1" ] ); then
|
||||
STATE="WARNING"
|
||||
status=1
|
||||
fi
|
||||
fi
|
||||
if [ "$critical" -ne "99999" ]; then
|
||||
if ( [ "`echo "$qsize >= $crit_1" | bc`" == "1" ] || [ "`echo "$qlength >= $crit_2" | bc`" == "1" ] ); then
|
||||
STATE="CRITICAL"
|
||||
status=2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Printing the results:
|
||||
MSG="$STATE - Disk Queue Stats: Average Request Size=$qsize Average Queue Length=$qlength"
|
||||
PERFDATA=" | qsize=$qsize; queue_length=$qlength;"
|
||||
fi
|
||||
#------------Queue Test End-------------
|
||||
|
||||
|
||||
#------------Wait Time Test-------------
|
||||
#Parse values. Warning - svc time will soon be deprecated and these will need to be changed. Future parser could look at first line (labels) to suggest correct column to return
|
||||
if [ "$waittime" == "1" ]; then
|
||||
avgwait=`echo "$TMPX" | awk '{print $10}'`
|
||||
if [ `echo "$TMPX" | wc -w` == "14" ]; then
|
||||
avgrwait=`echo "$TMPX" | awk '{print $11}'`
|
||||
avgwwait=`echo "$TMPX" | awk '{print $12}'`
|
||||
avgsvctime=`echo "$TMPX" | awk '{print $13}'`
|
||||
avgcpuutil=`echo "$TMPX" | awk '{print $14}'`
|
||||
else
|
||||
avgrwait="N/A"
|
||||
avgwwait="N/A"
|
||||
avgsvctime=`echo "$TMPX" | awk '{print $11}'`
|
||||
avgcpuutil=`echo "$TMPX" | awk '{print $12}'`
|
||||
fi
|
||||
|
||||
|
||||
# "Converting" values to float (string replace , with .)
|
||||
avgwait=${avgwait/,/.}
|
||||
avgrwait=${avgrwait/,/.}
|
||||
avgwwait=${avgwwait/,/.}
|
||||
avgsvctime=${avgsvctime/,/.}
|
||||
avgcpuutil=${avgcpuutil/,/.}
|
||||
|
||||
# Comparing the result and setting the correct level:
|
||||
if [ "$warning" -ne "99999" ]; then
|
||||
if ( [ "`echo "$avgwait >= $warn_1" | bc`" == "1" ] || [ "`echo "$avgrwait >= $warn_2" | bc -q`" == "1" ] || \
|
||||
[ "`echo "$avgwwait >= $warn_3" | bc`" == "1" ] || [ "`echo "$avgsvctime >= $warn_4" | bc -q`" == "1" ] || \
|
||||
[ "`echo "$avgcpuutil >= \"$warn_5\"" | bc`" == "1" ] ); then
|
||||
STATE="WARNING"
|
||||
status=1
|
||||
fi
|
||||
fi
|
||||
if [ "$critical" -ne "99999" ]; then
|
||||
if ( [ "`echo "$avgwait >= $crit_1" | bc`" == "1" ] || [ "`echo "$avgrwait >= $crit_2" | bc -q`" == "1" ] || \
|
||||
[ "`echo "$avgwwait >= $crit_3" | bc`" == "1" ] || [ "`echo "$avgsvctime >= $crit_4" | bc -q`" == "1" ] || \
|
||||
[ "`echo "$avgcpuutil >= $crit_5" | bc`" == "1" ] ); then
|
||||
STATE="CRITICAL"
|
||||
status=2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Printing the results:
|
||||
MSG="$STATE - Wait Time Stats: Avg I/O Wait Time (ms)=$avgwait Avg Read Wait Time (ms)=$avgrwait Avg Write Wait Time (ms)=$avgwwait Avg Service Wait Time (ms)=$avgsvctime Avg CPU Utilization=$avgcpuutil"
|
||||
PERFDATA=" | avg_io_waittime_ms=$avgwait; avg_r_waittime_ms=$avgrwait; avg_w_waittime_ms=$avgwwait; avg_service_waittime_ms=$avgsvctime; avg_cpu_utilization=$avgcpuutil;"
|
||||
fi
|
||||
#------------Wait Time End-------------
|
||||
|
||||
# now output the official result
|
||||
echo -n "$MSG"
|
||||
if [ "x$printperfdata" == "x1" ]; then
|
||||
echo -n "$PERFDATA";
|
||||
fi
|
||||
echo ""
|
||||
exit $status
|
||||
#----------/check_iostat.sh-----------
|
||||
3
check_iostat/readme.md
Normal file
3
check_iostat/readme.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# check_iostat.sh
|
||||
|
||||
Source: [https://raw.githubusercontent.com/fayetted/nagios-check_iostat/master/check_iostat.sh]
|
||||
Reference in New Issue
Block a user