Update dsmon.sh

This commit is contained in:
2025-06-14 13:06:05 -05:00
parent 03efb79e42
commit 1f40ba4400

216
dsmon.sh
View File

@@ -149,7 +149,8 @@ CHECK(){
for hostname in "${hostnames_sort[@]}"; do
until [ $(ls /tmp/dscheck.*.running 2>/dev/null | wc -l) -lt 12 ]; do tmp=tmp; done
# RUN_CHECK ${hostname} > /tmp/dscheck.${hostname}.results &
unbuffer bash -c "/usr/local/bin/dsmon runcheck '${hostname}'" > /tmp/dscheck.${hostname}.results &
# unbuffer bash -c "/usr/local/bin/dsmon runcheck '${hostname}'" > /tmp/dscheck.${hostname}.results &
RUN_CHECK "$hostname" "/tmp/dscheck.${hostname}.results" &
done
[ "${1}" != "report" ] && echo -e "${idsCL[LightGreen]}Done${idsCL[Default]}\n"
@@ -217,136 +218,139 @@ mv /tmp/dscheck.${hostname}.running /tmp/dscheck.${hostname}.done
}
RUN_CHECK(){
hostname=${1}
touch /tmp/dscheck.${hostname}.running
hostid=${host_ids[$hostname]}
local hostname=${1}
local outfile="$2"
{
touch /tmp/dscheck.${hostname}.running
hostid=${host_ids[$hostname]}
MSG=" ${host_name[$hostid]} - (${host_ip[$hostid]})"
c=0; cw=85; spc=''; spc1=`expr ${cw} - ${#MSG}`; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
DIVIDER 1 . ${cw}
echo -e "${idsBG[Blue]}${idsCL[White]}${MSG}${spc}${idsST[Reset]}"
DIVIDER . . ${cw}
MSG=" ${host_name[$hostid]} - (${host_ip[$hostid]})"
c=0; cw=85; spc=''; spc1=`expr ${cw} - ${#MSG}`; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
DIVIDER 1 . ${cw}
echo -e "${idsBG[Blue]}${idsCL[White]}${MSG}${spc}${idsST[Reset]}"
DIVIDER . . ${cw}
checkhost=$(CHECK_HOST ${host_ip[$hostid]})
if [ "${checkhost}" != "false" ]; then
declare -A host_limits_tmp
hostlimits=(${host_limits[${hostid}]})
for hl in ${hostlimits[@]}; do
hlname=$(echo $hl | cut -d ":" -f1)
hllim=$(echo $hl | cut -d ":" -f2)
host_limits_tmp[$hlname]=$hllim
done
checkhost=$(CHECK_HOST ${host_ip[$hostid]})
if [ "${checkhost}" != "false" ]; then
declare -A host_limits_tmp
hostlimits=(${host_limits[${hostid}]})
for hl in ${hostlimits[@]}; do
hlname=$(echo $hl | cut -d ":" -f1)
hllim=$(echo $hl | cut -d ":" -f2)
host_limits_tmp[$hlname]=$hllim
done
DRIVEINFO=$(${SSHCMD} root@${host_ip[$hostid]} df -BM | grep -vE '^Filesystem|tmpfs|cdrom|@|ram|loop|udev|veeamimage|localhost|shm|overlay|-volume|Music|Software|//|AFS|PlexData_VG' | awk '{ print $1 " " $2 " " $4 }')
DRIVEINFO=(${DRIVEINFO})
DRIVEINFO=$(${SSHCMD} root@${host_ip[$hostid]} df -BM | grep -vE '^Filesystem|tmpfs|cdrom|@|ram|loop|udev|veeamimage|localhost|shm|overlay|-volume|Music|Software|//|AFS|PlexData_VG' | awk '{ print $1 " " $2 " " $4 }')
DRIVEINFO=(${DRIVEINFO})
NUMDRIVES=$((${#DRIVEINFO[@]} / 3))
declare -A DRIVEINFO_TOT DRIVEINFO_FREE DRIVEINFO_FREEPER DRIVEINFO_SHORTNAME
declare -a DRIVES
NUMDRIVES=$((${#DRIVEINFO[@]} / 3))
declare -A DRIVEINFO_TOT DRIVEINFO_FREE DRIVEINFO_FREEPER DRIVEINFO_SHORTNAME
declare -a DRIVES
for ((i = 0 ; i <= $((${NUMDRIVES}-1)) ; i++)); do
ii=$((${i}*3))
dname=${DRIVEINFO[${ii}]}
dname=${dname//\/dev\/mapper\//}
dname=${dname//\/dev\//}
dname=${dname//\/dev\//}
dname_short=${dname#*vg-}
dname_short=${dname_short%*_v}
for ((i = 0 ; i <= $((${NUMDRIVES}-1)) ; i++)); do
ii=$((${i}*3))
dname=${DRIVEINFO[${ii}]}
dname=${dname//\/dev\/mapper\//}
dname=${dname//\/dev\//}
dname=${dname//\/dev\//}
dname_short=${dname#*vg-}
dname_short=${dname_short%*_v}
[ "$dname_short" = "" ] && dname_short=$dname
[ "$dname_short" = "" ] && dname_short=$dname
if [[ "${ia}" = *"sda"* ]] && [ ${DRIVEINFO[$((${ii}+1))]//M/} -lt 1024 ]; then
systempartition=true
elif [ "${ia}" = "shm" ] && [ ${DRIVEINFO[$((${ii}+1))]//M/} -lt 1024 ]; then
systempartition=true
elif [[ "${ia}" = *"nvme"* ]] && [ ${DRIVEINFO[$((${ii}+1))]//M/} -lt 1024 ]; then
systempartition=true
elif [[ "${ia}" = *"mmcblk"* ]] && [ ${DRIVEINFO[$((${ii}+1))]//M/} -lt 1024 ]; then
systempartition=true
elif [ ${DRIVEINFO[$((${ii}+1))]//M/} -lt 1024 ]; then
systempartition=true
else
dtot=$(bc <<< "scale=2; ${DRIVEINFO[$((${ii}+1))]}/1024/10")
dfree=$(bc <<< "scale=2; ${DRIVEINFO[$((${ii}+2))]}/1024/10")
dfreeper=$(printf "%0.2f" $(jq -n ${dfree}/${dtot}*100))
if [[ "${ia}" = *"sda"* ]] && [ ${DRIVEINFO[$((${ii}+1))]//M/} -lt 1024 ]; then
systempartition=true
elif [ "${ia}" = "shm" ] && [ ${DRIVEINFO[$((${ii}+1))]//M/} -lt 1024 ]; then
systempartition=true
elif [[ "${ia}" = *"nvme"* ]] && [ ${DRIVEINFO[$((${ii}+1))]//M/} -lt 1024 ]; then
systempartition=true
elif [[ "${ia}" = *"mmcblk"* ]] && [ ${DRIVEINFO[$((${ii}+1))]//M/} -lt 1024 ]; then
systempartition=true
elif [ ${DRIVEINFO[$((${ii}+1))]//M/} -lt 1024 ]; then
systempartition=true
else
dtot=$(bc <<< "scale=2; ${DRIVEINFO[$((${ii}+1))]}/1024/10")
dfree=$(bc <<< "scale=2; ${DRIVEINFO[$((${ii}+2))]}/1024/10")
dfreeper=$(printf "%0.2f" $(jq -n ${dfree}/${dtot}*100))
DRIVEINFO_TOT[$dname]=$dtot
DRIVEINFO_FREE[$dname]=$dfree
DRIVEINFO_FREEPER[$dname]=$dfreeper
DRIVEINFO_SHORTNAME[$dname]=$dname_short
DRIVES+=("$dname")
fi
done
DRIVEINFO_TOT[$dname]=$dtot
DRIVEINFO_FREE[$dname]=$dfree
DRIVEINFO_FREEPER[$dname]=$dfreeper
DRIVEINFO_SHORTNAME[$dname]=$dname_short
DRIVES+=("$dname")
fi
done
IFS=$'\n' DRIVES=($(sort <<<"${DRIVES[*]}")); unset IFS
IFS=$'\n' DRIVES=($(sort <<<"${DRIVES[*]}")); unset IFS
c=0; cw=18; spcA=''; spc1=`expr ${cw} - 7`; until [ $c = ${spc1} ]; do spcA="${spcA} "; c=`expr $c + 1`; done
c=0; cw=10; spcB=''; spc2=`expr ${cw} - 4`; until [ $c = ${spc2} ]; do spcB="${spcB} "; c=`expr $c + 1`; done
c=0; cw=10; spcC=''; spc3=`expr ${cw} - 5`; until [ $c = ${spc3} ]; do spcC="${spcC} "; c=`expr $c + 1`; done
c=0; cw=14; spcD=''; spc4=`expr ${cw} - 3`; until [ $c = ${spc4} ]; do spcD="${spcD} "; c=`expr $c + 1`; done
echo -e "${idsST[Reset]}${idsCL[White]} DRIVE${spcA} FREE GB${spcB}FREE %${spcC}TOTAL GB${spcD}WARN % / CRIT %${idsCL[Default]}"
c=0; cw=18; spcA=''; spc1=`expr ${cw} - 7`; until [ $c = ${spc1} ]; do spcA="${spcA} "; c=`expr $c + 1`; done
c=0; cw=10; spcB=''; spc2=`expr ${cw} - 4`; until [ $c = ${spc2} ]; do spcB="${spcB} "; c=`expr $c + 1`; done
c=0; cw=10; spcC=''; spc3=`expr ${cw} - 5`; until [ $c = ${spc3} ]; do spcC="${spcC} "; c=`expr $c + 1`; done
c=0; cw=14; spcD=''; spc4=`expr ${cw} - 3`; until [ $c = ${spc4} ]; do spcD="${spcD} "; c=`expr $c + 1`; done
echo -e "${idsST[Reset]}${idsCL[White]} DRIVE${spcA} FREE GB${spcB}FREE %${spcC}TOTAL GB${spcD}WARN % / CRIT %${idsCL[Default]}"
for DRIVE in "${DRIVES[@]}"; do
for DRIVE in "${DRIVES[@]}"; do
[ "${host_limits_tmp[$DRIVE]}" = "" ] && host_limits_tmp[$DRIVE]=81
[ "${host_limits_tmp[$DRIVE]}" = "" ] && host_limits_tmp[$DRIVE]=81
WARNING_LEVEL=$(printf "%0.2f" $(jq -n 100-${host_limits_tmp[$DRIVE]}))
CRITICAL_LEVEL=$(printf "%0.2f" $(jq -n $WARNING_LEVEL-$WARNING_LEVEL/2))
WARNING_LEVEL=$(printf "%0.2f" $(jq -n 100-${host_limits_tmp[$DRIVE]}))
CRITICAL_LEVEL=$(printf "%0.2f" $(jq -n $WARNING_LEVEL-$WARNING_LEVEL/2))
c=0; cw=18; spcA=''; spc1=`expr ${cw} - ${#DRIVEINFO_SHORTNAME[$DRIVE]}`; until [ $c = ${spc1} ]; do spcA="${spcA} "; c=`expr $c + 1`; done
c=0; cw=10; spcB=''; spc2=`expr ${cw} - ${#DRIVEINFO_FREE[$DRIVE]}`; until [ $c = ${spc2} ]; do spcB="${spcB} "; c=`expr $c + 1`; done
c=0; cw=10; spcC=''; spc3=`expr ${cw} - ${#DRIVEINFO_FREEPER[$DRIVE]}`; until [ $c = ${spc3} ]; do spcC="${spcC} "; c=`expr $c + 1`; done
c=0; cw=14; spcD=''; spc4=`expr ${cw} - ${#DRIVEINFO_TOT[$DRIVE]}`; until [ $c = ${spc4} ]; do spcD="${spcD} "; c=`expr $c + 1`; done
c=0; cw=18; spcA=''; spc1=`expr ${cw} - ${#DRIVEINFO_SHORTNAME[$DRIVE]}`; until [ $c = ${spc1} ]; do spcA="${spcA} "; c=`expr $c + 1`; done
c=0; cw=10; spcB=''; spc2=`expr ${cw} - ${#DRIVEINFO_FREE[$DRIVE]}`; until [ $c = ${spc2} ]; do spcB="${spcB} "; c=`expr $c + 1`; done
c=0; cw=10; spcC=''; spc3=`expr ${cw} - ${#DRIVEINFO_FREEPER[$DRIVE]}`; until [ $c = ${spc3} ]; do spcC="${spcC} "; c=`expr $c + 1`; done
c=0; cw=14; spcD=''; spc4=`expr ${cw} - ${#DRIVEINFO_TOT[$DRIVE]}`; until [ $c = ${spc4} ]; do spcD="${spcD} "; c=`expr $c + 1`; done
if (( $(bc <<<"${DRIVEINFO_FREEPER[$DRIVE]} <= ${CRITICAL_LEVEL}") )); then
fs_status='CRITICAL'
fs_priority=2
fs_status_color='RedBold'
if (( $(bc <<<"${DRIVEINFO_FREEPER[$DRIVE]} <= ${CRITICAL_LEVEL}") )); then
fs_status='CRITICAL'
fs_priority=2
fs_status_color='RedBold'
elif (( $(bc <<<"${DRIVEINFO_FREEPER[$DRIVE]} <= ${WARNING_LEVEL}") )); then
fs_status='Warning'
fs_priority=1
fs_status_color='Yellow'
elif (( $(bc <<<"${DRIVEINFO_FREEPER[$DRIVE]} <= ${WARNING_LEVEL}") )); then
fs_status='Warning'
fs_priority=1
fs_status_color='Yellow'
elif (( $(bc <<<"${DRIVEINFO_FREE[$DRIVE]} < 5") )); then
fs_status='Warning'
fs_priority=1
fs_status_color='Magenta'
else
fs_status='Normal'
fs_priority=0
fs_status_color='Green'
fi
elif (( $(bc <<<"${DRIVEINFO_FREE[$DRIVE]} < 5") )); then
fs_status='Warning'
fs_priority=1
fs_status_color='Magenta'
else
fs_status='Normal'
fs_priority=0
fs_status_color='Green'
fi
D_WARNING_LEVEL=$(printf "%0.2f" $(jq -n 100-${WARNING_LEVEL}))
D_CRITICAL_LEVEL=$(printf "%0.2f" $(jq -n 100-${CRITICAL_LEVEL}))
echo -e "${idsCL[Cyan]} ${DRIVEINFO_SHORTNAME[$DRIVE]}${spcA}${idsCL[$fs_status_color]}${DRIVEINFO_FREE[$DRIVE]} GB${spcB}${DRIVEINFO_FREEPER[$DRIVE]}%${spcC}${DRIVEINFO_TOT[$DRIVE]} GB${idsST[Reset]}${spcD}${idsCL[Cyan]}( ${idsCL[Yellow]}${D_WARNING_LEVEL}%${idsCL[Cyan]} / ${idsCL[RedBold]}${D_CRITICAL_LEVEL}%${idsST[Reset]}${idsCL[Cyan]} )${idsCL[Default]}"
D_WARNING_LEVEL=$(printf "%0.2f" $(jq -n 100-${WARNING_LEVEL}))
D_CRITICAL_LEVEL=$(printf "%0.2f" $(jq -n 100-${CRITICAL_LEVEL}))
echo -e "${idsCL[Cyan]} ${DRIVEINFO_SHORTNAME[$DRIVE]}${spcA}${idsCL[$fs_status_color]}${DRIVEINFO_FREE[$DRIVE]} GB${spcB}${DRIVEINFO_FREEPER[$DRIVE]}%${spcC}${DRIVEINFO_TOT[$DRIVE]} GB${idsST[Reset]}${spcD}${idsCL[Cyan]}( ${idsCL[Yellow]}${D_WARNING_LEVEL}%${idsCL[Cyan]} / ${idsCL[RedBold]}${D_CRITICAL_LEVEL}%${idsST[Reset]}${idsCL[Cyan]} )${idsCL[Default]}"
if [ "${fs_status}" != "Normal" ] && [ "${1}" == "report" ]; then
# echo -e "${host_name[$hostid]} - (${host_ip[$hostid]})\n\n${DRIVE} : ${DRIVEINFO_FREE[$DRIVE]}GB out of ${DRIVEINFO_TOT[$DRIVE]}GB Free (${DRIVEINFO_FREEPER[$DRIVE]}%)\n\n$(date)" | mail -s "Free Space ${fs_status}: '${host_name[$hostid]}'" ${ALERT_EMAIL}
SENDNOTICE "Free Space ${fs_status}: '${host_name[$hostid]}'" "${host_name[$hostid]} - (${host_ip[$hostid]})
if [ "${fs_status}" != "Normal" ] && [ "${1}" == "report" ]; then
# echo -e "${host_name[$hostid]} - (${host_ip[$hostid]})\n\n${DRIVE} : ${DRIVEINFO_FREE[$DRIVE]}GB out of ${DRIVEINFO_TOT[$DRIVE]}GB Free (${DRIVEINFO_FREEPER[$DRIVE]}%)\n\n$(date)" | mail -s "Free Space ${fs_status}: '${host_name[$hostid]}'" ${ALERT_EMAIL}
SENDNOTICE "Free Space ${fs_status}: '${host_name[$hostid]}'" "${host_name[$hostid]} - (${host_ip[$hostid]})
${DRIVE} : ${DRIVEINFO_FREE[$DRIVE]}GB out of ${DRIVEINFO_TOT[$DRIVE]}GB Free (${DRIVEINFO_FREEPER[$DRIVE]})percent" ${fs_priority}
fi
${DRIVE} : ${DRIVEINFO_FREE[$DRIVE]}GB out of ${DRIVEINFO_TOT[$DRIVE]}GB Free (${DRIVEINFO_FREEPER[$DRIVE]})percent" ${fs_priority}
fi
done
done
unset DRIVEINFO_TOT DRIVEINFO_FREE DRIVEINFO_FREEPER DRIVEINFO_SHORTNAME DRIVES DRIVEINFO host_limits_tmp
unset DRIVEINFO_TOT DRIVEINFO_FREE DRIVEINFO_FREEPER DRIVEINFO_SHORTNAME DRIVES DRIVEINFO host_limits_tmp
maillogsize=$(${SSHCMD} root@${host_ip[$hostid]} du -hs /var/log/mail.log | cut -d'/' -f 1)
if [[ "${maillogsize//[[:blank:]]/}" = *'G'* ]]; then
if [ ! -f /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent ] || ([ -f /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent ] && [ $(expr $(date +%s) - $(stat -c %Y /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent)) -gt 3600 ]); then
SENDNOTICE "Mail Log Getting Big" "Mail log getting big (${maillogsize//[[:blank:]]/}) on ${hostname} [${host_ip[$hostid]}]" 1
touch /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent
maillogsize=$(${SSHCMD} root@${host_ip[$hostid]} du -hs /var/log/mail.log | cut -d'/' -f 1)
if [[ "${maillogsize//[[:blank:]]/}" = *'G'* ]]; then
if [ ! -f /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent ] || ([ -f /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent ] && [ $(expr $(date +%s) - $(stat -c %Y /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent)) -gt 3600 ]); then
SENDNOTICE "Mail Log Getting Big" "Mail log getting big (${maillogsize//[[:blank:]]/}) on ${hostname} [${host_ip[$hostid]}]" 1
touch /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent
fi
elif [ -f /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent ]; then
rm -f /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent
fi
elif [ -f /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent ]; then
rm -f /tmp/dsmon.mail-log.${host_ip[$hostid]}.errorsent
else
echo -e "${idsCL[Yellow]} ${host_name[$hostid]} is down${idsCL[Default]}"; echo
fi
else
echo -e "${idsCL[Yellow]} ${host_name[$hostid]} is down${idsCL[Default]}"; echo
fi
mv /tmp/dscheck.${hostname}.running /tmp/dscheck.${hostname}.done
mv /tmp/dscheck.${hostname}.running /tmp/dscheck.${hostname}.done
} >> "$outfile"
}
SETUPSSH(){