Files
NodeMgmt/inc/status.inc
2025-10-01 22:07:04 -05:00

1246 lines
66 KiB
Bash
Executable File

#!/usr/bin/env bash
STATUS(){
start=$(date +%s)
log_start=$(date "+%Y-%m-%d %H:%M:%S")
[ "${NM_SCAN_THREADS}" != "" ] && SCAN_THREADS=${NM_SCAN_THREADS} || SCAN_THREADS=4
if [ "${1}" != "" ] && [ "${NM_NODETYPES[${1^^}]}" != "" ]; then
ntss=${1^^}
ntypesel=(${ntss})
STATUS_ACTION=${2}
else
ntss=""
ntypesel=(${NM_NODE_TYPES[@]})
[ "${1}" == "sync" ] && STATUS_ACTION=repl || STATUS_ACTION=${1}
fi
# while [ $# -gt 0 ]; do
# case "$1" in
# -site) DEL_SITE=${2};;
# -ssl) DEL_SSL=${2};;
#
# esac
# shift
# done
PRI_CW=40
LD_CW=62
DV_LEN=80
SV_LEN=55
declare -i cw; declare -i spc1; declare -i c
############################
## REPLICATION CHECK SETUP
############################
# REPLRUN=0
([ "${STATUS_ACTION}" != "services" ] && [ "${NM_DISABLE_REPL_CHECK}" != "true" ] && ([ ! -f ${NM_TMPFOLDER}/.replcheck ] || [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/.replcheck)) -ge $(expr ${NM_REPL_CHECK_TIMEOUT} \* 60) ])) && REPLRUN=1 || REPLRUN=0
([ -f ${NM_TMPFOLDER}/.replcheck.fail ] && [ ${REPLRUN} -eq 0 ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/.replcheck.fail)) -ge 60 ]) && REPLRUN=1
[ ${REPLRUN} -eq 1 ] && touch ${NM_TMPFOLDER}/.replcheck
# if [ "${STATUS_ACTION}" == "report" ] || [ "${STATUS_ACTION}" == "repl" ] || [ "${STATUS_ACTION}" == "check" ] || [ "${STATUS_ACTION}" == "" ]; then
if [ "${STATUS_ACTION}" == "repl" ] || ([ "${STATUS_ACTION}" == "report" ] && [ ${REPLRUN} -eq 1 ]) || ([ "${ntss}" != "" ] && [ "${NM_REPL_CHECK[${ntss}]}" != "" ] && [ "${STATUS_ACTION}" == "repl" ]); then
touch ${NM_TMPFOLDER}/.replcheck
if [ "${STATUS_ACTION}" != "check" ]; then
MSG="Setting up replication checks"
c=0; cw=${LD_CW}; spc=''; spc1=${cw}-${#MSG}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
echo -en "${idsCL[LightCyan]}${MSG}${spc}: "
fi
for NTYPE in "${ntypesel[@]}"; do
REPLSTART ${NTYPE} &
done
if [ "${STATUS_ACTION}" != "check" ]; then
echo -e "${idsCL[LightGreen]}Done${idsCL[Default]}"
MSG="Starting processes to collect/monitor replication checks"
c=0; cw=${LD_CW}; spc=''; spc1=${cw}-${#MSG}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
echo -en "${idsCL[LightCyan]}${MSG}${spc}: "
fi
for NTYPE in "${ntypesel[@]}"; do
PH=${NTYPE}_HOSTS[0]
PHA=${NTYPE}_HOSTS
skip=0
if [ -f ${NM_TMPFOLDER}/${!PH}.disable ] || [ -f ${NM_TMPFOLDER}/${!PH}.skip ] || [ -f ${NM_TMPFOLDER}/${!PH}.down ]; then
if [ $(eval echo \${#${PHA}[@]}) -eq 2 ]; then
skip=1
else
PH=${NODETYPE}_HOSTS[1]
fi
fi
if [ ${skip} -eq 0 ]; then
PH_CMD="${SSHCMD} root@${!PH}"
var=${NTYPE}_REPL_CHECK[@]
if [ ! -z ${!var+x} ]; then
var=${NTYPE}_HOSTS[@]
for nip in "${!var}"; do
if [[ "${RUN_NODE_IP}" != *"${nip}"* ]] && [ "${nip}" != "${!PH}" ] && [ ! -f ${NM_TMPFOLDER}/${!PH}.disable ] && [ ! -f ${NM_TMPFOLDER}/${!PH}.down ]; then
if [ ! -f ${NM_FOLDER}/${!PH}.down ] && [ ! -f ${NM_TMPFOLDER}/${nip}.down ]; then
var2=${NTYPE}_REPL_CHECK[@]
for rcheck in "${!var2}"; do
until [ $(ls ${STATUSRUN_TMPFOLDER}/repl.*.running 2>/dev/null | wc -l) -lt ${SCAN_THREADS} ]; do tmp=tmp; done
REPLCHECK "${rcheck}" "${nip}" "${PH_CMD}" "${STATUS_ACTION}" & >/dev/null 2>&1
done
fi
fi
done
fi
fi
done
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[LightGreen]}Done${idsCL[Default]}"
fi
if [ "${STATUS_ACTION}" == "" ] || [ "${STATUS_ACTION}" == "report" ] || [ "${STATUS_ACTION}" == "services" ] || [ "${STATUS_ACTION}" == "dockers" ] || [ "${STATUS_ACTION}" == "check" ] || [ "${STATUS_ACTION}" == "all" ]; then
############################
## SERVICE/DOCKER CHECK SETUP
############################
MSG="Starting processes to collect node information"
c=0; cw=${LD_CW}; spc=''; spc1=${cw}-${#MSG}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[LightCyan]}${MSG}${spc}: "
for NTYPE in "${ntypesel[@]}"; do
until [ $(ls ${STATUSRUN_TMPFOLDER}/status-check.*.running 2>/dev/null | wc -l) -lt ${SCAN_THREADS} ]; do tmp=tmp; done
STATUS_NODE ${NTYPE} > ${STATUSRUN_TMPFOLDER}/status-check.${NTYPE}.results 2>/dev/null &
done
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[LightGreen]}Done${idsCL[Default]}"
########################
## LOCALHOST CHECK
########################
if [ ! -z ${LOCAL_SERVICES+x} ] && [ "${STATUS_ACTION}" != "dockers" ] && [ "${ntss}" == "" ]; then
cpu_usage=$(awk '{u=$2+$4; t=$2+$4+$5; if (NR==1){u1=u; t1=t;} else print ($2+$4-u1) * 100 / (t-t1) "%"; }' <(grep 'cpu ' /proc/stat) <(sleep 1;grep 'cpu ' /proc/stat) | sed -e 's/%//g')
if [ "${STATUS_ACTION}" != "check" ]; then
lip=${RUN_NODE_IP}
echo -e "\n$(DIVIDER . . ${DV_LEN})\n${idsCL[Yellow]}${idsST[Bold]} LOCALHOST Service Status${idsST[Reset]}${idsCL[Default]}"
echo -e "$(DIVIDER . . ${DV_LEN})"
echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NODE_HOSTNAME} ${idsST[Reset]}${idsCL[LightCyan]}[${lip}]"
if ([ "${STATUS_ACTION}" == "report" ] && [ "${2}" == "email" ]) || [ "${STATUS_ACTION}" != "report" ]; then
uptime=`uptime -p`
echo -e "${idsCL[Yellow]}[CPU: `IDS_NUMBER_FORMAT ${cpu_usage} 1`'%]${idsCL[White]} - ${uptime}${idsCL[Default]}"
else
echo -e "${idsCL[Default]}"
fi
DIVIDER . green `expr ${DV_LEN} - 10`
fi
# if [ "${STATUS_ACTION}" != "check" ]; then
# lip=${RUN_NODE_IP}
# uptime=`uptime -p`
# echo -e " ${idsST[Bold]}${idsCL[LightCyan]}${NODE_HOSTNAME} (${lip})${idsST[Reset]}${idsCL[LightCyan]} - ${uptime} - localhost${idsCL[Default]}"
# DIVIDER false green ${DV_LEN}
# fi
for srvc in "${LOCAL_SERVICES[@]}"; do
if [ "${STATUS_ACTION}" != "check" ]; then
if [ "${STATUS_ACTION}" != "report" ]; then
c=0; cw=${PRI_CW}; spc=''
spc1=${cw}-${#NM_SERVICE_DESC[${srvc}]}
until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
else
spc=' '
fi
echo -en "${idsCL[White]} ${NM_SERVICE_DESC[${srvc}]}${spc}${idsCL[Default]}: "
fi
if [ "$(systemctl is-active ${srvc})" != "active" ]; then
if [ ! -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ]; then
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Not Running${idsCL[Default]}"
SENDNOTICE "${NODE_HOSTNAME}" "${NM_SERVICE_DESC[${srvc}]} is down" 1
touch ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down
echo "$(date +%Y-%m-%d-%H-%M-%S) - LOCAL - ${NODE_HOSTNAME} - ${NM_SERVICE_DESC[${srvc}]} is down" >> ${NM_LOGFILE}
else
errtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down))
if [ ${errtime} -gt ${NM_RENOTIFY_TIMEOUT} ]; then
[ ! -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime ] && mv ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime
toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime))
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}"
touch ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down
fi
fi
else
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[LightGreen]}Running${idsCL[Default]}"
if [ -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ]; then
if [ -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime ]; then
toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime))
else
toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down))
fi
rm -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.*
SENDNOTICE "${NODE_HOSTNAME}" "${NM_SERVICE_DESC[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})"
echo "$(date +%Y-%m-%d-%H-%M-%S) - LOCAL - ${NODE_HOSTNAME} - ${NM_SERVICE_DESC[${srvc}]} is back up" >> ${NM_LOGFILE}
fi
fi
done
fi
###################################
## NODE SERVICE AND DOCKER CHECK
###################################
nc_count=0; completed=false; scanstart=$(date +%s)
until [ "${completed}" == "true" ]; do
for nodestatus in ${STATUSRUN_TMPFOLDER}/status-check.*.done; do
NTS=$(grep -oP '(?<=status-check.).*?(?=.done)' <<< "${nodestatus}")
if [ "${NTS}" != "*" ]; then
nodestatus=$(cat ${STATUSRUN_TMPFOLDER}/status-check.${NTS}.results)
[ "${nodestatus}" != "" ] && echo -e "${nodestatus}"
rm -f ${STATUSRUN_TMPFOLDER}/status-check.${NTS}.done
((nc_count++))
# echo "${NTS} == ${nc_count} == ${nodestatus}"
fi
done
if [ $(($(date +%s)-scanstart)) -gt 600 ]; then
SENDNOTICE "NMG Service/Docker Scan Stuck" "NMG Service/Docker scan has been running for 10mins, klling all bash...." 1
killall bash
exit 1
fi
if [ ${nc_count} -eq ${#ntypesel[@]} ]; then
completed=true
break
fi
sleep 1s
done
# for NTYPE in "${ntypesel[@]}"; do
# completed=false
# until [ "${completed}" == "true" ]; do
# if [ -f ${STATUSRUN_TMPFOLDER}/status-check.${NTYPE}.done ]; then
# completed=true
# echo -e "$(cat ${STATUSRUN_TMPFOLDER}/status-check.${NTYPE}.results)"
# fi
# done
# done
fi
########################
## REPLICATION CHECK
########################
# if [ "${STATUS_ACTION}" == "report" ] || [ "${STATUS_ACTION}" == "repl" ] || [ "${STATUS_ACTION}" == "check" ] || [ "${STATUS_ACTION}" == "" ]; then
# if [ "${STATUS_ACTION}" == "repl" ] || [ "${STATUS_ACTION}" == "" ] || ([ "${STATUS_ACTION}" == "report" ] && [ ${REPLRUN} -eq 1 ]); then
if [ "${STATUS_ACTION}" == "repl" ] || ([ "${STATUS_ACTION}" == "report" ] && [ ${REPLRUN} -eq 1 ]) || ([ "${ntss}" != "" ] && [ "${NM_REPL_CHECK[${ntss}]}" != "" ] && [ "${STATUS_ACTION}" == "repl" ]); then
if [ "${STATUS_ACTION}" != "check" ]; then
echo -e "${idsST[Bold]}"; DIVIDER . . ${DV_LEN}
echo -e "${idsCL[Yellow]} Replication Status Between the Primary and Secondary Nodes${idsCL[Default]}"
DIVIDER . . ${DV_LEN}; echo -en "${idsST[Reset]}"
fi
for NTYPE in "${ntypesel[@]}"; do
PH=${NTYPE}_HOSTS[0]
PHA=${NTYPE}_HOSTS
skip=0
if [ -f ${NM_TMPFOLDER}/${!PH}.disable ] || [ -f ${NM_TMPFOLDER}/${!PH}.down ] || [ -f ${STATUSRUN_TMPFOLDER}/repl.${!PH}.skip ]; then
if [ $(eval echo \${#${PHA}[@]}) -eq 2 ]; then
skip=1
else
PH=${NODETYPE}_HOSTS[1]
fi
fi
PH_CMD="${SSHCMD} root@${!PH}"
var=${NTYPE}_REPL_CHECK[@]
if [ ! -z ${!var+x} ]; then
var=${NTYPE}_HOSTS[@]
for nip in "${!var}"; do
if [[ "${RUN_NODE_IP}" != *"${nip}"* ]] && [ "${nip}" != "${!PH}" ] && [ ! -f ${STATUSRUN_TMPFOLDER}/repl.${!PH}.skip ]; then
if [ "${STATUS_ACTION}" != "check" ]; then
echo -e " ${idsCL[LightCyan]}${NM_HOSTNAMES[${!PH}]}[${!PH}] <--> ${idsST[Bold]}${NM_HOSTNAMES[${nip}]}[${nip}]${idsST[Reset]}${idsCL[Default]}"
DIVIDER false green `expr ${DV_LEN} - 10`
fi
if [ ! -f ${NM_FOLDER}/${!PH}.down ] && [ ! -f ${NM_TMPFOLDER}/${nip}.down ]; then
var2=${NTYPE}_REPL_CHECK[@]
for rcheck in "${!var2}"; do
if [ "${STATUS_ACTION}" != "check" ]; then
if [ "${STATUS_ACTION}" != "report" ]; then
c=0; cw=${PRI_CW}; spc=''; spc1=${cw}-${#NM_REPL_DESC[${rcheck}]}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
else
spc=' '
fi
echo -en " ${idsCL[White]}${NM_REPL_DESC[${rcheck}]}${spc}${idsCL[Default]}: "
fi
checked=""; scanstart=$(date +%s)
until [ "${checked}" != "" ]; do
if [ -f ${STATUSRUN_TMPFOLDER}/repl.${rcheck}.${nip}.good ]; then
checked=good
elif [ -f ${STATUSRUN_TMPFOLDER}/repl.${rcheck}.${nip}.timeout ]; then
checked=timeout
elif [ ${skip} -eq 1 ] || [ -f ${STATUSRUN_TMPFOLDER}/repl.${rcheck}.${nip}.skip ] || [ -f ${STATUSRUN_TMPFOLDER}/repl.${nip}.skip ] || [ -f ${STATUSRUN_TMPFOLDER}/repl.${!PH}.skip ]; then
checked=skip
fi
if [ $(($(date +%s)-scanstart)) -gt 600 ]; then
SENDNOTICE "NMG Scan Stuck" "NMG Status Replication Check has been running for 10mins, klling all bash...." 1
touch ${NM_TMPFOLDER}/.replcheck.fail
mv ${STATUSRUN_TMPFOLDER} ${STATUSRUN_TMPFOLDER}.stuck
killall bash
exit 1
fi
done
if [ "${checked}" == "timeout" ]; then
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Timeout${idsCL[Default]}"
if [ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down ]; then
touch ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down
else
# if [ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.disable ] && ([[ "${nip}" != *"10.2."* ]] || [ ! -f ${PW_TMPFOLDER}/OFF-vMS-Host1.down ]) && ([ -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.sent)) -gt ${NM_RENOTIFY_TIMEOUT} ]) || ([ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down)) -gt 60 ]); then
if [ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.disable ] && ([ -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.sent)) -gt ${NM_RENOTIFY_TIMEOUT} ]) || ([ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down)) -gt 60 ]); then
[ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime ] && mv ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - (${NM_REPL_DESC[${rcheck}]}) Replicated folder timeout, it is not syncing" >> ${NM_LOGFILE}
touch ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down
if [ ${NM_ENABLE_RENOTIFY} -eq 1 ] || [ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.sent ]; then
SENDNOTICE "Repl-Timeout-'${NM_HOSTNAMES[${nip}]}[${nip}]'" "${NM_REPL_DESC[${rcheck}]} (${NM_REPL_CHECK_LOC[${rcheck}]})
It has been down for $(SHOW_TIME $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime)))" 1
touch ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.sent
fi
fi
fi
elif [ "${checked}" == "good" ]; then
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[LightGreen]}Good${idsCL[Default]}"
if [ -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down ]; then
if [ -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.sent ]; then
SENDNOTICE "Repl-'${NM_HOSTNAMES[${nip}]}[${nip}]'" "Replicated folder is back up!\n${NM_REPL_DESC[${rcheck}]} (${NM_REPL_CHECK_LOC[${rcheck}]})
It was down for $(SHOW_TIME $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime)))"
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - (${NM_REPL_DESC[${rcheck}]}) Replicated folder is back up" >> ${NM_LOGFILE}
fi
rm -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl*
fi
${SSHCMD} root@${nip} 'find ${NM_REPL_CHECK_LOC[${rcheck}]} -iname "*.sync-conflict-*" -exec rm {} \;' &
elif [ "${checked}" == "skip" ]; then
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[LightCyan]}Skipped${idsCL[Default]}"
fi
done
else
if [ "${STATUS_ACTION}" != "check" ]; then
if [ -f ${STATUSRUN_TMPFOLDER}/repl.${!PH}.skip ]; then
echo -e "${idsCL[Red]}${NM_HOSTNAMES[${!PH}]} (${!PH}) is skipped${idsCL[Default]}"
elif [ -f ${NM_FOLDER}/${!PH}.down ]; then
echo -e "${idsCL[Red]}${NM_HOSTNAMES[${!PH}]} (${!PH}) is offline${idsCL[Default]}"
elif [ -f ${NM_TMPFOLDER}/${nip}.down ]; then
echo -e "${idsCL[Red]}${NM_HOSTNAMES[${nip}]}[${nip}] is offline${idsCL[Default]}"
fi
fi
fi
[ "${STATUS_ACTION}" != "check" ] && echo
fi
done
fi
done
##########################
# REMOVE REPL CHECK FILES
##########################
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[LightCyan]} Cleaning up status checks... ${idsCL[Default]}"
for NTYPE in "${ntypesel[@]}"; do
PH=${NTYPE}_HOSTS[0]
PHA=${NTYPE}_HOSTS
skip=0
if [ -f ${NM_TMPFOLDER}/${!PH}.disable ] || [ -f ${NM_TMPFOLDER}/${!PH}.down ]; then
if [ $(eval echo \${#${PHA}[@]}) -eq 2 ]; then
skip=1
else
PH=${NODETYPE}_HOSTS[1]
fi
fi
if [ ! -f ${NM_FOLDER}/${!PH}.down ] && [ ${skip} -eq 0 ]; then
PH_CMD="${SSHCMD} root@${!PH}"
var=${NTYPE}_REPL_CHECK[@]
if [ ! -z ${!var+x} ]; then
if [ "${PH_CMD}" == "" ]; then
rm -f ${NM_FOLDER}/.test.${STATUS_START//-/}.repl &
for rcheck in "${!var}"; do
rm -f ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.${STATUS_START//-/}.repl &
done
else
${PH_CMD} rm -f ${NM_FOLDER}/.test.${STATUS_START//-/}.repl &
for rcheck in "${!var}"; do
${PH_CMD} rm -f ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.${STATUS_START//-/}.repl &
done
fi
fi
fi
done
if [ ! -f ${NM_TMPFOLDER}/.replclean ] || [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/.replclean)) -ge 10800 ]; then
for NTYPE in "${NM_NODE_TYPES[@]}"; do
PH=${NTYPE}_HOSTS[0]
PHA=${NTYPE}_HOSTS
skip=0
if [ -f ${NM_TMPFOLDER}/${!PH}.disable ] || [ -f ${NM_TMPFOLDER}/${!PH}.down ]; then
if [ $(eval echo \${#${PHA}[@]}) -eq 2 ]; then
skip=1
else
PH=${NODETYPE}_HOSTS[1]
fi
fi
if [ ! -f ${NM_FOLDER}/${!PH}.down ] && [ ${skip} -eq 0 ]; then
var=${NTYPE}_REPL_CHECK[@]
if [ ! -z ${!var+x} ]; then
PH_CMD="${SSHCMD} root@${!PH}"
for rcheck in "${!var}"; do
${PH_CMD} rm -f ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.2023*.repl
if [ $(${PH_CMD} ls ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.*.repl 2>/dev/null | wc -l) -gt 0 ]; then
for replfile in $(${PH_CMD} ls ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.*.repl); do
[ $(expr $(date +%s) - $(${PH_CMD} stat -c %Y ${replfile})) -ge 1200 ] && ${PH_CMD} rm -f ${replfile} &
done
fi
done
fi
fi
done
REPLRUN=2
touch ${NM_TMPFOLDER}/.replclean
fi
rm -f ${NM_FOLDER}/.test.${STATUS_START//-/}.repl
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[LightGreen]}Complete${idsCL[Default]}\n"
touch ${NM_TMPFOLDER}/.replcheck
rm -f ${NM_TMPFOLDER}/.replcheck.fail
fi
########################
## FREE SPACE CHECK
########################
#if [ "${STATUS_ACTION}" == "" ] || [ "${STATUS_ACTION}" == "report" ] || [ "${STATUS_ACTION}" == "freespace" ]; then
if [ "${STATUS_ACTION}" == "freespace" ]; then
for NTYPE in "${ntypesel[@]}"; do
if [ "${STATUS_ACTION}" != "check" ]; then
echo -en "${idsST[Bold]}"; DIVIDER . . ${DV_LEN}
echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node(s) Free Space Scan${idsCL[Default]}"
DIVIDER . . ${DV_LEN}; echo -e "${idsST[Reset]}"
fi
var=${NTYPE}_HOSTS[@]
for nip in "${!var}"; do
if [[ $"{RNIP}" == *"${nip}"* ]]; then NCMD=''; LH='- localhost'
else NCMD="${SSHCMD} root@${nip}"; LH=''
fi
if [ "${STATUS_ACTION}" != "check" ]; then
echo -e " ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} [${nip}]${idsST[Reset]}${idsCL[LightCyan]} ${idsCL[LightYellow]}${LH}${idsCL[Default]}"
DIVIDER false green `expr ${DV_LEN} - 10`
fi
[ "${STATUS_ACTION}" != "check" ] && [ "${2}" != "report" ] && echo -en " ${idsCL[LightCyan]}Getting drives from server ... ${idsCL[Default]}"
declare -A partitions
DRIVEINFO=$(${SSHCMD} root@${nip} df -BM | grep -vE '^Filesystem|tmpfs|cdrom|@|ram|loop|udev|veeamimage|nvme|localhost|shm|mmcblk|overlay|-volume|Music|Software' | awk '{ print $1 " " $2 " " $4 }')
DRIVEINFO=(${DRIVEINFO})
if [ "${2}" != "report" ]; then
echo -en "\e[1A"
echo -e "\e[0K\r"
fi
NUMDRIVES=$((${#DRIVEINFO[@]} / 3))
for ((i = 0 ; i <= $((${NUMDRIVES}-1)) ; i++)); do
ii=$((${i}*3))
pname=`echo "${DRIVEINFO[${ii}]}" | awk -F'/' ' { print $NF } '` >/dev/null 2>&1
pname=${pname#*vg-}
if [ ${DRIVEINFO[$((${ii}+1))]//M/} -gt 1024 ]; then
freespace=${DRIVEINFO[$((${ii}+2))]//M/}
if [ "${STATUS_ACTION}" != "check" ]; then
c=0; cw=20; spc=''
spc1=${cw}-${#pname}
until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
echo -en " ${pname}$spc: "
fi
if [[ ${freespace} -gt 1024 ]]; then
fsgb=$(bc <<< "scale=2; ${freespace}/1024")
fsdsp="${fsgb} GB"
else
fsdsp="${freespace} MB"
fi
if [ "${freespace}" -le "1024" ]; then
fs_status='error'
fs_status_color='Red'
SENDNOTICE "Free Space Critical: '${NM_HOSTNAMES[${nip}]}[${nip}]'" "${partition} : ${fsdsp} free" 1
elif [ "${freespace}" -le "5120" ]; then
fs_status='warn'
fs_status_color='Yellow'
SENDNOTICE "Free Space Warning: '${NM_HOSTNAMES[${nip}]}[${nip}]'" "${partition} : ${fsdsp} free"
else
fs_status=''
fs_status_color='Green'
fi
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[${fs_status_color}]}${fsdsp} ${idsCL[Default]}"
fi
done
echo
done
done
fi
########################
if [ "${STATUS_ACTION}" != "check" ]; then
echo
if [ -z ${NMG_ACTION} ] || [ "${NMG_ACTION}" == "gui" ]; then
DIVIDER true
ENTER2CONTINUE
fi
fi
end=`date +%s`; runtime=$((end-start)); echo -e "Runtime: ${runtime}\n"
[ "${STATUS_ACTION}" == "report" ] && echo "${log_start}~${runtime}~${REPLRUN}" >> ${NM_LOGFOLDER}/status-check.scantimes
}
STATUS_NODE(){
NODETYPE=${1}
dockers=${NODETYPE}_DOCKERS_CHECK[@]
hosts=${NODETYPE}_HOSTS[@]
NTHOSTS=${NODETYPE}_HOSTS[@]
touch ${STATUSRUN_TMPFOLDER}/status-check.${NTYPE}.running
if ([[ ! -v ${NODETYPE}_DOCKERS_CHECK ]] && [ "${STATUS_ACTION}" == "dockers" ]) || ([ "${NM_SERVICES_CHECK[${NODETYPE}]}" == "" ] && [ "${STATUS_ACTION}" == "services" ]); then
GOFORCHECK=false;
else
GOFORCHECK=true;
fi
if [ ${GOFORCHECK} = true ]; then
if [ "${STATUS_ACTION}" != "check" ]; then
echo -e "${idsST[Bold]}"; DIVIDER . . ${DV_LEN}; DIVIDER . . ${DV_LEN}
if [ "${STATUS_ACTION}" == "dockers" ]; then
echo -e "${idsCL[LightYellow]} ${NM_NODETYPES[$NTYPE]}-Node Docker Status${idsCL[Default]}"
else
echo -e "${idsCL[LightYellow]} ${NM_NODETYPES[$NTYPE]}-Node Service Status${idsCL[Default]}"
fi
DIVIDER . lightYellow ${DV_LEN}; echo -en "${idsST[Reset]}"
fi
for nip in "${!NTHOSTS}"; do
if [ "${STATUS_ACTION}" != "check" ]; then
echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} [${nip}]${idsST[Reset]}${idsCL[Default]}"
[ "${STATUS_ACTION}" != "report" ] && echo -en " - Verifying Host..."
fi
if [[ "${RUN_NODE_IP}" == *"${nip}"* ]]; then
NCMD=''; LH='- localhost'
else
NCMD="${SSHCMD} root@${nip}"; LH=''
fi
if [ "${NCMD}" != "" ] && [ ! -f ${STATUSRUN_TMPFOLDER}/.${nip}.* ]; then
checkhost=$(CHECK_HOST ${nip})
[ "${checkhost}" != "false" ] && checkhostssh=$(ssh -o BatchMode=yes -o ConnectTimeout=3 root@${nip} echo ok 2>&1)
elif [ -f ${STATUSRUN_TMPFOLDER}/.${nip}.up ]; then
checkhost=true
checkhostssh=ok
elif [ -f ${STATUSRUN_TMPFOLDER}/.${nip}.down ]; then
checkhost=false
checkhostssh=no
else
checkhost=true
checkhostssh=ok
fi
if [ "${checkhost}" != "false" ] && [ "${checkhostssh}" == "ok" ]; then
if [ ! -f ${STATUSRUN_TMPFOLDER}/.${nip}.up ]; then
############## ALERTING ON HIGH CPU USAGE ##############
# [ "${NODETYPE}" != "OFW" ] && cpu_usage=$(${NCMD} "/opt/idssys/defaults/get-data.sh cpu-usage") || cpu_usage=""
# if [ "${cpu_usage}" != "" ]; then
# if [ "${NM_HOST_CPULEVELS[${nip}]}" != "" ]; then
# cpu_warn=$(echo ${NM_HOST_CPULEVELS[${nip}]} | cut -d',' -f 1)
# cpu_crit=$(echo ${NM_HOST_CPULEVELS[${nip}]} | cut -d',' -f 2)
# else
# cpu_warn=75
# cpu_crit=85
# fi
# if [ $(ROUND_NUMBER ${cpu_usage}) -lt ${cpu_warn} ]; then
# CUFC="${idsCL[Green]}"
# if [ -f ${NM_TMPFOLDER}/${nip}.cpu_usage.warn ] || [ -f ${NM_TMPFOLDER}/${nip}.cpu_usage.crit ]; then
# if [ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.norm ]; then
# echo "${cpu_usage}%" >| ${NM_TMPFOLDER}/${nip}.cpu_usage.norm
# elif [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.norm)) -ge 300 ] && ([ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.crit ] || [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.crit)) -ge 300 ]) && ([ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.warn ] || [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.warn)) -ge 300 ]); then
# [ -f ${NM_TMPFOLDER}/${nip}.cpu_usage.sent ] && SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}] NORMAL CPU USAGE" "${NM_HOSTNAMES[${nip}]}[${nip}] has returned to normal cpu usage: ${cpu_usage}%"
# rm -f ${NM_TMPFOLDER}/${nip}.cpu_usage.*
# fi
# fi
# elif [ $(ROUND_NUMBER ${cpu_usage}) -lt ${cpu_crit} ]; then
# CUFC="${idsCL[LightYellow]}"
# if [ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.warn ]; then
# echo "${cpu_usage}%" >| ${NM_TMPFOLDER}/${nip}.cpu_usage.warn
# elif [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.warn)) -ge 300 ] && ([ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.crit ] || [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.crit)) -ge 300 ]) && ([ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.norm ] || [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.norm)) -ge 300 ]) && ([ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.sent ] || [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.sent)) -ge 3600 ]); then
# SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}] WARNING HIGH CPU USAGE" "${NM_HOSTNAMES[${nip}]}[${nip}] has high cpu usage: ${cpu_usage}%"
# echo "${cpu_usage}%" >| ${NM_TMPFOLDER}/${nip}.cpu_usage.warn
# touch ${NM_TMPFOLDER}/${nip}.cpu_usage.sent
# fi
# else
# CUFC="${idsCL[LightRed]}"
# if [ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.crit ]; then
# echo "${cpu_usage}%" >| ${NM_TMPFOLDER}/${nip}.cpu_usage.crit
# elif [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.crit)) -ge 300 ] && ([ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.warn ] || [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.warn)) -ge 300 ]) && ([ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.norm ] || [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.norm)) -ge 300 ]) && ([ ! -f ${NM_TMPFOLDER}/${nip}.cpu_usage.sent ] || [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.cpu_usage.sent)) -ge 3600 ]); then
# SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}] CRITICAL HIGH CPU USAGE" "${NM_HOSTNAMES[${nip}]}[${nip}] has CRITICALLY high cpu usage: ${cpu_usage}%" 1
# echo "${cpu_usage}%" >| ${NM_TMPFOLDER}/${nip}.cpu_usage.crit
# touch ${NM_TMPFOLDER}/${nip}.cpu_usage.sent
# fi
# fi
# fi
touch ${STATUSRUN_TMPFOLDER}/.${nip}.up
fi
if [ "${STATUS_ACTION}" != "check" ]; then
if ([ "${STATUS_ACTION}" == "report" ] && [ "${2}" == "email" ]) || [ "${STATUS_ACTION}" != "report" ]; then
if [ "${NODETYPE}" == "OFW" ]; then
uptime=$(${NCMD} uptime | awk '{print $3}' | cut -d, -f1)
if [ ${#uptime} -ge 6 ]; then
uptime="up $(echo $uptime | cut -d: -f1) days, $(echo $uptime | cut -d: -f2) hours, $(echo $uptime | cut -d: -f3) minutes"
elif [ ${#uptime} -ge 3 ]; then
uptime="up $(echo $uptime | cut -d: -f1) hours, $(echo $uptime | cut -d: -f2) minutes"
else
uptime="up ${uptime} minutes"
fi
else
uptime=$(${NCMD} uptime -p)
fi
[ "${STATUS_ACTION}" != "check" ] && echo -e "\r\033[K ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]}${idsST[Reset]}${idsCL[LightCyan]} [${nip}]${CUFC}[CPU: `IDS_NUMBER_FORMAT ${cpu_usage} 1`'%]${idsCL[White]} - ${uptime}${idsCL[Default]}"
else
echo -e "${idsCL[Default]}"
fi
DIVIDER false lightCyan ${DV_LEN}
fi
if [ -f ${NM_TMPFOLDER}/${nip}.down ]; then
if [ -f ${NM_TMPFOLDER}/${nip}.sent ]; then
[ -f ${NM_TMPFOLDER}/${nip}.errtime ] && toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.errtime)) || toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.down))
SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]-UP" "${NM_HOSTNAMES[${nip}]}[${nip}] is back UP! It was down for $(SHOW_TIME ${toterrtime})"
fi
rm -f ${NM_TMPFOLDER}/${nip}.*
fi
########################
## SERVICES CHECK
########################
if [ "${STATUS_ACTION}" != "dockers" ] && [ "${NM_SERVICES_CHECK[${NODETYPE}]}" != "" ]; then
echo -e "${idsCL[Green]} System Service(s) Status${idsCL[Default]}"
DIVIDER false green ${SV_LEN}
srvcs=${NODETYPE}_SERVICES_CHECK[@];
srvcstotest="$(join_by " " ${!srvcs})"
if [ "${NODETYPE}" != "OFW" ]; then
[ "${NCMD}" != "" ] && srvctst=(`${NCMD} "systemctl is-active ${srvcstotest}" | tr -d $'\r'`) || srvctst=(`systemctl is-active ${srvcstotest}`)
fi
sr=0
[ "${NODETYPE}" == "MM" ] && mmstop=$(${SSHCMD} root@${nip} "if [ -f /opt/idssys/mediamanager/.tmp/mm.stop ] || [ -f /opt/idssys/mediamanager/.tmp/mm.noservices ]; then echo 1; fi")
for srvc in "${!srvcs}"; do
if [ "${STATUS_ACTION}" != "report" ]; then
c=0; cw=${PRI_CW}; spc=''
spc1=${cw}-${#NM_SERVICE_DESC[${srvc}]}
until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
else
spc=' '
fi
tmp=${NM_SINGLESRVR_SERVICES[${NODETYPE}]}
SSCHK=true
if [ "$(GETSKIP ${nip} host)" == "true" ]; then
SSCHK=skip
elif [[ ${tmp[@]} =~ ${srvc} ]]; then
if [[ $(${NCMD} ip addr show $(ip route | awk '/default/ { print $5 }') | grep "inet" | awk '/inet/ {print $2}' | cut -d'/' -f1) != *${NM_SINGLESRVR_IP[${NODETYPE}]}* ]]; then
SSCHK=false
else
SSCHK=primary
fi
elif ([ "${nip}" == "10.2.1.51" ] && [ "${srvc}" == "keepalived" ]); then
SSCHK=disable
fi
if [ "${SSCHK}" == "true" ] && [ "${NODETYPE}" == "MM" ]; then
[ ${mmstop} -eq 1 ] && [[ "${NM_MEDIA_SERVICES}" = *"${srvc}"* ]] && SSCHK=skip
fi
([ "${SSCHK}" == "skip" ] || [ "${SSCHK}" == "false" ] || [ "${SSCHK}" == "disable" ]) && rm -f ${NM_TMPFOLDER}/${nip}~${srvc}.*
if [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.disable ] && ([ ${SSCHK} == true ] || [ ${SSCHK} == primary ]); then
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[White]} ${NM_SERVICE_DESC[${srvc}]}${spc}${idsCL[Default]}: "
if [ "${NODETYPE}" != "OFW" ]; then
srvctest=${srvctst[$sr]}
if [ "${srvctest}" != "active" ] && [ "${srvc}" == "mysql" ]; then
[ "$(${NCMD} systemctl is-active mariadb | tr -d $'\r')" == "active" ] && mysqlgo=true || mysqlgo=false
elif [ "${srvctest}" != "active" ] && [ "${srvc}" == "mariadb" ]; then
[ "$(${NCMD} systemctl is-active mysql | tr -d $'\r')" == "active" ] && mysqlgo=true || mysqlgo=false
elif [ "${srvctest}" == "active" ] && ([ "${srvc}" == "mysql" ] || [ "${srvc}" == "mariadb" ]); then
mysqlgo=true
fi
else
if [ "$(${SSHCMD} root@${nip} ps -U root | grep "offsite-power-check.sh start" | grep -v "grep" | awk '{print $1}')" != "" ]; then
srvctest=active
else
srvctest=notactive
fi
fi
if [ "${srvctest}" != "active" ] && [ "${mysqlgo}" != "true" ]; then
if [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.down ]; then
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[Red]}Not Running"
touch ${NM_TMPFOLDER}/${nip}~${srvc}.down
# SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is down" 1
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_SERVICE_DESC[${srvc}]} is down" >> ${NM_LOGFILE}
# [ "${srvc}" == "pdns" ] && ${NCMD} systemctl restart ${srvc} &
else
[ -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ] && toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.errtime)) || toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down))
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})"
#if [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.disable ] && ([ "${nip}" = "10.2.1.5" ] || [[ "${nip}" != *"10.2."* ]] || ([[ "${nip}" = *"10.2."* ]] && [ ! -f ${PW_TMPFOLDER}/OFF-vMS-Host1.down ])) && ([ -f ${NM_TMPFOLDER}/${nip}~${srvc}.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.sent)) -gt ${NM_RENOTIFY_TIMEOUT} ]) || ([ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down)) -gt 60 ]); then
if [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.disable ] && ([ -f ${NM_TMPFOLDER}/${nip}~${srvc}.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.sent)) -gt ${NM_RENOTIFY_TIMEOUT} ]) || ([ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down)) -gt 60 ]); then
[ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ] && mv ${NM_TMPFOLDER}/${nip}~${srvc}.down ${NM_TMPFOLDER}/${nip}~${srvc}.errtime
touch ${NM_TMPFOLDER}/${nip}~${srvc}.down
if [ ${NM_ENABLE_RENOTIFY} -eq 1 ] || [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.sent ]; then
SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "'${srvc}' ${NM_SERVICE_DESC[${srvc}]} - is down" 1
touch ${NM_TMPFOLDER}/${nip}~${srvc}.sent
fi
fi
#########################################
############ SERVICE REPAIRS ############
if [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.disable ] && [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.fix ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down)) -gt 90 ]; then
if [ "${srvc}" == "pihole-FTL" ]; then
${SSHCMD} root@${nip} 'pihole -r' & >/dev/null 2>&1
touch ${NM_TMPFOLDER}/${nip}~${srvc}.fix
fi
fi
#########################################
#########################################
fi
else
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[LightGreen]}Running"
if [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.down ]; then
if [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.sent ]; then
[ -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ] && toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.errtime)) || toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down))
if [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.fix ]; then
SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "'${srvc}' ${NM_SERVICE_DESC[${srvc}]} - was FIXED! It was down for $(SHOW_TIME ${toterrtime})"
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_SERVICE_DESC[${srvc}]} was FIXED, it was down for $(SHOW_TIME ${toterrtime})" >> ${NM_LOGFILE}
else
SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "'${srvc}' ${NM_SERVICE_DESC[${srvc}]} - is back UP! It was down for $(SHOW_TIME ${toterrtime})"
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_SERVICE_DESC[${srvc}]} is back up, it was down for $(SHOW_TIME ${toterrtime})" >> ${NM_LOGFILE}
fi
fi
fi
if [[ "${NM_SINGLESRVR_SERVICES[${NODETYPE}]}" = *"${srvc}"* ]]; then
for tnip in "${!NTHOSTS}"; do
rm -f ${NM_TMPFOLDER}/${tnip}~${srvc}.* &
done
else
rm -f ${NM_TMPFOLDER}/${nip}~${srvc}.* &
fi
fi
[ "${mysqlgo}" == "true" ] && unset mysqlgo
[ "${SSCHK}" == "primary" ] && echo -e "${idsCL[LightCyan]} - Primary Node${idsCL[Default]}" || echo -e "${idsCL[Default]}"
# elif [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.disable ] || ([ "${nip}" != "10.2.1.5" ] && [[ "${nip}" = *"10.2."* ]] && [ -f ${PW_TMPFOLDER}/OFF-vMS-Host1.down ]) || ([ "${srvc}" == "keepalived" ] && [ "${nip}" == "10.2.1.51" ]); then
elif [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.disable ] || [ "${SSCHK}" == "disable" ]; then
echo -e "${idsCL[White]} ${NM_SERVICE_DESC[${srvc}]}${spc}${idsCL[Default]}: ${idsCL[LightCyan]}Disabled${idsCL[Default]}"
elif [ "${SSCHK}" == "skip" ]; then
echo -e "${idsCL[White]} ${NM_SERVICE_DESC[${srvc}]}${spc}${idsCL[Default]}: ${idsCL[LightCyan]}Host Disabled${idsCL[Default]}"
else
echo -e "${idsCL[White]} ${NM_SERVICE_DESC[${srvc}]}${spc}${idsCL[Default]}: ${idsCL[LightCyan]}Disabled - Backup Node${idsCL[Default]}"
fi
sr=`expr $sr + 1`
done
fi
########################
## DOCKER CHECK
########################
if [[ -v ${NODETYPE}_DOCKERS_CHECK ]] && [ "${STATUS_ACTION}" != "services" ]; then
if [ "${STATUS_ACTION}" != "check" ]; then
[ "${NM_SERVICES_CHECK[${NODETYPE}]}" != "" ] && echo
echo -e "${idsCL[Green]} Docker Service(s) Status${idsCL[Default]}"
DIVIDER false green ${SV_LEN}
fi
for docker in "${!dockers}"; do
if [ "${STATUS_ACTION}" != "report" ]; then
c=0; cw=${PRI_CW}; spc=''
spc1=`expr ${cw} - ${#NM_DOCKER_DESC[${docker}]}`
until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
else
spc=' '
fi
tmp=${NM_SINGLESRVR_DOCKERS[${NODETYPE}]}
SDCHK=true
if ${SSHCMD} root@${nip} [ -f ${NM_TMPFOLDER}/.skip ]; then
SDCHK=skip
elif [[ ${tmp[@]} =~ ${docker} ]]; then
if [[ $(${NCMD} ip addr show $(ip route | awk '/default/ { print $5 }') | grep "inet" | awk '/inet/ {print $2}' | cut -d'/' -f1) != *${NM_SINGLESRVR_IP[${NODETYPE}]}* ]]; then
SDCHK=false
else
SDCHK=primary
fi
elif [ "${nip}" != "10.10.10.240" ] && [[ "${docker}" = "hcloud_"* ]]; then
SDCHK=disable
elif [ "${nip}" == "10.10.1.180" ] && ([ "${docker}" == "watchtower" ] || [ "${docker}" == "portainer_agent" ]); then
SDCHK=disable
fi
([ "${SDCHK}" == "skip" ] || [ "${SDCHK}" == "false" ]) && rm -f ${NM_TMPFOLDER}/${nip}~${docker}.*
if [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.disable ] && ([ ${SDCHK} == true ] || [ ${SDCHK} == primary ]); then
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[White]} ${NM_DOCKER_DESC[${docker}]}$spc${idsCL[Default]}: "
# docker_go=0
# if [ "$(${NCMD} docker inspect -f {{.State.Health.Status}} ${docker} 2>/dev/null)" == "healthy" ]; then
# docker_go=1
# elif [ "$(${NCMD} docker inspect -f {{.State.Running}} ${docker})" == "true" ]; then
# docker_go=1
# fi
if [ "${NCMD}" == "" ]; then
if [ "$(docker inspect -f {{.State.Health.Status}} ${docker} 2>/dev/null)" == "healthy" ] || [ "$(docker inspect -f {{.State.Running}} ${docker} 2>/dev/null)" == "true" ]; then
docker_go=true
else
docker_go=false
fi
else
docker_go=$(${NCMD} "docker_name='${docker}'; if [ \"\$(docker inspect -f {{.State.Health.Status}} \${docker_name} 2>/dev/null)\" == \"healthy\" ] || [ \"\$(docker inspect -f {{.State.Running}} \${docker_name} 2>/dev/null)\" == \"true\" ]; then echo true; else echo false; fi")
fi
if [ "${docker_go}" == "false" ]; then
#if [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.disable ] && ([ "${nip}" = "10.2.1.5" ] || [[ "${nip}" != *"10.2."* ]] || ([[ "${nip}" = *"10.2."* ]] && [ ! -f ${PW_TMPFOLDER}/OFF-vMS-Host1.down ])); then
if [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.disable ]; then
if [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.down ]; then
touch ${NM_TMPFOLDER}/${nip}~${docker}.down
if [ "${NM_DOCKER_COMPOSE_LOC[${docker}]}" != "" ]; then
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is down, will try and fix in 1min" >> ${NM_LOGFILE}
echo -en "${idsCL[Red]}Not Running - will try and fix in 1min!"
else
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is down" >> ${NM_LOGFILE}
echo -en "${idsCL[Red]}Not Running"
fi
elif ([ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.fix ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${docker}.down)) -gt 50 ]) || ([ -f ${NM_TMPFOLDER}/${nip}~${docker}.fix ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${docker}.fix)) -ge 3600 ]); then
[ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.errtime ] && mv ${NM_TMPFOLDER}/${nip}~${docker}.down ${NM_TMPFOLDER}/${nip}~${docker}.errtime
if [ "$(${NCMD} docker ps -a | grep ${docker})" ]; then
${NCMD} docker start ${docker} >/dev/null 2>&1
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is not running, starting now" >> ${NM_LOGFILE}
elif [ "${NM_DOCKER_COMPOSE_LOC[${docker}]}" != "" ] || [ "${docker}" == "watchtower" ] || [ "${docker}" == "portainer_agent" ]; then
if [ "${docker}" == "watchtower" ]; then
${NCMD} runup watchtower >/dev/null 2>&1
elif [ "${docker}" == "portainer_agent" ]; then
${NCMD} docker run -d -p 9001:9001 --name portainer_agent --restart=always -v /var/run/docker.sock:/var/run/docker.sock -v /var/lib/docker/volumes:/var/lib/docker/volumes -v /:/host portainer/agent:latest >/dev/null 2>&1
else
${NCMD} docker compose -f ${NM_DOCKER_COMPOSE_LOC[${docker}]}/docker-compose.yml up -d >/dev/null 2>&1
fi
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is not found, creating and starting now" >> ${NM_LOGFILE}
fi
touch ${NM_TMPFOLDER}/${nip}~${docker}.fix
sleep 10s
if [ "$(${NCMD} docker ps -a -q -f name=${docker})" ]; then
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[Green]}Running - Fixed"
[ -f ${NM_TMPFOLDER}/${nip}~${docker}.sent ] && SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "'${docker}' ${NM_DOCKER_DESC[${docker}]} - is fixed"
rm -f ${NM_TMPFOLDER}/${nip}~${docker}.*
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is fixed" >> ${NM_LOGFILE}
else
if [ "${STATUS_ACTION}" != "check" ]; then
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[Red]}Not Running - Could Not Fix!"
SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "'${docker}' ${NM_DOCKER_DESC[${docker}]} - is down, could not fix" 1
touch ${NM_TMPFOLDER}/${nip}~${docker}.sent
touch ${NM_TMPFOLDER}/${nip}~${docker}.down
echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is down, could not fix" >> ${NM_LOGFILE}
fi
fi
elif [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.disable ] && [ -f ${NM_TMPFOLDER}/${nip}~${docker}.fix ] && ([ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.sent ] || ([ -f ${NM_TMPFOLDER}/${nip}~${docker}.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${docker}.sent)) -gt ${NM_RENOTIFY_TIMEOUT} ])); then
# elif [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.disable ] && ([ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.sent ] || ([ -f ${NM_TMPFOLDER}/${nip}~${docker}.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${docker}.sent)) -gt ${NM_RENOTIFY_TIMEOUT} ])); then
toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${docker}.errtime))
[ "${STATUS_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}"
touch ${NM_TMPFOLDER}/${nip}~${docker}.down
if [ ${NM_ENABLE_RENOTIFY} -eq 1 ] || [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.sent ]; then
SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "'${docker}' ${NM_SERVICE_DESC[${docker}]} - is down" 1
touch ${NM_TMPFOLDER}/${nip}~${docker}.sent
fi
echo -en "${idsCL[Red]}Not Running, could not fix earlier, FIX ME!"
fi
else
echo -en "${idsCL[Yellow]}Disabled - Skipping"
fi
else
if [ -f ${NM_TMPFOLDER}/${nip}~${docker}.fix ]; then
[ -f ${NM_TMPFOLDER}/${nip}~${docker}.sent ] && SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "'${docker}' ${NM_DOCKER_DESC[${docker}]} - is fixed"
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[Green]}Running - Fixed"
else
[ "${STATUS_ACTION}" != "check" ] && echo -en "${idsCL[LightGreen]}Running"
fi
if [[ "${NM_SINGLESRVR_DOCKERS[${NODETYPE}]}" = *"${docker}"* ]]; then
for tnip in "${!NTHOSTS}"; do
rm -f ${NM_TMPFOLDER}/${tnip}~${docker}.* &
done
else
rm -f ${NM_TMPFOLDER}/${nip}~${docker}.* &
fi
fi
[ "${SDCHK}" == "primary" ] && echo -e "${idsCL[LightCyan]} - Primary Node${idsCL[Default]}" || echo -e "${idsCL[Default]}"
elif [ -f ${NM_TMPFOLDER}/${nip}~${docker}.disable ] || [ "${SDCHK}" == "disable" ]; then
echo -e "${idsCL[White]} ${NM_DOCKER_DESC[${docker}]}${spc}${idsCL[Default]}: ${idsCL[LightCyan]}Disabled${idsCL[Default]}"
elif [ "${SDCHK}" == "skip" ]; then
echo -e "${idsCL[White]} ${NM_DOCKER_DESC[${docker}]}${spc}${idsCL[Default]}: ${idsCL[LightCyan]}Host Disabled${idsCL[Default]}"
else
echo -e "${idsCL[White]} ${NM_DOCKER_DESC[${docker}]}${spc}${idsCL[Default]}: ${idsCL[LightCyan]}Disabled - Backup Node${idsCL[Default]}"
fi
done
fi
else
if [ "${STATUS_ACTION}" != "check" ]; then
echo -e "\r\033[K ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} [${nip}]${idsST[Reset]}${idsCL[LightRed]} - Offline${idsCL[Default]}"
DIVIDER false lightCyan ${DV_LEN}
fi
[ ! -f ${STATUSRUN_TMPFOLDER}/.${nip}.down ] && touch ${STATUSRUN_TMPFOLDER}/.${nip}.down
rm -f ${NM_TMPFOLDER}/${nip}~*
([[ "${NM_IGNOREHOSTS_IFDOWN}" = *"${nip}"* ]] && [ ! -f ${NM_TMPFOLDER}/${nip}.disable ]) && touch ${NM_TMPFOLDER}/${nip}.disable && touch ${NM_TMPFOLDER}/${nip}.down
if [ ! -f ${NM_TMPFOLDER}/${nip}.down ]; then
# if [ ! -f ${NM_TMPFOLDER}/${nip}.disable ] && ([[ "${nip}" != *"10.2."* ]] || ([[ "${nip}" = *"10.2."* ]] && [ ! -f ${PW_TMPFOLDER}/OFF-vMS-Host1.down ])); then
[ ! -f ${NM_TMPFOLDER}/${nip}.disable ] && touch ${NM_TMPFOLDER}/${nip}.down
if [ "${STATUS_ACTION}" != "check" ]; then
if [ "${checkhost}" != "false" ]; then
echo -e "${idsCL[Red]} Node is online, but SSH is down!${idsCL[Default]}"
else
echo -e "${idsCL[Red]} Node is down! ${idsCL[Default]}"
fi
fi
# SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] is down" 1
[ "${checkhostssh}" != "ok" ] && echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - Node is down" >> ${NM_LOGFILE}
else
#if [ ! -f ${NM_TMPFOLDER}/${nip}.disable ] && ([ "${nip}" = "10.2.1.5" ] || [[ "${nip}" != *"10.2."* ]] || ([[ "${nip}" = *"10.2."* ]] && [ ! -f ${PW_TMPFOLDER}/OFF-vMS-Host1.down ])) && [ ! -f ${NM_TMPFOLDER}/${nip}.errtime ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.down)) -gt 60 ]; then
if [ ! -f ${NM_TMPFOLDER}/${nip}.disable ] && [ ! -f ${NM_TMPFOLDER}/${nip}.errtime ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.down)) -gt 60 ]; then
mv ${NM_TMPFOLDER}/${nip}.down ${NM_TMPFOLDER}/${nip}.errtime
toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.errtime))
errtime=${toterrtime}
if [ "${checkhost}" != "false" ]; then
SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] is online, but SSH is down" 1
else
SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] is down" 1
fi
touch ${NM_TMPFOLDER}/${nip}.sent
else
errtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.down))
[ -f ${NM_TMPFOLDER}/${nip}.errtime ] && toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.errtime)) || toterrtime=${errtime}
fi
if [ "${STATUS_ACTION}" != "check" ]; then
if [ "${checkhost}" != "false" ]; then
echo -e "${idsCL[LightRed]} Node SSH has been down for $(SHOW_TIME ${toterrtime}) ${idsCL[LightYellow]}${LH}${idsCL[Default]}"
else
echo -e "${idsCL[LightRed]} Node has been down for $(SHOW_TIME ${toterrtime}) ${idsCL[LightYellow]}${LH}${idsCL[Default]}"
fi
fi
# if [ ${NM_ENABLE_RENOTIFY} -eq 1 ] && [ ! -f ${NM_TMPFOLDER}/${nip}.disable ] && ([ "${nip}" = "10.2.1.5" ] || [[ "${nip}" != *"10.2."* ]] || ([[ "${nip}" = *"10.2."* ]] && [ ! -f ${PW_TMPFOLDER}/OFF-vMS-Host1.down ])) && [ -f ${NM_TMPFOLDER}/${nip}.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.sent)) -gt ${NM_RENOTIFY_TIMEOUT} ]; then
if [ ${NM_ENABLE_RENOTIFY} -eq 1 ] && [ ! -f ${NM_TMPFOLDER}/${nip}.disable ] && [ -f ${NM_TMPFOLDER}/${nip}.sent ] && [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.sent)) -gt ${NM_RENOTIFY_TIMEOUT} ]; then
if [ "${checkhost}" != "false" ]; then
SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] SSH has been down for $(SHOW_TIME ${toterrtime})" 1
else
SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] has been down for $(SHOW_TIME ${toterrtime})" 1
fi
touch ${NM_TMPFOLDER}/${nip}.sent
fi
#if [ ! -f ${NM_TMPFOLDER}/${nip}.disable ] && ([[ "${nip}" != *"10.2."* ]] || ([[ "${nip}" = *"10.2."* ]] && [ ! -f ${PW_TMPFOLDER}/OFF-vMS-Host1.down ])); then
[ ! -f ${NM_TMPFOLDER}/${nip}.disable ] && touch ${NM_TMPFOLDER}/${nip}.down
fi
fi
[ "${STATUS_ACTION}" != "check" ] && echo
done
fi #GOFORCHECK
mv ${STATUSRUN_TMPFOLDER}/status-check.${NTYPE}.running ${STATUSRUN_TMPFOLDER}/status-check.${NTYPE}.done
}
REPLCHECK(){
rcheck=${1}
nip=${2}
PH_CMD=${3}
touch ${STATUSRUN_TMPFOLDER}/repl.${rcheck}.${nip}.running
# if [ "${4}" != "check" ]; then
# timeout=`date --date='1 minutes' +%s`
# # timeout=`date --date='30 seconds' +%s`
# else
# timeout=`date --date='2 minutes' +%s`
# fi
timeout=`date --date='1 minutes' +%s`
# timeout=`date --date='30 seconds' +%s`
# if [ -f ${NM_TMPFOLDER}/${nip}.disable ] || [ -f ${NM_TMPFOLDER}/${nip}.down ] || ([[ "${nip}" = "10.2."* ]] && [ -f ${PW_TMPFOLDER}/OFF-vMS-Host1.down ]) ||
# if [ -f ${NM_TMPFOLDER}/${nip}.disable ] || [ -f ${NM_TMPFOLDER}/${nip}.down ] || ([ "${nip}" == "10.10.10.240" ] && [ "${rcheck}" == "rootssh" ]); then
if [ -f ${NM_TMPFOLDER}/${nip}.disable ] || [ -f ${NM_TMPFOLDER}/${nip}.down ]; then
rm -f ${STATUSRUN_TMPFOLDER}/repl.${rcheck}.${nip}.running
touch ${STATUSRUN_TMPFOLDER}/repl.${rcheck}.${nip}.skip
else
checked=false
until [ "${checked}" == "" ]; do
if [ "${PH_CMD}" == "" ]; then
${SSHCMD} root@${nip} [[ -f ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.${STATUS_START//-/}.repl ]] && checked=`${SSHCMD} root@${nip} "cat ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.${STATUS_START//-/}.repl" | diff - ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.${STATUS_START//-/}.repl`
else
${SSHCMD} root@${nip} [[ -f ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.${STATUS_START//-/}.repl ]] && checked="`${PH_CMD} \"${SSHCMD} root@${nip} \"cat ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.${STATUS_START//-/}.repl\" | diff - ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.${STATUS_START//-/}.repl\"`"
fi
if [ "`date +%s`" -gt "${timeout}" ]; then
timeout=true
break
fi
done
rm -f ${STATUSRUN_TMPFOLDER}/repl.${rcheck}.${nip}.running
[ "${timeout}" == "true" ] && touch ${STATUSRUN_TMPFOLDER}/repl.${rcheck}.${nip}.timeout || touch ${STATUSRUN_TMPFOLDER}/repl.${rcheck}.${nip}.good
fi
}
REPLSTART(){
NODETYPE=${1}
PH=${NODETYPE}_HOSTS[0]
PHA=${NODETYPE}_HOSTS
skip=0
if [ -f ${NM_TMPFOLDER}/${!PH}.disable ] || [ -f ${NM_TMPFOLDER}/${!PH}.down ]; then
touch ${STATUSRUN_TMPFOLDER}/repl.${!PH}.skip
if [ $(eval echo \${#${PHA}[@]}) -eq 2 ]; then
skip=1
else
PH=${NODETYPE}_HOSTS[1]
if [ -f ${NM_TMPFOLDER}/${!PH}.disable ] || [ -f ${NM_TMPFOLDER}/${!PH}.down ]; then
skip=1
touch ${STATUSRUN_TMPFOLDER}/repl.${!PH}.skip
fi
fi
fi
if [ ${skip} -eq 0 ] && [ "${!PH}" != "" ]; then
PH_CMD="${SSHCMD} root@${!PH}"
var=${NODETYPE}_REPL_CHECK[@]
if [ ! -z ${!var+x} ]; then
for rcheck in "${!var}"; do
${PH_CMD} rm -f ${NM_FOLDER}/.test.${STATUS_START//-/}.repl
daterun=`date +%Y-%m-%d-%H-%M-%S`
${PH_CMD} "echo -e \"Replcation-Test\n${daterun}\" > ${NM_REPL_CHECK_LOC[${rcheck}]}/.test.${STATUS_START//-/}.repl" &
${PH_CMD} 'find '${NM_REPL_CHECK_LOC[${rcheck}]}' -iname "*.sync-conflict-*" -exec rm {} \;' &
done
fi
fi
}
REMOVE_CONFLICT_SYNC_FILES(){
echo
if [ "${1}" != "" ] && [ "${NM_NODETYPES[${1^^}]}" != "" ]; then
ntss=${1^^}
ntypesel=(${ntss})
else
ntss=""
ntypesel=(${NM_NODE_TYPES[@]})
fi
for NTYPE in "${ntypesel[@]}"; do
var=${NTYPE}_REPL_CHECK[@]
if [ ! -z ${!var+x} ]; then
var=${NTYPE}_HOSTS[@]
for nip in "${!var}"; do
if [ ! -f ${NM_TMPFOLDER}/${nip}.down ]; then
echo -e "${idsCL[LightYellow]}Removing all Syncthing 'sync-conflict' files from ${idsCL[LightGreen]}${NM_HOSTNAMES[${nip}]}${idsCL[LightYellow]} [${nip}]${idsCL[Default]}"
PH_CMD="${SSHCMD} root@${nip}"
var2=${NTYPE}_REPL_CHECK[@]
for rcheck in "${!var2}"; do
echo -en "Removing files from ${idsCL[LightCyan]}${NM_REPL_CHECK_LOC[${rcheck}]}/* ${idsCL[Default]}... "
${PH_CMD} 'find '${NM_REPL_CHECK_LOC[${rcheck}]}' -iname "*.sync-conflict-*" -exec rm {} \;' >/dev/null 2>&1
echo -e "${idsCL[Green]}Done${idsCL[Default]}"
done
fi
echo
done
fi
done
echo
}
STATUS_SCANTIMES(){
start=$(date +%s)
scandefault=1440
while [ $# -gt 0 ]; do
case "${1}" in
-l|-last) LAST=${2};;
-h|-help|--help)
echo -e "Usage: ${idsCL[Yellow]}[nodemgmt or nmg] scantimes {opt}${idsCL[Default]} {"
width=15
printf "%-${width}s- %s\n" " -l|-last" "(number of last scan times to average from, defaults to ${scandefault})"
echo -e "}\n"
exit 0;;
esac
shift
done
[ "${LAST}" == "" ] && LAST=${scandefault}
echo -en "${idsCL[Yellow]}Pulling data and calculating average scan times for the last ${idsCL[LightYellow]}$(IDS_NUMBER_FORMAT ${LAST}) scans ${idsCL[Yellow]}... "
lastscantime=$(tail -n 1 ${NM_LOGFOLDER}/status-check.scantimes)
lastscantime=$(echo "${lastscantime}" | cut -d '~' -f 2)
scantimes=$(tail -n ${LAST} ${NM_LOGFOLDER}/status-check.scantimes)
longest_scantime=0
IFS=$'\n'
read -rd '' -a scantimes <<<"${scantimes}"
unset IFS
scantimes_norm=0; scansfound_norm=0
scantimes_norm_xtr=0; scansfound_norm_xtr=0
scantimes_repl=0; scansfound_repl=0
scantimes_repl_xtr=0; scansfound_repl_xtr=0
scantimes_replchk=0; scansfound_replchk=0
scantimes_replchk_xtr=0; scansfound_replchk_xtr=0
for scan in "${scantimes[@]}"; do
scantime=$(echo "${scan}" | cut -d '~' -f 2)
scanrun=$(echo "${scan}" | cut -d '~' -f 3)
if [ ${scantime} -gt ${longest_scantime} ]; then
longest_scantime=${scantime}
longest_scandate=$(echo "${scan}" | cut -d '~' -f 1)
fi
if [ ${scanrun} -eq 0 ]; then
if [ ${scantime} -lt 60 ]; then
scantimes_norm=$(bc <<< "scale=2; ${scantimes_norm}+${scantime}")
((scansfound_norm++))
else
scantimes_norm_xtr=$(bc <<< "scale=2; ${scantimes_norm_xtr}+${scantime}")
((scansfound_norm_xtr++))
fi
elif [ ${scanrun} -eq 1 ]; then
if [ ${scantime} -lt 60 ]; then
scantimes_repl=$(bc <<< "scale=2; ${scantimes_repl}+${scantime}")
((scansfound_repl++))
else
scantimes_repl_xtr=$(bc <<< "scale=2; ${scantimes_repl_xtr}+${scantime}")
((scansfound_repl_xtr++))
fi
elif [ ${scanrun} -eq 2 ]; then
if [ ${scantime} -lt 60 ]; then
scantimes_replchk=$(bc <<< "scale=2; ${scantimes_replchk}+${scantime}")
((scansfound_replchk++))
else
scantimes_replchk_xtr=$(bc <<< "scale=2; ${scantimes_replchk_xtr}+${scantime}")
((scansfound_replchk_xtr++))
fi
fi
done
scantimes_total_norm=$(expr ${scantimes_norm} + ${scantimes_norm_xtr})
scantimes_total_repl=$(expr ${scantimes_repl} + ${scantimes_repl_xtr})
scantimes_total_replchk=$(expr ${scantimes_replchk} + ${scantimes_replchk_xtr})
scantimes_total=$(expr ${scantimes_total_norm} + ${scantimes_total_repl} + ${scantimes_total_replchk})
scansfound_total_norm=$(expr ${scansfound_norm} + ${scansfound_norm_xtr})
scansfound_total_repl=$(expr ${scansfound_repl} + ${scansfound_repl_xtr})
scansfound_total_replchk=$(expr ${scansfound_replchk} + ${scansfound_replchk_xtr})
scansfound_total=$(expr ${scansfound_total_norm} + ${scansfound_total_repl} + ${scansfound_total_replchk})
[ ${scansfound_norm} -eq 0 ] && average_norm=0 || average_norm=$(ROUND_NUMBER $(bc <<< "scale=2; ${scantimes_norm}/${scansfound_norm}") 0)
[ ${scansfound_norm_xtr} -eq 0 ] && average_norm_xtr=0 || average_norm_xtr=$(ROUND_NUMBER $(bc <<< "scale=2; ${scantimes_norm_xtr}/${scansfound_norm_xtr}") 0)
[ ${scansfound_total_norm} -eq 0 ] && average_total_norm=0 || average_total_norm=$(ROUND_NUMBER $(bc <<< "scale=2; ${scantimes_total_norm}/${scansfound_total_norm}") 0)
[ ${scansfound_repl} -eq 0 ] && average_repl=0 || average_repl=$(ROUND_NUMBER $(bc <<< "scale=2; ${scantimes_repl}/${scansfound_repl}") 0)
[ ${scansfound_repl_xtr} -eq 0 ] && average_repl_xtr=0 || average_repl_xtr=$(ROUND_NUMBER $(bc <<< "scale=2; ${scantimes_repl_xtr}/${scansfound_repl_xtr}") 0)
[ ${scansfound_total_repl} -eq 0 ] && average_total_repl=0 || average_total_repl=$(ROUND_NUMBER $(bc <<< "scale=2; ${scantimes_total_repl}/${scansfound_total_repl}") 0)
[ ${scansfound_replchk} -eq 0 ] && average_replchk=0 || average_replchk=$(ROUND_NUMBER $(bc <<< "scale=2; ${scantimes_replchk}/${scansfound_replchk}") 0)
[ ${scansfound_replchk_xtr} -eq 0 ] && average_replchk_xtr=0 || average_replchk_xtr=$(ROUND_NUMBER $(bc <<< "scale=2; ${scantimes_replchk_xtr}/${scansfound_replchk_xtr}") 0)
[ ${scansfound_total_replchk} -eq 0 ] && average_total_replchk=0 || average_total_replchk=$(ROUND_NUMBER $(bc <<< "scale=2; ${scantimes_total_replchk}/${scansfound_total_replchk}") 0)
[ ${scansfound_total} -eq 0 ] && average_total=0 || average_total=$(ROUND_NUMBER $(bc <<< "scale=2; ${scantimes_total}/${scansfound_total}") 0)
cw=`expr $(echo "${#average_norm} ${#average_norm_xtr} ${#average_repl} ${#average_repl_xtr} ${#average_total_norm} ${#average_total_replt} ${#average_total}" | grep -oE '[0-9]+' | sort -n | tail -n 1) + 1`
average_norm_disp=$(SHOW_TIME ${average_norm} s); c=0; spcAND=''; spc1=`expr ${cw} - ${#average_norm}`; until [ $c = ${spc1} ]; do spcAND="${spcAND} "; c=`expr $c + 1`; done
average_norm_xtr_disp=$(SHOW_TIME ${average_norm_xtr} s); c=0; spcANXD=''; spc1=`expr ${cw} - ${#average_norm_xtr}`; until [ $c = ${spc1} ]; do spcANXD="${spcANXD} "; c=`expr $c + 1`; done
average_total_norm_disp=$(SHOW_TIME ${average_total_norm} s); c=0; spcATND=''; spc1=`expr ${cw} - ${#average_total_norm}`; until [ $c = ${spc1} ]; do spcATND="${spcATND} "; c=`expr $c + 1`; done
average_repl_disp=$(SHOW_TIME ${average_repl} s); c=0; spcARD=''; spc1=`expr ${cw} - ${#average_repl}`; until [ $c = ${spc1} ]; do spcARD="${spcARD} "; c=`expr $c + 1`; done
average_repl_xtr_disp=$(SHOW_TIME ${average_repl_xtr} s); c=0; spcARXD=''; spc1=`expr ${cw} - ${#average_repl_xtr}`; until [ $c = ${spc1} ]; do spcARXD="${spcARXD} "; c=`expr $c + 1`; done
average_total_repl_disp=$(SHOW_TIME ${average_total_repl} s); c=0; spcATRD=''; spc1=`expr ${cw} - ${#average_total_repl}`; until [ $c = ${spc1} ]; do spcATRD="${spcATRD} "; c=`expr $c + 1`; done
average_replchk_disp=$(SHOW_TIME ${average_replchk} s); c=0; spcARCD=''; spc1=`expr ${cw} - ${#average_replchk}`; until [ $c = ${spc1} ]; do spcARCD="${spcARCD} "; c=`expr $c + 1`; done
average_replchk_xtr_disp=$(SHOW_TIME ${average_replchk_xtr} s); c=0; spcARCXD=''; spc1=`expr ${cw} - ${#average_replchk_xtr}`; until [ $c = ${spc1} ]; do spcARCXD="${spcARCXD} "; c=`expr $c + 1`; done
average_total_replchk_disp=$(SHOW_TIME ${average_total_replchk} s); c=0; spcATRCD=''; spc1=`expr ${cw} - ${#average_total_replchk}`; until [ $c = ${spc1} ]; do spcATRCD="${spcATRCD} "; c=`expr $c + 1`; done
average_total_disp=$(SHOW_TIME ${average_total} s); c=0; spcATD=''; spc1=`expr ${cw} - ${#average_total}`; until [ $c = ${spc1} ]; do spcATD="${spcATD} "; c=`expr $c + 1`; done
lastscantime_disp=$(SHOW_TIME ${lastscantime} s)
longest_scantime_disp=$(SHOW_TIME ${longest_scantime} s)
[ -f ${NM_TMPFOLDER}/.replcheck ] && last_replcheck=$(date -d @$(stat -c %Y ${NM_TMPFOLDER}/.replcheck) "+%Y-%m-%d %H:%M:%S") || last_replcheck=""
[ -f ${NM_TMPFOLDER}/.replclean ] && last_replclean=$(date -d @$(stat -c %Y ${NM_TMPFOLDER}/.replclean) "+%Y-%m-%d %H:%M:%S") || last_replclean=""
[ -f ${NM_LOGFOLDER}/status-check.scantimes ] && last_scancheck=$(date -d @$(stat -c %Y ${NM_LOGFOLDER}/status-check.scantimes) "+%Y-%m-%d %H:%M:%S") || last_scancheck=""
echo -en "\r\033[K"
echo -e "${idsCL[LightCyan]}Scan Type Scans Average Scantime${idsCL[Default]}"
DIVIDER . lightCyan 60
echo -e "${idsCL[LightGreen]}Average for last `IDS_NUMBER_FORMAT ${scansfound_total}` ${idsCL[Green]}(${average_total}s)${spcATD}${idsCL[LightGreen]}${average_total_disp}${idsCL[Default]}"
DIVIDER . lightCyan 60
echo -e "${idsCL[Green]} Service Checks `IDS_NUMBER_FORMAT ${scansfound_total_norm}` ${idsCL[Green]}(${average_total_norm}s)${spcATND}${idsCL[LightGreen]}${average_total_norm_disp}${idsCL[Default]}"
DIVIDER . lightCyan 60
echo -e "${idsCL[Cyan]} - Normal `IDS_NUMBER_FORMAT ${scansfound_norm}` ${idsCL[Cyan]}(${average_norm}s)${spcAND}${idsCL[LightCyan]}${average_norm_disp}${idsCL[Default]}"
echo -e "${idsCL[Cyan]} - Long `IDS_NUMBER_FORMAT ${scansfound_norm_xtr}` ${idsCL[Cyan]}(${average_norm_xtr}s)${spcANXD}${idsCL[LightCyan]}${average_norm_xtr_disp}${idsCL[Default]}"
DIVIDER . lightCyan 60
echo -e "${idsCL[Green]} w/Replication `IDS_NUMBER_FORMAT ${scansfound_total_repl}` ${idsCL[Green]}(${average_total_repl}s)${spcATRD}${idsCL[LightGreen]}${average_total_repl_disp}${idsCL[Default]}"
DIVIDER . lightCyan 60
echo -e "${idsCL[Cyan]} - Normal `IDS_NUMBER_FORMAT ${scansfound_repl}` ${idsCL[Cyan]}(${average_repl}s)${spcARD}${idsCL[LightCyan]}${average_repl_disp}${idsCL[Default]}"
echo -e "${idsCL[Cyan]} - Long `IDS_NUMBER_FORMAT ${scansfound_repl_xtr}` ${idsCL[Cyan]}(${average_repl_xtr}s)${spcARXD}${idsCL[LightCyan]}${average_repl_xtr_disp}${idsCL[Default]}"
DIVIDER . lightCyan 60
echo -e "${idsCL[Green]} w/Repl. & Clean `IDS_NUMBER_FORMAT ${scansfound_total_replchk}` ${idsCL[Green]}(${average_total_replchk}s)${spcATRCD}${idsCL[LightGreen]}${average_total_replchk_disp}${idsCL[Default]}"
DIVIDER . lightCyan 60
echo -e "${idsCL[Cyan]} - Normal `IDS_NUMBER_FORMAT ${scansfound_replchk}` ${idsCL[Cyan]}(${average_replchk}s)${spcARCD}${idsCL[LightCyan]}${average_replchk_disp}${idsCL[Default]}"
echo -e "${idsCL[Cyan]} - Long `IDS_NUMBER_FORMAT ${scansfound_replchk_xtr}` ${idsCL[Cyan]}(${average_replchk_xtr}s)${spcARCXD}${idsCL[LightCyan]}${average_replchk_xtr_disp}${idsCL[Default]}"
echo
echo -e "${idsCL[LightCyan]}Longest Scan Time : ${idsCL[Green]}(${longest_scantime}s) ${idsCL[LightGreen]}${longest_scantime_disp}${idsCL[Default]}"
echo -e "${idsCL[LightCyan]}Longest Scan Date : ${idsCL[LightGreen]}${longest_scandate}${idsCL[Default]}"
echo
echo -e "${idsCL[LightCyan]}Last Scan Time : ${idsCL[Green]}(${lastscantime}s) ${idsCL[LightGreen]}${lastscantime_disp}${idsCL[Default]}"
echo -e "${idsCL[LightCyan]}Last Service Scan : ${idsCL[LightGreen]}${last_scancheck}${idsCL[Default]}"
echo -e "${idsCL[LightCyan]}Last w/Replication Scan : ${idsCL[LightGreen]}${last_replcheck}${idsCL[Default]}"
echo -e "${idsCL[LightCyan]}Last w/Repl. & Clean : ${idsCL[LightGreen]}${last_replclean}${idsCL[Default]}"
# if [ ${scansfound_total} -le 500 ]; then
if [ ${scansfound_total} -gt 20 ]; then
echo -en "\n${idsCL[LightCyan]}List out these '${scansfound_total}' scan times? (${idsCL[Yellow]}A${idsCL[LightCyan]})ll/(${idsCL[Yellow]}N${idsCL[LightCyan]})ormal/(${idsCL[Yellow]}R${idsCL[LightCyan]})epl/repl(${idsCL[Yellow]}C${idsCL[LightCyan]})lean/(${idsCL[Yellow]}L${idsCL[LightCyan]})ong [default=no] : ${idsCL[Default]}"
read -n1 choice
if [ "${choice^^}" == "A" ] || [ "${choice^^}" == "N" ] || [ "${choice^^}" == "R" ] || [ "${choice^^}" == "C" ] || [ "${choice^^}" == "L" ]; then
echo -en "\r\033[K"
gofor=1
else
gofor=0
fi
else
gofor=2
echo
fi
if [ ${gofor} -gt 0 ]; then
lastscansnum=${scansfound_total}
last_scantimes=$(tail -n ${lastscansnum} ${NM_LOGFOLDER}/status-check.scantimes)
IFS=$'\n'
read -rd '' -a last_scantimes <<<"${last_scantimes}"
unset IFS
echo -e "${idsCL[LightCyan]}Last ${lastscansnum} Scan Times\n${idsCL[Green]}------------------------------------------${idsCL[Default]}"
for scan in "${last_scantimes[@]}"; do
scandate=$(echo "${scan}" | cut -d '~' -f 1)
scantime=$(echo "${scan}" | cut -d '~' -f 2)
scanrun=$(echo "${scan}" | cut -d '~' -f 3)
if [ ${gofor} -eq 2 ] || [ "${choice^^}" == "A" ] || ([ "${choice^^}" == "N" ] && [ ${scanrun} -eq 0 ]) || ([ "${choice^^}" == "R" ] && [ ${scanrun} -eq 1 ]) || ([ "${choice^^}" == "C" ] && [ ${scanrun} -eq 2 ]) || ([ "${choice^^}" == "L" ] && ([ ${scanrun} -eq 0 ] && [ ${scantime} -gt 60 ] || [ ${scanrun} -gt 0 ] && [ ${scantime} -gt 70 ])); then
echo -en "${idsCL[White]} ${scandate} (${scantime}s) ${idsCL[Yellow]}$(SHOW_TIME ${scantime} s) "
[ ${scanrun} -eq 1 ] && echo -en "${idsCL[LightCyan]}<-- Replication Run "
[ ${scanrun} -eq 2 ] && echo -en "${idsCL[Blue]}<-- Replication & Clean Run "
[ ${scanrun} -eq 0 ] && [ ${scantime} -gt 60 ] && echo -en "${idsCL[LightRed]} <!!--[LONG RUN]--!!>"
[ ${scanrun} -gt 0 ] && [ ${scantime} -gt 70 ] && echo -en "${idsCL[LightRed]} <!!--[LONG RUN]--!!>"
echo -e "${idsCL[Default]}"
fi
done
fi
# fi
end=`date +%s`; runtime=$((end-start))
echo -e "\nRuntime: ${runtime}\n"
}