From ca01c1b887a29846b2918f409242d9620d4d03fa Mon Sep 17 00:00:00 2001 From: David Schroeder Date: Wed, 10 May 2023 21:52:38 -0500 Subject: [PATCH] Update status.inc --- inc/status.inc | 692 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 692 insertions(+) diff --git a/inc/status.inc b/inc/status.inc index aeefe87a..221b65a6 100755 --- a/inc/status.inc +++ b/inc/status.inc @@ -670,6 +670,698 @@ STATUS(){ echo "runtime: ${runtime}" } + +REPLCHECK(){ + rcheck=${1} + nip=${2} + PH_CMD=${3} + if [ "${4}" != "check" ]; then + timeout=`date --date='1 minutes' +%s` + else + timeout=`date --date='2 minutes' +%s` + fi + checked=false + until [ "${checked}" == "" ]; do + if [ "${PH_CMD}" == "" ]; then + ssh -q root@${nip} [[ -f ${REPL_CHECKS[${rcheck}]}/test.repl ]] && checked=`ssh root@${nip} "cat ${REPL_CHECKS[${rcheck}]}/test.repl" | diff - ${REPL_CHECKS[${rcheck}]}/test.repl` + else + ssh -q root@${nip} [[ -f ${REPL_CHECKS[${rcheck}]}/test.repl ]] && checked="`${PH_CMD} \"ssh root@${nip} \"cat ${REPL_CHECKS[${rcheck}]}/test.repl\" | diff - ${REPL_CHECKS[${rcheck}]}/test.repl\"`" + fi + [ "`date +%s`" -gt "${timeout}" ] && timeout=true && break + done + [ "${timeout}" == "true" ] && touch ${TMPFOLDER}/repl.${rcheck}.${nip}.timeout || touch ${TMPFOLDER}/repl.${rcheck}.${nip}.good +}#!/usr/bin/env bash + +STATUS(){ + start=`date +%s` + ST_ACTION=${1} + if [ "${ST_ACTION}" = "report" ]; then + unset idsCL idsBG idsST + idsCL=('') + idsBG=('') + idsST=('') + elif [ "${ST_ACTION}" = "sync" ]; then + ST_ACTION=repl + fi + declare -i cw; declare -i spc1; declare -i c + + ######################## + if [ "${ST_ACTION}" == "report" ] || [ "${ST_ACTION}" == "repl" ] || [ "${ST_ACTION}" == "check" ] || [ "${ST_ACTION}" == "" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -en "${idsCL[LightCyan]}Setting up replication checks ... ${idsCL[Default]}" + fi + for NTYPE in "${NODE_TYPES[@]}"; do + PH=${NTYPE}_HOSTS[0] + if [ ! -f ${FOLDER}/${!PH}.down ]; then + PH_CMD="ssh root@${!PH}" + var=${NTYPE}_REPL_CHECK[@] + if [ ! -z ${!var+x} ]; then + for rcheck in "${!var}"; do + # echo "CHECK: ${NTYPE} - ${!PH} - ${REPL_CHECKS[${rcheck}]}" + ${PH_CMD} rm -f ${FOLDER}/test.repl + daterun=`date +%Y-%m-%d-%H-%M-%S` + ${PH_CMD} "echo -e \"Status-Check (${NODE_HOSTNAME})\n${daterun}\" > ${REPL_CHECKS[${rcheck}]}/test.repl" & + done + fi + fi + done + # replstart=`date +%s` + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Green]}Complete${idsCL[Default]}" + echo + fi + fi + + ######################## + if [ "${ST_ACTION}" = "" ] || [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "services" ] || [ "${ST_ACTION}" = "dockers" ] || [ "${ST_ACTION}" = "check" ]; then + if [ ! -z ${LOCAL_SERVICES+x} ]; then + if [ "${ST_ACTION}" != "check" ]; then + lip=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) + uptime=`uptime -p` + echo -e " ${idsST[Bold]}${idsCL[LightCyan]}${NODE_HOSTNAME} (${lip})${idsST[Reset]}${idsCL[LightCyan]} - ${uptime} - localhost${idsCL[Default]}" + DIVIDER false green + fi + for srvc in "${LOCAL_SERVICES[@]}"; do + if [ "${ST_ACTION}" != "check" ]; then + if [ "${ST_ACTION}" != "report" ]; then + c=0; cw=30; spc='' + spc1=${cw}-${#NM_SERVICES[${srvc}]} + until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + else + spc=' ' + fi + echo -en " ${NM_SERVICES[${srvc}]}${spc}: " + fi + + if [ "$(systemctl is-active ${srvc})" != "active" ]; then + if [ ! -f ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.down ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Red]}Not Running${idsCL[Default]}" + fi + SENDNOTICE "${NODE_HOSTNAME}" "${NM_SERVICES[${srvc}]} is down" 1 + touch ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.down + echo "$(date) - LOCAL - ${NODE_HOSTNAME} - ${NM_SERVICES[${srvc}]} is down" >> ${LOGFILE} + else + errtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.down) + if [ $errtime -gt ${RENOTIFY} ]; then + if [ ! -f ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.errtime ]; then + mv ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.down ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.errtime + fi + toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.errtime) + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}" + fi + touch ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.down + fi + fi + else + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Green]}Running${idsCL[Default]}" + fi + if [ -f ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.down ]; then + if [ -f ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.errtime ]; then + toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.errtime) + else + toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.down) + fi + rm -f ${TMPFOLDER}/${NODE_HOSTNAME}-${srvc}.* + SENDNOTICE "${NODE_HOSTNAME}" "${NM_SERVICES[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})" + echo "$(date) - LOCAL - ${NODE_HOSTNAME} - ${NM_SERVICES[${srvc}]} is back up" >> ${LOGFILE} + + fi + fi + done + + if [ "${ST_ACTION}" != "check" ]; then echo; fi + fi + ######################## + + for NTYPE in "${NODE_TYPES[@]}"; do + nid=1 + + dockers=${NTYPE}_DOCKER[@] + hosts=${NTYPE}_HOSTS[@] + + var=${NTYPE}_HOSTS[@] + + if [[ ! -v ${NTYPE}_DOCKER ]] && [ "${ST_ACTION}" == "dockers" ]; then + GOFORCHECK=false; + else + GOFORCHECK=true; + fi + + if [ ${GOFORCHECK} = true ]; then + + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsST[Bold]}"; DIVIDER + if [ "${ST_ACTION}" = "dockers" ]; then + echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node Docker Status${idsCL[Default]}" + else + echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node Service Status${idsCL[Default]}" + fi + DIVIDER; echo -e "${idsST[Reset]}" + fi + for nip in "${!var}"; do + [ "${ST_ACTION}" != "check" ] && echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NM_NODETYPES[$NTYPE]}-Node${nid} (${nip})${idsST[Reset]}" + + if [[ $(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) == *"${nip}"* ]]; then NCMD=''; LH='- localhost' + else NCMD="ssh root@${nip}"; LH='' + fi + if [ "${NCMD}" != "" ]; then + checkhost=$(CHECK_HOST ${nip}) + fi + if [ "${checkhost}" != "false" ]; then + if [ "${ST_ACTION}" != "check" ]; then + #echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NM_NODETYPES[$NTYPE]}-Node${nid} (${nip})${idsST[Reset]}" + if ([ "${ST_ACTION}" == "report" ] && [ "${2}" == "email" ]) || [ "${ST_ACTION}" != "report" ]; then + uptime=`${NCMD} uptime -p` + echo -e "${idsCL[LightCyan]} - ${uptime} ${idsCL[LightYello]}${LH}${idsCL[Default]}" + else + echo -e "${idsCL[Default]}" + fi + + DIVIDER false green + fi + if [ -f ${TMPFOLDER}/${nip}.down ]; then + if [ -f ${TMPFOLDER}/${nip}.errtime ]; then + toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}.errtime) + else + toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}.down) + fi + rm -f ${TMPFOLDER}/${nip}.* + SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}-UP" "${NM_NODETYPES[$NTYPE]}-Node${nid} is back UP! It was down for $(SHOW_TIME ${toterrtime})" + fi + + if [ "${ST_ACTION}" != "dockers" ]; then + srvcs=${NTYPE}_SERVICES_CHECK[@]; + srvcstotest="$(join_by " " ${!srvcs})" + srvctst=(`${NCMD} systemctl is-active ${srvcstotest}`) + sr=0 + for srvc in "${!srvcs}"; do + [ "${srvc}" == "gitea" ] && [ "${NTYPE}" == "WEB" ] && [[ $($NCMD /sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) != *${WEB_HOSTS[0]}* ]] && NOGOCHK=false || NOGOCHK=true + if [ ${NOGOCHK} == true ]; then + if [ "${ST_ACTION}" != "check" ]; then + if [ "${ST_ACTION}" != "report" ]; then + c=0; cw=30; spc=''; spc1=${cw}-${#NM_SERVICES[${srvc}]}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + else + spc=' ' + fi + echo -en " ${NM_SERVICES[${srvc}]}$spc: " + fi + if [ "${srvctst[$sr]}" != "active" ] && [ "${srvc}" == "mysql" ]; then + [ "$(${NCMD} systemctl is-active mariadb)" == "active" ] && mysqlgo=true || mysqlgo=false + elif [ "${srvctst[$sr]}" != "active" ] && [ "${srvc}" = "mariadb" ]; then + [ "$(${NCMD} systemctl is-active mysql)" == "active" ] && mysqlgo=true || mysqlgo=false + elif [ "${srvctst[$sr]}" == "active" ] && ([ "${srvc}" == "mysql" ] || [ "${srvc}" == "mariadb" ]); then + mysqlgo=true + fi + # echo "mysqlgo=${mysqlgo}" + if [ "${srvctst[$sr]}" != "active" ] && [ "${mysqlgo}" != "true" ]; then + if [ ! -f ${TMPFOLDER}/${nip}-${srvc}.down ]; then + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Not Running${idsCL[Default]}" + touch ${TMPFOLDER}/${nip}-${srvc}.down + # SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" "${NM_SERVICES[${srvc}]} is down" 1 + echo "$(date) - ${nip} - ${NM_NODETYPES[$NTYPE]}-Node${nid} - ${NM_SERVICES[${srvc}]} is down" >> ${LOGFILE} + else + errtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}-${srvc}.down) + if [ $errtime -gt ${RENOTIFY} ]; then + [ ! -f ${TMPFOLDER}/${nip}-${srvc}.errtime ] && mv ${TMPFOLDER}/${nip}-${srvc}.down ${TMPFOLDER}/${nip}-${srvc}.errtime + toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}-${srvc}.errtime) + touch ${TMPFOLDER}/${nip}-${srvc}.down + SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" "${NM_SERVICES[${srvc}]} is down" 1 + # ([ "${srvc}" == "mysql" ] || [ "${srvc}" == "mariadb" ]) && ${NCMD} systemctl restart ${srvc} & + + elif [ $errtime -gt 60 ] && [ $errtime -lt 180 ]; then + SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" "${NM_SERVICES[${srvc}]} is down" 1 + + fi + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}" + fi + else + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Green]}Running${idsCL[Default]}" + if [ -f ${TMPFOLDER}/${nip}-${srvc}.down ]; then + [ -f ${TMPFOLDER}/${nip}-${srvc}.errtime ] && toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}-${srvc}.errtime) || toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}-${srvc}.down) + SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" "${NM_SERVICES[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})" + echo "$(date) - ${nip} - ${NM_NODETYPES[$NTYPE]}-Node${nid} - ${NM_SERVICES[${srvc}]} is back up, it was down for $(SHOW_TIME ${toterrtime})" >> ${LOGFILE} + + rm -f ${TMPFOLDER}/${nip}-${srvc}.* + + fi + # if [ "${srvc}" == "pdnsadmin.socket" ] || [ "${srvc}" == "pdnsadmin" ] || [ "${srvc}" == "gitea" ]; then + if [ "${srvc}" == "gitea" ]; then + rm -f ${FOLDER}/*-${srvc}.* + fi + fi + [ "${mysqlgo}" = "true" ] && unset mysqlgo + fi + sr=`expr $sr + 1` + done + + + + + + # for srvc in "${!var2}"; do + # NOGOCHK=true; + # [ "${srvc}" == "gitea" ] && [ "${NTYPE}" == "WEB" ] && [[ $($NCMD /sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) != *${WEB_HOSTS[0]}* ]] && NOGOCHK=false; + # if [ ${NOGOCHK} = true ]; then + # if [ "${ST_ACTION}" != "check" ]; then + # if [ "${ST_ACTION}" != "report" ]; then + # c=0; cw=30; spc='' + # spc1=${cw}-${#NM_SERVICES[${srvc}]} + # until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + # else + # spc=' ' + # fi + # echo -en " ${NM_SERVICES[${srvc}]}$spc: " + # fi + # srvctst=$(${NCMD} systemctl is-active ${srvc}) + # if [ "${srvctst}" != "active" ] && [ "${srvc}" = "mysql" ]; then + # mysqlgo=false; + # [ "$(${NCMD} systemctl is-active mariadb)" = "active" ] && mysqlgo=true; + # elif [ "${srvctst}" != "active" ] && [ "${srvc}" = "mariadb" ]; then + # mysqlgo=false; + # [ "$(${NCMD} systemctl is-active mysql)" = "active" ] && mysqlgo=true; + # elif [ "${srvctst}" = "active" ] && ([ "${srvc}" = "mysql" ] || [ "${srvc}" = "mariadb" ]); then + # mysqlgo=true + # fi + # # echo "mysqlgo=${mysqlgo}" + # if [ "${srvctst}" != "active" ] && [ "${mysqlgo}" != "true" ]; then + # if [ ! -f ${TMPFOLDER}/${nip}-${srvc}.down ]; then + # if [ "${ST_ACTION}" != "check" ]; then + # echo -e "${idsCL[Red]}Not Running${idsCL[Default]}" + # fi + # SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" "${NM_SERVICES[${srvc}]} is down" 1 + # touch ${TMPFOLDER}/${nip}-${srvc}.down + # echo "$(date) - ${nip} - ${NM_NODETYPES[$NTYPE]}-Node${nid} - ${NM_SERVICES[${srvc}]} is down" >> ${LOGFILE} + # + # else + # errtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}-${srvc}.down) + # if [ $errtime -gt ${RENOTIFY} ]; then + # if [ ! -f ${TMPFOLDER}/${nip}-${srvc}.errtime ]; then + # mv ${TMPFOLDER}/${nip}-${srvc}.down ${TMPFOLDER}/${nip}-${srvc}.errtime + # fi + # toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}-${srvc}.errtime) + # touch ${TMPFOLDER}/${nip}-${srvc}.down + # if [ "${srvc}" = "mysql" ]; then + # ${NCMD} systemctl restart ${srvc} + # fi + # fi + # if [ "${ST_ACTION}" != "check" ]; then + # echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}" + # fi + # fi + # else + # if [ "${ST_ACTION}" != "check" ]; then + # echo -e "${idsCL[Green]}Running${idsCL[Default]}" + # fi + # if [ -f ${TMPFOLDER}/${nip}-${srvc}.down ]; then + # if [ -f ${TMPFOLDER}/${nip}-${srvc}.errtime ]; then + # toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}-${srvc}.errtime) + # else + # toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}-${srvc}.down) + # fi + # SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" "${NM_SERVICES[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})" + # echo "$(date) - ${nip} - ${NM_NODETYPES[$NTYPE]}-Node${nid} - ${NM_SERVICES[${srvc}]} is back up, it was down for $(SHOW_TIME ${toterrtime})" >> ${LOGFILE} + # + # rm -f ${TMPFOLDER}/${nip}-${srvc}.down + # rm -f ${TMPFOLDER}/${nip}-${srvc}.errtime + # + # fi + # # if [ "${srvc}" == "pdnsadmin.socket" ] || [ "${srvc}" == "pdnsadmin" ] || [ "${srvc}" == "gitea" ]; then + # if [ "${srvc}" == "gitea" ]; then + # rm -f ${FOLDER}/*-${srvc}.down + # rm -f ${FOLDER}/*-${srvc}.errtime + # fi + # fi + # [ "${mysqlgo}" = "true" ] && unset mysqlgo + # fi + # done + fi + + if [[ -v ${NTYPE}_DOCKER ]] && [ "${ST_ACTION}" != "services" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo + echo -e "${idsCL[Yellow]} Docker Service(s) Status${idsCL[Default]}" + echo -e "${idsCL[Yellow]}---------------------------------------------${idsCL[Default]}" + fi + + for docker in "${!dockers}"; do + if [ "${ST_ACTION}" != "check" ]; then + if [ "${ST_ACTION}" != "report" ]; then + c=0; cw=30; spc='' + spc1=`expr ${cw} - ${#NM_DOCKERS[${docker}]}` + until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + else + spc=' ' + fi + echo -en " ${NM_DOCKERS[${docker}]}$spc: " + fi + + if [ ! "$(${NCMD} docker ps -q -f name=${docker})" ]; then + if [ "$(${NCMD} docker ps -aq -f status=exited -f name=${docker})" ]; then + ${NCMD} docker rm ${docker} >/dev/null 2>&1 + fi + ${NCMD} docker-compose -f ${NM_DOCKER_COMPOSE[${docker}]}/docker-compose.yml up -d >/dev/null 2>&1 + sleep 10s + if [ "$(${NCMD} docker ps -q -f name=${docker})" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Green]}Running - Fixed${idsCL[Default]}" + fi + SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" "${NM_DOCKERS[${docker}]} is fixed" + rm -f ${TMPFOLDER}/${nip}-${docker}.down + rm -f ${TMPFOLDER}/${nip}-${docker}.errtime + echo "$(date) - ${nip} - ${NM_NODETYPES[$NTYPE]}-Node${nid} - ${NM_DOCKERS[${docker}]} (docker) is fixed" >> ${LOGFILE} + + else + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Red]}Not Running - Could Not Fix!${idsCL[Default]}" + + if [ ! -f ${TMPFOLDER}/${nip}-${docker}.down ]; then + SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" "${NM_DOCKERS[${docker}]} is down" 1 + touch ${TMPFOLDER}/${nip}-${docker}.down + echo "$(date) - ${nip} - ${NM_NODETYPES[$NTYPE]}-Node${nid} - ${NM_DOCKERS[${docker}]} (docker) is down" >> ${LOGFILE} + else + errtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}-${docker}.down) + if [ $errtime -gt ${RENOTIFY} ]; then + if [ ! -f ${TMPFOLDER}/${nip}-${docker}.errtime ]; then + mv ${TMPFOLDER}/${nip}-${docker}.down ${TMPFOLDER}/${nip}-${docker}.errtime + fi + toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}-${docker}.errtime) + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}" + fi + touch ${TMPFOLDER}/${nip}-${docker}.down + fi + fi + + fi + fi + else + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Green]}Running${idsCL[Default]}" + fi + rm -f ${TMPFOLDER}/${nip}-${docker}.down + rm -f ${TMPFOLDER}/${nip}-${docker}.errtime + fi + done + fi + + else + if [ ! -f ${TMPFOLDER}/${nip}.down ]; then + touch ${TMPFOLDER}/${nip}.down + if [ ! -f ${TMPFOLDER}/${nip}.errtime ]; then + touch ${TMPFOLDER}/${nip}.errtime + fi + if [ "${ST_ACTION}" != "check" ]; then + toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}.errtime) + echo -e "${idsCL[Red]} - Node is down!${idsCL[Default]}" + fi + # SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" "${NM_NODETYPES[$NTYPE]}-Node${nid} is down" 1 + echo "$(date) - ${nip} - ${NM_NODETYPES[$NTYPE]}-Node${nid} - Node is down" >> ${LOGFILE} + else + if [ "${ST_ACTION}" != "check" ]; then + toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}.errtime) + echo -e "${idsCL[Red]} - Node has been down for $(SHOW_TIME ${toterrtime}) ${idsCL[LightYello]}${LH}${idsCL[Default]}" + fi + errtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}.down) + if [ $errtime -gt 60 ] && [ $errtime -lt 180 ]; then + SENDNOTICE "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" "${NM_SERVICES[${srvc}]} is down" 1 + fi + # if [ $errtime -gt ${RENOTIFY} ] && [ "${EMAIL_NOTICE}" != "" ]; then + # echo "${NM_NODETYPES[$NTYPE]}-Node${nid} has been down for $(SHOW_TIME ${toterrtime})" | mail -s "${NM_NODETYPES[$NTYPE]}-Node${nid}-${nip}" ${EMAIL_NOTICE} + # fi + fi + + fi + if [ "${ST_ACTION}" != "check" ]; then echo; fi + nid=`expr $nid + 1` + done + + fi #GOFORCHECK + + done + + fi + ######################## + ## REPLICATION CHECK + ######################## + + if [ "${ST_ACTION}" == "report" ] || [ "${ST_ACTION}" == "repl" ] || [ "${ST_ACTION}" == "check" ] || [ "${ST_ACTION}" == "" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsST[Bold]}"; DIVIDER + echo -e "${idsCL[Yellow]} Replication Status Between the Primary and Secondary Nodes${idsCL[Default]}" + DIVIDER; echo -e "${idsST[Reset]}" + echo -en " ${idsCL[LightCyan]}Starting processes to collect/monitor replication status : " + fi + for NTYPE in "${NODE_TYPES[@]}"; do + PH=${NTYPE}_HOSTS[0] + PH_CMD="ssh root@${!PH}" + var=${NTYPE}_REPL_CHECK[@] + if [ ! -z ${!var+x} ]; then + var=${NTYPE}_HOSTS[@] + for nip in "${!var}"; do + if [[ $(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) != *"${nip}"* ]] && [ "${nip}" != "${!PH}" ]; then + if [ ! -f ${FOLDER}/${!PH}.down ] && [ ! -f ${TMPFOLDER}/${nip}.down ]; then + var2=${NTYPE}_REPL_CHECK[@] + for rcheck in "${!var2}"; do + REPLCHECK "${rcheck}" "${nip}" "${PH_CMD}" "${ST_ACTION}" & >/dev/null 2>&1 + done + fi + fi + done + fi + done + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Green]}Done${idsCL[Default]}\n" + + for NTYPE in "${NODE_TYPES[@]}"; do + PH=${NTYPE}_HOSTS[0] + PH_CMD="ssh root@${!PH}" + var=${NTYPE}_REPL_CHECK[@] + if [ ! -z ${!var+x} ]; then + var=${NTYPE}_HOSTS[@] + for nip in "${!var}"; do + if [[ $(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) != *"${nip}"* ]] && [ "${nip}" != "${!PH}" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -e " ${idsCL[LightCyan]}${NM_NODETYPES[$NTYPE]}-Node1 (${!PH}) <--> ${idsST[Bold]}${NM_NODETYPES[$NTYPE]}-Node${nid} (${nip})${idsST[Reset]}${idsCL[Default]}" + DIVIDER false green + fi + if [ ! -f ${FOLDER}/${!PH}.down ] && [ ! -f ${TMPFOLDER}/${nip}.down ]; then + var2=${NTYPE}_REPL_CHECK[@] + for rcheck in "${!var2}"; do + if [ "${ST_ACTION}" != "check" ]; then + if [ "${ST_ACTION}" != "report" ]; then + c=0; cw=30; spc=''; spc1=${cw}-${#REPL_DESC[${rcheck}]}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + else + spc=' ' + fi + echo -en " ${REPL_DESC[${rcheck}]}${spc}: " + + fi + + checked="" + until [ "${checked}" != "" ]; do + if [ -f ${TMPFOLDER}/repl.${rcheck}.${nip}.good ]; then + checked=good + elif [ -f ${TMPFOLDER}/repl.${rcheck}.${nip}.timeout ]; then + checked=timeout + fi + rm -f ${TMPFOLDER}/repl.${rcheck}.${nip}.* + done + + if [ "${checked}" == "timeout" ]; then + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Timeout${idsCL[Default]}" + if [ ! -f ${TMPFOLDER}/${nip}-${rcheck}.down ]; then + touch ${TMPFOLDER}/${nip}-${rcheck}.down + SENDNOTICE "Repl-Timeout-'${NM_NODETYPES[$NTYPE]}-Node${nid}'" "${REPL_DESC[${rcheck}]} (${REPL_CHECKS[${rcheck}]})" 1 + echo "$(date) - ${nip} - ${NM_NODETYPES[$NTYPE]}-Node${nid} - (${REPL_DESC[${rcheck}]}) Replicated folder timeout, it is not syncing" >> ${LOGFILE} + fi + else + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Green]}Good${idsCL[Default]}" + if [ -f ${TMPFOLDER}/${nip}-${rcheck}.down ]; then + rm -f ${TMPFOLDER}/${nip}-${rcheck}.down + SENDNOTICE "Repl-Timeout-'${NM_NODETYPES[$NTYPE]}-Node${nid}'" "Replicated folder is back up!\n${REPL_DESC[${rcheck}]} (${REPL_CHECKS[${rcheck}]})" + echo "$(date) - ${nip} - ${NM_NODETYPES[$NTYPE]}-Node${nid} - (${REPL_DESC[${rcheck}]}) Replicated folder is back up" >> ${LOGFILE} + fi + fi + done + else + if [ "${ST_ACTION}" != "check" ]; then + [ -f ${FOLDER}/${!PH}.down ] && echo -e "${idsCL[Red]}${NM_NODETYPES[$NTYPE]}-Node1 (${!PH}) is offline${idsCL[Default]}" + [ -f ${TMPFOLDER}/${nip}.down ] && echo -e "${idsCL[Red]}${NM_NODETYPES[$NTYPE]}-Node${nip: -1} (${nip}) is offline${idsCL[Default]}" + fi + fi + if [ "${ST_ACTION}" != "check" ]; then echo; fi + fi + done + fi + done + ########################## + # REMOVE REPL CHECK FILES + ########################## + if [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "repl" ] || [ "${ST_ACTION}" = "check" ] || [ "${ST_ACTION}" = "" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -en "${idsCL[LightCyan]}Cleaning up status checks... ${idsCL[Default]}" + fi + #NHCMD="ssh root@${MYSQL_HOSTS[0]}" + #LBHCMD="ssh root@${LB_HOSTS[0]}" + #WHCMD="ssh root@${WEB_HOSTS[0]}" + for NTYPE in "${NODE_TYPES[@]}"; do + PH=${NTYPE}_HOSTS[0] + if [ ! -f ${FOLDER}/${!PH}.down ]; then + PH_CMD="ssh root@${!PH}" + var=${NTYPE}_REPL_CHECK[@] + if [ ! -z ${!var+x} ]; then + for rcheck in "${!var}"; do + ${PH_CMD} rm -f ${FOLDER}/test.repl + daterun=`date +%Y-%m-%d-%H-%M-%S` + if [ "${PH_CMD}" = "" ]; then + rm -f ${FOLDER}/test.repl & + rm -f ${REPL_CHECKS[${rcheck}]}/test.repl & + else + ${PH_CMD} rm -f ${FOLDER}/test.repl & + ${PH_CMD} rm -f ${REPL_CHECKS[${rcheck}]}/test.repl & + fi + done + fi + fi + done + rm -Rf ${FOLDER}/test.repl + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Green]}Complete${idsCL[Default]}" + echo + fi + fi + fi + + ######################## + ## FREE SPACE CHECK + ######################## + #if [ "${ST_ACTION}" = "" ] || [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "freespace" ]; then + if [ "${ST_ACTION}" = "freespace" ]; then + if [ "${ST_ACTION}" != "repl" ] && [ "${ST_ACTION}" != "services" ]; then + for NTYPE in "${NODE_TYPES[@]}"; do + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsST[Bold]}"; DIVIDER + echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node Free Space Scan${idsCL[Default]}" + DIVIDER; echo -e "${idsST[Reset]}" + fi + nid=1 + var=${NTYPE}_HOSTS[@] + for nip in "${!var}"; do + if [[ $(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) == *"${nip}"* ]]; then NCMD=''; LH='- localhost' + else NCMD="ssh root@${nip}"; LH='' + fi + if [ "${ST_ACTION}" != "check" ]; then + echo -e " ${idsST[Bold]}${idsCL[LightCyan]}${NM_NODETYPES[$NTYPE]}-Node${nid} (${nip})${idsST[Reset]}${idsCL[LightCyan]} ${idsCL[LightYello]}${LH}${idsCL[Default]}" + DIVIDER false green + fi + + [ "${ST_ACTION}" != "check" ] && echo -en " ${idsCL[LightCyan]}Getting drives from server ... ${idsCL[Default]}" + declare -A partitions + DRIVEINFO=$(ssh root@${nip} df -BM | grep -vE '^Filesystem|tmpfs|cdrom|@|ram|loop|udev|veeamimage|nvme|localhost|shm|mmcblk|overlay|-volume|Music|Software' | awk '{ print $1 " " $2 " " $4 }') + DRIVEINFO=(${DRIVEINFO}) + echo -en "\e[1A"; + echo -e "\e[0K\r" + + NUMDRIVES=$((${#DRIVEINFO[@]} / 3)) + for ((i = 0 ; i <= $((${NUMDRIVES}-1)) ; i++)); do + ii=$((${i}*3)) + pname=`echo "${DRIVEINFO[${ii}]}" | awk -F'/' ' { print $NF } '` >/dev/null 2>&1 + pname=${pname#*vg-} + if [ ${DRIVEINFO[$((${ii}+1))]//M/} -gt 1024 ]; then + freespace=${DRIVEINFO[$((${ii}+2))]//M/} + if [ "${ST_ACTION}" != "check" ]; then + c=0; cw=20; spc='' + spc1=${cw}-${#pname} + until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + echo -en " ${pname}$spc: " + fi + if [[ ${freespace} -gt 1024 ]]; then + fsgb=$(bc <<< "scale=2; ${freespace}/1024") + fsdsp="${fsgb} GB" + else + fsdsp="${freespace} MB" + fi + if [ "${freespace}" -le "1024" ]; then + fs_status='error' + fs_status_color='Red' + SENDNOTICE "Free Space Critical: '${NM_NODETYPES[$NTYPE]}-Node${nid}'" "${partition} : ${fsdsp} free" 1 + elif [ "${freespace}" -le "5120" ]; then + fs_status='warn' + fs_status_color='Yellow' + SENDNOTICE "Free Space Warning: '${NM_NODETYPES[$NTYPE]}-Node${nid}'" "${partition} : ${fsdsp} free" + else + fs_status='' + fs_status_color='Green' + fi + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[${fs_status_color}]}${fsdsp} ${idsCL[Default]}" + fi + done + + # for partition in "${partitions3[@]}"; do + # if [ "${partition}" != "udev" ] && [ "${partition}" != "/dev/sda1" ]; then + # pname=`echo "${partition}" | awk -F'/' ' { print $NF } '` >/dev/null 2>&1 + # pname=${pname#*vg-} + # if [ "${ST_ACTION}" != "check" ]; then + # c=0; cw=20; spc='' + # spc1=${cw}-${#pname} + # until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + # echo -en " ${pname}$spc: " + # fi + # + # # [[ "${partition}" == *"root"* ]] && prt="/" || prt=${partition} + # # freespace=`${NCMD} df -hPBM ${prt} | awk '{print $4}' |tail -1|sed 's/M$//g'` >/dev/null 2>&1 + # + # if [ "${freespace}" -le "1000" ]; then + # fs_status='error' + # fs_status_color='Red' + # SENDNOTICE "Free Space Critical: '${NM_NODETYPES[$NTYPE]}-Node${nid}'" "${partition} : ${freespace} MB free" 1 + # + # elif [ "${freespace}" -le "5000" ]; then + # fs_status='warn' + # fs_status_color='Yellow' + # SENDNOTICE "Free Space Warning: '${NM_NODETYPES[$NTYPE]}-Node${nid}'" "${partition} : ${freespace} MB free" + # + # else + # fs_status='' + # fs_status_color='Green' + # + # fi + # if [ "${ST_ACTION}" != "check" ]; then + # if [[ ${freespace} -gt 1000 ]]; then + # fsgb=$(bc <<< "scale=2; ${freespace}/1000") + # fsdsp="${fsgb} GB" + # else + # fsdsp="${freespace} MB" + # fi + # echo -e "${idsCL[${fs_status_color}]}${fsdsp} ${idsCL[Default]}" + # + # fi + # fi + # done + + nid=`expr $nid + 1` + echo + done + done + fi + fi + ######################## + ######################## + + if [ "${ST_ACTION}" != "check" ]; then + echo "" + if [ -z $action ] || [ "${action}" = "gui" ]; then + DIVIDER true + ENTER2CONTINUE + fi + fi + end=`date +%s` + runtime=$((end-start)) + echo "runtime: ${runtime}" +} + REPLCHECK(){ rcheck=${1} nip=${2}