#!/usr/bin/env bash STATUS(){ start=`date +%s` PRI_CW=40; ST_ACTION=${1} if [ "${ST_ACTION}" = "sync" ]; then ST_ACTION=repl fi declare -i cw; declare -i spc1; declare -i c ######################## if [ "${ST_ACTION}" == "report" ] || [ "${ST_ACTION}" == "repl" ] || [ "${ST_ACTION}" == "check" ] || [ "${ST_ACTION}" == "" ]; then if [ "${ST_ACTION}" != "check" ]; then echo -en "${idsCL[LightCyan]}Setting up replication checks ... ${idsCL[Default]}" fi for NTYPE in "${NM_NODE_TYPES[@]}"; do PH=${NTYPE}_HOSTS[0] if [ ! -f ${NM_FOLDER}/${!PH}.down ]; then PH_CMD="ssh root@${!PH}" var=${NTYPE}_REPL_CHECK[@] if [ ! -z ${!var+x} ]; then for rcheck in "${!var}"; do # echo "CHECK: ${NTYPE} - ${!PH} - $rcheck - ${NM_REPL_CHECK_LOC[${rcheck}]}" ${PH_CMD} rm -f ${NM_FOLDER}/test.repl daterun=`date +%Y-%m-%d-%H-%M-%S` ${PH_CMD} "echo -e \"Status-Check (${NODE_HOSTNAME})\n${daterun}\" > ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl" & done fi fi done # replstart=`date +%s` if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsCL[Green]}Complete${idsCL[Default]}" echo fi fi ######################## if [ "${ST_ACTION}" = "" ] || [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "services" ] || [ "${ST_ACTION}" = "dockers" ] || [ "${ST_ACTION}" = "check" ]; then if [ ! -z ${LOCAL_SERVICES+x} ] && [ "${ST_ACTION}" != "dockers" ]; then if [ "${ST_ACTION}" != "check" ]; then lip=${RUN_NODE_IP} echo -e "\n$(DIVIDER)\n${idsCL[Yellow]}${idsST[Bold]} LOCALHOST Service Status${idsST[Reset]}${idsCL[Default]}" echo -e "$(DIVIDER)\n" echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NODE_HOSTNAME} (${lip})${idsST[Reset]}" if ([ "${ST_ACTION}" == "report" ] && [ "${2}" == "email" ]) || [ "${ST_ACTION}" != "report" ]; then uptime=`uptime -p` echo -e "${idsCL[LightCyan]} - ${uptime} ${idsCL[LightYello]}- localhost${idsCL[Default]}" else echo -e "${idsCL[Default]}" fi DIVIDER . green fi # if [ "${ST_ACTION}" != "check" ]; then # lip=${RUN_NODE_IP} # uptime=`uptime -p` # echo -e " ${idsST[Bold]}${idsCL[LightCyan]}${NODE_HOSTNAME} (${lip})${idsST[Reset]}${idsCL[LightCyan]} - ${uptime} - localhost${idsCL[Default]}" # DIVIDER false green # fi for srvc in "${LOCAL_SERVICES[@]}"; do if [ "${ST_ACTION}" != "check" ]; then if [ "${ST_ACTION}" != "report" ]; then c=0; cw=${PRI_CW}; spc='' spc1=${cw}-${#NM_SERVICE_DESC[${srvc}]} until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done else spc=' ' fi echo -en " ${NM_SERVICE_DESC[${srvc}]}${spc}: " fi if [ "$(systemctl is-active ${srvc})" != "active" ]; then if [ ! -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ]; then if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsCL[Red]}Not Running${idsCL[Default]}" fi SENDNOTICE "${NODE_HOSTNAME}" "${NM_SERVICE_DESC[${srvc}]} is down" 1 touch ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down echo "$(date) - LOCAL - ${NODE_HOSTNAME} - ${NM_SERVICE_DESC[${srvc}]} is down" >> ${NM_LOGFILE} else errtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down) if [ $errtime -gt ${NM_RENOTIFY_TIMEOUT} ]; then if [ ! -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime ]; then mv ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime fi toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime) if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}" fi touch ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down fi fi else if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsCL[Green]}Running${idsCL[Default]}" fi if [ -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ]; then if [ -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime ]; then toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime) else toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down) fi rm -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.* SENDNOTICE "${NODE_HOSTNAME}" "${NM_SERVICE_DESC[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})" echo "$(date) - LOCAL - ${NODE_HOSTNAME} - ${NM_SERVICE_DESC[${srvc}]} is back up" >> ${NM_LOGFILE} fi fi done if [ "${ST_ACTION}" != "check" ]; then echo; fi fi ######################## ntypesel="" for NTYPE in "${NM_NODE_TYPES[@]}"; do if [ "${ST_ACTION}" == "${NTYPE}" ]; then ntypesel=${NTYPE} break fi done if [ "${ntypesel}" != "" ]; then ntypesel=(${ntypesel}) else ntypesel=${NM_NODE_TYPES[@]} fi for NTYPE in "${!ntypesel}"; do echo "HERE: $NTYPE" done for NTYPE in "${NM_NODE_TYPESsss[@]}"; do nid=1 dockers=${NTYPE}_DOCKERS_CHECK[@] hosts=${NTYPE}_HOSTS[@] var=${NTYPE}_HOSTS[@] if [[ ! -v ${NTYPE}_DOCKERS_CHECK ]] && [ "${ST_ACTION}" == "dockers" ]; then GOFORCHECK=false; else GOFORCHECK=true; fi if [ ${GOFORCHECK} = true ]; then if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsST[Bold]}"; DIVIDER if [ "${ST_ACTION}" = "dockers" ]; then echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node Docker Status${idsCL[Default]}" else echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node Service Status${idsCL[Default]}" fi DIVIDER; echo -e "${idsST[Reset]}" fi for nip in "${!var}"; do [ "${ST_ACTION}" != "check" ] && echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} [${nip}]${idsST[Reset]}" if [[ "${RUN_NODE_IP}" == *"${nip}"* ]]; then NCMD=''; LH='- localhost' else NCMD="ssh root@${nip}"; LH='' fi if [ "${NCMD}" != "" ]; then checkhost=$(CHECK_HOST ${nip}) fi if [ "${checkhost}" != "false" ]; then if [ "${ST_ACTION}" != "check" ]; then #echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]}[${nip}]${idsST[Reset]}" if ([ "${ST_ACTION}" == "report" ] && [ "${2}" == "email" ]) || [ "${ST_ACTION}" != "report" ]; then if [ "${NTYPE}" != "OFW" ]; then uptime=$(${NCMD} uptime -p) else uptime=$(${NCMD} uptime | awk '{print $3}' | cut -d, -f1) if [ ${#uptime} -ge 6 ]; then uptime="up $(echo $uptime | cut -d: -f1) days, $(echo $uptime | cut -d: -f2) hours, $(echo $uptime | cut -d: -f3) minutes" elif [ ${#uptime} -ge 3 ]; then uptime="up $(echo $uptime | cut -d: -f1) hours, $(echo $uptime | cut -d: -f2) minutes" else uptime="up ${uptime} minutes" fi fi echo -e "${idsCL[LightCyan]} - ${uptime} ${idsCL[LightYello]}${LH}${idsCL[Default]}" else echo -e "${idsCL[Default]}" fi DIVIDER false green fi if [ -f ${NM_TMPFOLDER}/${nip}.down ]; then if [ -f ${NM_TMPFOLDER}/${nip}.errtime ]; then toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}.errtime) else toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}.down) fi rm -f ${NM_TMPFOLDER}/${nip}.* SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]-UP" "${NM_HOSTNAMES[${nip}]}[${nip}] is back UP! It was down for $(SHOW_TIME ${toterrtime})" fi if [ "${ST_ACTION}" != "dockers" ]; then srvcs=${NTYPE}_SERVICES_CHECK[@]; srvcstotest="$(join_by " " ${!srvcs})" [ "${NTYPE}" != "OFW" ] && srvctst=(`${NCMD} "systemctl is-active ${srvcstotest}"`) sr=0 for srvc in "${!srvcs}"; do [ "${srvc}" == "gitea" ] && [ "${NTYPE}" == "WEB" ] && [[ $(${NCMD} ip addr show $(ip route | awk '/default/ { print $5 }') | grep "inet" | head -n 1 | awk '/inet/ {print $2}' | cut -d'/' -f1) != *${NM_SINGLESRVR_IP['WEB']}* ]] && NOGOCHK=false || NOGOCHK=true [ "${srvc}" == "headscale" ] && [ "${NTYPE}" == "HS" ] && [[ $(${NCMD} ip addr show $(ip route | awk '/default/ { print $5 }') | grep "inet" | head -n 1 | awk '/inet/ {print $2}' | cut -d'/' -f1) != *${NM_SINGLESRVR_IP['HS']}* ]] && NOGOCHK=false [ "${srvc}" == "keepalived" ] && ([ "${nip}" == "10.2.1.2" ] || [ "${nip}" == "10.2.1.51" ]) && NOGOCHK=false if [ ${NOGOCHK} == true ]; then if [ "${ST_ACTION}" != "check" ]; then if [ "${ST_ACTION}" != "report" ]; then c=0; cw=${PRI_CW}; spc=''; spc1=${cw}-${#NM_SERVICE_DESC[${srvc}]}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done else spc=' ' fi echo -en " ${NM_SERVICE_DESC[${srvc}]}${spc}: " fi if [ "${NTYPE}" != "OFW" ]; then srvctest=${srvctst[$sr]} if [ "${srvctest}" != "active" ] && [ "${srvc}" == "mysql" ]; then [ "$(${NCMD} systemctl is-active mariadb)" == "active" ] && mysqlgo=true || mysqlgo=false elif [ "${srvctest}" != "active" ] && [ "${srvc}" = "mariadb" ]; then [ "$(${NCMD} systemctl is-active mysql)" == "active" ] && mysqlgo=true || mysqlgo=false elif [ "${srvctest}" == "active" ] && ([ "${srvc}" == "mysql" ] || [ "${srvc}" == "mariadb" ]); then mysqlgo=true fi else if [ "$(ssh root@${nip} ps -U root | grep "offsite-power-check.sh start" | grep -v "grep" | awk '{print $1}')" != "" ]; then srvctest=active else srvctest=notactive fi fi if [ "${srvctest}" != "active" ] && [ "${mysqlgo}" != "true" ]; then if [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.down ]; then [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Not Running${idsCL[Default]}" touch ${NM_TMPFOLDER}/${nip}~${srvc}.down # SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is down" 1 echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_SERVICE_DESC[${srvc}]} is down" >> ${NM_LOGFILE} [ "${srvc}" == "pdnsadmin" ] && ${NCMD} systemctl restart ${srvc} & elif [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.disable ]; then errtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down) if [ $errtime -gt ${NM_RENOTIFY_TIMEOUT} ]; then [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ] && mv ${NM_TMPFOLDER}/${nip}~${srvc}.down ${NM_TMPFOLDER}/${nip}~${srvc}.errtime toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.errtime) touch ${NM_TMPFOLDER}/${nip}~${srvc}.down SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is down" 1 # ([ "${srvc}" == "mysql" ] || [ "${srvc}" == "mariadb" ]) && ${NCMD} systemctl restart ${srvc} & elif [ $errtime -gt 60 ] && [ $errtime -lt 180 ] && [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ]; then SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is down" 1 fi [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}" fi else [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Green]}Running${idsCL[Default]}" if [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.down ]; then [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ] && toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.errtime) || toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down) SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})" echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_SERVICE_DESC[${srvc}]} is back up, it was down for $(SHOW_TIME ${toterrtime})" >> ${NM_LOGFILE} rm -f ${NM_TMPFOLDER}/${nip}~${srvc}.* fi # if [ "${srvc}" == "pdnsadmin.socket" ] || [ "${srvc}" == "pdnsadmin" ] || [ "${srvc}" == "gitea" ]; then if [ "${srvc}" == "gitea" ] || [ "${srvc}" == "headscale" ]; then rm -f ${NM_FOLDER}/*~${srvc}.* fi fi [ "${mysqlgo}" = "true" ] && unset mysqlgo fi sr=`expr $sr + 1` done # for srvc in "${!var2}"; do # NOGOCHK=true; # [ "${srvc}" == "gitea" ] && [ "${NTYPE}" == "WEB" ] && [[ $(${NCMD} ip addr show $(ip route | awk '/default/ { print $5 }') | grep "inet" | head -n 1 | awk '/inet/ {print $2}' | cut -d'/' -f1) != *${WEB_HOSTS[0]}* ]] && NOGOCHK=false; # if [ ${NOGOCHK} = true ]; then # if [ "${ST_ACTION}" != "check" ]; then # if [ "${ST_ACTION}" != "report" ]; then # c=0; cw=${PRI_CW}; spc='' # spc1=${cw}-${#NM_SERVICE_DESC[${srvc}]} # until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done # else # spc=' ' # fi # echo -en " ${NM_SERVICE_DESC[${srvc}]}$spc: " # fi # srvctst=$(${NCMD} systemctl is-active ${srvc}) # if [ "${srvctst}" != "active" ] && [ "${srvc}" = "mysql" ]; then # mysqlgo=false; # [ "$(${NCMD} systemctl is-active mariadb)" = "active" ] && mysqlgo=true; # elif [ "${srvctst}" != "active" ] && [ "${srvc}" = "mariadb" ]; then # mysqlgo=false; # [ "$(${NCMD} systemctl is-active mysql)" = "active" ] && mysqlgo=true; # elif [ "${srvctst}" = "active" ] && ([ "${srvc}" = "mysql" ] || [ "${srvc}" = "mariadb" ]); then # mysqlgo=true # fi # # echo "mysqlgo=${mysqlgo}" # if [ "${srvctst}" != "active" ] && [ "${mysqlgo}" != "true" ]; then # if [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.down ]; then # if [ "${ST_ACTION}" != "check" ]; then # echo -e "${idsCL[Red]}Not Running${idsCL[Default]}" # fi # SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is down" 1 # touch ${NM_TMPFOLDER}/${nip}~${srvc}.down # echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_SERVICE_DESC[${srvc}]} is down" >> ${NM_LOGFILE} # # else # errtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down) # if [ $errtime -gt ${NM_RENOTIFY_TIMEOUT} ]; then # if [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ]; then # mv ${NM_TMPFOLDER}/${nip}~${srvc}.down ${NM_TMPFOLDER}/${nip}~${srvc}.errtime # fi # toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.errtime) # touch ${NM_TMPFOLDER}/${nip}~${srvc}.down # if [ "${srvc}" = "mysql" ]; then # ${NCMD} systemctl restart ${srvc} # fi # fi # if [ "${ST_ACTION}" != "check" ]; then # echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}" # fi # fi # else # if [ "${ST_ACTION}" != "check" ]; then # echo -e "${idsCL[Green]}Running${idsCL[Default]}" # fi # if [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.down ]; then # if [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ]; then # toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.errtime) # else # toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down) # fi # SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})" # echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_SERVICE_DESC[${srvc}]} is back up, it was down for $(SHOW_TIME ${toterrtime})" >> ${NM_LOGFILE} # # rm -f ${NM_TMPFOLDER}/${nip}~${srvc}.down # rm -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime # # fi # # if [ "${srvc}" == "pdnsadmin.socket" ] || [ "${srvc}" == "pdnsadmin" ] || [ "${srvc}" == "gitea" ]; then # if [ "${srvc}" == "gitea" ]; then # rm -f ${NM_FOLDER}/*~${srvc}.down # rm -f ${NM_FOLDER}/*~${srvc}.errtime # fi # fi # [ "${mysqlgo}" = "true" ] && unset mysqlgo # fi # done fi ######################## ## DOCKER CHECK ######################## if [[ -v ${NTYPE}_DOCKERS_CHECK ]] && ([ "${ST_ACTION}" != "services" ] || [ "${ST_ACTION}" != "dockers" ]); then if [ "${ST_ACTION}" != "check" ]; then echo echo -e "${idsCL[Yellow]} Docker Service(s) Status${idsCL[Default]}" echo -e "${idsCL[Yellow]}----------------------------------------------------${idsCL[Default]}" fi for docker in "${!dockers}"; do if [ "${NTYPE}" == "WEB" ]; then ([ "${docker}" == "vaultwarden" ] || [ "${docker}" == "heimdall" ] || [ "${docker}" == "authelia" ]) && [[ $(${NCMD} ip addr show $(ip route | awk '/default/ { print $5 }') | grep "inet" | head -n 1 | awk '/inet/ {print $2}' | cut -d'/' -f1) != *${NM_SINGLESRVR_IP['WEB']}* ]] && NOGOCHK=false || NOGOCHK=true else NOGOCHK=true fi if [ ${NOGOCHK} == true ]; then if [ "${ST_ACTION}" != "check" ]; then if [ "${ST_ACTION}" != "report" ]; then c=0; cw=${PRI_CW}; spc='' spc1=`expr ${cw} - ${#NM_DOCKER_DESC[${docker}]}` until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done else spc=' ' fi echo -en " ${NM_DOCKER_DESC[${docker}]}$spc: " fi if [ ! "$(${NCMD} docker ps -q -f name=${docker})" ]; then if [ "$(${NCMD} docker ps -a | grep ${docker})" ]; then ${NCMD} docker start ${docker} >/dev/null 2>&1 echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is not running, starting now" >> ${NM_LOGFILE} else ${NCMD} /usr/local/bin/docker compose -f ${NM_DOCKER_COMPOSE_LOC[${docker}]}/docker compose.yml up -d >/dev/null 2>&1 echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is not found, creating and starting now" >> ${NM_LOGFILE} fi sleep 10s if [ "$(${NCMD} docker ps -q -f name=${docker})" ]; then if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsCL[Green]}Running - Fixed${idsCL[Default]}" fi SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_DOCKER_DESC[${docker}]} is fixed" rm -f ${NM_TMPFOLDER}/${nip}~${docker}.* echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is fixed" >> ${NM_LOGFILE} else if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsCL[Red]}Not Running - Could Not Fix!${idsCL[Default]}" if [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.down ]; then SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_DOCKER_DESC[${docker}]} is down, could not fix" 1 touch ${NM_TMPFOLDER}/${nip}~${docker}.down echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is down, could not fix" >> ${NM_LOGFILE} else errtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}~${docker}.down) if [ $errtime -gt ${NM_RENOTIFY_TIMEOUT} ]; then if [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.errtime ]; then mv ${NM_TMPFOLDER}/${nip}~${docker}.down ${NM_TMPFOLDER}/${nip}~${docker}.errtime fi toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}~${docker}.errtime) if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}" fi touch ${NM_TMPFOLDER}/${nip}~${docker}.down fi fi fi fi else if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsCL[Green]}Running${idsCL[Default]}" fi rm -f ${NM_TMPFOLDER}/${nip}~${docker}.* fi fi done fi else rm -f ${NM_TMPFOLDER}/${nip}~* if [ ! -f ${NM_TMPFOLDER}/${nip}.down ]; then touch ${NM_TMPFOLDER}/${nip}.down if [ ! -f ${NM_TMPFOLDER}/${nip}.errtime ]; then touch ${NM_TMPFOLDER}/${nip}.errtime fi if [ "${ST_ACTION}" != "check" ]; then toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}.errtime) echo -e "${idsCL[Red]} - Node is down!${idsCL[Default]}" fi # SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] is down" 1 echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - Node is down" >> ${NM_LOGFILE} else toterrtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}.errtime) errtime=`date +%s`-$(stat -c %Y ${NM_TMPFOLDER}/${nip}.down) if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsCL[Red]} - Node has been down for $(SHOW_TIME ${toterrtime}) ${idsCL[LightYello]}${LH}${idsCL[Default]}" fi if [ $errtime -gt ${NM_RENOTIFY_TIMEOUT} ]; then SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] has been down for $(SHOW_TIME ${toterrtime})" 1 elif [ $errtime -gt 60 ] && [ $errtime -lt 180 ] && [ ! -f ${NM_TMPFOLDER}/${nip}.errtime ]; then SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] is down" 1 fi touch ${NM_TMPFOLDER}/${nip}.down # if [ $errtime -gt ${NM_RENOTIFY_TIMEOUT} ] && [ "${EMAIL_NOTICE}" != "" ]; then # echo "${NM_HOSTNAMES[${nip}]}[${nip}] has been down for $(SHOW_TIME ${toterrtime})" | mail -s "${NM_HOSTNAMES[${nip}]}[${nip}]" ${EMAIL_NOTICE} # fi fi fi if [ "${ST_ACTION}" != "check" ]; then echo; fi nid=`expr $nid + 1` done fi #GOFORCHECK done fi ######################## ## REPLICATION CHECK ######################## if [ "${ST_ACTION}" == "report" ] || [ "${ST_ACTION}" == "repl" ] || [ "${ST_ACTION}" == "check" ] || [ "${ST_ACTION}" == "" ]; then if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsST[Bold]}"; DIVIDER echo -e "${idsCL[Yellow]} Replication Status Between the Primary and Secondary Nodes${idsCL[Default]}" DIVIDER; echo -e "${idsST[Reset]}" echo -en " ${idsCL[LightCyan]}Starting processes to collect/monitor replication status : " fi for NTYPE in "${NM_NODE_TYPES[@]}"; do PH=${NTYPE}_HOSTS[0] PH_CMD="ssh root@${!PH}" var=${NTYPE}_REPL_CHECK[@] if [ ! -z ${!var+x} ]; then var=${NTYPE}_HOSTS[@] for nip in "${!var}"; do if [[ "${RUN_NODE_IP}" != *"${nip}"* ]] && [ "${nip}" != "${!PH}" ]; then if [ ! -f ${NM_FOLDER}/${!PH}.down ] && [ ! -f ${NM_TMPFOLDER}/${nip}.down ]; then var2=${NTYPE}_REPL_CHECK[@] for rcheck in "${!var2}"; do REPLCHECK "${rcheck}" "${nip}" "${PH_CMD}" "${ST_ACTION}" & >/dev/null 2>&1 done fi fi done fi done [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Green]}Done${idsCL[Default]}\n" for NTYPE in "${NM_NODE_TYPES[@]}"; do PH=${NTYPE}_HOSTS[0] PH_CMD="ssh root@${!PH}" var=${NTYPE}_REPL_CHECK[@] if [ ! -z ${!var+x} ]; then var=${NTYPE}_HOSTS[@] for nip in "${!var}"; do if [[ "${RUN_NODE_IP}" != *"${nip}"* ]] && [ "${nip}" != "${!PH}" ]; then if [ "${ST_ACTION}" != "check" ]; then echo -e " ${idsCL[LightCyan]}${NM_HOSTNAMES[${!PH}]} (${!PH}) <--> ${idsST[Bold]}${NM_HOSTNAMES[${nip}]}[${nip}]${idsST[Reset]}${idsCL[Default]}" DIVIDER false green fi if [ ! -f ${NM_FOLDER}/${!PH}.down ] && [ ! -f ${NM_TMPFOLDER}/${nip}.down ]; then var2=${NTYPE}_REPL_CHECK[@] for rcheck in "${!var2}"; do if [ "${ST_ACTION}" != "check" ]; then if [ "${ST_ACTION}" != "report" ]; then c=0; cw=${PRI_CW}; spc=''; spc1=${cw}-${#NM_REPL_DESC[${rcheck}]}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done else spc=' ' fi echo -en " ${NM_REPL_DESC[${rcheck}]}${spc}: " fi checked="" until [ "${checked}" != "" ]; do if [ -f ${NM_TMPFOLDER}/repl.${rcheck}.${nip}.good ]; then checked=good elif [ -f ${NM_TMPFOLDER}/repl.${rcheck}.${nip}.timeout ]; then checked=timeout fi done if [ "${checked}" == "timeout" ]; then [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Timeout${idsCL[Default]}" if [ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down ]; then touch ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down elif [ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime ]; then mv ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime touch ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down SENDNOTICE "Repl-Timeout-'${NM_HOSTNAMES[${nip}]}[${nip}]'" "${NM_REPL_DESC[${rcheck}]} (${NM_REPL_CHECK_LOC[${rcheck}]})" 1 echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - (${NM_REPL_DESC[${rcheck}]}) Replicated folder timeout, it is not syncing" >> ${NM_LOGFILE} else snderrtime=`expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down)` # echo "HERE: $snderrtime" if [ ${snderrtime} -gt ${NM_RENOTIFY_TIMEOUT} ]; then toterrtime=`expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime)` SENDNOTICE "Repl-Timeout-'${NM_HOSTNAMES[${nip}]}[${nip}]'" "${NM_REPL_DESC[${rcheck}]} (${NM_REPL_CHECK_LOC[${rcheck}]}) It has been down for $(SHOW_TIME ${toterrtime})" 1 echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - (${NM_REPL_DESC[${rcheck}]}) Replicated folder timeout, has now been down for: $(SHOW_TIME ${toterrtime})" >> ${NM_LOGFILE} touch ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down fi fi elif [ "${checked}" == "good" ]; then [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Green]}Good${idsCL[Default]}" if [ -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down ]; then if [ -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime ]; then toterrtime=`expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime)` SENDNOTICE "Repl-'${NM_HOSTNAMES[${nip}]}[${nip}]'" "Replicated folder is back up!\n${NM_REPL_DESC[${rcheck}]} (${NM_REPL_CHECK_LOC[${rcheck}]}) It was down for $(SHOW_TIME ${toterrtime})" echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - (${NM_REPL_DESC[${rcheck}]}) Replicated folder is back up" >> ${NM_LOGFILE} fi rm -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl* fi fi done else if [ "${ST_ACTION}" != "check" ]; then [ -f ${NM_FOLDER}/${!PH}.down ] && echo -e "${idsCL[Red]}${NM_HOSTNAMES[${!PH}]} (${!PH}) is offline${idsCL[Default]}" [ -f ${NM_TMPFOLDER}/${nip}.down ] && echo -e "${idsCL[Red]}${NM_HOSTNAMES[${nip}]}[${nip}] is offline${idsCL[Default]}" fi fi if [ "${ST_ACTION}" != "check" ]; then echo; fi fi done fi done ########################## # REMOVE REPL CHECK FILES ########################## if [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "repl" ] || [ "${ST_ACTION}" = "check" ] || [ "${ST_ACTION}" = "" ]; then if [ "${ST_ACTION}" != "check" ]; then echo -en "${idsCL[LightCyan]} Cleaning up status checks... ${idsCL[Default]}" fi #NHCMD="ssh root@${NM_HOSTS['MYSQL'][0]}" #LBHCMD="ssh root@${NM_HOSTS['LB'][0]}" #WHCMD="ssh root@${NM_HOSTS['WEB'][0]}" for NTYPE in "${NM_NODE_TYPES[@]}"; do PH=${NTYPE}_HOSTS[0] if [ ! -f ${NM_FOLDER}/${!PH}.down ]; then PH_CMD="ssh root@${!PH}" var=${NTYPE}_REPL_CHECK[@] if [ ! -z ${!var+x} ]; then for rcheck in "${!var}"; do ${PH_CMD} rm -f ${NM_FOLDER}/test.repl daterun=`date +%Y-%m-%d-%H-%M-%S` if [ "${PH_CMD}" = "" ]; then rm -f ${NM_FOLDER}/test.repl & rm -f ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl & else ${PH_CMD} rm -f ${NM_FOLDER}/test.repl & ${PH_CMD} rm -f ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl & fi rm -f ${NM_TMPFOLDER}/repl.${rcheck}.* done fi fi done rm -Rf ${NM_FOLDER}/test.repl if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsCL[Green]}Complete${idsCL[Default]}" echo fi fi fi ######################## ## FREE SPACE CHECK ######################## #if [ "${ST_ACTION}" = "" ] || [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "freespace" ]; then if [ "${ST_ACTION}" = "freespace" ]; then if [ "${ST_ACTION}" != "repl" ] && [ "${ST_ACTION}" != "services" ]; then for NTYPE in "${NM_NODE_TYPES[@]}"; do if [ "${ST_ACTION}" != "check" ]; then echo -e "${idsST[Bold]}"; DIVIDER echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node(s) Free Space Scan${idsCL[Default]}" DIVIDER; echo -e "${idsST[Reset]}" fi nid=1 var=${NTYPE}_HOSTS[@] for nip in "${!var}"; do if [[ $"{RNIP}" == *"${nip}"* ]]; then NCMD=''; LH='- localhost' else NCMD="ssh root@${nip}"; LH='' fi if [ "${ST_ACTION}" != "check" ]; then echo -e " ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} [${nip}]${idsST[Reset]}${idsCL[LightCyan]} ${idsCL[LightYello]}${LH}${idsCL[Default]}" DIVIDER false green fi [ "${ST_ACTION}" != "check" ] && [ "${2}" != "report" ] && echo -en " ${idsCL[LightCyan]}Getting drives from server ... ${idsCL[Default]}" declare -A partitions DRIVEINFO=$(ssh root@${nip} df -BM | grep -vE '^Filesystem|tmpfs|cdrom|@|ram|loop|udev|veeamimage|nvme|localhost|shm|mmcblk|overlay|-volume|Music|Software' | awk '{ print $1 " " $2 " " $4 }') DRIVEINFO=(${DRIVEINFO}) if [ "${2}" != "report" ]; then echo -en "\e[1A" echo -e "\e[0K\r" fi NUMDRIVES=$((${#DRIVEINFO[@]} / 3)) for ((i = 0 ; i <= $((${NUMDRIVES}-1)) ; i++)); do ii=$((${i}*3)) pname=`echo "${DRIVEINFO[${ii}]}" | awk -F'/' ' { print $NF } '` >/dev/null 2>&1 pname=${pname#*vg-} if [ ${DRIVEINFO[$((${ii}+1))]//M/} -gt 1024 ]; then freespace=${DRIVEINFO[$((${ii}+2))]//M/} if [ "${ST_ACTION}" != "check" ]; then c=0; cw=20; spc='' spc1=${cw}-${#pname} until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done echo -en " ${pname}$spc: " fi if [[ ${freespace} -gt 1024 ]]; then fsgb=$(bc <<< "scale=2; ${freespace}/1024") fsdsp="${fsgb} GB" else fsdsp="${freespace} MB" fi if [ "${freespace}" -le "1024" ]; then fs_status='error' fs_status_color='Red' SENDNOTICE "Free Space Critical: '${NM_HOSTNAMES[${nip}]}[${nip}]'" "${partition} : ${fsdsp} free" 1 elif [ "${freespace}" -le "5120" ]; then fs_status='warn' fs_status_color='Yellow' SENDNOTICE "Free Space Warning: '${NM_HOSTNAMES[${nip}]}[${nip}]'" "${partition} : ${fsdsp} free" else fs_status='' fs_status_color='Green' fi [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[${fs_status_color}]}${fsdsp} ${idsCL[Default]}" fi done # for partition in "${partitions3[@]}"; do # if [ "${partition}" != "udev" ] && [ "${partition}" != "/dev/sda1" ]; then # pname=`echo "${partition}" | awk -F'/' ' { print $NF } '` >/dev/null 2>&1 # pname=${pname#*vg-} # if [ "${ST_ACTION}" != "check" ]; then # c=0; cw=20; spc='' # spc1=${cw}-${#pname} # until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done # echo -en " ${pname}$spc: " # fi # # # [[ "${partition}" == *"root"* ]] && prt="/" || prt=${partition} # # freespace=`${NCMD} df -hPBM ${prt} | awk '{print $4}' |tail -1|sed 's/M$//g'` >/dev/null 2>&1 # # if [ "${freespace}" -le "1000" ]; then # fs_status='error' # fs_status_color='Red' # SENDNOTICE "Free Space Critical: '${NM_HOSTNAMES[${nip}]}[${nip}]'" "${partition} : ${freespace} MB free" 1 # # elif [ "${freespace}" -le "5000" ]; then # fs_status='warn' # fs_status_color='Yellow' # SENDNOTICE "Free Space Warning: '${NM_HOSTNAMES[${nip}]}[${nip}]'" "${partition} : ${freespace} MB free" # # else # fs_status='' # fs_status_color='Green' # # fi # if [ "${ST_ACTION}" != "check" ]; then # if [[ ${freespace} -gt 1000 ]]; then # fsgb=$(bc <<< "scale=2; ${freespace}/1000") # fsdsp="${fsgb} GB" # else # fsdsp="${freespace} MB" # fi # echo -e "${idsCL[${fs_status_color}]}${fsdsp} ${idsCL[Default]}" # # fi # fi # done nid=`expr $nid + 1` echo done done fi fi ######################## ######################## if [ "${ST_ACTION}" != "check" ]; then echo if [ -z $action ] || [ "${action}" = "gui" ]; then DIVIDER true ENTER2CONTINUE fi fi end=`date +%s` runtime=$((end-start)) echo -e "Runtime: ${runtime}\n" echo ${runtime} >> /opt/idssys/nodemgmt/status-check.scantimes } REPLCHECK(){ rcheck=${1} nip=${2} PH_CMD=${3} # if [ "${4}" != "check" ]; then # timeout=`date --date='1 minutes' +%s` # # timeout=`date --date='30 seconds' +%s` # else # timeout=`date --date='2 minutes' +%s` # fi timeout=`date --date='1 minutes' +%s` # timeout=`date --date='30 seconds' +%s` checked=false until [ "${checked}" == "" ]; do if [ "${PH_CMD}" == "" ]; then ssh -q root@${nip} [[ -f ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl ]] && checked=`ssh root@${nip} "cat ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl" | diff - ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl` else ssh -q root@${nip} [[ -f ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl ]] && checked="`${PH_CMD} \"ssh root@${nip} \"cat ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl\" | diff - ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl\"`" fi [ "`date +%s`" -gt "${timeout}" ] && timeout=true && break done [ "${timeout}" == "true" ] && touch ${NM_TMPFOLDER}/repl.${rcheck}.${nip}.timeout || touch ${NM_TMPFOLDER}/repl.${rcheck}.${nip}.good }