From b6d604b56b6e61e6b883f0d31823072e1b1205a4 Mon Sep 17 00:00:00 2001 From: David Schroeder Date: Tue, 5 Dec 2023 21:31:40 -0600 Subject: [PATCH] update --- defaults.inc | 2 +- inc/status.new.inc | 743 ++++++++++++++++++++++++++++++++++++++++++++ nodemgmt-scripts.sh | 1 + 3 files changed, 745 insertions(+), 1 deletion(-) create mode 100755 inc/status.new.inc diff --git a/defaults.inc b/defaults.inc index b23f84fe..f3b4fcb6 100755 --- a/defaults.inc +++ b/defaults.inc @@ -1,5 +1,5 @@ #!/usr/bin/env bash -VERS='4.17.57-12052023' +VERS='4.17.100-12052023' NM_BETA=false noheader=' service status-check nightlyrephp7.3-fpm,new backup report check checkcerts gitea update-nodes copynpmcerts singleservercheck update-dyndns backup-offsitepfsense gui nightlyreview update log betacheck ' diff --git a/inc/status.new.inc b/inc/status.new.inc new file mode 100755 index 00000000..03a69739 --- /dev/null +++ b/inc/status.new.inc @@ -0,0 +1,743 @@ +#!/usr/bin/env bash +STATUSNEW(){ + start=`date +%s` + + PRI_CW=40; + + ST_ACTION=${1} + if [ "${ST_ACTION}" = "sync" ]; then + ST_ACTION=repl + fi + declare -i cw; declare -i spc1; declare -i c + + ntypesel="" + for NTYPE in "${NM_NODE_TYPES[@]}"; do + if [ "${ST_ACTION^^}" == "${NTYPE}" ]; then + ntypesel=${NTYPE} + ST_ACTION=${2} + break + fi + done + if [ "${ntypesel}" != "" ]; then + ntypesel=(${ntypesel}) + else + ntypesel=(${NM_NODE_TYPES[@]}) + fi + + ######################## + if [ "${ST_ACTION}" == "report" ] || [ "${ST_ACTION}" == "repl" ] || [ "${ST_ACTION}" == "check" ] || [ "${ST_ACTION}" == "" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -en "${idsCL[LightCyan]}Setting up replication checks ... ${idsCL[Default]}" + fi + for NTYPE in "${ntypesel[@]}"; do + PH=${NTYPE}_HOSTS[0] + if [ ! -f ${NM_FOLDER}/${!PH}.down ]; then + PH_CMD="ssh root@${!PH}" + var=${NTYPE}_REPL_CHECK[@] + if [ ! -z ${!var+x} ]; then + for rcheck in "${!var}"; do + # echo "CHECK: ${NTYPE} - ${!PH} - $rcheck - ${NM_REPL_CHECK_LOC[${rcheck}]}" + ${PH_CMD} rm -f ${NM_FOLDER}/test.repl + daterun=`date +%Y-%m-%d-%H-%M-%S` + ${PH_CMD} "echo -e \"Status-Check (${NODE_HOSTNAME})\n${daterun}\" > ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl" & + ${PH_CMD} 'find ${NM_REPL_CHECK_LOC[${rcheck}]} -iname "*.sync-conflict-*" -exec rm {} \;' + done + fi + fi + done + # replstart=`date +%s` + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[LightGreen]}Complete${idsCL[Default]}" + echo + fi + fi + + ######################## + if [ "${ST_ACTION}" = "" ] || [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "services" ] || [ "${ST_ACTION}" = "dockers" ] || [ "${ST_ACTION}" = "check" ]; then + cpu_usage=$(awk '{u=$2+$4; t=$2+$4+$5; if (NR==1){u1=u; t1=t;} else print ($2+$4-u1) * 100 / (t-t1) "%"; }' <(grep 'cpu ' /proc/stat) <(sleep 1;grep 'cpu ' /proc/stat) | sed -e 's/%//g') + + if [ ! -z ${LOCAL_SERVICES+x} ] && [ "${ST_ACTION}" != "dockers" ]; then + if [ "${ST_ACTION}" != "check" ]; then + lip=${RUN_NODE_IP} + echo -e "\n$(DIVIDER)\n${idsCL[Yellow]}${idsST[Bold]} LOCALHOST Service Status${idsST[Reset]}${idsCL[Default]}" + echo -e "$(DIVIDER)\n" + echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NODE_HOSTNAME} (${lip})${idsST[Reset]}" + if ([ "${ST_ACTION}" == "report" ] && [ "${2}" == "email" ]) || [ "${ST_ACTION}" != "report" ]; then + uptime=`uptime -p` + echo -e "${idsCL[LightCyan]} - ${uptime} - ${idsCL[Yellow]}CPU: `IDS_NUMBER_FORMAT ${cpu_usage} 1`'%${idsCL[Default]}" + else + echo -e "${idsCL[Default]}" + fi + DIVIDER . green + fi + # if [ "${ST_ACTION}" != "check" ]; then + # lip=${RUN_NODE_IP} + # uptime=`uptime -p` + # echo -e " ${idsST[Bold]}${idsCL[LightCyan]}${NODE_HOSTNAME} (${lip})${idsST[Reset]}${idsCL[LightCyan]} - ${uptime} - localhost${idsCL[Default]}" + # DIVIDER false green + # fi + for srvc in "${LOCAL_SERVICES[@]}"; do + if [ "${ST_ACTION}" != "check" ]; then + if [ "${ST_ACTION}" != "report" ]; then + c=0; cw=${PRI_CW}; spc='' + spc1=${cw}-${#NM_SERVICE_DESC[${srvc}]} + until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + else + spc=' ' + fi + echo -en "${idsCL[White]} ${NM_SERVICE_DESC[${srvc}]}${spc}${idsCL[Default]}: " + fi + + if [ "$(systemctl is-active ${srvc})" != "active" ]; then + if [ ! -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Red]}Not Running${idsCL[Default]}" + fi + SENDNOTICE "${NODE_HOSTNAME}" "${NM_SERVICE_DESC[${srvc}]} is down" 1 + touch ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down + echo "$(date +%Y-%m-%d-%H-%M-%S) - LOCAL - ${NODE_HOSTNAME} - ${NM_SERVICE_DESC[${srvc}]} is down" >> ${NM_LOGFILE} + else + errtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down)) + if [ ${errtime} -gt ${NM_RENOTIFY_TIMEOUT} ]; then + if [ ! -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime ]; then + mv ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime + fi + toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime)) + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}" + fi + touch ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down + fi + fi + else + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[LightGreen]}Running${idsCL[Default]}" + fi + if [ -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ]; then + if [ -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime ]; then + toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime)) + else + toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down)) + fi + rm -f ${NM_TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.* + SENDNOTICE "${NODE_HOSTNAME}" "${NM_SERVICE_DESC[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})" + echo "$(date +%Y-%m-%d-%H-%M-%S) - LOCAL - ${NODE_HOSTNAME} - ${NM_SERVICE_DESC[${srvc}]} is back up" >> ${NM_LOGFILE} + + fi + fi + done + + if [ "${ST_ACTION}" != "check" ]; then echo; fi + fi + ######################## + + for NTYPE in "${ntypesel[@]}"; do + nid=1 + + dockers=${NTYPE}_DOCKERS_CHECK[@] + hosts=${NTYPE}_HOSTS[@] + + var=${NTYPE}_HOSTS[@] + + if ([[ ! -v ${NTYPE}_DOCKERS_CHECK ]] && [ "${ST_ACTION}" == "dockers" ]) || ([ "${NM_SERVICES_CHECK[${NTYPE}]}" == "" ] && [ "${ST_ACTION}" == "services" ]); then + GOFORCHECK=false; + else + GOFORCHECK=true; + fi + + if [ ${GOFORCHECK} = true ]; then + + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsST[Bold]}"; DIVIDER + if [ "${ST_ACTION}" == "dockers" ]; then + echo -e "${idsCL[LightYellow]} ${NM_NODETYPES[$NTYPE]}-Node Docker Status${idsCL[Default]}" + else + echo -e "${idsCL[LightYellow]} ${NM_NODETYPES[$NTYPE]}-Node Service Status${idsCL[Default]}" + fi + DIVIDER . lightYellow; echo -e "${idsST[Reset]}" + fi + for nip in "${!var}"; do + if [ "${ST_ACTION}" != "check" ]; then + echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} [${nip}]${idsST[Reset]}${idsCL[Default]}" + [ "${ST_ACTION}" != "report" ] && echo -en " - Verifying Host..." + fi + + if [[ "${RUN_NODE_IP}" == *"${nip}"* ]]; then + NCMD=''; LH='- localhost' + else + NCMD="ssh -o ConnectTimeout=3 root@${nip}"; LH='' + fi + if [ "${NCMD}" != "" ]; then + checkhost=$(CHECK_HOST ${nip}) + [ "${checkhost}" != "false" ] && checkhostssl=$(ssh -o BatchMode=yes -o ConnectTimeout=3 root@${nip} echo ok 2>&1) + else + checkhost=true + checkhostssl=ok + fi + # echo "HERE: $checkhost - $checkhostssl" + if [ "${checkhost}" != "false" ] && [ "${checkhostssl}" == "ok" ]; then + [ "${NTYPE}" != "OFW" ] && cpu_usage=$(ssh -q -o ConnectTimeout=1 -o ConnectionAttempts=1 root@${nip} "/opt/idssys/defaults/get-data.sh cpu-usage") || cpu_usage="" + + + if [ "${ST_ACTION}" != "check" ]; then + if ([ "${ST_ACTION}" == "report" ] && [ "${2}" == "email" ]) || [ "${ST_ACTION}" != "report" ]; then + if [ "${NTYPE}" == "OFW" ]; then + uptime=$(${NCMD} uptime | awk '{print $3}' | cut -d, -f1) + if [ ${#uptime} -ge 6 ]; then + uptime="up $(echo $uptime | cut -d: -f1) days, $(echo $uptime | cut -d: -f2) hours, $(echo $uptime | cut -d: -f3) minutes" + elif [ ${#uptime} -ge 3 ]; then + uptime="up $(echo $uptime | cut -d: -f1) hours, $(echo $uptime | cut -d: -f2) minutes" + else + uptime="up ${uptime} minutes" + fi + + else + uptime=$(${NCMD} uptime -p) + fi + [ "${ST_ACTION}" != "check" ] && echo -e "\r\033[K ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} [${nip}]${idsST[Reset]}${idsCL[White]} - ${uptime} - ${idsCL[Yellow]}CPU: `IDS_NUMBER_FORMAT ${cpu_usage} 1`'%${idsCL[Default]}" + else + echo -e "${idsCL[Default]}" + fi + + DIVIDER false lightCyan + fi + + if [ -f ${NM_TMPFOLDER}/${nip}.down ]; then + if [ -f ${NM_TMPFOLDER}/${nip}.sent ]; then + [ -f ${NM_TMPFOLDER}/${nip}.errtime ] && toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.errtime)) || toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.down)) + SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]-UP" "${NM_HOSTNAMES[${nip}]}[${nip}] is back UP! It was down for $(SHOW_TIME ${toterrtime})" + fi + rm -f ${NM_TMPFOLDER}/${nip}.* + fi + + ######################## + ## SERVICES CHECK + ######################## + if [ "${ST_ACTION}" != "dockers" ] && [ "${NM_SERVICES_CHECK[${NTYPE}]}" != "" ]; then + echo -e "${idsCL[Green]} System Service(s) Status${idsCL[Default]}" + DIVIDER . green 52 + + srvcs=${NTYPE}_SERVICES_CHECK[@]; + srvcstotest="$(join_by " " ${!srvcs})" + [ "${NTYPE}" != "OFW" ] && srvctst=(`${NCMD} "systemctl is-active ${srvcstotest}"`) + sr=0 + for srvc in "${!srvcs}"; do + if [ "${ST_ACTION}" != "report" ]; then + c=0; cw=${PRI_CW}; spc='' + spc1=${cw}-${#NM_SERVICE_DESC[${srvc}]} + until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + else + spc=' ' + fi + tmp=${NM_SINGLESRVR_SERVICES[${NTYPE}]} + if [[ ${tmp[@]} =~ ${srvc} ]]; then + if [[ $(${NCMD} ip addr show $(ip route | awk '/default/ { print $5 }') | grep "inet" | awk '/inet/ {print $2}' | cut -d'/' -f1) != *${NM_SINGLESRVR_IP[${NTYPE}]}* ]]; then + SSCHK=false + rm -f ${NM_TMPFOLDER}/${nip}~${srvc}.* + else + SSCHK=primary + fi + else + SSCHK=true + fi + + [ "${srvc}" == "keepalived" ] && ([ "${nip}" == "10.2.1.2" ] || [ "${nip}" == "10.2.1.51" ]) && SSCHK=false + + if [ ${SSCHK} != false ]; then + [ "${ST_ACTION}" != "check" ] && echo -en "${idsCL[White]} ${NM_SERVICE_DESC[${srvc}]}${spc}${idsCL[Default]}: " + if [ "${NTYPE}" != "OFW" ]; then + srvctest=${srvctst[$sr]} + if [ "${srvctest}" != "active" ] && [ "${srvc}" == "mysql" ]; then + [ "$(${NCMD} systemctl is-active mariadb)" == "active" ] && mysqlgo=true || mysqlgo=false + elif [ "${srvctest}" != "active" ] && [ "${srvc}" = "mariadb" ]; then + [ "$(${NCMD} systemctl is-active mysql)" == "active" ] && mysqlgo=true || mysqlgo=false + elif [ "${srvctest}" == "active" ] && ([ "${srvc}" == "mysql" ] || [ "${srvc}" == "mariadb" ]); then + mysqlgo=true + fi + else + if [ "$(ssh -o ConnectTimeout=3 root@${nip} ps -U root | grep "offsite-power-check.sh start" | grep -v "grep" | awk '{print $1}')" != "" ]; then + srvctest=active + else + srvctest=notactive + fi + fi + if [ "${srvctest}" != "active" ] && [ "${mysqlgo}" != "true" ]; then + if [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.down ]; then + [ "${ST_ACTION}" != "check" ] && echo -en "${idsCL[Red]}Not Running" + touch ${NM_TMPFOLDER}/${nip}~${srvc}.down + # SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is down" 1 + echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_SERVICE_DESC[${srvc}]} is down" >> ${NM_LOGFILE} + [ "${srvc}" == "pdnsadmin" ] && ${NCMD} systemctl restart ${srvc} & + + elif [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.disable ]; then + errtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down)) + if [ ${errtime} -gt ${NM_RENOTIFY_TIMEOUT} ]; then + [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ] && mv ${NM_TMPFOLDER}/${nip}~${srvc}.down ${NM_TMPFOLDER}/${nip}~${srvc}.errtime + toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.errtime)) + touch ${NM_TMPFOLDER}/${nip}~${srvc}.down + SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is down" 1 + # ([ "${srvc}" == "mysql" ] || [ "${srvc}" == "mariadb" ]) && ${NCMD} systemctl restart ${srvc} & + + elif [ ${errtime} -gt 60 ] && [ ${errtime} -lt 180 ] && [ ! -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ]; then + SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is down" 1 + + fi + [ "${ST_ACTION}" != "check" ] && echo -en "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})" + fi + else + [ "${ST_ACTION}" != "check" ] && echo -en "${idsCL[LightGreen]}Running" + if [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.down ]; then + [ -f ${NM_TMPFOLDER}/${nip}~${srvc}.errtime ] && toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.errtime)) || toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${srvc}.down)) + SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_SERVICE_DESC[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})" + echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_SERVICE_DESC[${srvc}]} is back up, it was down for $(SHOW_TIME ${toterrtime})" >> ${NM_LOGFILE} + + rm -f ${NM_TMPFOLDER}/${nip}~${srvc}.* + + fi + # if [ "${srvc}" == "pdnsadmin.socket" ] || [ "${srvc}" == "pdnsadmin" ] || [ "${srvc}" == "gitea" ]; then + if [ "${srvc}" == "gitea" ] || [ "${srvc}" == "headscale" ]; then + rm -f ${NM_FOLDER}/*~${srvc}.* + fi + fi + [ "${mysqlgo}" = "true" ] && unset mysqlgo + [ "${SSCHK}" == "primary" ] && echo -e "${idsCL[LightCyan]} - Primary Node${idsCL[Default]}" || echo -e "${idsCL[Default]}" + + elif [ "${srvc}" == "keepalived" ] && ([ "${nip}" == "10.2.1.2" ] || [ "${nip}" == "10.2.1.51" ]); then + echo -e "${idsCL[White]} ${NM_SERVICE_DESC[${srvc}]}${spc}${idsCL[Default]}: ${idsCL[LightCyan]}Disabled${idsCL[Default]}" + + else + echo -e "${idsCL[White]} ${NM_SERVICE_DESC[${srvc}]}${spc}${idsCL[Default]}: ${idsCL[LightCyan]}Disabled - Secondary Node${idsCL[Default]}" + + fi + sr=`expr $sr + 1` + done + + fi + + ######################## + ## DOCKER CHECK + ######################## + if [[ -v ${NTYPE}_DOCKERS_CHECK ]] && [ "${ST_ACTION}" != "services" ]; then + if [ "${ST_ACTION}" != "check" ]; then + [ "${ST_ACTION}" == "" ] && [ "${NM_SERVICES_CHECK[${NTYPE}]}" != "" ] && echo + echo -e "${idsCL[Green]} Docker Service(s) Status${idsCL[Default]}" + DIVIDER . green 52 + fi + for docker in "${!dockers}"; do + if [ "${ST_ACTION}" != "report" ]; then + c=0; cw=${PRI_CW}; spc='' + spc1=`expr ${cw} - ${#NM_DOCKER_DESC[${docker}]}` + until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + else + spc=' ' + fi + tmp=${NM_SINGLESRVR_DOCKERS[${NTYPE}]} + if [[ ${tmp[@]} =~ ${docker} ]]; then + if [[ $(${NCMD} ip addr show $(ip route | awk '/default/ { print $5 }') | grep "inet" | awk '/inet/ {print $2}' | cut -d'/' -f1) != *${NM_SINGLESRVR_IP[${NTYPE}]}* ]]; then + SDCHK=false + rm -f ${NM_TMPFOLDER}/${nip}~${docker}.* + else + SDCHK=primary + fi + else + SDCHK=true + fi + if [ ${SDCHK} != false ]; then + [ "${ST_ACTION}" != "check" ] && echo -en "${idsCL[White]} ${NM_DOCKER_DESC[${docker}]}$spc${idsCL[Default]}: " + + if [ ! "$(${NCMD} docker ps -q -f name=${docker})" ]; then + if [ "$(${NCMD} docker ps -a | grep ${docker})" ]; then + ${NCMD} docker start ${docker} >/dev/null 2>&1 + echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is not running, starting now" >> ${NM_LOGFILE} + else + ${NCMD} docker compose -f ${NM_DOCKER_COMPOSE_LOC[${docker}]}/docker-compose.yml up -d >/dev/null 2>&1 + echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is not found, creating and starting now" >> ${NM_LOGFILE} + fi + sleep 10s + if [ "$(${NCMD} docker ps -q -f name=${docker})" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -en "${idsCL[Green]}Running - Fixed" + fi + SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_DOCKER_DESC[${docker}]} is fixed" + rm -f ${NM_TMPFOLDER}/${nip}~${docker}.* + echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is fixed" >> ${NM_LOGFILE} + + else + if [ "${ST_ACTION}" != "check" ]; then + echo -en "${idsCL[Red]}Not Running - Could Not Fix!" + + if [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.down ]; then + SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_DOCKER_DESC[${docker}]} is down, could not fix" 1 + touch ${NM_TMPFOLDER}/${nip}~${docker}.down + echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - ${NM_DOCKER_DESC[${docker}]} (docker) is down, could not fix" >> ${NM_LOGFILE} + else + errtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${docker}.down)) + if [ ${errtime} -gt ${NM_RENOTIFY_TIMEOUT} ]; then + if [ ! -f ${NM_TMPFOLDER}/${nip}~${docker}.errtime ]; then + mv ${NM_TMPFOLDER}/${nip}~${docker}.down ${NM_TMPFOLDER}/${nip}~${docker}.errtime + fi + toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${docker}.errtime)) + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}" + fi + touch ${NM_TMPFOLDER}/${nip}~${docker}.down + fi + fi + + fi + fi + else + if [ "${ST_ACTION}" != "check" ]; then + echo -en "${idsCL[LightGreen]}Running" + fi + rm -f ${NM_TMPFOLDER}/${nip}~${docker}.* + fi + [ "${SDCHK}" == "primary" ] && echo -e "${idsCL[LightCyan]} - Primary Node${idsCL[Default]}" || echo -e "${idsCL[Default]}" + else + echo -e "${idsCL[White]} ${NM_DOCKER_DESC[${docker}]}${spc}${idsCL[Default]}: ${idsCL[LightCyan]}Disabled - Secondary Node${idsCL[Default]}" + fi + done + fi + + else + [ "${ST_ACTION}" != "check" ] && echo -en "\033[K\r ${idsST[Bold]}${idsCL[LightRed]}${NM_HOSTNAMES[${nip}]} [${nip}]${idsST[Reset]}" + rm -f ${NM_TMPFOLDER}/${nip}~* + if [ ! -f ${NM_TMPFOLDER}/${nip}.down ]; then + touch ${NM_TMPFOLDER}/${nip}.down + # [ ! -f ${NM_TMPFOLDER}/${nip}.errtime ] && touch ${NM_TMPFOLDER}/${nip}.errtime + if [ "${ST_ACTION}" != "check" ]; then + toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.errtime)) + if [ "${checkhost}" != "false" ]; then + echo -e "${idsCL[Red]} - Node is online, but SSL is down!${idsCL[Default]}" + else + echo -e "${idsCL[Red]} - Node is down!${idsCL[Default]}" + fi + fi + # SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] is down" 1 + [ "${checkhostssl}" != "ok" ] && echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - Node is down" >> ${NM_LOGFILE} + else + if [ ! -f ${NM_TMPFOLDER}/${nip}.errtime ]; then + mv touch ${NM_TMPFOLDER}/${nip}.down touch ${NM_TMPFOLDER}/${nip}.errtime + toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.errtime)) + errtime=${toterrtime} + if [ "${checkhost}" != "false" ]; then + SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] is online, but SSL is down" 1 + else + SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] is down" 1 + fi + touch ${NM_TMPFOLDER}/${nip}.sent + else + toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.errtime)) + errtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.down)) + fi + + if [ "${ST_ACTION}" != "check" ]; then + if [ "${checkhost}" != "false" ]; then + echo -e "${idsCL[Red]} - Node SSL has been down for $(SHOW_TIME ${toterrtime}) ${idsCL[LightYellow]}${LH}${idsCL[Default]}" + else + echo -e "${idsCL[Red]} - Node has been down for $(SHOW_TIME ${toterrtime}) ${idsCL[LightYellow]}${LH}${idsCL[Default]}" + fi + fi + if [ $(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}.sent)) -gt ${NM_RENOTIFY_TIMEOUT} ]; then + if [ "${checkhost}" != "false" ]; then + SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] SSL has been down for $(SHOW_TIME ${toterrtime})" 1 + else + SENDNOTICE "${NM_HOSTNAMES[${nip}]}[${nip}]" "${NM_HOSTNAMES[${nip}]}[${nip}] has been down for $(SHOW_TIME ${toterrtime})" 1 + fi + touch ${NM_TMPFOLDER}/${nip}.sent + fi + touch ${NM_TMPFOLDER}/${nip}.down + fi + + fi + [ "${ST_ACTION}" != "check" ] && echo + nid=`expr $nid + 1` + done + + fi #GOFORCHECK + + done + + fi + ######################## + ## REPLICATION CHECK + ######################## + + if [ "${ST_ACTION}" == "report" ] || [ "${ST_ACTION}" == "repl" ] || [ "${ST_ACTION}" == "check" ] || [ "${ST_ACTION}" == "" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsST[Bold]}"; DIVIDER + echo -e "${idsCL[Yellow]} Replication Status Between the Primary and Secondary Nodes${idsCL[Default]}" + DIVIDER; echo -e "${idsST[Reset]}" + echo -en " ${idsCL[LightCyan]}Starting processes to collect/monitor replication status : " + fi + for NTYPE in "${ntypesel[@]}"; do + PH=${NTYPE}_HOSTS[0] + PH_CMD="ssh root@${!PH}" + var=${NTYPE}_REPL_CHECK[@] + if [ ! -z ${!var+x} ]; then + var=${NTYPE}_HOSTS[@] + for nip in "${!var}"; do + if [[ "${RUN_NODE_IP}" != *"${nip}"* ]] && [ "${nip}" != "${!PH}" ]; then + if [ ! -f ${NM_FOLDER}/${!PH}.down ] && [ ! -f ${NM_TMPFOLDER}/${nip}.down ]; then + var2=${NTYPE}_REPL_CHECK[@] + for rcheck in "${!var2}"; do + REPLCHECK "${rcheck}" "${nip}" "${PH_CMD}" "${ST_ACTION}" & >/dev/null 2>&1 + done + fi + fi + done + fi + done + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Green]}Done${idsCL[Default]}\n" + + for NTYPE in "${ntypesel[@]}"; do + PH=${NTYPE}_HOSTS[0] + PH_CMD="ssh root@${!PH}" + var=${NTYPE}_REPL_CHECK[@] + if [ ! -z ${!var+x} ]; then + var=${NTYPE}_HOSTS[@] + for nip in "${!var}"; do + if [[ "${RUN_NODE_IP}" != *"${nip}"* ]] && [ "${nip}" != "${!PH}" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -e " ${idsCL[LightCyan]}${NM_HOSTNAMES[${!PH}]} (${!PH}) <--> ${idsST[Bold]}${NM_HOSTNAMES[${nip}]}[${nip}]${idsST[Reset]}${idsCL[Default]}" + DIVIDER false green + fi + if [ ! -f ${NM_FOLDER}/${!PH}.down ] && [ ! -f ${NM_TMPFOLDER}/${nip}.down ]; then + var2=${NTYPE}_REPL_CHECK[@] + for rcheck in "${!var2}"; do + if [ "${ST_ACTION}" != "check" ]; then + if [ "${ST_ACTION}" != "report" ]; then + c=0; cw=${PRI_CW}; spc=''; spc1=${cw}-${#NM_REPL_DESC[${rcheck}]}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + else + spc=' ' + fi + echo -en " ${NM_REPL_DESC[${rcheck}]}${spc}: " + + fi + + checked="" + until [ "${checked}" != "" ]; do + if [ -f ${NM_TMPFOLDER}/repl.${rcheck}.${nip}.good ]; then + checked=good + elif [ -f ${NM_TMPFOLDER}/repl.${rcheck}.${nip}.timeout ]; then + checked=timeout + fi + done + + if [ "${checked}" == "timeout" ]; then + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Timeout${idsCL[Default]}" + if [ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down ]; then + touch ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down + elif [ ! -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime ]; then + mv ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime + touch ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down + SENDNOTICE "Repl-Timeout-'${NM_HOSTNAMES[${nip}]}[${nip}]'" "${NM_REPL_DESC[${rcheck}]} (${NM_REPL_CHECK_LOC[${rcheck}]})" 1 + echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - (${NM_REPL_DESC[${rcheck}]}) Replicated folder timeout, it is not syncing" >> ${NM_LOGFILE} + else + snderrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down)) + # echo "HERE: $snderrtime" + if [ ${snderrtime} -gt ${NM_RENOTIFY_TIMEOUT} ]; then + toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime)) + SENDNOTICE "Repl-Timeout-'${NM_HOSTNAMES[${nip}]}[${nip}]'" "${NM_REPL_DESC[${rcheck}]} (${NM_REPL_CHECK_LOC[${rcheck}]}) +It has been down for $(SHOW_TIME ${toterrtime})" 1 + echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - (${NM_REPL_DESC[${rcheck}]}) Replicated folder timeout, has now been down for: $(SHOW_TIME ${toterrtime})" >> ${NM_LOGFILE} + touch ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down + fi + fi + + elif [ "${checked}" == "good" ]; then + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[LightGreen]}Good${idsCL[Default]}" + if [ -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.down ]; then + if [ -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime ]; then + toterrtime=$(expr $(date +%s) - $(stat -c %Y ${NM_TMPFOLDER}/${nip}~${rcheck}.repl.errtime)) + SENDNOTICE "Repl-'${NM_HOSTNAMES[${nip}]}[${nip}]'" "Replicated folder is back up!\n${NM_REPL_DESC[${rcheck}]} (${NM_REPL_CHECK_LOC[${rcheck}]}) +It was down for $(SHOW_TIME ${toterrtime})" + echo "$(date +%Y-%m-%d-%H-%M-%S) - ${nip} - ${NM_HOSTNAMES[${nip}]}[${nip}] - (${NM_REPL_DESC[${rcheck}]}) Replicated folder is back up" >> ${NM_LOGFILE} + fi + rm -f ${NM_TMPFOLDER}/${nip}~${rcheck}.repl* + fi + ssh -q root@${nip} 'find ${NM_REPL_CHECK_LOC[${rcheck}]} -iname "*.sync-conflict-*" -exec rm {} \;' + fi + done + + else + if [ "${ST_ACTION}" != "check" ]; then + [ -f ${NM_FOLDER}/${!PH}.down ] && echo -e "${idsCL[Red]}${NM_HOSTNAMES[${!PH}]} (${!PH}) is offline${idsCL[Default]}" + [ -f ${NM_TMPFOLDER}/${nip}.down ] && echo -e "${idsCL[Red]}${NM_HOSTNAMES[${nip}]}[${nip}] is offline${idsCL[Default]}" + fi + fi + [ "${ST_ACTION}" != "check" ] && echo + + fi + + done + fi + done + ########################## + # REMOVE REPL CHECK FILES + ########################## + if [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "repl" ] || [ "${ST_ACTION}" = "check" ] || [ "${ST_ACTION}" = "" ]; then + if [ "${ST_ACTION}" != "check" ]; then + echo -en "${idsCL[LightCyan]} Cleaning up status checks... ${idsCL[Default]}" + fi + #NHCMD="ssh root@${NM_HOSTS['MYSQL'][0]}" + #LBHCMD="ssh root@${NM_HOSTS['LB'][0]}" + #WHCMD="ssh root@${NM_HOSTS['WEB'][0]}" + for NTYPE in "${ntypesel[@]}"; do + PH=${NTYPE}_HOSTS[0] + if [ ! -f ${NM_FOLDER}/${!PH}.down ]; then + PH_CMD="ssh root@${!PH}" + var=${NTYPE}_REPL_CHECK[@] + if [ ! -z ${!var+x} ]; then + for rcheck in "${!var}"; do + ${PH_CMD} rm -f ${NM_FOLDER}/test.repl + daterun=`date +%Y-%m-%d-%H-%M-%S` + if [ "${PH_CMD}" = "" ]; then + rm -f ${NM_FOLDER}/test.repl & + rm -f ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl & + else + ${PH_CMD} rm -f ${NM_FOLDER}/test.repl & + ${PH_CMD} rm -f ${NM_REPL_CHECK_LOC[${rcheck}]}/test.repl & + fi + rm -f ${NM_TMPFOLDER}/repl.${rcheck}.* + done + fi + fi + done + rm -Rf ${NM_FOLDER}/test.repl + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsCL[LightGreen]}Complete${idsCL[Default]}" + echo + fi + fi + fi + + ######################## + ## FREE SPACE CHECK + ######################## + #if [ "${ST_ACTION}" = "" ] || [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "freespace" ]; then + if [ "${ST_ACTION}" = "freespace" ]; then + if [ "${ST_ACTION}" != "repl" ] && [ "${ST_ACTION}" != "services" ]; then + for NTYPE in "${ntypesel[@]}"; do + if [ "${ST_ACTION}" != "check" ]; then + echo -e "${idsST[Bold]}"; DIVIDER + echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node(s) Free Space Scan${idsCL[Default]}" + DIVIDER; echo -e "${idsST[Reset]}" + fi + nid=1 + var=${NTYPE}_HOSTS[@] + for nip in "${!var}"; do + if [[ $"{RNIP}" == *"${nip}"* ]]; then NCMD=''; LH='- localhost' + else NCMD="ssh -o ConnectTimeout=3 root@${nip}"; LH='' + fi + if [ "${ST_ACTION}" != "check" ]; then + echo -e " ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} [${nip}]${idsST[Reset]}${idsCL[LightCyan]} ${idsCL[LightYellow]}${LH}${idsCL[Default]}" + DIVIDER false green + fi + + [ "${ST_ACTION}" != "check" ] && [ "${2}" != "report" ] && echo -en " ${idsCL[LightCyan]}Getting drives from server ... ${idsCL[Default]}" + declare -A partitions + DRIVEINFO=$(ssh -o ConnectTimeout=3 root@${nip} df -BM | grep -vE '^Filesystem|tmpfs|cdrom|@|ram|loop|udev|veeamimage|nvme|localhost|shm|mmcblk|overlay|-volume|Music|Software' | awk '{ print $1 " " $2 " " $4 }') + DRIVEINFO=(${DRIVEINFO}) + if [ "${2}" != "report" ]; then + echo -en "\e[1A" + echo -e "\e[0K\r" + fi + + NUMDRIVES=$((${#DRIVEINFO[@]} / 3)) + for ((i = 0 ; i <= $((${NUMDRIVES}-1)) ; i++)); do + ii=$((${i}*3)) + pname=`echo "${DRIVEINFO[${ii}]}" | awk -F'/' ' { print $NF } '` >/dev/null 2>&1 + pname=${pname#*vg-} + if [ ${DRIVEINFO[$((${ii}+1))]//M/} -gt 1024 ]; then + freespace=${DRIVEINFO[$((${ii}+2))]//M/} + if [ "${ST_ACTION}" != "check" ]; then + c=0; cw=20; spc='' + spc1=${cw}-${#pname} + until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + echo -en " ${pname}$spc: " + fi + if [[ ${freespace} -gt 1024 ]]; then + fsgb=$(bc <<< "scale=2; ${freespace}/1024") + fsdsp="${fsgb} GB" + else + fsdsp="${freespace} MB" + fi + if [ "${freespace}" -le "1024" ]; then + fs_status='error' + fs_status_color='Red' + SENDNOTICE "Free Space Critical: '${NM_HOSTNAMES[${nip}]}[${nip}]'" "${partition} : ${fsdsp} free" 1 + elif [ "${freespace}" -le "5120" ]; then + fs_status='warn' + fs_status_color='Yellow' + SENDNOTICE "Free Space Warning: '${NM_HOSTNAMES[${nip}]}[${nip}]'" "${partition} : ${fsdsp} free" + else + fs_status='' + fs_status_color='Green' + fi + [ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[${fs_status_color}]}${fsdsp} ${idsCL[Default]}" + fi + done + + # for partition in "${partitions3[@]}"; do + # if [ "${partition}" != "udev" ] && [ "${partition}" != "/dev/sda1" ]; then + # pname=`echo "${partition}" | awk -F'/' ' { print $NF } '` >/dev/null 2>&1 + # pname=${pname#*vg-} + # if [ "${ST_ACTION}" != "check" ]; then + # c=0; cw=20; spc='' + # spc1=${cw}-${#pname} + # until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done + # echo -en " ${pname}$spc: " + # fi + # + # # [[ "${partition}" == *"root"* ]] && prt="/" || prt=${partition} + # # freespace=`${NCMD} df -hPBM ${prt} | awk '{print $4}' |tail -1|sed 's/M$//g'` >/dev/null 2>&1 + # + # if [ "${freespace}" -le "1000" ]; then + # fs_status='error' + # fs_status_color='Red' + # SENDNOTICE "Free Space Critical: '${NM_HOSTNAMES[${nip}]}[${nip}]'" "${partition} : ${freespace} MB free" 1 + # + # elif [ "${freespace}" -le "5000" ]; then + # fs_status='warn' + # fs_status_color='Yellow' + # SENDNOTICE "Free Space Warning: '${NM_HOSTNAMES[${nip}]}[${nip}]'" "${partition} : ${freespace} MB free" + # + # else + # fs_status='' + # fs_status_color='Green' + # + # fi + # if [ "${ST_ACTION}" != "check" ]; then + # if [[ ${freespace} -gt 1000 ]]; then + # fsgb=$(bc <<< "scale=2; ${freespace}/1000") + # fsdsp="${fsgb} GB" + # else + # fsdsp="${freespace} MB" + # fi + # echo -e "${idsCL[${fs_status_color}]}${fsdsp} ${idsCL[Default]}" + # + # fi + # fi + # done + + nid=`expr $nid + 1` + echo + done + done + fi + fi + ######################## + ######################## + if [ "${ST_ACTION}" != "check" ]; then + echo + if [ -z ${ACTION} ] || [ "${ACTION}" = "gui" ]; then + DIVIDER true + ENTER2CONTINUE + fi + fi + end=`date +%s` + runtime=$((end-start)) + echo -e "Runtime: ${runtime}\n" + [ "${ST_ACTION}" == "report" ] && echo ${runtime} >> ${NM_LOGFOLDER}/status-check.scantimes +} diff --git a/nodemgmt-scripts.sh b/nodemgmt-scripts.sh index fe9fd9d0..62ba7257 100755 --- a/nodemgmt-scripts.sh +++ b/nodemgmt-scripts.sh @@ -10,6 +10,7 @@ source /opt/idssys/nodemgmt/conf/settings.conf source /opt/idssys/nodemgmt/defaults.inc source /opt/idssys/nodemgmt/inc/status.inc +source /opt/idssys/nodemgmt/inc/status.new.inc source /opt/idssys/nodemgmt/inc/certs.inc source /opt/idssys/nodemgmt/inc/sites.inc source /opt/idssys/nodemgmt/inc/services.inc