711 lines
30 KiB
Bash
Executable File
711 lines
30 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
STATUS(){
|
|
start=`date +%s`
|
|
ST_ACTION=${1}
|
|
if [ "${ST_ACTION}" = "report" ]; then
|
|
unset idsCL idsBG idsST
|
|
idsCL=('')
|
|
idsBG=('')
|
|
idsST=('')
|
|
elif [ "${ST_ACTION}" = "sync" ]; then
|
|
ST_ACTION=repl
|
|
fi
|
|
declare -i cw; declare -i spc1; declare -i c
|
|
########################
|
|
if [ "${ST_ACTION}" == "report" ] || [ "${ST_ACTION}" == "repl" ] || [ "${ST_ACTION}" == "check" ] || [ "${ST_ACTION}" == "" ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -en "${idsCL[LightCyan]}Setting up replication checks ... ${idsCL[Default]}"
|
|
fi
|
|
for NTYPE in "${NODE_TYPES[@]}"; do
|
|
PH=${NTYPE}_HOSTS[0]
|
|
if [ ! -f ${FOLDER}/${!PH}.down ]; then
|
|
PH_CMD="ssh root@${!PH}"
|
|
var=${NTYPE}_REPL_CHECK[@]
|
|
if [ ! -z ${!var+x} ]; then
|
|
for rcheck in "${!var}"; do
|
|
# echo "CHECK: ${NTYPE} - ${!PH} - ${REPL_CHECKS[${rcheck}]}"
|
|
${PH_CMD} rm -f ${FOLDER}/test.repl
|
|
daterun=`date +%Y-%m-%d-%H-%M-%S`
|
|
${PH_CMD} "echo -e \"Status-Check (${NODE_HOSTNAME})\n${daterun}\" > ${REPL_CHECKS[${rcheck}]}/test.repl" &
|
|
done
|
|
fi
|
|
fi
|
|
done
|
|
# replstart=`date +%s`
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsCL[Green]}Complete${idsCL[Default]}"
|
|
echo
|
|
fi
|
|
fi
|
|
|
|
########################
|
|
if [ "${ST_ACTION}" = "" ] || [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "services" ] || [ "${ST_ACTION}" = "dockers" ] || [ "${ST_ACTION}" = "check" ]; then
|
|
if [ ! -z ${LOCAL_SERVICES+x} ] && [ "${ST_ACTION}" != "dockers" ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
lip=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1)
|
|
echo -e "\n$(DIVIDER)\n${idsCL[Yellow]}${idsST[Bold]} LOCALHOST Service Status${idsST[Reset]}${idsCL[Default]}"
|
|
echo -e "$(DIVIDER)\n"
|
|
echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NODE_HOSTNAME} (${lip})${idsST[Reset]}"
|
|
if ([ "${ST_ACTION}" == "report" ] && [ "${2}" == "email" ]) || [ "${ST_ACTION}" != "report" ]; then
|
|
uptime=`uptime -p`
|
|
echo -e "${idsCL[LightCyan]} - ${uptime} ${idsCL[LightYello]}- localhost${idsCL[Default]}"
|
|
else
|
|
echo -e "${idsCL[Default]}"
|
|
fi
|
|
DIVIDER . green
|
|
fi
|
|
# if [ "${ST_ACTION}" != "check" ]; then
|
|
# lip=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1)
|
|
# uptime=`uptime -p`
|
|
# echo -e " ${idsST[Bold]}${idsCL[LightCyan]}${NODE_HOSTNAME} (${lip})${idsST[Reset]}${idsCL[LightCyan]} - ${uptime} - localhost${idsCL[Default]}"
|
|
# DIVIDER false green
|
|
# fi
|
|
for srvc in "${LOCAL_SERVICES[@]}"; do
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
if [ "${ST_ACTION}" != "report" ]; then
|
|
c=0; cw=30; spc=''
|
|
spc1=${cw}-${#NM_SERVICES[${srvc}]}
|
|
until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
|
|
else
|
|
spc=' '
|
|
fi
|
|
echo -en " ${NM_SERVICES[${srvc}]}${spc}: "
|
|
fi
|
|
|
|
if [ "$(systemctl is-active ${srvc})" != "active" ]; then
|
|
if [ ! -f ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsCL[Red]}Not Running${idsCL[Default]}"
|
|
fi
|
|
SENDNOTICE "${NODE_HOSTNAME}" "${NM_SERVICES[${srvc}]} is down" 1
|
|
touch ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down
|
|
echo "$(date) - LOCAL - ${NODE_HOSTNAME} - ${NM_SERVICES[${srvc}]} is down" >> ${LOGFILE}
|
|
else
|
|
errtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down)
|
|
if [ $errtime -gt ${RENOTIFY} ]; then
|
|
if [ ! -f ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime ]; then
|
|
mv ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime
|
|
fi
|
|
toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime)
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}"
|
|
fi
|
|
touch ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down
|
|
fi
|
|
fi
|
|
else
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsCL[Green]}Running${idsCL[Default]}"
|
|
fi
|
|
if [ -f ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down ]; then
|
|
if [ -f ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime ]; then
|
|
toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.errtime)
|
|
else
|
|
toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.down)
|
|
fi
|
|
rm -f ${TMPFOLDER}/${NODE_HOSTNAME}~${srvc}.*
|
|
SENDNOTICE "${NODE_HOSTNAME}" "${NM_SERVICES[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})"
|
|
echo "$(date) - LOCAL - ${NODE_HOSTNAME} - ${NM_SERVICES[${srvc}]} is back up" >> ${LOGFILE}
|
|
|
|
fi
|
|
fi
|
|
done
|
|
|
|
if [ "${ST_ACTION}" != "check" ]; then echo; fi
|
|
fi
|
|
########################
|
|
|
|
for NTYPE in "${NODE_TYPES[@]}"; do
|
|
nid=1
|
|
|
|
dockers=${NTYPE}_DOCKER[@]
|
|
hosts=${NTYPE}_HOSTS[@]
|
|
|
|
var=${NTYPE}_HOSTS[@]
|
|
|
|
if [[ ! -v ${NTYPE}_DOCKER ]] && [ "${ST_ACTION}" == "dockers" ]; then
|
|
GOFORCHECK=false;
|
|
else
|
|
GOFORCHECK=true;
|
|
fi
|
|
|
|
if [ ${GOFORCHECK} = true ]; then
|
|
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsST[Bold]}"; DIVIDER
|
|
if [ "${ST_ACTION}" = "dockers" ]; then
|
|
echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node Docker Status${idsCL[Default]}"
|
|
else
|
|
echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node Service Status${idsCL[Default]}"
|
|
fi
|
|
DIVIDER; echo -e "${idsST[Reset]}"
|
|
fi
|
|
for nip in "${!var}"; do
|
|
[ "${ST_ACTION}" != "check" ] && echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} (${nip})${idsST[Reset]}"
|
|
|
|
if [[ $(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) == *"${nip}"* ]]; then NCMD=''; LH='- localhost'
|
|
else NCMD="ssh root@${nip}"; LH=''
|
|
fi
|
|
if [ "${NCMD}" != "" ]; then
|
|
checkhost=$(CHECK_HOST ${nip})
|
|
fi
|
|
if [ "${checkhost}" != "false" ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
#echo -en " ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} (${nip})${idsST[Reset]}"
|
|
if ([ "${ST_ACTION}" == "report" ] && [ "${2}" == "email" ]) || [ "${ST_ACTION}" != "report" ]; then
|
|
uptime=`${NCMD} uptime -p`
|
|
echo -e "${idsCL[LightCyan]} - ${uptime} ${idsCL[LightYello]}${LH}${idsCL[Default]}"
|
|
else
|
|
echo -e "${idsCL[Default]}"
|
|
fi
|
|
|
|
DIVIDER false green
|
|
fi
|
|
if [ -f ${TMPFOLDER}/${nip}.down ]; then
|
|
if [ -f ${TMPFOLDER}/${nip}.errtime ]; then
|
|
toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}.errtime)
|
|
else
|
|
toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}.down)
|
|
fi
|
|
rm -f ${TMPFOLDER}/${nip}.*
|
|
SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}-UP" "${NM_HOSTNAMES[${nip}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})"
|
|
fi
|
|
|
|
if [ "${ST_ACTION}" != "dockers" ]; then
|
|
srvcs=${NTYPE}_SERVICES_CHECK[@];
|
|
srvcstotest="$(join_by " " ${!srvcs})"
|
|
srvctst=(`${NCMD} systemctl is-active ${srvcstotest}`)
|
|
sr=0
|
|
for srvc in "${!srvcs}"; do
|
|
[ "${srvc}" == "gitea" ] && [ "${NTYPE}" == "WEB" ] && [[ $($NCMD /sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) != *${WEB_HOSTS[0]}* ]] && NOGOCHK=false || NOGOCHK=true
|
|
[ "${srvc}" == "keepalived" ] && [ "${nip}" == "10.2.1.2" ] && NOGOCHK=false
|
|
if [ ${NOGOCHK} == true ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
if [ "${ST_ACTION}" != "report" ]; then
|
|
c=0; cw=30; spc=''; spc1=${cw}-${#NM_SERVICES[${srvc}]}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
|
|
else
|
|
spc=' '
|
|
fi
|
|
echo -en " ${NM_SERVICES[${srvc}]}$spc: "
|
|
fi
|
|
if [ "${srvctst[$sr]}" != "active" ] && [ "${srvc}" == "mysql" ]; then
|
|
[ "$(${NCMD} systemctl is-active mariadb)" == "active" ] && mysqlgo=true || mysqlgo=false
|
|
elif [ "${srvctst[$sr]}" != "active" ] && [ "${srvc}" = "mariadb" ]; then
|
|
[ "$(${NCMD} systemctl is-active mysql)" == "active" ] && mysqlgo=true || mysqlgo=false
|
|
elif [ "${srvctst[$sr]}" == "active" ] && ([ "${srvc}" == "mysql" ] || [ "${srvc}" == "mariadb" ]); then
|
|
mysqlgo=true
|
|
fi
|
|
# echo "mysqlgo=${mysqlgo}"
|
|
if [ "${srvctst[$sr]}" != "active" ] && [ "${mysqlgo}" != "true" ]; then
|
|
if [ ! -f ${TMPFOLDER}/${nip}~${srvc}.down ]; then
|
|
[ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Not Running${idsCL[Default]}"
|
|
touch ${TMPFOLDER}/${nip}~${srvc}.down
|
|
# SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_SERVICES[${srvc}]} is down" 1
|
|
echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - ${NM_SERVICES[${srvc}]} is down" >> ${LOGFILE}
|
|
else
|
|
errtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}~${srvc}.down)
|
|
if [ $errtime -gt ${RENOTIFY} ]; then
|
|
[ ! -f ${TMPFOLDER}/${nip}~${srvc}.errtime ] && mv ${TMPFOLDER}/${nip}~${srvc}.down ${TMPFOLDER}/${nip}~${srvc}.errtime
|
|
toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}~${srvc}.errtime)
|
|
touch ${TMPFOLDER}/${nip}~${srvc}.down
|
|
SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_SERVICES[${srvc}]} is down" 1
|
|
# ([ "${srvc}" == "mysql" ] || [ "${srvc}" == "mariadb" ]) && ${NCMD} systemctl restart ${srvc} &
|
|
|
|
elif [ $errtime -gt 60 ] && [ $errtime -lt 180 ] && [ ! -f ${TMPFOLDER}/${nip}~${srvc}.errtime ]; then
|
|
SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_SERVICES[${srvc}]} is down" 1
|
|
|
|
fi
|
|
[ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}"
|
|
fi
|
|
else
|
|
[ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Green]}Running${idsCL[Default]}"
|
|
if [ -f ${TMPFOLDER}/${nip}~${srvc}.down ]; then
|
|
[ -f ${TMPFOLDER}/${nip}~${srvc}.errtime ] && toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}~${srvc}.errtime) || toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}~${srvc}.down)
|
|
SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_SERVICES[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})"
|
|
echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - ${NM_SERVICES[${srvc}]} is back up, it was down for $(SHOW_TIME ${toterrtime})" >> ${LOGFILE}
|
|
|
|
rm -f ${TMPFOLDER}/${nip}~${srvc}.*
|
|
|
|
fi
|
|
# if [ "${srvc}" == "pdnsadmin.socket" ] || [ "${srvc}" == "pdnsadmin" ] || [ "${srvc}" == "gitea" ]; then
|
|
if [ "${srvc}" == "gitea" ]; then
|
|
rm -f ${FOLDER}/*~${srvc}.*
|
|
fi
|
|
fi
|
|
[ "${mysqlgo}" = "true" ] && unset mysqlgo
|
|
fi
|
|
sr=`expr $sr + 1`
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
# for srvc in "${!var2}"; do
|
|
# NOGOCHK=true;
|
|
# [ "${srvc}" == "gitea" ] && [ "${NTYPE}" == "WEB" ] && [[ $($NCMD /sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) != *${WEB_HOSTS[0]}* ]] && NOGOCHK=false;
|
|
# if [ ${NOGOCHK} = true ]; then
|
|
# if [ "${ST_ACTION}" != "check" ]; then
|
|
# if [ "${ST_ACTION}" != "report" ]; then
|
|
# c=0; cw=30; spc=''
|
|
# spc1=${cw}-${#NM_SERVICES[${srvc}]}
|
|
# until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
|
|
# else
|
|
# spc=' '
|
|
# fi
|
|
# echo -en " ${NM_SERVICES[${srvc}]}$spc: "
|
|
# fi
|
|
# srvctst=$(${NCMD} systemctl is-active ${srvc})
|
|
# if [ "${srvctst}" != "active" ] && [ "${srvc}" = "mysql" ]; then
|
|
# mysqlgo=false;
|
|
# [ "$(${NCMD} systemctl is-active mariadb)" = "active" ] && mysqlgo=true;
|
|
# elif [ "${srvctst}" != "active" ] && [ "${srvc}" = "mariadb" ]; then
|
|
# mysqlgo=false;
|
|
# [ "$(${NCMD} systemctl is-active mysql)" = "active" ] && mysqlgo=true;
|
|
# elif [ "${srvctst}" = "active" ] && ([ "${srvc}" = "mysql" ] || [ "${srvc}" = "mariadb" ]); then
|
|
# mysqlgo=true
|
|
# fi
|
|
# # echo "mysqlgo=${mysqlgo}"
|
|
# if [ "${srvctst}" != "active" ] && [ "${mysqlgo}" != "true" ]; then
|
|
# if [ ! -f ${TMPFOLDER}/${nip}~${srvc}.down ]; then
|
|
# if [ "${ST_ACTION}" != "check" ]; then
|
|
# echo -e "${idsCL[Red]}Not Running${idsCL[Default]}"
|
|
# fi
|
|
# SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_SERVICES[${srvc}]} is down" 1
|
|
# touch ${TMPFOLDER}/${nip}~${srvc}.down
|
|
# echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - ${NM_SERVICES[${srvc}]} is down" >> ${LOGFILE}
|
|
#
|
|
# else
|
|
# errtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}~${srvc}.down)
|
|
# if [ $errtime -gt ${RENOTIFY} ]; then
|
|
# if [ ! -f ${TMPFOLDER}/${nip}~${srvc}.errtime ]; then
|
|
# mv ${TMPFOLDER}/${nip}~${srvc}.down ${TMPFOLDER}/${nip}~${srvc}.errtime
|
|
# fi
|
|
# toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}~${srvc}.errtime)
|
|
# touch ${TMPFOLDER}/${nip}~${srvc}.down
|
|
# if [ "${srvc}" = "mysql" ]; then
|
|
# ${NCMD} systemctl restart ${srvc}
|
|
# fi
|
|
# fi
|
|
# if [ "${ST_ACTION}" != "check" ]; then
|
|
# echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}"
|
|
# fi
|
|
# fi
|
|
# else
|
|
# if [ "${ST_ACTION}" != "check" ]; then
|
|
# echo -e "${idsCL[Green]}Running${idsCL[Default]}"
|
|
# fi
|
|
# if [ -f ${TMPFOLDER}/${nip}~${srvc}.down ]; then
|
|
# if [ -f ${TMPFOLDER}/${nip}~${srvc}.errtime ]; then
|
|
# toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}~${srvc}.errtime)
|
|
# else
|
|
# toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}~${srvc}.down)
|
|
# fi
|
|
# SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_SERVICES[${srvc}]} is back UP! It was down for $(SHOW_TIME ${toterrtime})"
|
|
# echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - ${NM_SERVICES[${srvc}]} is back up, it was down for $(SHOW_TIME ${toterrtime})" >> ${LOGFILE}
|
|
#
|
|
# rm -f ${TMPFOLDER}/${nip}~${srvc}.down
|
|
# rm -f ${TMPFOLDER}/${nip}~${srvc}.errtime
|
|
#
|
|
# fi
|
|
# # if [ "${srvc}" == "pdnsadmin.socket" ] || [ "${srvc}" == "pdnsadmin" ] || [ "${srvc}" == "gitea" ]; then
|
|
# if [ "${srvc}" == "gitea" ]; then
|
|
# rm -f ${FOLDER}/*~${srvc}.down
|
|
# rm -f ${FOLDER}/*~${srvc}.errtime
|
|
# fi
|
|
# fi
|
|
# [ "${mysqlgo}" = "true" ] && unset mysqlgo
|
|
# fi
|
|
# done
|
|
fi
|
|
|
|
if [[ -v ${NTYPE}_DOCKER ]] && [ "${ST_ACTION}" != "services" ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo
|
|
echo -e "${idsCL[Yellow]} Docker Service(s) Status${idsCL[Default]}"
|
|
echo -e "${idsCL[Yellow]}---------------------------------------------${idsCL[Default]}"
|
|
fi
|
|
|
|
for docker in "${!dockers}"; do
|
|
([ "${docker}" == "vaultwarden" ] || [ "${docker}" == "heimdall" ] || [ "${docker}" == "authelia" ]) && [ "${NTYPE}" == "WEB" ] && [[ $($NCMD /sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) != *${WEB_HOSTS[0]}* ]] && NOGOCHK=false || NOGOCHK=true
|
|
if [ ${NOGOCHK} == true ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
if [ "${ST_ACTION}" != "report" ]; then
|
|
c=0; cw=30; spc=''
|
|
spc1=`expr ${cw} - ${#NM_DOCKERS[${docker}]}`
|
|
until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
|
|
else
|
|
spc=' '
|
|
fi
|
|
echo -en " ${NM_DOCKERS[${docker}]}$spc: "
|
|
fi
|
|
|
|
if [ ! "$(${NCMD} docker ps -q -f name=${docker})" ]; then
|
|
if [ "$(${NCMD} docker ps -a | grep ${docker})" ]; then
|
|
${NCMD} docker start ${docker} >/dev/null 2>&1
|
|
echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - ${NM_DOCKERS[${docker}]} (docker) is not running, starting now" >> ${LOGFILE}
|
|
else
|
|
${NCMD} /usr/local/bin/docker-compose -f ${NM_DOCKER_COMPOSE[${docker}]}/docker-compose.yml up -d >/dev/null 2>&1
|
|
echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - ${NM_DOCKERS[${docker}]} (docker) is not found, creating and starting now" >> ${LOGFILE}
|
|
fi
|
|
sleep 10s
|
|
if [ "$(${NCMD} docker ps -q -f name=${docker})" ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsCL[Green]}Running - Fixed${idsCL[Default]}"
|
|
fi
|
|
SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_DOCKERS[${docker}]} is fixed"
|
|
rm -f ${TMPFOLDER}/${nip}~${docker}.*
|
|
echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - ${NM_DOCKERS[${docker}]} (docker) is fixed" >> ${LOGFILE}
|
|
|
|
else
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsCL[Red]}Not Running - Could Not Fix!${idsCL[Default]}"
|
|
|
|
if [ ! -f ${TMPFOLDER}/${nip}~${docker}.down ]; then
|
|
SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_DOCKERS[${docker}]} is down, could not fix" 1
|
|
touch ${TMPFOLDER}/${nip}~${docker}.down
|
|
echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - ${NM_DOCKERS[${docker}]} (docker) is down, could not fix" >> ${LOGFILE}
|
|
else
|
|
errtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}~${docker}.down)
|
|
if [ $errtime -gt ${RENOTIFY} ]; then
|
|
if [ ! -f ${TMPFOLDER}/${nip}~${docker}.errtime ]; then
|
|
mv ${TMPFOLDER}/${nip}~${docker}.down ${TMPFOLDER}/${nip}~${docker}.errtime
|
|
fi
|
|
toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}~${docker}.errtime)
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsCL[Red]}Not running for $(SHOW_TIME ${toterrtime})${idsCL[Default]}"
|
|
fi
|
|
touch ${TMPFOLDER}/${nip}~${docker}.down
|
|
fi
|
|
fi
|
|
|
|
fi
|
|
fi
|
|
else
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsCL[Green]}Running${idsCL[Default]}"
|
|
fi
|
|
rm -f ${TMPFOLDER}/${nip}~${docker}.*
|
|
fi
|
|
fi
|
|
done
|
|
fi
|
|
|
|
else
|
|
if [ ! -f ${TMPFOLDER}/${nip}.down ]; then
|
|
touch ${TMPFOLDER}/${nip}.down
|
|
if [ ! -f ${TMPFOLDER}/${nip}.errtime ]; then
|
|
touch ${TMPFOLDER}/${nip}.errtime
|
|
fi
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}.errtime)
|
|
echo -e "${idsCL[Red]} - Node is down!${idsCL[Default]}"
|
|
fi
|
|
# SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_HOSTNAMES[${nip}]} is down" 1
|
|
echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - Node is down" >> ${LOGFILE}
|
|
else
|
|
toterrtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}.errtime)
|
|
errtime=`date +%s`-$(stat -c %Y ${TMPFOLDER}/${nip}.down)
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsCL[Red]} - Node has been down for $(SHOW_TIME ${toterrtime}) ${idsCL[LightYello]}${LH}${idsCL[Default]}"
|
|
fi
|
|
if [ $errtime -gt ${RENOTIFY} ]; then
|
|
touch ${TMPFOLDER}/${nip}.down
|
|
SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_HOSTNAMES[${nip}]} has been down for $(SHOW_TIME ${toterrtime})" 1
|
|
elif [ $errtime -gt 60 ] && [ $errtime -lt 180 ] && [ ! -f ${TMPFOLDER}/${nip}.errtime ]; then
|
|
SENDNOTICE "${NM_HOSTNAMES[${nip}]}-${nip}" "${NM_HOSTNAMES[${nip}]} is down" 1
|
|
fi
|
|
# if [ $errtime -gt ${RENOTIFY} ] && [ "${EMAIL_NOTICE}" != "" ]; then
|
|
# echo "${NM_HOSTNAMES[${nip}]} has been down for $(SHOW_TIME ${toterrtime})" | mail -s "${NM_HOSTNAMES[${nip}]}-${nip}" ${EMAIL_NOTICE}
|
|
# fi
|
|
fi
|
|
|
|
fi
|
|
if [ "${ST_ACTION}" != "check" ]; then echo; fi
|
|
nid=`expr $nid + 1`
|
|
done
|
|
|
|
fi #GOFORCHECK
|
|
|
|
done
|
|
|
|
fi
|
|
########################
|
|
## REPLICATION CHECK
|
|
########################
|
|
|
|
if [ "${ST_ACTION}" == "report" ] || [ "${ST_ACTION}" == "repl" ] || [ "${ST_ACTION}" == "check" ] || [ "${ST_ACTION}" == "" ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsST[Bold]}"; DIVIDER
|
|
echo -e "${idsCL[Yellow]} Replication Status Between the Primary and Secondary Nodes${idsCL[Default]}"
|
|
DIVIDER; echo -e "${idsST[Reset]}"
|
|
echo -en " ${idsCL[LightCyan]}Starting processes to collect/monitor replication status : "
|
|
fi
|
|
for NTYPE in "${NODE_TYPES[@]}"; do
|
|
PH=${NTYPE}_HOSTS[0]
|
|
PH_CMD="ssh root@${!PH}"
|
|
var=${NTYPE}_REPL_CHECK[@]
|
|
if [ ! -z ${!var+x} ]; then
|
|
var=${NTYPE}_HOSTS[@]
|
|
for nip in "${!var}"; do
|
|
if [[ $(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) != *"${nip}"* ]] && [ "${nip}" != "${!PH}" ]; then
|
|
if [ ! -f ${FOLDER}/${!PH}.down ] && [ ! -f ${TMPFOLDER}/${nip}.down ]; then
|
|
var2=${NTYPE}_REPL_CHECK[@]
|
|
for rcheck in "${!var2}"; do
|
|
REPLCHECK "${rcheck}" "${nip}" "${PH_CMD}" "${ST_ACTION}" & >/dev/null 2>&1
|
|
done
|
|
fi
|
|
fi
|
|
done
|
|
fi
|
|
done
|
|
[ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Green]}Done${idsCL[Default]}\n"
|
|
|
|
for NTYPE in "${NODE_TYPES[@]}"; do
|
|
PH=${NTYPE}_HOSTS[0]
|
|
PH_CMD="ssh root@${!PH}"
|
|
var=${NTYPE}_REPL_CHECK[@]
|
|
if [ ! -z ${!var+x} ]; then
|
|
var=${NTYPE}_HOSTS[@]
|
|
for nip in "${!var}"; do
|
|
if [[ $(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) != *"${nip}"* ]] && [ "${nip}" != "${!PH}" ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e " ${idsCL[LightCyan]}${NM_HOSTNAMES[${!PH}]} (${!PH}) <--> ${idsST[Bold]}${NM_HOSTNAMES[${nip}]} (${nip})${idsST[Reset]}${idsCL[Default]}"
|
|
DIVIDER false green
|
|
fi
|
|
if [ ! -f ${FOLDER}/${!PH}.down ] && [ ! -f ${TMPFOLDER}/${nip}.down ]; then
|
|
var2=${NTYPE}_REPL_CHECK[@]
|
|
for rcheck in "${!var2}"; do
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
if [ "${ST_ACTION}" != "report" ]; then
|
|
c=0; cw=30; spc=''; spc1=${cw}-${#REPL_DESC[${rcheck}]}; until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
|
|
else
|
|
spc=' '
|
|
fi
|
|
echo -en " ${REPL_DESC[${rcheck}]}${spc}: "
|
|
|
|
fi
|
|
|
|
checked=""
|
|
until [ "${checked}" != "" ]; do
|
|
if [ -f ${TMPFOLDER}/repl.${rcheck}.${nip}.good ]; then
|
|
checked=good
|
|
elif [ -f ${TMPFOLDER}/repl.${rcheck}.${nip}.timeout ]; then
|
|
checked=timeout
|
|
fi
|
|
rm -f ${TMPFOLDER}/repl.${rcheck}.${nip}.*
|
|
done
|
|
|
|
if [ "${checked}" == "timeout" ]; then
|
|
[ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Red]}Timeout${idsCL[Default]}"
|
|
if [ ! -f ${TMPFOLDER}/${nip}~${rcheck}.down ]; then
|
|
touch ${TMPFOLDER}/${nip}~${rcheck}.down
|
|
SENDNOTICE "Repl-Timeout-'${NM_HOSTNAMES[${nip}]}'" "${REPL_DESC[${rcheck}]} (${REPL_CHECKS[${rcheck}]})" 1
|
|
echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - (${REPL_DESC[${rcheck}]}) Replicated folder timeout, it is not syncing" >> ${LOGFILE}
|
|
fi
|
|
else
|
|
[ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[Green]}Good${idsCL[Default]}"
|
|
if [ -f ${TMPFOLDER}/${nip}~${rcheck}.down ]; then
|
|
rm -f ${TMPFOLDER}/${nip}~${rcheck}.down
|
|
SENDNOTICE "Repl-Timeout-'${NM_HOSTNAMES[${nip}]}'" "Replicated folder is back up!\n${REPL_DESC[${rcheck}]} (${REPL_CHECKS[${rcheck}]})"
|
|
echo "$(date) - ${nip} - ${NM_HOSTNAMES[${nip}]} - (${REPL_DESC[${rcheck}]}) Replicated folder is back up" >> ${LOGFILE}
|
|
fi
|
|
fi
|
|
done
|
|
else
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
[ -f ${FOLDER}/${!PH}.down ] && echo -e "${idsCL[Red]}${NM_HOSTNAMES[${!PH}]} (${!PH}) is offline${idsCL[Default]}"
|
|
[ -f ${TMPFOLDER}/${nip}.down ] && echo -e "${idsCL[Red]}${NM_HOSTNAMES[${nip}]} (${nip}) is offline${idsCL[Default]}"
|
|
fi
|
|
fi
|
|
if [ "${ST_ACTION}" != "check" ]; then echo; fi
|
|
fi
|
|
done
|
|
fi
|
|
done
|
|
##########################
|
|
# REMOVE REPL CHECK FILES
|
|
##########################
|
|
if [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "repl" ] || [ "${ST_ACTION}" = "check" ] || [ "${ST_ACTION}" = "" ]; then
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -en "${idsCL[LightCyan]} Cleaning up status checks... ${idsCL[Default]}"
|
|
fi
|
|
#NHCMD="ssh root@${MYSQL_HOSTS[0]}"
|
|
#LBHCMD="ssh root@${LB_HOSTS[0]}"
|
|
#WHCMD="ssh root@${WEB_HOSTS[0]}"
|
|
for NTYPE in "${NODE_TYPES[@]}"; do
|
|
PH=${NTYPE}_HOSTS[0]
|
|
if [ ! -f ${FOLDER}/${!PH}.down ]; then
|
|
PH_CMD="ssh root@${!PH}"
|
|
var=${NTYPE}_REPL_CHECK[@]
|
|
if [ ! -z ${!var+x} ]; then
|
|
for rcheck in "${!var}"; do
|
|
${PH_CMD} rm -f ${FOLDER}/test.repl
|
|
daterun=`date +%Y-%m-%d-%H-%M-%S`
|
|
if [ "${PH_CMD}" = "" ]; then
|
|
rm -f ${FOLDER}/test.repl &
|
|
rm -f ${REPL_CHECKS[${rcheck}]}/test.repl &
|
|
else
|
|
${PH_CMD} rm -f ${FOLDER}/test.repl &
|
|
${PH_CMD} rm -f ${REPL_CHECKS[${rcheck}]}/test.repl &
|
|
fi
|
|
done
|
|
fi
|
|
fi
|
|
done
|
|
rm -Rf ${FOLDER}/test.repl
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsCL[Green]}Complete${idsCL[Default]}"
|
|
echo
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
########################
|
|
## FREE SPACE CHECK
|
|
########################
|
|
#if [ "${ST_ACTION}" = "" ] || [ "${ST_ACTION}" = "report" ] || [ "${ST_ACTION}" = "freespace" ]; then
|
|
if [ "${ST_ACTION}" = "freespace" ]; then
|
|
if [ "${ST_ACTION}" != "repl" ] && [ "${ST_ACTION}" != "services" ]; then
|
|
for NTYPE in "${NODE_TYPES[@]}"; do
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e "${idsST[Bold]}"; DIVIDER
|
|
echo -e "${idsCL[Yellow]} ${NM_NODETYPES[$NTYPE]}-Node Free Space Scan${idsCL[Default]}"
|
|
DIVIDER; echo -e "${idsST[Reset]}"
|
|
fi
|
|
nid=1
|
|
var=${NTYPE}_HOSTS[@]
|
|
for nip in "${!var}"; do
|
|
if [[ $(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) == *"${nip}"* ]]; then NCMD=''; LH='- localhost'
|
|
else NCMD="ssh root@${nip}"; LH=''
|
|
fi
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo -e " ${idsST[Bold]}${idsCL[LightCyan]}${NM_HOSTNAMES[${nip}]} (${nip})${idsST[Reset]}${idsCL[LightCyan]} ${idsCL[LightYello]}${LH}${idsCL[Default]}"
|
|
DIVIDER false green
|
|
fi
|
|
|
|
[ "${ST_ACTION}" != "check" ] && echo -en " ${idsCL[LightCyan]}Getting drives from server ... ${idsCL[Default]}"
|
|
declare -A partitions
|
|
DRIVEINFO=$(ssh root@${nip} df -BM | grep -vE '^Filesystem|tmpfs|cdrom|@|ram|loop|udev|veeamimage|nvme|localhost|shm|mmcblk|overlay|-volume|Music|Software' | awk '{ print $1 " " $2 " " $4 }')
|
|
DRIVEINFO=(${DRIVEINFO})
|
|
echo -en "\e[1A";
|
|
echo -e "\e[0K\r"
|
|
|
|
NUMDRIVES=$((${#DRIVEINFO[@]} / 3))
|
|
for ((i = 0 ; i <= $((${NUMDRIVES}-1)) ; i++)); do
|
|
ii=$((${i}*3))
|
|
pname=`echo "${DRIVEINFO[${ii}]}" | awk -F'/' ' { print $NF } '` >/dev/null 2>&1
|
|
pname=${pname#*vg-}
|
|
if [ ${DRIVEINFO[$((${ii}+1))]//M/} -gt 1024 ]; then
|
|
freespace=${DRIVEINFO[$((${ii}+2))]//M/}
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
c=0; cw=20; spc=''
|
|
spc1=${cw}-${#pname}
|
|
until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
|
|
echo -en " ${pname}$spc: "
|
|
fi
|
|
if [[ ${freespace} -gt 1024 ]]; then
|
|
fsgb=$(bc <<< "scale=2; ${freespace}/1024")
|
|
fsdsp="${fsgb} GB"
|
|
else
|
|
fsdsp="${freespace} MB"
|
|
fi
|
|
if [ "${freespace}" -le "1024" ]; then
|
|
fs_status='error'
|
|
fs_status_color='Red'
|
|
SENDNOTICE "Free Space Critical: '${NM_HOSTNAMES[${nip}]}'" "${partition} : ${fsdsp} free" 1
|
|
elif [ "${freespace}" -le "5120" ]; then
|
|
fs_status='warn'
|
|
fs_status_color='Yellow'
|
|
SENDNOTICE "Free Space Warning: '${NM_HOSTNAMES[${nip}]}'" "${partition} : ${fsdsp} free"
|
|
else
|
|
fs_status=''
|
|
fs_status_color='Green'
|
|
fi
|
|
[ "${ST_ACTION}" != "check" ] && echo -e "${idsCL[${fs_status_color}]}${fsdsp} ${idsCL[Default]}"
|
|
fi
|
|
done
|
|
|
|
# for partition in "${partitions3[@]}"; do
|
|
# if [ "${partition}" != "udev" ] && [ "${partition}" != "/dev/sda1" ]; then
|
|
# pname=`echo "${partition}" | awk -F'/' ' { print $NF } '` >/dev/null 2>&1
|
|
# pname=${pname#*vg-}
|
|
# if [ "${ST_ACTION}" != "check" ]; then
|
|
# c=0; cw=20; spc=''
|
|
# spc1=${cw}-${#pname}
|
|
# until [ $c = ${spc1} ]; do spc="${spc} "; c=`expr $c + 1`; done
|
|
# echo -en " ${pname}$spc: "
|
|
# fi
|
|
#
|
|
# # [[ "${partition}" == *"root"* ]] && prt="/" || prt=${partition}
|
|
# # freespace=`${NCMD} df -hPBM ${prt} | awk '{print $4}' |tail -1|sed 's/M$//g'` >/dev/null 2>&1
|
|
#
|
|
# if [ "${freespace}" -le "1000" ]; then
|
|
# fs_status='error'
|
|
# fs_status_color='Red'
|
|
# SENDNOTICE "Free Space Critical: '${NM_HOSTNAMES[${nip}]}'" "${partition} : ${freespace} MB free" 1
|
|
#
|
|
# elif [ "${freespace}" -le "5000" ]; then
|
|
# fs_status='warn'
|
|
# fs_status_color='Yellow'
|
|
# SENDNOTICE "Free Space Warning: '${NM_HOSTNAMES[${nip}]}'" "${partition} : ${freespace} MB free"
|
|
#
|
|
# else
|
|
# fs_status=''
|
|
# fs_status_color='Green'
|
|
#
|
|
# fi
|
|
# if [ "${ST_ACTION}" != "check" ]; then
|
|
# if [[ ${freespace} -gt 1000 ]]; then
|
|
# fsgb=$(bc <<< "scale=2; ${freespace}/1000")
|
|
# fsdsp="${fsgb} GB"
|
|
# else
|
|
# fsdsp="${freespace} MB"
|
|
# fi
|
|
# echo -e "${idsCL[${fs_status_color}]}${fsdsp} ${idsCL[Default]}"
|
|
#
|
|
# fi
|
|
# fi
|
|
# done
|
|
|
|
nid=`expr $nid + 1`
|
|
echo
|
|
done
|
|
done
|
|
fi
|
|
fi
|
|
########################
|
|
########################
|
|
if [ "${ST_ACTION}" != "check" ]; then
|
|
echo
|
|
if [ -z $action ] || [ "${action}" = "gui" ]; then
|
|
DIVIDER true
|
|
ENTER2CONTINUE
|
|
fi
|
|
fi
|
|
end=`date +%s`
|
|
runtime=$((end-start))
|
|
echo "runtime: ${runtime}"
|
|
}
|
|
|
|
REPLCHECK(){
|
|
rcheck=${1}
|
|
nip=${2}
|
|
PH_CMD=${3}
|
|
if [ "${4}" != "check" ]; then
|
|
timeout=`date --date='1 minutes' +%s`
|
|
else
|
|
timeout=`date --date='2 minutes' +%s`
|
|
fi
|
|
checked=false
|
|
until [ "${checked}" == "" ]; do
|
|
if [ "${PH_CMD}" == "" ]; then
|
|
ssh -q root@${nip} [[ -f ${REPL_CHECKS[${rcheck}]}/test.repl ]] && checked=`ssh root@${nip} "cat ${REPL_CHECKS[${rcheck}]}/test.repl" | diff - ${REPL_CHECKS[${rcheck}]}/test.repl`
|
|
else
|
|
ssh -q root@${nip} [[ -f ${REPL_CHECKS[${rcheck}]}/test.repl ]] && checked="`${PH_CMD} \"ssh root@${nip} \"cat ${REPL_CHECKS[${rcheck}]}/test.repl\" | diff - ${REPL_CHECKS[${rcheck}]}/test.repl\"`"
|
|
fi
|
|
[ "`date +%s`" -gt "${timeout}" ] && timeout=true && break
|
|
done
|
|
[ "${timeout}" == "true" ] && touch ${TMPFOLDER}/repl.${rcheck}.${nip}.timeout || touch ${TMPFOLDER}/repl.${rcheck}.${nip}.good
|
|
} |