diff --git a/nodemgmt-scripts.sh b/nodemgmt-scripts.sh index 737b66fd..cf432895 100755 --- a/nodemgmt-scripts.sh +++ b/nodemgmt-scripts.sh @@ -224,49 +224,49 @@ STATUS-CHECK(){ fi nid=`expr $nid + 1` done + ##REPLICATION CHECK + nid=1 + for nip in "${NODE_HOSTS[@]}"; do + if [ "${nip}" = '10.5.10.51' ] && [ ! -z ${LOCAL_SERVICES+x} ]; then isreplhost=true; else isreplhost=false; fi + if [[ $(/sbin/ip -o -4 addr list ens192 | awk '{print $4}' | cut -d/ -f1) != *"${nip}"* ]] && [ "${isreplhost}" = "false" ]; then + checkhost=$(CHECK_HOST ${nip}) + if [ "${checkhost}" != "false" ]; then + if [ -f ${FOLDER}/${nip}.down ]; then + rm -f ${FOLDER}/${nip}.down + echo "iDS-Node${nid} is back UP!" | mail -s "iDS-Node${nid}-${nip}-UP" ${STATUS_CHECK_EMAIL} + fi + for rcheck in "${REPL_CHECK[@]}"; do + timeout=`date --date='20 seconds' +%s` + checked=false + until [ "${checked}" = "" ]; do + if [ "${NHCMD}" = "" ]; then + checked=`ssh root@${nip} "cat ${REPL_CHECKS[${rcheck}]}/test.repl" | diff - ${REPL_CHECKS[${rcheck}]}/test.repl` + else + checked="`${NHCMD} \"ssh root@${nip} \"cat ${REPL_CHECKS[${rcheck}]}/test.repl\" | diff - ${REPL_CHECKS[${rcheck}]}/test.repl\"`" + fi + if [ "`date +%s`" -gt "$timeout" ]; then + timeout=true + break + fi + done + if [ "${timeout}" != "true" ]; then + if [ -f ${FOLDER}/${nip}-${rcheck}.down ]; then + rm -f ${FOLDER}/${nip}-${rcheck}.down + echo "Replicated folder is back up!\n${REPL_DESC[${rcheck}]} (${REPL_CHECKS[${rcheck}]})" | mail -s "Repl-Timeout-'iDS-Node${nid}'" ${STATUS_CHECK_EMAIL} + fi + elif [ ! -f ${FOLDER}/${nip}-${rcheck}.down ]; then + touch ${FOLDER}/${nip}-${rcheck}.down + echo "${REPL_DESC[${rcheck}]} (${REPL_CHECKS[${rcheck}]})" | mail -s "Repl-Timeout-'iDS-Node${nid}'" ${STATUS_CHECK_EMAIL} + fi + done + elif [ ! -f ${FOLDER}/${nip}.down ]; then + touch ${FOLDER}/${nip}.down + echo "iDS-Node${nid} is down" | mail -s "iDS-Node${nid}-${nip}-DOWN" ${STATUS_CHECK_EMAIL} + fi + fi + nid=`expr $nid + 1` + done fi - ##REPLICATION CHECK - # nid=1 - # for nip in "${NODE_HOSTS[@]}"; do - # if [ "${nip}" = '10.5.10.51' ] && [ ! -z ${LOCAL_SERVICES+x} ]; then isreplhost=true; else isreplhost=false; fi - # if [[ $(/sbin/ip -o -4 addr list ens192 | awk '{print $4}' | cut -d/ -f1) != *"${nip}"* ]] && [ "${isreplhost}" = "false" ]; then - # checkhost=$(CHECK_HOST ${nip}) - # if [ "${checkhost}" != "false" ]; then - # if [ -f ${FOLDER}/${nip}.down ]; then - # rm -f ${FOLDER}/${nip}.down - # echo "iDS-Node${nid} is back UP!" | mail -s "iDS-Node${nid}-${nip}-UP" ${STATUS_CHECK_EMAIL} - # fi - # for rcheck in "${REPL_CHECK[@]}"; do - # timeout=`date --date='20 seconds' +%s` - # checked=false - # until [ "${checked}" = "" ]; do - # if [ "${NHCMD}" = "" ]; then - # checked=`ssh root@${nip} "cat ${REPL_CHECKS[${rcheck}]}/test.repl" | diff - ${REPL_CHECKS[${rcheck}]}/test.repl` - # else - # checked="`${NHCMD} \"ssh root@${nip} \"cat ${REPL_CHECKS[${rcheck}]}/test.repl\" | diff - ${REPL_CHECKS[${rcheck}]}/test.repl\"`" - # fi - # if [ "`date +%s`" -gt "$timeout" ]; then - # timeout=true - # break - # fi - # done - # if [ "${timeout}" != "true" ]; then - # if [ -f ${FOLDER}/${nip}-${rcheck}.down ]; then - # rm -f ${FOLDER}/${nip}-${rcheck}.down - # echo "Replicated folder is back up!\n${REPL_DESC[${rcheck}]} (${REPL_CHECKS[${rcheck}]})" | mail -s "Repl-Timeout-'iDS-Node${nid}'" ${STATUS_CHECK_EMAIL} - # fi - # elif [ ! -f ${FOLDER}/${nip}-${rcheck}.down ]; then - # touch ${FOLDER}/${nip}-${rcheck}.down - # echo "${REPL_DESC[${rcheck}]} (${REPL_CHECKS[${rcheck}]})" | mail -s "Repl-Timeout-'iDS-Node${nid}'" ${STATUS_CHECK_EMAIL} - # fi - # done - # elif [ ! -f ${FOLDER}/${nip}.down ]; then - # touch ${FOLDER}/${nip}.down - # echo "iDS-Node${nid} is down" | mail -s "iDS-Node${nid}-${nip}-DOWN" ${STATUS_CHECK_EMAIL} - # fi - # fi - # nid=`expr $nid + 1` - # done fi end=`date +%s` runtime=$((end-start))