#!/bin/ksh93
#  ALTRAN_PROLOG_BEGIN_TAG
#  This is an automatically generated prolog.
#
#  Copyright (C) Altran ACT S.A.S. 2017,2019,2021.  All rights reserved.
#
#  ALTRAN_PROLOG_END_TAG
#
# IBM_PROLOG_BEGIN_TAG 
# This is an automatically generated prolog. 
#  
# 61haes_r721 src/43haes/usr/sbin/cluster/events/forced_down_too_long.sh 1.5.1.1 
#  
# Licensed Materials - Property of IBM 
#  
# COPYRIGHT International Business Machines Corp. 2006,2016 
# All Rights Reserved 
#  
# US Government Users Restricted Rights - Use, duplication or 
# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. 
#  
# IBM_PROLOG_END_TAG 

# @(#)  7d4c34b 43haes/usr/sbin/cluster/events/forced_down_too_long.sh, 726, 2147A_aha726, Feb 05 2021 09:50 PM

#########################################################################
#
#       Name:           forced_down_too_long
#
#       Description:    When a node is in forced down (unmanage) state for 
#                       too long, this event script is called to display
#                       a message periodically.
#
#       Called by:      cluster manager
#
#       Calls to:       None
#
#       Arguments:      NUM_SECS - hard coded to 60 minutes
#                       EVENT - not used
#
#       Returns:        Never return: the clstrmgr will kill this script
#                       when cluster services are resumed
#
#########################################################################

#########################################################################
#
#       Name:           sigquit_handler
#
#       Description:    The clstrmgr will kill this script when cluster
#                       services are resumed - display an informative message
#                       on our way out.
#
#       Returns:        Never return
#
#########################################################################
sigquit_handler ()
{
    typeset PS4_FUNC="sigquit_handler"
    dspmsg scripts.cat 329 "$PROGNAME:There are no nodes with unmanaged resources on Cluster $CLUSTER.\n" $PROGNAME $CLUSTER
    exit 0
}


#######################################################################
# Main
#######################################################################

# clstrmgr will kill this script: setup a signal handler so we
# can catch that and display a message
trap sigquit_handler INT

typeset PROGNAME=${0##*/}
export PATH="$(/usr/es/sbin/cluster/utilities/cl_get_path all)"

set -a
eval $(cllsparam -n $LOCALNODENAME)
set +a

if [[ $VERBOSE_LOGGING == "high" ]]; then
    eval export $(cllsparam -x)
    set -x
    version='%I%'
fi

# always 60 minutes
NUM_SECS=$1
EVENT=$2

# constants
integer HOUR=3600           # In seconds
integer THRESHOLD=5         # Error Time Doubles every THRESHOLD times
integer SLEEP_INTERVAL=1    # Sleep 5 causes sleep to be called 5/SL * SL times

#
# Set the PERIOD to the number of seconds to wait before 
# redisplaying message.  Default is 30 minutes.
#
integer PERIOD=30
set -u

#
# Initialize counters
#
integer LOOPCNT=0
integer MESSAGECNT=0
CLUSTER=$(cllsclstr -c | tail -1 | cut -d ':' -f2)
integer TIME=$NUM_SECS
integer sleep_cntr=0

#
# Forced down too long loops continously, displaying messages with decreasing
# frequency so as not to overrun the logs, so we turn off tracing on
# purpose.  clstrmgr will kill this process when event processing resumes.
#
set +x

while (:)
do
    # Get the list of Forced down nodes.
    FORCEDDOWNLIST=$(lssrc -ls clstrmgrES | grep "^Forced down node list:" |
	    cut -f2 -d:)

    MSG=$(dspmsg scripts.cat 328 "At the user's request, node(s)$FORCEDDOWNLIST of cluster $CLUSTER has suspended monitoring the resources for the last $TIME minutes. Restart cluster services to make the cluster-managed resources on this node highly available again." "$FORCEDDOWNLIST" $CLUSTER $TIME)

    # print to console and stdout for maximum visibility
    echo $MSG >/dev/console
    echo $MSG

    # Adjust for next iteration
    if (( PERIOD < HOUR ))
    then
        (( MESSAGECNT++ ))
        if (( MESSAGECNT % $THRESHOLD == 0 ))
        then
            MESSAGECNT=0;
            (( PERIOD *= 2 ))
            if (( PERIOD > HOUR ))
            then
                PERIOD=$HOUR
            fi
        fi
    fi

    (( LOOPCNT++ ))
    (( TIME += PERIOD ))
    sleep_cntr=0
    while (( sleep_cntr < PERIOD ))
    do
        sleep $SLEEP_INTERVAL
        (( sleep_cntr++ ))
    done

done

# we expect to be killed by clstrmgr, so there is no formal exit value here