#!/bin/ksh93 # ALTRAN_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # Copyright (C) Altran ACT S.A.S. 2017,2019,2021. All rights reserved. # # ALTRAN_PROLOG_END_TAG # # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # 61haes_r721 src/43haes/usr/sbin/cluster/events/forced_down_too_long.sh 1.5.1.1 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 2006,2016 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#) 7d4c34b 43haes/usr/sbin/cluster/events/forced_down_too_long.sh, 726, 2147A_aha726, Feb 05 2021 09:50 PM ######################################################################### # # Name: forced_down_too_long # # Description: When a node is in forced down (unmanage) state for # too long, this event script is called to display # a message periodically. # # Called by: cluster manager # # Calls to: None # # Arguments: NUM_SECS - hard coded to 60 minutes # EVENT - not used # # Returns: Never return: the clstrmgr will kill this script # when cluster services are resumed # ######################################################################### ######################################################################### # # Name: sigquit_handler # # Description: The clstrmgr will kill this script when cluster # services are resumed - display an informative message # on our way out. # # Returns: Never return # ######################################################################### sigquit_handler () { typeset PS4_FUNC="sigquit_handler" dspmsg scripts.cat 329 "$PROGNAME:There are no nodes with unmanaged resources on Cluster $CLUSTER.\n" $PROGNAME $CLUSTER exit 0 } ####################################################################### # Main ####################################################################### # clstrmgr will kill this script: setup a signal handler so we # can catch that and display a message trap sigquit_handler INT typeset PROGNAME=${0##*/} export PATH="$(/usr/es/sbin/cluster/utilities/cl_get_path all)" set -a eval $(cllsparam -n $LOCALNODENAME) set +a if [[ $VERBOSE_LOGGING == "high" ]]; then eval export $(cllsparam -x) set -x version='%I%' fi # always 60 minutes NUM_SECS=$1 EVENT=$2 # constants integer HOUR=3600 # In seconds integer THRESHOLD=5 # Error Time Doubles every THRESHOLD times integer SLEEP_INTERVAL=1 # Sleep 5 causes sleep to be called 5/SL * SL times # # Set the PERIOD to the number of seconds to wait before # redisplaying message. Default is 30 minutes. # integer PERIOD=30 set -u # # Initialize counters # integer LOOPCNT=0 integer MESSAGECNT=0 CLUSTER=$(cllsclstr -c | tail -1 | cut -d ':' -f2) integer TIME=$NUM_SECS integer sleep_cntr=0 # # Forced down too long loops continously, displaying messages with decreasing # frequency so as not to overrun the logs, so we turn off tracing on # purpose. clstrmgr will kill this process when event processing resumes. # set +x while (:) do # Get the list of Forced down nodes. FORCEDDOWNLIST=$(lssrc -ls clstrmgrES | grep "^Forced down node list:" | cut -f2 -d:) MSG=$(dspmsg scripts.cat 328 "At the user's request, node(s)$FORCEDDOWNLIST of cluster $CLUSTER has suspended monitoring the resources for the last $TIME minutes. Restart cluster services to make the cluster-managed resources on this node highly available again." "$FORCEDDOWNLIST" $CLUSTER $TIME) # print to console and stdout for maximum visibility echo $MSG >/dev/console echo $MSG # Adjust for next iteration if (( PERIOD < HOUR )) then (( MESSAGECNT++ )) if (( MESSAGECNT % $THRESHOLD == 0 )) then MESSAGECNT=0; (( PERIOD *= 2 )) if (( PERIOD > HOUR )) then PERIOD=$HOUR fi fi fi (( LOOPCNT++ )) (( TIME += PERIOD )) sleep_cntr=0 while (( sleep_cntr < PERIOD )) do sleep $SLEEP_INTERVAL (( sleep_cntr++ )) done done # we expect to be killed by clstrmgr, so there is no formal exit value here