#!/bin/ksh
#  ALTRAN_PROLOG_BEGIN_TAG
#  This is an automatically generated prolog.
#
#  Copyright (C) Altran ACT S.A.S. 2017,2021.  All rights reserved.
#
#  ALTRAN_PROLOG_END_TAG
#
# IBM_PROLOG_BEGIN_TAG 
# This is an automatically generated prolog. 
#  
# 61haes_r714 src/43haes/usr/sbin/cluster/events/utils/cl_update_statd.sh 1.10.2.1 
#  
# Licensed Materials - Property of IBM 
#  
# COPYRIGHT International Business Machines Corp. 1996,2014 
# All Rights Reserved 
#  
# US Government Users Restricted Rights - Use, duplication or 
# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. 
#  
# IBM_PROLOG_END_TAG 
# @(#)  7d4c34b 43haes/usr/sbin/cluster/events/utils/cl_update_statd.sh, 726, 2147A_aha726, Feb 05 2021 09:50 PM

###############################################################################
#
#   COMPONENT_NAME: EVENTUTILS
#
#   FUNCTIONS: none
#
###############################################################################

###############################################################################
#
#  Name:  cl_update_statd
#
#	Update rpc.statd's notion of what the cluster looks like based
#	on the content of POST_EVENT_MEMBERSHIP.
#
#  Returns:
#       0 - rpc.statd update successful
#       1 - Failure
#
#  Arguments: none
#
#  Environment: VERBOSE_LOGGING, PATH, LOCALNODENAME
#		POST_EVENT_MEMBERSHIP
#
###############################################################################

###############################################################################
#
# Function: get_node_ip
#
###############################################################################
function get_node_ip {

    if [[ $VERBOSE_LOGGING == 'high' ]]
    then
	PS4_TIMER=TRUE
	set -x
    fi

    # This should never happen but the check is cheap
    if (( $# != 1 )) 
    then
	echo "${PROGNAME}[$LINENO]: internal error, please report this problem to ibm support\n" 
	exit 1
    fi

    Twin_Name=$1
    NewTwin=""

    #
    :	Get the Interface details for every interface on the twin node
    :	Reject interfaces on nodes that are not public  boot addresses
    :	because those are the only ones we have state information for
    #
    LOCAL_NETWORK_INFO=$(LC_ALL=C cllsif -J "$OP_SEP" -Sw -i $LOCALNODENAME)
    LC_ALL=C cllsif -J "$OP_SEP" -Sw -i $Twin_Name | \
    while IFS=${OP_SEP} read adapt type network net_type attrib node ip_addr skip interface skip netmask skip skip prefix ip_family
    do
	if [[ $attrib != 'public' || $type != 'boot' ]]
	then
	    continue
	fi
	
	#
	: Find the state of this candidate
	#
	if [[ $ip_family == 'AF_INET' ]] 
	then
	    addr=i$(print $ip_addr | tr "./" "xx")_$Twin_Name

	elif [[ $ip_family == 'AF_INET6' ]] 
	then
	    addr=i$(print $ip_addr | tr ":/" "yy")_$Twin_Name
	fi
	eval candidate_state=\${$addr:-down}

	#
	: If state is UP, check to see if this node can talk to it
	#
	if [[ $candidate_state == 'UP' ]] 
	then
	    if ! ping -w 5 -c 1 -q $ip_addr >/dev/null
	    then
		#
		:   Even though the cluster manager thinks $adapt is up
		:   it does not respond to a ping.  So, keep looking.
		#
		#   This can happen if multiple networks/adapters fail
		#   at close to the same time.
		#
		continue
	    fi
	    echo $LOCAL_NETWORK_INFO | tr ' ' '\n' | \
	    while IFS=${OP_SEP} read lcl_adapt lcl_type lcl_network lcl_net_type lcl_attrib lcl_node lcl_ip_addr skip lcl_interface skip lcl_netmask skip skip lcl_prefix lcl_ip_family
	    do
		if [[ $lcl_network != $network || $lcl_type != 'boot' || $lcl_attrib != 'public' || $lcl_ip_family != $ip_family ]]
		then
		    continue
		fi

		if [[ $lcl_ip_family == 'AF_INET' ]] 
		then
		    addr=i$(print $lcl_ip_addr | tr "./" "xx")_$LOCALNODENAME

		elif [[ $ip_family == 'AF_INET6' ]] 
		then
		    addr=i$(print $lcl_ip_addr | tr ":/" "yy")_$LOCALNODENAME
		fi
		eval lcl_candidate_state=\${$addr:-down}
		if [[ $lcl_candidate_state == 'UP' ]]
		then
		    #
		    :	$adapt is on the same network as an interface that is up
		    :	on the local node, and the attributes match. 
		    #
		    NewTwin=$adapt
		    break  # all done
		fi
	    done
	fi
	if [[ -n $NewTwin ]]
	then
	    break
	fi
    done

    if [[ -z $NewTwin ]]
    then
      cl_msg -e 0 -m 10743 "%1\$s[%2\$d]: No twin found for %3\$s \n" $PROGNAME $LINENO $Twin_Name
    fi

    echo $NewTwin
    return 0
}


###############################################################################
#
# Main Starts Here
#
###############################################################################

PROGNAME=${0##*/}
export PATH="$(/usr/es/sbin/cluster/utilities/cl_get_path all)"
if [[ $VERBOSE_LOGGING == 'high' ]]
then
    eval export $(cllsparam -x)
    PS4_TIMER=true
    set -x
    version='%I%'
fi

integer RC=0
LOCAL_FOUND=""		# assume we are NOT part of the post event membership
TWIN_NAME=""		# Assert that we don't know our twin
[[ -z $LOCALNODENAME ]] && LOCALNODENAME=$(get_local_nodename)
OP_SEP="$(cl_get_path -S)"
set -u

for i in $POST_EVENT_MEMBERSHIP ; do
    case $i in
    $LOCALNODENAME)	# OK, we survived this event, do nothing
			LOCAL_FOUND="true" ;;
    *)			# looks like we have a twin to register with
			TWIN_NAME=$i ;;
    esac
done

#
:   Make sure statd is running locally
#
if LC_ALL=C lssrc -s statd | grep -qw inoperative || ! LC_ALL=C rpcinfo -p | grep -qw status
then
    cl_msg -e 0 -m 10744 "%1\$s[%2\$d]: statd is not up on the local node \n" $PROGNAME $LINENO
    #
    :	Attempt to recover this situation by restarting statd
    #
    startsrc -s rpc.statd
    sleep 5
fi

#
:   Get the current twin, if there is one
#
CURTWIN=$(nfso -H sm_gethost 2>&1)
RC=$?

if [[ -z $LOCAL_FOUND || -z $TWIN_NAME ]] 
then
    #
    :	Local node is no longer a cluster member, unregister its twin
    #
    if [[ -n $CURTWIN ]] 
    then
	nfso -H sm_unregister $CURTWIN
	RC=$?
    fi

else
    #
    :	Get the interface to the twin node
    #
    NEWTWIN=$(get_node_ip $TWIN_NAME)

    if [[ -z $NEWTWIN ]] 
    then
	#
	:   Theres no active interface to the twin, unregister
	#
	if [[ -n $CURTWIN ]] 
	then
	    nfso -H sm_unregister $CURTWIN
	    RC=$?
	fi

    elif [[ $NEWTWIN != $CURTWIN ]] 
    then
	#
	:   Need to register a new twin
	#
	if [[ -n $CURTWIN ]] 
	then
	    nfso -H sm_unregister $CURTWIN	# unregister current twin
	fi

	#
	:   Register our new twin, $NEWTWIN
	#
	nfso -H sm_register $NEWTWIN		# register new twin
	RC=$?
    fi
fi

#
:   RC is actually $RC
#
#   Return code is always passed back as '0', because
#   failure to (un)register is not a terminating error,
#   but a non-zero return code is treated as an event
#   error by the caller.
#
return 0
