#!/bin/ksh93
#  ALTRAN_PROLOG_BEGIN_TAG                                                    
#  This is an automatically generated prolog.                                  
#                                                                              
#  Copyright (C) Altran ACT S.A.S. 2018,2021.  All rights reserved.  
#                                                                              
#  ALTRAN_PROLOG_END_TAG                                                      
#                                                                              
# IBM_PROLOG_BEGIN_TAG 
# This is an automatically generated prolog. 
#  
# 61haes_r721 src/43haes/usr/sbin/cluster/events/utils/cl_mode3.sh 1.52 
#  
# Licensed Materials - Property of IBM 
#  
# COPYRIGHT International Business Machines Corp. 1990,2016 
# All Rights Reserved 
#  
# US Government Users Restricted Rights - Use, duplication or 
# disclosure restricted by GSA ADP Schedule Contract with IBM Corp. 
#  
# IBM_PROLOG_END_TAG 
# @(#)  7d4c34b 43haes/usr/sbin/cluster/events/utils/cl_mode3.sh, 726, 2147A_aha726, Feb 05 2021 09:50 PM
###############################################################################
#
#   COMPONENT_NAME: EVENTUTILS
#
#   FUNCTIONS: none
#
#   Questions?	Comments?   Expressions on Astonishment?    mailto:hafeedbk@us.ibm.com
#
###############################################################################

# Including file containing SCSIPR functions
. /usr/es/sbin/cluster/events/utils/cl_scsipr_event_functions

# Including Availability metrics library file
. /usr/es/lib/ksh93/availability/cl_amlib

###############################################################################
#
#	Name:		varyonc
#
#	Function:	Try to vary on the volume group in concurrent mode.
#			If that fails, used a forced vary on if appropriate.
#
#	Arguments:	volume group name
#			sync flag
#
#	Returns:	0 - varyon successful
#			anything else indicates failure
#                       If the varyonvg fails, then if cl_mode3 was invoked from
#                       process_resources, the global variable STATUS is set
#                       to 11, otherwise it is set to 1
#
###############################################################################
function varyonc {

    typeset PS4_FUNC="varyonc"
    [[ "$VERBOSE_LOGGING" == "high" ]] && set -x

    integer i=0

    #
    :	clstart uses this same condition to decide whether or not to
    :	start gsclvmd. If the deamon should be running, loop till it
    :	is ready to handle requests.
    #
    if [[ -x /usr/sbin/gsclvmd ]] ; then
	#
        : Wait for gsclvmd to be ready.
	#
        for (( i = 0 ; i < 42 ; i++ ))
	do
          if lssrc -ls gsclvmd >/dev/null 2>&1
          then
              break
          fi
          sleep 1
        done
    fi

    NOQUORUM=20		# varyonvg return code for failure due to lack of quorum

    #
    :	Pick up passed parameters: volume group and sync flag
    #
    typeset SYNCFLAG vg
    vg=$1
    SYNCFLAG=${2:-""}

    integer rc=0	# return code
    #Logging the concurrent volume group varyon begin entry along with timestamp
    amlog_trace $AM_VG_VARYON_BEGIN "Activating Volume Group|$vg"
    #
    :   Try to vary on volume group $vg in concurrent mode
    #
    varyonvg $SYNCFLAG -c $vg
    rc=$?
    if (( $rc == $NOQUORUM ))  
    then
	#
	:   If the varyon failed due to lack of quorum, go see if theres
	:   at least one copy of the data available, making it worthwhile
	:   to try a forced varyon.
	#
	if cl_mirrorset $vg 
	then
	    varyonvg -f $SYNCFLAG -c $vg
	    rc=$?
	fi
    fi

    #
    :   If varyon was ultimately unsuccessful, note the error
    #
    if (( $rc != 0 ))
    then
	cl_log 203 "$PROGNAME: Failed varyonvg $SYNCFLAG -c of $vg." $PROGNAME $SYNCFLAG $vg
        #Logging the concurrent volume group varyon failure entry along with timestamp
        amlog_err $AM_VG_VARYON_FAILURE "Activating Volume Group|$vg"
	STATUS=1
    else
	NONERRORVGS=${NONERRORVGS:+"$NONERRORVGS "}$vg
    fi

    #
    :   Update volume group time stamps cluster wide. According to LVM,
    :   it is possible for the timestamp to be updated even if the varyon
    :   fails.  So the sync is placed here.
    #
    cl_update_vg_odm_ts -o $vg

    #check for vary on the volume group in concurrent mode is sucess
    if (( $rc == 0 ))
    then
        #Logging the concurrent volume group varyon end entry along with timestamp
        amlog_trace $AM_VG_VARYON_END "Activating Volume Group|$vg"
    fi

    return $rc		# pass back success/failure indication
}


###############################################################################
#  
#  Name:  cl_mode3
#
#  Returns:
#	0 - All of the volume groups are successfully varied on/changed mode
#	1 - varyonvg/mode change of at least one volume group failed
#	2 - Zero arguments were passed
#
#  This function will place the volume groups passed in as arguments in
#  the designated mode .
#
#  Arguments: -s 	Varyon volume group in mode 3 with sync
#	      -n	Varyon volume group in mode 3 without sync
#              
#  Environment: VERBOSE_LOGGING, PATH
#
###############################################################################

PROGNAME=${0##*/}
export PATH="$(/usr/es/sbin/cluster/utilities/cl_get_path all)"

if [[ $VERBOSE_LOGGING == "high" ]]
then
    set -x
    version='1.52 $Source$'
fi

if (( $# < 2 ))
then
    #
    :	Caller used incorrect syntax
    #
    cl_echo 204 "usage: $PROGNAME [-n | -s] volume_groups_to_varyon" $PROGNAME
    exit 2
fi

#
:   if JOB_TYPE is set, and it is not equal to "GROUP", then
:   we are processing for process_resources
#
if [[ ${JOB_TYPE:-0} != 0 && $JOB_TYPE != "GROUP" ]]
then
    PROC_RES=true
else
    PROC_RES=false
fi

if [[ $1 == "-n" ]] ; then		# sync or no sync
    SYNCFLAG="-n"
    shift
elif [[ $1 == "-s" ]] ; then
    SYNCFLAG=""				# LVM default is "sync"
    shift
else
    SYNCFLAG=""				# LVM default is "sync"	
fi

integer STATUS=0
integer SAVED_STATUS=0
MODE=""
export MODE
NONERRORVGS=""
lv_list=""
vg_list="$*"

set -u

#
:   Update resource manager with the fact that we are trying to bring all the
:   concurrent volume groups online
#
ALLCONCURVG="All_concurrent_vgs"

cl_RMupdate resource_acquiring $ALLCONCURVG $PROGNAME

#
:   See what volume groups are currently varyd on, so that they can be
:   skipped.
#
ON_LIST=$(print $(lsvg -L -o 2>/tmp/lsvg.err))

#
:   Process each given volume group
#
typeset PS4_LOOP=""
for vg in $*
do
    PS4_LOOP=$vg
    #
    :	We want to retain the fact that at least one VG had a problem, and
    :	return that info to the caller.   We check \$STATUS at this point
    :	because there are "continues" in the code that make checking at the
    :	bottom of the for loop impossible.  Any non-zero STATUS will always
    :	be 1 or always be 11 for a particular invocation of cl_mode3.
    #
    if (( $SAVED_STATUS == 0 ))
    then
        SAVED_STATUS=$STATUS
    fi

    #
    :	STATUS can be 0, 1, or 11.  A problem with a VG is indicated by a
    :	STATUS of 1 or 11.  STATUS is set to 11 instead of 1 if this script
    :	was called from process_resources.  If so, cl_RMupdate is run locally.
    :	An exit code of 11 prevents cl_RMupdate from being run again when we
    :	return to process_resources.
    #
    STATUS=0
    VGID=$(/usr/sbin/getlvodm -v $vg)	

    if [[ -z $VGID ]] 
    then
        #
        :   Volume group $vg could not be found.  Report error and
        :   continue with next one.
        #
	cl_log 9650 "$PROGNAME: Volume group $vg not found." $PROGNAME $vg
	STATUS=1

	#
        :   Update resource manager with results
        #
	cl_RMupdate resource_error $vg $PROGNAME
	if [[ $PROC_RES == true ]]
	then
	    STATUS=11
	fi
	continue				# go on to next volume group
    fi

    #
    :   Check to see if volume group $vg is already varyd on
    #
    if [[ $ON_LIST == ?(* )$vg?( *) ]]
    then
        #
        :   Note this and keep going.  This could happen legitimately on a
        :   node up after a forced down.
        #
	CONC=$(lqueryvg -g $VGID -C)
	RC=$?
	#
        :   Find out if its varyd on in concurrent mode
        #
	if (( $RC == 0 )) && [[ -n $CONC ]] && (( 1 == $CONC ))
	then
            #
            :   Since volume group $vg is already varyd on in
            :   concurrent mode, there is really nothing more to do
            :   with it.  Go on to the next one.
            #
	    continue
	else
	    #
            :   No, its not.  Now, find out if its defined as concurrent capable
            #
	    MODE=$(lqueryvg -g $VGID -X)
	    RC=$?
	    if (( $RC == 0 )) && [[ -n $MODE ]] && (( 0 == $MODE ))
	    then
                #
                :   We get here in the case where volume group $vg is
                :   varyd on, but not in concurrent mode, and is not
                :   concurrent capable.  This would be the case for a SCSI
                :   RAID disk used in concurrent mode.
                #
		if ! cl_raid_vg $vg ; then
		    #
                    :   Volume group $vg is not made up of known RAID devices
                    #
		    cl_log 485 "$PROGNAME: Failed concurrent varyon of $vg\n\
			because it is not made up of known RAID devices." $PROGNAME $vg
		    STATUS=1	
		fi
	    fi
	    if (( $RC !=0 || $STATUS == 0 ))
	    then
                #
                :   We get here either because we cannot query the state
                :   of volume group $vg - which means its in pretty sad
                :   shape - or we could successfully query the state of
                :   the volume group, and found it to be a concurrent
                :   capable volume group varyd on in non-concurrent mode.
                #
		cl_echo 200 "$PROGNAME: Concurrent volume Group "$vg" is already vary'd on in non-concurrent mode." $PROGNAME $vg
                #
                :   Try to recover by varying it off, to be varyd on in
                :   concurrent mode below.
                #
		if ! varyoffvg $vg  
		then
		    #
		    :   Unable to vary off volume group $vg - probably because
		    :   its in use.  Note error and keep going
		    #
		    cl_log 28 "$PROGNAME: Failed varyoff of $vg." $PROGNAME $vg
		    STATUS=1
		fi
                #
                :   At this point, volume group $vg was varyd off.  The flow
                :   takes over below, and varys on the volume group in concurrent
                :   mode.
                #
	    fi
	fi				# end on, but not concurrent 

	#
        :   Update resource manager if we could not get volume group $vg back to
        :   a reasonable - varyd off - state.
        #
	if (( $STATUS == 1 ))
	then
	    cl_RMupdate resource_error $vg $PROGNAME
	    if [[ $PROC_RES == true ]]
	    then
		STATUS=11
	    fi
	    continue				# go on to next volume group
	fi
        #
        :   At this point, volume group $vg was varyd off.  The flow takes over
        :   below, and varys on the volume group in concurrent mode.
        #
    else

	#
	:   Checking if SCSI PR is enabled and it is so,
	:   confirming if the SCSI PR reservations are intact.
	#
	typeset SCSIPR_ENABLED=$(clodmget -n -q "policy=scsi" -f value HACMPsplitmerge)
	if [[ $SCSIPR_ENABLED == Yes ]]
	then
		cl_confirm_scsipr_res $vg
		if (( $? != 0 ))
		then

			#
			: We are not sure if the SCSIPR reservation exists.
			: So we are going to exit with status 1.
			: Which will put the corresponding RG to error state.
			: Reservation Policy, Registered Keys are extracted for debugging.
			#
			# Print the current policy of Volume Group
			clpr_Get_policy_vg $vg

			# Print the current PR Key of Volume Group
			clpr_Get_prkey_vg $vg

			# Print the reservation of Volume Group
			clpr_ReadRes_vg $vg

			# Print the registered PR Keys of the Volume Group(1 is dummy key)
			clpr_verifyKey_vg $vg 1

			# Print Configured Reserve Policy, Effective Reserve Policy and
			# Reservation Status of VG.
			clrsrvmgr -rg $vg

			# Fail varyonvg
			exit 1
		fi
	fi

	#
	:   The volume group is not currently varied on.
	:   If there is a fence group defined for this volume group, set the
	:   fence height to allow read/write access
	#
	cl_set_vg_fence_height -c $vg rw
	RC=$?
	if (( $RC != 0 )) 
	then
	    #
	    :   Log any error, but continue.  If this is a real problem, the varyonvg will fail
	    #
	    rw=$(dspmsg -s 103 cspoc.cat 350 'read/write' | cut -f2 -d,)
	    cl_log 10511 "$PROGNAME: Volume group $vg fence height could not be set to read/write" $PROGNAME $vg $rw
	fi
    fi						# end already vary'd on

    #
    :   Find out whether LVM thinks this volume group is concurrent
    :   capable.  Note that since volume group $vg is not varyd on at this
    :   point in time, we have to look directly at the VGDA on the
    :   hdisks in the volume group.
    #
    MODE=""
    /usr/sbin/getlvodm -w $VGID | while read pvid HDISK 
    do
        #
        :   Check each of the hdisks for a valid mode value.  Stop at the
        :   first one we find.
        #
	MODE=$(lqueryvg -p $HDISK -X)
	RC=$?
	if (( $RC == 0 )) && [[ -n $MODE ]] 
	then
	    break
	fi
    done

    if [[ -z $MODE ]]
    then
        #
        :   If we could not pull a valid mode indicator off of any disk in
        :   volume group $vg, there is no chance whatsoever that LVM
        :   will be able to vary it on.  Give up on this one.
        #
	cl_log 203 "$PROGNAME: Failed varyonvg $SYNCFLAG -c of $vg." $PROGNAME $SYNCFLAG $vg
	STATUS=1

    elif (( $MODE == 0 ))
    then
        #
        :   LVM thinks that this is not a concurrent capable
        :   volume group.  Try to turn this into enhanced concurrent mode
	#
	if cl_makecm -C $vg		# try to make it ECM
	then
	    varyonc $vg $SYNCFLAG	# if successful, vary on
	    if (( $STATUS == 1 ))
	    then
		#
		:   Update resource manager with results
		#
		cl_RMupdate resource_error $vg $PROGNAME
		if [[ $PROC_RES == true ]]
		then
		    STATUS=11
		fi
	    fi
	    continue			# and go to next VG
	fi

    elif (( $MODE == 32 ))
    then
        #
        :   LVM thinks that this volume group is defined as concurrent
        :   capable, for the group services based concurrent mode
        #
        :   try to varyon in concurrent with appropriate sync option
        #
	varyonc $vg $SYNCFLAG

    else
	#
        :   Anything else - "1" or "16", depending on the level of LVM -
        :   indicates that LVM thinks this volume group is
        :   defined as concurrent capable, for the covert channel based
        :   concurrent mode - SSA or 9333.
        #
	if cl_raid_vg $vg 
	then
	    #
            :   SCSI attached RAID devices are reported as concurrent capable.
            :   If that is what we have here, try to make it an enhanced
            :   concurrent mode volume group
            #
	    if [[ -z $PRE_EVENT_MEMBERSHIP ]] 
	    then
		#
                :   If we are the first node up - so that no other node should
                :   have volume group $vg varyd on - and running on AIX 5.3,
                :   try to turn this into enhanced concurrent mode
                #
		if cl_makecm -C $vg		# try to make it ECM
		then
		    varyonc $vg $SYNCFLAG	# if successful, vary on
		    if (( $STATUS == 1 )); then
			#
        		:   Update resource manager with results
        		#
			cl_RMupdate resource_error $vg $PROGNAME
			if [[ $PROC_RES == true ]]; then
			    STATUS=11
			fi
		    fi
		    continue			# and go to next VG
		fi
	    fi
	    #
            :   Bring on line in RAID concurrent mode
            #
	    if ! convaryonvg $vg
	    then
		#
                :   It was not possible to vary on this volume
                :   group.  Note error and keep going.
                #
		STATUS=1
	    fi
	else
	    #
            :   Its not a concurrent capable RAID device.  The only remaining
            :   supported choice is covert channel based concurrent mode.
            #
	    if [[ -z $PRE_EVENT_MEMBERSHIP ]] 
	    then
		#
                :   If we are the first node up - so that no other node should
                :   have volume group $vg varyd on - and running on AIX 5.3,
                #
		cl_makecm -C $vg 
	    fi
	    #
            :   Bring on line in enhanced - if above change done - or old
            :   concurrent mode
            #
	    varyonc $vg $SYNCFLAG
	fi
    fi

    #
    :   update resource manager with results
    #
    if (( $STATUS == 1 ))
    then
	cl_RMupdate resource_error $vg $PROGNAME
        #
        :   Save any error indication, so that an error return happens if any
        :   volume group fails to come on line
        #
	if [[ $PROC_RES == true ]]
	then
	    STATUS=11
	fi
    else
	#
	:   Remove any prior failure indication on successful varyon
	#
	rm -f /usr/es/sbin/cluster/etc/vg/${vg}.fail
    fi
done
unset PS4_LOOP

if (( $SAVED_STATUS == 0 ))
then
    SAVED_STATUS=$STATUS
fi

#
:   Update the resource manager with the unsurprising result that all volume
:   groups that did not have errors, have been varyd on.
#
ALLNONERRVG="All_nonerror_concurrent_vgs"
cl_RMupdate resource_up $ALLNONERRVG $PROGNAME

#
:   There are circumstances - such as rg_move after a loss of quorum - where
:   some disks may not be on line or where stale partitions could exist.  In
:   that case, do a sync.
#
if [[ -n $NONERRORVGS ]]
then
    cl_sync_vgs $NONERRORVGS &		    # check, and sync if necessary
fi

exit $SAVED_STATUS			    # accumulated status