#!/bin/ksh93 # ALTRAN_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # Copyright (C) Altran ACT S.A.S. 2018,2021. All rights reserved. # # ALTRAN_PROLOG_END_TAG # # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # 61haes_r721 src/43haes/usr/sbin/cluster/events/utils/cl_mode3.sh 1.52 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 1990,2016 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#) 7d4c34b 43haes/usr/sbin/cluster/events/utils/cl_mode3.sh, 726, 2147A_aha726, Feb 05 2021 09:50 PM ############################################################################### # # COMPONENT_NAME: EVENTUTILS # # FUNCTIONS: none # # Questions? Comments? Expressions on Astonishment? mailto:hafeedbk@us.ibm.com # ############################################################################### # Including file containing SCSIPR functions . /usr/es/sbin/cluster/events/utils/cl_scsipr_event_functions # Including Availability metrics library file . /usr/es/lib/ksh93/availability/cl_amlib ############################################################################### # # Name: varyonc # # Function: Try to vary on the volume group in concurrent mode. # If that fails, used a forced vary on if appropriate. # # Arguments: volume group name # sync flag # # Returns: 0 - varyon successful # anything else indicates failure # If the varyonvg fails, then if cl_mode3 was invoked from # process_resources, the global variable STATUS is set # to 11, otherwise it is set to 1 # ############################################################################### function varyonc { typeset PS4_FUNC="varyonc" [[ "$VERBOSE_LOGGING" == "high" ]] && set -x integer i=0 # : clstart uses this same condition to decide whether or not to : start gsclvmd. If the deamon should be running, loop till it : is ready to handle requests. # if [[ -x /usr/sbin/gsclvmd ]] ; then # : Wait for gsclvmd to be ready. # for (( i = 0 ; i < 42 ; i++ )) do if lssrc -ls gsclvmd >/dev/null 2>&1 then break fi sleep 1 done fi NOQUORUM=20 # varyonvg return code for failure due to lack of quorum # : Pick up passed parameters: volume group and sync flag # typeset SYNCFLAG vg vg=$1 SYNCFLAG=${2:-""} integer rc=0 # return code #Logging the concurrent volume group varyon begin entry along with timestamp amlog_trace $AM_VG_VARYON_BEGIN "Activating Volume Group|$vg" # : Try to vary on volume group $vg in concurrent mode # varyonvg $SYNCFLAG -c $vg rc=$? if (( $rc == $NOQUORUM )) then # : If the varyon failed due to lack of quorum, go see if theres : at least one copy of the data available, making it worthwhile : to try a forced varyon. # if cl_mirrorset $vg then varyonvg -f $SYNCFLAG -c $vg rc=$? fi fi # : If varyon was ultimately unsuccessful, note the error # if (( $rc != 0 )) then cl_log 203 "$PROGNAME: Failed varyonvg $SYNCFLAG -c of $vg." $PROGNAME $SYNCFLAG $vg #Logging the concurrent volume group varyon failure entry along with timestamp amlog_err $AM_VG_VARYON_FAILURE "Activating Volume Group|$vg" STATUS=1 else NONERRORVGS=${NONERRORVGS:+"$NONERRORVGS "}$vg fi # : Update volume group time stamps cluster wide. According to LVM, : it is possible for the timestamp to be updated even if the varyon : fails. So the sync is placed here. # cl_update_vg_odm_ts -o $vg #check for vary on the volume group in concurrent mode is sucess if (( $rc == 0 )) then #Logging the concurrent volume group varyon end entry along with timestamp amlog_trace $AM_VG_VARYON_END "Activating Volume Group|$vg" fi return $rc # pass back success/failure indication } ############################################################################### # # Name: cl_mode3 # # Returns: # 0 - All of the volume groups are successfully varied on/changed mode # 1 - varyonvg/mode change of at least one volume group failed # 2 - Zero arguments were passed # # This function will place the volume groups passed in as arguments in # the designated mode . # # Arguments: -s Varyon volume group in mode 3 with sync # -n Varyon volume group in mode 3 without sync # # Environment: VERBOSE_LOGGING, PATH # ############################################################################### PROGNAME=${0##*/} export PATH="$(/usr/es/sbin/cluster/utilities/cl_get_path all)" if [[ $VERBOSE_LOGGING == "high" ]] then set -x version='1.52 $Source$' fi if (( $# < 2 )) then # : Caller used incorrect syntax # cl_echo 204 "usage: $PROGNAME [-n | -s] volume_groups_to_varyon" $PROGNAME exit 2 fi # : if JOB_TYPE is set, and it is not equal to "GROUP", then : we are processing for process_resources # if [[ ${JOB_TYPE:-0} != 0 && $JOB_TYPE != "GROUP" ]] then PROC_RES=true else PROC_RES=false fi if [[ $1 == "-n" ]] ; then # sync or no sync SYNCFLAG="-n" shift elif [[ $1 == "-s" ]] ; then SYNCFLAG="" # LVM default is "sync" shift else SYNCFLAG="" # LVM default is "sync" fi integer STATUS=0 integer SAVED_STATUS=0 MODE="" export MODE NONERRORVGS="" lv_list="" vg_list="$*" set -u # : Update resource manager with the fact that we are trying to bring all the : concurrent volume groups online # ALLCONCURVG="All_concurrent_vgs" cl_RMupdate resource_acquiring $ALLCONCURVG $PROGNAME # : See what volume groups are currently varyd on, so that they can be : skipped. # ON_LIST=$(print $(lsvg -L -o 2>/tmp/lsvg.err)) # : Process each given volume group # typeset PS4_LOOP="" for vg in $* do PS4_LOOP=$vg # : We want to retain the fact that at least one VG had a problem, and : return that info to the caller. We check \$STATUS at this point : because there are "continues" in the code that make checking at the : bottom of the for loop impossible. Any non-zero STATUS will always : be 1 or always be 11 for a particular invocation of cl_mode3. # if (( $SAVED_STATUS == 0 )) then SAVED_STATUS=$STATUS fi # : STATUS can be 0, 1, or 11. A problem with a VG is indicated by a : STATUS of 1 or 11. STATUS is set to 11 instead of 1 if this script : was called from process_resources. If so, cl_RMupdate is run locally. : An exit code of 11 prevents cl_RMupdate from being run again when we : return to process_resources. # STATUS=0 VGID=$(/usr/sbin/getlvodm -v $vg) if [[ -z $VGID ]] then # : Volume group $vg could not be found. Report error and : continue with next one. # cl_log 9650 "$PROGNAME: Volume group $vg not found." $PROGNAME $vg STATUS=1 # : Update resource manager with results # cl_RMupdate resource_error $vg $PROGNAME if [[ $PROC_RES == true ]] then STATUS=11 fi continue # go on to next volume group fi # : Check to see if volume group $vg is already varyd on # if [[ $ON_LIST == ?(* )$vg?( *) ]] then # : Note this and keep going. This could happen legitimately on a : node up after a forced down. # CONC=$(lqueryvg -g $VGID -C) RC=$? # : Find out if its varyd on in concurrent mode # if (( $RC == 0 )) && [[ -n $CONC ]] && (( 1 == $CONC )) then # : Since volume group $vg is already varyd on in : concurrent mode, there is really nothing more to do : with it. Go on to the next one. # continue else # : No, its not. Now, find out if its defined as concurrent capable # MODE=$(lqueryvg -g $VGID -X) RC=$? if (( $RC == 0 )) && [[ -n $MODE ]] && (( 0 == $MODE )) then # : We get here in the case where volume group $vg is : varyd on, but not in concurrent mode, and is not : concurrent capable. This would be the case for a SCSI : RAID disk used in concurrent mode. # if ! cl_raid_vg $vg ; then # : Volume group $vg is not made up of known RAID devices # cl_log 485 "$PROGNAME: Failed concurrent varyon of $vg\n\ because it is not made up of known RAID devices." $PROGNAME $vg STATUS=1 fi fi if (( $RC !=0 || $STATUS == 0 )) then # : We get here either because we cannot query the state : of volume group $vg - which means its in pretty sad : shape - or we could successfully query the state of : the volume group, and found it to be a concurrent : capable volume group varyd on in non-concurrent mode. # cl_echo 200 "$PROGNAME: Concurrent volume Group "$vg" is already vary'd on in non-concurrent mode." $PROGNAME $vg # : Try to recover by varying it off, to be varyd on in : concurrent mode below. # if ! varyoffvg $vg then # : Unable to vary off volume group $vg - probably because : its in use. Note error and keep going # cl_log 28 "$PROGNAME: Failed varyoff of $vg." $PROGNAME $vg STATUS=1 fi # : At this point, volume group $vg was varyd off. The flow : takes over below, and varys on the volume group in concurrent : mode. # fi fi # end on, but not concurrent # : Update resource manager if we could not get volume group $vg back to : a reasonable - varyd off - state. # if (( $STATUS == 1 )) then cl_RMupdate resource_error $vg $PROGNAME if [[ $PROC_RES == true ]] then STATUS=11 fi continue # go on to next volume group fi # : At this point, volume group $vg was varyd off. The flow takes over : below, and varys on the volume group in concurrent mode. # else # : Checking if SCSI PR is enabled and it is so, : confirming if the SCSI PR reservations are intact. # typeset SCSIPR_ENABLED=$(clodmget -n -q "policy=scsi" -f value HACMPsplitmerge) if [[ $SCSIPR_ENABLED == Yes ]] then cl_confirm_scsipr_res $vg if (( $? != 0 )) then # : We are not sure if the SCSIPR reservation exists. : So we are going to exit with status 1. : Which will put the corresponding RG to error state. : Reservation Policy, Registered Keys are extracted for debugging. # # Print the current policy of Volume Group clpr_Get_policy_vg $vg # Print the current PR Key of Volume Group clpr_Get_prkey_vg $vg # Print the reservation of Volume Group clpr_ReadRes_vg $vg # Print the registered PR Keys of the Volume Group(1 is dummy key) clpr_verifyKey_vg $vg 1 # Print Configured Reserve Policy, Effective Reserve Policy and # Reservation Status of VG. clrsrvmgr -rg $vg # Fail varyonvg exit 1 fi fi # : The volume group is not currently varied on. : If there is a fence group defined for this volume group, set the : fence height to allow read/write access # cl_set_vg_fence_height -c $vg rw RC=$? if (( $RC != 0 )) then # : Log any error, but continue. If this is a real problem, the varyonvg will fail # rw=$(dspmsg -s 103 cspoc.cat 350 'read/write' | cut -f2 -d,) cl_log 10511 "$PROGNAME: Volume group $vg fence height could not be set to read/write" $PROGNAME $vg $rw fi fi # end already vary'd on # : Find out whether LVM thinks this volume group is concurrent : capable. Note that since volume group $vg is not varyd on at this : point in time, we have to look directly at the VGDA on the : hdisks in the volume group. # MODE="" /usr/sbin/getlvodm -w $VGID | while read pvid HDISK do # : Check each of the hdisks for a valid mode value. Stop at the : first one we find. # MODE=$(lqueryvg -p $HDISK -X) RC=$? if (( $RC == 0 )) && [[ -n $MODE ]] then break fi done if [[ -z $MODE ]] then # : If we could not pull a valid mode indicator off of any disk in : volume group $vg, there is no chance whatsoever that LVM : will be able to vary it on. Give up on this one. # cl_log 203 "$PROGNAME: Failed varyonvg $SYNCFLAG -c of $vg." $PROGNAME $SYNCFLAG $vg STATUS=1 elif (( $MODE == 0 )) then # : LVM thinks that this is not a concurrent capable : volume group. Try to turn this into enhanced concurrent mode # if cl_makecm -C $vg # try to make it ECM then varyonc $vg $SYNCFLAG # if successful, vary on if (( $STATUS == 1 )) then # : Update resource manager with results # cl_RMupdate resource_error $vg $PROGNAME if [[ $PROC_RES == true ]] then STATUS=11 fi fi continue # and go to next VG fi elif (( $MODE == 32 )) then # : LVM thinks that this volume group is defined as concurrent : capable, for the group services based concurrent mode # : try to varyon in concurrent with appropriate sync option # varyonc $vg $SYNCFLAG else # : Anything else - "1" or "16", depending on the level of LVM - : indicates that LVM thinks this volume group is : defined as concurrent capable, for the covert channel based : concurrent mode - SSA or 9333. # if cl_raid_vg $vg then # : SCSI attached RAID devices are reported as concurrent capable. : If that is what we have here, try to make it an enhanced : concurrent mode volume group # if [[ -z $PRE_EVENT_MEMBERSHIP ]] then # : If we are the first node up - so that no other node should : have volume group $vg varyd on - and running on AIX 5.3, : try to turn this into enhanced concurrent mode # if cl_makecm -C $vg # try to make it ECM then varyonc $vg $SYNCFLAG # if successful, vary on if (( $STATUS == 1 )); then # : Update resource manager with results # cl_RMupdate resource_error $vg $PROGNAME if [[ $PROC_RES == true ]]; then STATUS=11 fi fi continue # and go to next VG fi fi # : Bring on line in RAID concurrent mode # if ! convaryonvg $vg then # : It was not possible to vary on this volume : group. Note error and keep going. # STATUS=1 fi else # : Its not a concurrent capable RAID device. The only remaining : supported choice is covert channel based concurrent mode. # if [[ -z $PRE_EVENT_MEMBERSHIP ]] then # : If we are the first node up - so that no other node should : have volume group $vg varyd on - and running on AIX 5.3, # cl_makecm -C $vg fi # : Bring on line in enhanced - if above change done - or old : concurrent mode # varyonc $vg $SYNCFLAG fi fi # : update resource manager with results # if (( $STATUS == 1 )) then cl_RMupdate resource_error $vg $PROGNAME # : Save any error indication, so that an error return happens if any : volume group fails to come on line # if [[ $PROC_RES == true ]] then STATUS=11 fi else # : Remove any prior failure indication on successful varyon # rm -f /usr/es/sbin/cluster/etc/vg/${vg}.fail fi done unset PS4_LOOP if (( $SAVED_STATUS == 0 )) then SAVED_STATUS=$STATUS fi # : Update the resource manager with the unsurprising result that all volume : groups that did not have errors, have been varyd on. # ALLNONERRVG="All_nonerror_concurrent_vgs" cl_RMupdate resource_up $ALLNONERRVG $PROGNAME # : There are circumstances - such as rg_move after a loss of quorum - where : some disks may not be on line or where stale partitions could exist. In : that case, do a sync. # if [[ -n $NONERRORVGS ]] then cl_sync_vgs $NONERRORVGS & # check, and sync if necessary fi exit $SAVED_STATUS # accumulated status