#!/bin/ksh93 # ALTRAN_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # Copyright (C) Altran ACT S.A.S. 2017,2018,2019,2020,2021. All rights reserved. # # ALTRAN_PROLOG_END_TAG # # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # 61haes_r721 src/43haes/usr/sbin/cluster/events/node_up.sh 1.10.11.45 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 1990,2016 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#) 7d4c34b 43haes/usr/sbin/cluster/events/node_up.sh, 726, 2147A_aha726, Feb 05 2021 09:50 PM # Including file containing SCSIPR functions . /usr/es/sbin/cluster/events/utils/cl_scsipr_event_functions ######################################################################### # # # Name: node_up # # # # Description: This script is called when a node joins # # the cluster. # # The script checks the id of the node, # # then calls one of the two sub-event script # # accordingly. # # # # Called by: cluster manager # # # # Calls to: node_up_local, node_up_remote # # # # Arguments: nodename # # # # Returns: 0 success # # 1 failure # # 2 bad argument # # # ######################################################################### ############################################################################### # # node_up_vg_fence_init # # This will set up CAA volume group fencing for this node. # # For every volume group managed by PowerHA and known on this node, pass # the volume group name and the list of disk in the volume group to # cl_vg_fence_init. This will attempt to set up a fence group for this # volume group, and, if that is possible, set the fence height to "read # only". This prevents inadvertent/malicious access to the disks in # the volume group from this node. The fence height is adjusted to # to "read/write" prior to active varyon. # # ############################################################################### function node_up_vg_fence_init { typeset PS4_FUNC="node_up_vg_fence_init" [[ "$VERBOSE_LOGGING" == "high" ]] && set -x typeset VGs_on_line # list of volume groups currently on line typeset VG_name # volume group managed by PowerHA typeset VG_ID # volume group identifier used by LVM typeset VG_PV_list # disks in volume group # : Find out what volume groups are currently on-line # VGs_on_line=$(print $(lsvg -L -o 2> /var/hacmp/log/${PROGNAME}.lsvg.err)) [[ -e /var/hacmp/log/${PROGNAME}.lsvg.err && ! -s /var/hacmp/log/${PROGNAME}.lsvg.err ]] && rm /var/hacmp/log/${PROGNAME}.lsvg.err # : Clean up any old fence group files and stale fence groups. : These are all of the form "/usr/es/sbin/cluster/etc/vg/.uud" # valid_vg_lst="" for VG_name in $(lsvg -L 2>> /var/hacmp/log/${PROGNAME}.lsvg.err | egrep -vw 'rootvg|caavg_private') do PS4_LOOP=$VG_name if [[ -z $(clodmget -q "name like '*VOLUME_GROUP' and value = $VG_name" -f value -n HACMPresource) ]] then if [[ -s /usr/es/sbin/cluster/etc/vg/${VG_name}.uuid ]] then # : Volume group $VG_name is no longer an HACMP resource, : and there is a saved fence group UUID file. : Delete the stale fence group. # if cl_set_vg_fence_height -c $VG_name rw then cl_vg_fence_term -c $VG_name fi rm -f /usr/es/sbin/cluster/etc/vg/${VG_name}.uuid fi else # : Volume group $VG_name is an HACMP resource # if [[ $VGs_on_line == ?(* )$VG_name?( *) ]] then fence_height='rw' else fence_height='ro' fi # : Recreate the fence group to match current volume group membership # cl_vg_fence_redo -c $VG_name $fence_height valid_vg_lst="$valid_vg_lst $VG_name" fi done [[ -e /var/hacmp/log/${PROGNAME}.lsvg.err && ! -s /var/hacmp/log/${PROGNAME}.lsvg.err ]] && rm /var/hacmp/log/${PROGNAME}.lsvg.err # : Any remaining old fence group files are from stale fence groups, : so remove them # if [[ -s /usr/es/sbin/cluster/etc/vg/${VG_name}.uuid ]] then for VG_path_name in $(ls /usr/es/sbin/cluster/etc/vg/*uuid) do PS4_LOOP=$VG_path_name VG_name=${VG_path_name##*/} # strip off path VG_name=${VG_name%%.uuid} # just volume group name if [[ $valid_vg_lst == ?(* )$VG_name?( *) ]] then # : Just redid the fence group for $VG_name # continue fi # : Get rid of the stale fence group # if cl_set_vg_fence_height -c $VG_name rw then cl_vg_fence_term -c $VG_name fi # : Get rid of the stale fence group UUID # rm -f $VG_path_name done fi unset PS4_LOOP return 0 } ############################################################################### # Start Main # ############################################################################### PROGNAME=${0##/*/} PATH="$(/usr/es/sbin/cluster/utilities/cl_get_path all)" NODE_HALT_CONTROL_FILE="/usr/es/sbin/cluster/etc/ha_nodehalt.lock" set -a eval $(cllsparam -n $LOCALNODENAME) set +a if [[ $VERBOSE_LOGGING == "high" ]] then set -x version='%I%' fi export NODENAME=$1 # This will be the exit status seen by the Cluster Manager. # If STATUS is not 0, the Cluster Manager will enter reconfiguration # All lower-level scripts should pass status back to the caller. # This will allow a Resource Groups to be processed individaully, # independent of the status of another resource group. # integer STATUS=0 integer RC=0 ENABLE_NFS_CROSS_MOUNT=${ENABLE_NFS_CROSS_MOUNT:-"false"} typeset START_MODE="" set -u if (( $# < 1 )) || (( $# > 2 )) then echo "Usage: $PROGNAME nodename [forced|manual]" exit 2 fi # : serial number for this event is $EVENT_SERIAL_NUMBER # if [[ $LOCALNODENAME == $NODENAME ]] then # : Remove the node halt lock file. : Hereafter, clstrmgr failure leads to node halt # rm -f $NODE_HALT_CONTROL_FILE fi if (( $# > 1 )) then START_MODE=$2 case $START_MODE in forced ) set +x dspmsg scripts.cat 10713 "WARNING: Cluster services on node $NODENAME are being started\n\ after previously being stopped with the 'unmanage' option.\n\ Resource groups will be restored to their state prior to stopping cluster\n\ services without processing any individual resources.\n" [[ $VERBOSE_LOGGING == "high" ]] && set -x ;; manual ) # : Suitable warning in the case of a manual RG processing option # set +x dspmsg scripts.cat 10713 "WARNING: Cluster services on node $NODENAME have been set to \n\ start with 'manually process resource groups' option. \n\ Because of this, the resource groups will not come online \n\ after the cluster services have started. \n\ \n\ Use the PowerHA SystemMirror 'Resource Group and Applications' \n\ SMIT screen under System Management (C-SPOC) to bring the \n\ OFFLINE resource groups to the ONLINE state. \n" $NODENAME [[ $VERBOSE_LOGGING == "high" ]] && set -x ;; * ) echo "Usage: $PROGNAME nodename [forced|manual]" exit 2 ;; esac fi # : If RG_DEPENDENCIES=false, process RGs with clsetenvgrp # if [[ $RG_DEPENDENCIES == "FALSE" ]] then # : Set RESOURCE_GROUPS with the RG names participating in this event. # set -a eval $(clsetenvgrp $NODENAME $PROGNAME) RC=$? set +a : exit status of clsetenvgrp $NODENAME $PROGNAME is: $RC if (( 0 != $RC )) then STATUS=1 fi # : Track when NFS lock daemon stopped, so that we know when to start it again # rm -f /tmp/.RPCLOCKDSTOPPED fi # : localnode processing prior to RG acquisition # if [[ $LOCALNODENAME == "$NODENAME" && $START_MODE != forced ]] then # : Reserve Volume Groups using SCSIPR # typeset SCSIPR_ENABLED=$(clodmget -n -q "policy=scsi" -f value HACMPsplitmerge) if [[ $SCSIPR_ENABLED == "Yes" ]] then # : Setup SCSIPR fencing. This must be done prior to any potential disk access. # node_up_scsipr_init # Setup SCSIPR fencing for raw disks if any. typeset diskList=$(cllsres | grep -w DISK | cut -f2 -d=) if [[ -n $diskList ]] then # : Register and reserve raw disks # diskList=${diskList#\"} diskList=${diskList%\"} typeset pvid="" for pvid in $diskList do typeset hdisk=$(lspv -L | grep -w $pvid | awk '{print $1}') if [[ -n $hdisk ]] then cl_scsipr_Reg_Res $hdisk fi done fi diskList=$(cllsres | grep -w RAW_DISK | cut -f2 -d=) if [[ -n $diskList ]] then diskList=${diskList#\"} diskList=${diskList%\"} typeset uuid="" for uuid in $diskList do typeset hdisk=$(lspv -u | grep -w $uuid | awk '{print $1}') if [[ -n $hdisk ]] then cl_scsipr_Reg_Res $hdisk fi done fi fi # : Setup VG fencing. This must be done prior to any potential disk access. # node_up_vg_fence_init # # The discerning reader will note that the return code is not checked. # This is because a failure to set up fencing should not prevent the node # from coming up. # # : If WLM manager clases have been configured for an application server, process them now # if [[ -n $(clodmget -q "name like 'WLM_*'" -f id HACMPresource) ]] then # : remove the WLM flag file used to indicate reconfig_resources # rm -f /usr/es/sbin/cluster/etc/.hacmp_wlm_config_changed cl_wlm_reconfig node_up WLM_STATUS=$? # : WLM support is required on this node # if (( 0 == $WLM_STATUS )) then cl_wlm_start # : cl_wlm_start status irrelevant during node_up # even if there was an error in the reconfiguration script # or the start script, the rest of node_up should continue # fi fi # : Call ss-load replicated resource methods if they are defined # METHODS=$(cl_rrmethods2call ss_load) for method in $METHODS do if [[ -x $method ]] then if ! $method $* then STATUS=1 fi fi done # : When the local node is brought up, reset the resource locator info. # Delete all resource_locators on the local node # if ! clchdaemons -r -d clstrmgr_scripts -t resource_locator then cl_log 655 "$PROGNAME: Problem with resource location database in HACMPdaemons ODM." $PROGNAME STATUS=1 fi # if [[ $(clodmget -q "nodename = $LOCALNODENAME and type = start and object = manage" -f value -n HACMPdaemons) != 'manual' ]] if [[ $START_MODE != manual ]] then # : attempt passive varyon for any ECM VGs in serial RGs # # If manual management, do not do passive varyons. These will be automatically # done when resource group is brought on line. # cl_pvo : exit status of cl_pvo is: $? else # # Cluster is started in manual mode. Any VG which is part of RG, but varied on manually, # won't be disturbed. But we should log such VGs for debugging purpose. # lsvg_out=$(lsvg -o) clodmget -q "name like *VOLUME_GROUP" -f value -n HACMPresource | while read vgname do if print -- "$lsvg_out" | grep -qw $vgname then cl_msg -e 0 -m 10680 "$PROGNAME: Volume group $vgname is online in non-concurrent mode." $PROGNAME $vgname fi done fi if ls /dev/vpath* > /dev/null 2>&1 then # : When the local node is brought up, setup any needed persistent reserve key # cl_setprkey : exit status of cl_setprkey is: $? fi # : Configure any split and merge policies. # rm -f /usr/es/sbin/cluster/etc/smm_oflag # remove any storage system recovery flag if [[ -z $PRE_EVENT_MEMBERSHIP ]] # first node up then # : If this is the first node up, configure split merge handling. # cl_cfg_sm_rt : exit status of cl_cfg_sm_rt is $? # # Any errors from cl_cfg_sm_rt are logged, but do not otherwise interfere # with normal bring up # else # not first node up # : If this is a node after the first, make sure any required : persistent reserve policy is set up if the tie breaker uses it # tiebreaker=$(clodmget -q "policy = tiebreaker" -f value -n HACMPsplitmerge) if [[ -n $tiebreaker && $tiebreaker != "None" ]] then # : Just in case the disk was not set up before, : set the reserve policy to PR_exclusive so : that the disk can function as a tie breaker. # TBN=$(clodmget -q "attribute = pvid and value like ${tiebreaker}*" -f name -n CuAt) if [[ -n $TBN ]] then if LC_ALL=C lsattr -R -l $TBN -a reserve_policy | grep -qw PR_exclusive then LC_ALL=C lsattr -E -l $TBN -a reserve_policy,PR_key_value -F value | paste -s - | read reserve_policy PR_key_value if [[ $reserve_policy != "PR_exclusive" ]] then # : If the disk does not have a reserved policy of : PR_exclusive, set that, and, if not already set, : the Persistent Reserve key. # if [[ $PR_key_value == "none" ]] then PR_key_value="-a PR_key_value=2147483647" else PR_key_value="" fi if ! chdev -l $TBN -a reserve_policy=PR_exclusive $PR_key_value then dspmsg -s 32 scripts.cat 28 "The disk $TBN cannot be configured with the reserve policy (PR_exclusive) required for it to function as a Tie Breaker\n" $TBN # # This failure throws serious doubt on the ability # of the cluster to correctly handle a split or # merge. However, we do not let that stop the # cluster from becoming active. # fi # end chdev did not work fi # end policy not PR_exclusive fi # end support SCSI-3 PR fi # end tie breaker disk found fi # end tie breaker defined fi # end first/not first node up fi # end of processing always done # on local node up # : Enable NFS crossmounts during manual start # if [[ -n $ENABLE_NFS_CROSS_MOUNT && $ENABLE_NFS_CROSS_MOUNT == true && $START_MODE != forced ]] then # : Get list of RGs that include this node # Take only those RGs that are part of the local node # using clRGinfo instead of HACMPgroup as we are considering # those RGs that are part of local node. Also clRGinfo returns # if cluster services are active on at least one node in a cluster # this is preciously what we want i.e., we do not want to mount # crossmounts if RG is not available on any node hence if clRGinfo # does not return anything we can rest assured that we do not # have to enable crossmounts. # LOCAL_RGS=$(clRGinfo -s 2>/dev/null | awk -F: '{ if ($3 =="'$LOCALNODENAME'") print $1 }') # : Select those RG that are offline on local node # LOCAL_OFFLINE_RG="" LOCAL_OFFLINE_RGS="" for eachrg in $LOCAL_RGS do LOCAL_OFFLINE_RG=$(clRGinfo -s $eachrg 2>/dev/null | awk -F: '{ if ( ($3 =="'$LOCALNODENAME'") && ($2 =="OFFLINE")) print $1 }') if [[ -n $LOCAL_OFFLINE_RG ]] then LOCAL_OFFLINE_RGS="$LOCAL_OFFLINE_RGS $LOCAL_OFFLINE_RG" fi done for MYRGS in $LOCAL_OFFLINE_RGS do # : Mount filesystem only if RG is online on remote node # This makes sense because if the RG is not online anywhere # then service IP label will not be available and crossmounts # will be activated using service labels. It also avoids unnecessary # ping test to be performed. # ONLINE_RG=$(clRGinfo -s $MYRGS 2>/dev/null | awk -F: '{ if ($2 =="ONLINE") print $1 }') if [[ -n $ONLINE_RG ]] then NFS_FILE_SYS=$(clodmget -q "group=$MYRGS AND name=MOUNT_FILESYSTEM" -f value -n HACMPresource) if [[ -z $NFS_FILE_SYS ]] then continue fi NFS_SERVICE_LABEL=$(clodmget -q "group=$MYRGS AND name=SERVICE_LABEL" -f value -n HACMPresource) if [[ -z $NFS_SERVICE_LABEL ]] then continue fi preferred_service_ip_list="" PREFERRED_NFS_NETWORK=$(clodmget -q "group=$MYRGS AND name=NFS_NETWORK" -f value -n HACMPresource) if [[ -n $PREFERRED_NFS_NETWORK ]] then for nfs_network in $PREFERRED_NFS_NETWORK do for nfs_network_service_ip in $NFS_SERVICE_LABEL do preferred_service_ip=$(cllsif -cS 2> /dev/null | awk -F: '{ if ( ($3 =="'$nfs_network'") && ($1 =="'$nfs_network_service_ip'")) print $1 }' | uniq) if [[ -n $preferred_service_ip ]] then preferred_service_ip_list="$preferred_service_ip_list $preferred_service_ip" fi done done fi # : add all other service IP last such that if there is no : preferred NFS_NETWORK available we will try mounting these : non-preferred service IPs # preferred_service_ip_list="$preferred_service_ip_list $NFS_SERVICE_LABEL" # : Check if any service IP is UP # MOUNT_NFSHOST="" for host in $preferred_service_ip_list do if ping $host 1024 1 >/dev/null then MOUNT_NFSHOST=$host break fi done if [[ -n $MOUNT_NFSHOST ]] then # : we have the FS to mount, and the service IP to use # export GROUPNAME=$MYRGS cl_activate_nfs 1 $MOUNT_NFSHOST "$NFS_FILE_SYS" # : activate_nfs will not wait for the mounts to complete # fi fi done fi # : When RG dependencies are not configured we call node_up_local/remote, : followed by process_resources to process any remaining groups # if [[ $RG_DEPENDENCIES == "FALSE" && $START_MODE != forced ]] then # : For each participating RG, serially process the resources # for group in $RESOURCE_GROUPS do # export the variables set by clsetenvres set -a eval $(clsetenvres $group $PROGNAME) set +a export GROUPNAME=$group # : Check nodename, and call node_up_local or node_up_remote accordingly. # if [[ $NODENAME == $LOCALNODENAME ]] then clcallev node_up_local RC=$? : exit status of node_up_local is: $RC else clcallev node_up_remote $NODENAME RC=$? : exit status of node_up_remote is: $RC fi # : If ANY failure has occurred, this script should exit accordingly # if (( $RC != 0 )) then cl_log 650 "$PROGNAME: Failure occurred while processing Resource Group $group. Manual intervention required." $PROGNAME $group STATUS=1 fi done # : Use process_resources for parallel-processed resource groups # if ! process_resources then STATUS=1 fi fi if [[ $LOCALNODENAME == $NODENAME ]] then # : Perform any deferred TCP daemon startup, if necessary, : along with any necessary start up of iSCSI devices. # cl_telinit : exit status of cl_telinit is: $? fi return $STATUS