#!/bin/ksh93 # ALTRAN_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # Copyright (C) Altran ACT S.A.S. 2017,2019,2020,2021,2022. All rights reserved. # # ALTRAN_PROLOG_END_TAG # # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # 61haes_r721 src/43haes/usr/sbin/cluster/events/process_resources.sh 1.176 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 2001,2016 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#) f037b55 43haes/usr/sbin/cluster/events/process_resources.sh, 61aha_r726, 2205E_aha726, Jun 06 2022 06:15 PM ######################################################################### # # COMPONENT_NAME: EVENTS # # FUNCTIONS: none # ######################################################################### # Including file containing SCSIPR functions . /usr/es/sbin/cluster/events/utils/cl_scsipr_event_functions #Including Availability metrics library file . /usr/es/lib/ksh93/availability/cl_amlib ######################################################################### # # # Name: process_resources # # # # Description: This event script performs actions dictated by # # subsequent call to clrgpa. Depending on what JOB_TYPE # # is returned, different action is taken. # # # # Called by: # # # # Calls to: Different event scripts # # clrgpa # # # # Arguments: nodename # # # # Returns: 0 success # # 1 failure # # 2 bad argument # # # ######################################################################### get_list_head() { typeset PS4_FUNC="get_list_head" [[ $VERBOSE_LOGGING == "high" ]] && set -x echo $* | IFS=: read listhead listtail echo $listhead | tr ',' ' ' } get_list_tail() { typeset PS4_FUNC="get_list_tail" [[ $VERBOSE_LOGGING == "high" ]] && set -x echo $* | IFS=: read listhead listtail echo $listtail } ############################################################################ # Function: set_resource_group_state # Purpose: Sets the resource group state. # Parameters: $1=new status of the resource group # Return: none ############################################################################ set_resource_group_state() { typeset PS4_FUNC="set_resource_group_state" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 new_status=$1 for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME if [[ $new_status != "DOWN" ]] then if ! clchdaemons -d clstrmgr_scripts -t resource_locator -n "$LOCALNODENAME" -o "$GROUPNAME" -v "$1" then cl_log 655 "$PROGNAME: Problem with resource location database in HACMPdaemons ODM." $PROGNAME STAT=1 fi fi # : Resource Manager Updates # case $new_status in ACQUIRING ) #Logging the resource acquire begin entry along with timestamp amlog_trace $RG_ACQUIRE_BEGIN "acquire|$GROUPNAME|$LOCALNODENAME" cl_RMupdate acquiring $GROUPNAME $PROGNAME ;; RELEASING ) #Logging the resource release begin entry along with timestamp amlog_trace $RG_RELEASE_BEGIN "acquire|$GROUPNAME|$LOCALNODENAME" cl_RMupdate releasing $GROUPNAME $PROGNAME ;; UP ) cl_RMupdate rg_up $GROUPNAME $PROGNAME #Logging the resource acquire end entry along with timestamp amlog_trace $RG_ACQUIRE_END "acquire|$GROUPNAME|$LOCALNODENAME" ;; DOWN ) cl_RMupdate rg_down $GROUPNAME $PROGNAME #Logging the resource release end entry along with timestamp amlog_trace $RG_RELEASE_END "acquire|$GROUPNAME|$LOCALNODENAME" ;; ERROR ) cl_RMupdate rg_error $GROUPNAME $PROGNAME ;; ACQUIRING_SECONDARY ) cl_RMupdate acquiring_secondary $GROUPNAME $PROGNAME ;; RELEASING_SECONDARY ) cl_RMupdate releasing_secondary $GROUPNAME $PROGNAME ;; ONLINE_SECONDARY ) cl_RMupdate rg_up_secondary $GROUPNAME $PROGNAME ;; OFFLINE_SECONDARY ) cl_RMupdate rg_down $GROUPNAME $PROGNAME ;; ERROR_SECONDARY ) cl_RMupdate rg_error_secondary $GROUPNAME $PROGNAME ;; esac done return $STAT } queue_RG_MOVE() { typeset PS4_FUNC="queue_RG_MOVE" [[ $VERBOSE_LOGGING == "high" ]] && set -x for GROUPNAME in $RESOURCE_GROUPS do GROUP_ID=$(clodmget -q "group=$GROUPNAME" -f id -n HACMPgroup) export GROUPNAME get_list_head $NODE | read NODES_FOR_RG get_list_tail $NODE | read NODE # : if the node was left empty for some reason, assume local node # if [[ -z $NODES_FOR_RG ]] then NODES_FOR_RG=$(clodmget -f nodename -n HACMPcluster) fi for node in $NODES_FOR_RG do # : find node id for node name # TARGET_NODE_ID=$(clodmget -q "name=$node AND object=VERBOSE_LOGGING" -f node_id -n HACMPnode) case $1 in ACQUIRE ) let action=20 ;; RELEASE ) let action=21 ;; ACQUIRE_NFS ) let action=22 ;; RELEASE_NFS ) let action=23 ;; esac cl_echo 3021 "$PROGNAME: Enqueing rg_move action ($1) for resource group ($GROUPNAME)!\n" $PROGNAME $1 $GROUPNAME clRMupdate cluster_rg_move $GROUP_ID $action $TARGET_NODE_ID 0 0 done done return 0 } ############################################################################ # Function: notify_rpc_statd # Purpose: # If this is a two node cluster and exported filesystems exist, then when # the cluster topology is stable notify rpc.statd of the changes # Parameters: none # Return: none ############################################################################ notify_rpc_statd() { typeset PS4_FUNC="notify_rpc_statd" [[ $VERBOSE_LOGGING == "high" ]] && set -x # : if this is a two node cluster and exported filesystems exist, then when : the cluster topology is stable notify rpc.statd of the changes # STAT=0 UPDATESTATDFILE="/usr/es/sbin/cluster/etc/updatestatd" # : This gets called if there is any RG with EXPORT_FILESYSTEMS! # if ! cl_update_statd then cl_log 1074 "$PROGNAME: Failure occurred while processing cl_update_statd.\n" $PROGNAME STAT=1 fi touch $UPDATESTATDFILE return $STAT } ############################################################################ # Function: start_or_stop_applications_for_rg # # Purpose: Call the app start/stop scripts for each application in the # resource group. References globals GROUPNAME, TMP_FILE # and APPLICATIONS. # # Parameters: Acquire or release flag # # Return: none - exit status is stored in a file # ############################################################################ start_or_stop_applications_for_rg() { typeset PS4_FUNC="start_or_stop_applications_for_rg" [[ $VERBOSE_LOGGING == "high" ]] && set -x if [[ $1 == "ACQUIRE" ]] then cmd_to_execute="start_server" else cmd_to_execute="stop_server" fi # : File name to store our exit status # STATUS_FILE=$2 # : Use clcallev to run the event # clcallev $cmd_to_execute "$APPLICATIONS" RC=$? if (( $RC != 0 && $RC != 11 )) then # : 11 = acquiring/releasing unsuccessful, but clRMupdate was run. : If there was an error not already handled, notify the resource manager # ALLSERVERS="All_servers" cl_RMupdate resource_error $ALLSERVERS $PROGNAME fi if (( $RC != 0 )) then # : If this failed while stopping an application, manual intervention is required. # if [[ $cmd_to_execute == "stop_server" ]] then cl_log 650 "$PROGNAME: Failure occurred while processing Resource Group $GROUPNAME. Manual intervention required." $PROGNAME $GROUPNAME fi fi # : Store the result for later accumulation # print "$GROUPNAME $RC" >>$STATUS_FILE } ############################################################################ # Function: process_applications # # Purpose: Called from process_resouces, this routine will processes # application servers for all groups in RESOURCE_GROUPS. # References globals ALL_APPLICATIONS, RESOURCE_GROUPS. # # Parameters: Acquire or release flag # # Return: 0, else 1 on failure # ############################################################################ process_applications() { typeset PS4_FUNC="process_applications" [[ $VERBOSE_LOGGING == "high" ]] && set -x # : Each subprocess will log to a file with this name and PID # export TMP_FILE="/var/hacmp/log/.process_resources_applications.$$" rm -f /var/hacmp/log/.process_resources_applications* WAITPIDS="" LPAR_ACQUIRE_FAILED=0 LPAR_RELEASE_FAILED=0 START_STOP_FAILED=0 LIST_OF_APPS=$ALL_APPLICATIONS # : Acquire lpar resources in one-shot before starting applications # if [[ $1 == "ACQUIRE" ]] ; then export GROUPNAME=$RESOURCE_GROUPS clmanageroha -o acquire -s -l $LIST_OF_APPS 3>&2 RC=$? if (( $RC != 0 )) ; then LPAR_ACQUIRE_FAILED=1 print "ERROR: clmanageroha failed with exit status $RC while" print "acquiring resources for applications $LIST_OF_APPS." fi fi if (( LPAR_ACQUIRE_FAILED == 0 )) ; then # : Loop through all groups to start or stop applications # for GROUPNAME in $RESOURCE_GROUPS ; do export GROUPNAME # : Break out application data # get_list_head $ALL_APPLICATIONS | read LIST_OF_APPLICATIONS_FOR_RG get_list_tail $ALL_APPLICATIONS | read ALL_APPLICATIONS get_list_head $MISCDATA | read MISCDATA_FOR_RG get_list_tail $MISCDATA | read MISCDATA if [[ $1 == "RELEASE" ]] ; then # #: When releasing, we change the order of apps - first-in, last-out # TMPLIST="" set -A appnames $( print ${LIST_OF_APPLICATIONS_FOR_RG} ) for (( cnt=0; cnt < ${#appnames[*]} ; cnt++ )) ; do TMPLIST="${LIST_OF_APPLICATIONS_FOR_RG%% *} ${TMPLIST}" LIST_OF_APPLICATIONS_FOR_RG="${LIST_OF_APPLICATIONS_FOR_RG#* }" done LIST_OF_APPLICATIONS_FOR_RG="${TMPLIST}" fi export APPLICATIONS=$LIST_OF_APPLICATIONS_FOR_RG export MISC_DATA=$MISCDATA_FOR_RG # : Now call start_or_stop_applications_for_rg to do the app start/stop. # This will create a number of subprocesses to deal with each app. # start_or_stop_applications_for_rg $1 $TMP_FILE.$GROUPNAME & # : Add PID of the last bg start_or_stop_applications_for_rg process to WAITPIDS. # WAITPIDS="$WAITPIDS $!" done # : Wait for the start_or_stop_applications_for_rg PIDs to finish. # This will ensure that all the app server start/stop scripts are finished. # wait $WAITPIDS # : Look at all the status files to see if any were unsuccessful # for GROUPNAME in $RESOURCE_GROUPS ; do cat $TMP_FILE.$GROUPNAME | read skip SUCCESS rest if [[ $SUCCESS != 0 ]] ; then echo "Content of the status file $TMP_FILE.$GROUPNAME:" cat $TMP_FILE.$GROUPNAME START_STOP_FAILED=1 print "ERROR: There was a failure processing application server start or" print "stop scripts for resource group $GROUPNAME." print "Exit status was $SUCCESS" fi rm -f $TMP_FILE.$GROUPNAME done fi # : Release lpar resources in one-shot now that applications are stopped # if [[ $1 == "RELEASE" ]] ; then export GROUPNAME=$RESOURCE_GROUPS clmanageroha -o release -s -l $LIST_OF_APPS 3>&2 RC=$? if (( $RC != 0 )) ; then LPAR_RELEASE_FAILED=1 print "ERROR: clmanageroha failed with exit status $RC while" print "releasing resources for applications $LIST_OF_APPS" fi fi # # If anything failed, return failure # if [[ $LPAR_ACQUIRE_FAILED != 0 || $LPAR_RELEASE_FAILED != 0 || $START_STOP_FAILED != 0 ]] ; then print "$PS4_FUNC: Errors encountered, return failure" return 1 else return 0 fi } ############################################################################ # Function: start_or_stop_udresources_for_rg # # Purpose: Call the userdefined resource start/stop scripts for each # resources in the resource group. References globals GROUPNAME, # TMP_FILE and UDRESOURCES. # # Parameters: Acquire or release flag # # Return: none - exit status is stored in a file # ############################################################################ start_or_stop_udresources_for_rg() { typeset PS4_FUNC="start_or_stop_udresources_for_rg" [[ $VERBOSE_LOGGING == "high" ]] && set -x if [[ $1 == "ACQUIRE" ]] then cmd_to_execute="start_udresource" else cmd_to_execute="stop_udresource" fi # : File name to store our exit status # STATUS_FILE=$2 # : Use clcallev to run the event # clcallev $cmd_to_execute "$UDRESOURCES" RC=$? # if (( $RC != 0 && $RC != 11 )) then # : 11 = acquiring/releasing unsuccessful, but clRMupdate was run. : If there was an error not already handled, notify the resource manager # ALLUDRS="All_userdefined_resources" cl_RMupdate resource_error $ALLUDRS $PROGNAME fi # if (( $RC != 0 )) then # : If this failed while stopping an application, manual intervention is required. # if [[ $cmd_to_execute == "stop_udresource" ]] then cl_log 650 "$PROGNAME: Failure occurred while processing Resource Group $GROUPNAME. Manual intervention required." $PROGNAME $GROUPNAME fi fi # : Store the result for later accumulation # print "$GROUPNAME $RC" >>$STATUS_FILE } ############################################################################ # Function: process_userdefined_resources # # Purpose: Called from process_resouces, this routine will processes # userdefined resources for specifc resource type for all groups # in RESOURCE_GROUPS # # Parameters: none # # Return: none # ############################################################################ process_userdefined_resources() { typeset PS4_FUNC="process_userdefined_resources" [[ $VERBOSE_LOGGING == "high" ]] && set -x # : Each subprocess will log to a file with this name and PID # export TMP_FILE="/var/hacmp/log/.process_resources_userdefined.$$" rm -f /var/hacmp/log/.process_resources_userdefined* if [[ $1 == "ACQUIRE" ]] then cmd_to_execute="start_udresource" else cmd_to_execute="stop_udresource" fi NOERRORUDRS="" WAITPIDS="" START_STOP_FAILED=0 # : Loop through all groups # for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME # : Break out application data # get_list_head $RES_LIST | read LIST_OF_UDRES_FOR_RG get_list_tail $RES_LIST | read RES_LIST if [[ $1 == "RELEASE" ]] then # : When releasing, we change the order of apps - first-in, last-out # TMPLIST="" set -A udresnames $( print ${LIST_OF_UDRES_FOR_RG} ) for (( cnt=0 ; cnt < ${#udresnames[*]} ; cnt++ )) do TMPLIST="${LIST_OF_UDRES_FOR_RG%% *} ${TMPLIST}" LIST_OF_UDRES_FOR_RG="${LIST_OF_UDRES_FOR_RG#* }" done LIST_OF_UDRES_FOR_RG="${TMPLIST}" fi export UDRESOURCES=$LIST_OF_UDRES_FOR_RG # : Now call start_or_stop_applications_for_rg to do the app start/stop. # This will create a number of subprocesses to deal with each app. # ALLUDRESOURCES="${UDRESOURCES} ${ALLUDRESOURCES}" start_or_stop_udresources_for_rg $1 $TMP_FILE.$GROUPNAME & # : Add PID of the last bg start_or_stop_udresources_for_rg process to WAITPIDS. # WAITPIDS="$WAITPIDS $!" done # : Wait for the start_or_stop_applications_for_rg PIDs to finish. # This will ensure that all the app server start/stop scripts are finished. # wait $WAITPIDS # # : Look at all the status files to see if any were unsuccessful # for GROUPNAME in $RESOURCE_GROUPS do cat $TMP_FILE.$GROUPNAME | read skip SUCCESS rest if [[ $SUCCESS != 0 ]] then echo "Content of the status file $TMP_FILE.$GROUPNAME:" cat $TMP_FILE.$GROUPNAME START_STOP_FAILED=1 print "ERROR: There was a failure processing user defined resource" print "start or stop scripts for resource group $GROUPNAME." print "Exit status was $SUCCESS" fi rm -f $TMP_FILE.$GROUPNAME done # : If anything failed, return failure # if [[ $START_STOP_FAILED != 0 ]] then print "$PS4_FUNC: Errors encountered, return failure" return 1 else return 0 fi } ############################################################################ # # Function: Drive fencing operations # # Parameters: $1 ACQUIRE or RELEASE flag # # $RESOURCE_GROUPS, containing resource groups for which fencing # is required # # $HDISKS, containing disks for which fencing must be done # # $HOSTS, containing the node to be fenced in or out # # Return: Always returns 0 # ############################################################################ process_ssa_fence() { typeset PS4_FUNC="process_ssa_fence" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME get_list_head $HDISKS | read LIST_OF_HDISKS_FOR_RG get_list_tail $HDISKS | read HDISKS get_list_head $HOSTS | read HOST_FOR_RG get_list_tail $HOSTS | read HOSTS # : Break these two strings into records. They have a one-to-one relationship. # for ONEHOST in $HOST_FOR_RG ; do print $LIST_OF_HDISKS_FOR_RG | IFS=, read ONEDISK LIST_OF_HDISKS_FOR_RG echo "Host=$ONEHOST, disk=$ONEDISK" # if $1 = ACQUIRE: fence in, cl_disk_available is not called yet # if $1 = RELEASE: fence out, VG is already varied off cl_ssa_fence $1 $ONEHOST "$ONEDISK" done done return $STAT } ##################################################################################### # # Function: getReplicatedResources # # Purpose: Called from process_resouces, this routine will check whether # the resource group is replicated or not. # # Parameters: $1 Resource Group name # # # Return: return TRUE in case resource group is replicated resource else return FALSE # ####################################################################################### getReplicatedResources () { typeset PS4_FUNC="getReplicatedResources" [[ $VERBOSE_LOGGING == "high" ]] && set -x RV="false" if [[ -n $(clodmget -n -f type HACMPrresmethods) ]] then # : Replicated resource methods are defined, check for resources # if [[ -n $(clodmget -q "name like '*_REP_RESOURCE' AND group=$1" -f value -n HACMPresource) ]] then # : Replicated resources exist # RV="true" fi # : Verify if any backup profiles are configured and trigger cbm utilities based on that # if [[ -n $(clodmget -q "name=BACKUP_ENABLED" -f value HACMPresource) ]]; then typeset enable_backup="" backup_method="" enable_backup=$(LANG=C cl_cbm_list $GROUPNAME events=1 2>/dev/null | grep -w "Enable_backup" | cut -f 2 -d '=') enable_backup=${enable_backup// /} if [[ $enable_backup == "yes" ]] then backup_method=$(LANG=C cl_cbm_list $GROUPNAME events=1 2>/dev/null | grep -w "Backup_method" | cut -f 2 -d '=') backup_method=${backup_method// /} if [[ $backup_method == "remote_storage" ]] then # : Replicated resources of remote backup exist # RV="true" fi fi fi fi echo $RV } ############################################################################ # # Function: get_inactive_vgs_or_disks # # Purpose: Given a set of volume groups of interest, return the ones # that are not currently active on the local node, or return # the disks in those volume groups. # # Input: VOLUME_GROUPS - global variable holding space separated list # of volume groups to check # Search option - "VGS" return the inactive volume groups # "VGSP" return the inactive and passively # vary'd on volume groups # "DISKS" return the disks in the inactive # volume groups # # Output: List of volume groups or disks written to stdout # ############################################################################ get_inactive_vgs_or_disks () { typeset PS4_FUNC="get_inactive_vgs_or_disks" [[ $VERBOSE_LOGGING == "high" ]] && set -x RETURN=$1 INACTIVE_VGS="" HDISK_LIST="" if [[ -n $VOLUME_GROUPS ]] then lsvg_lst=$(lsvg -L -o 2> /tmp/lsvg.err) for vg in $VOLUME_GROUPS do # : Check to see if the volume group is both varied on, and readable : by LVM - e.g., not closed due to lack of quorum. # if print "$lsvg_lst" | grep -qx $vg && lqueryvg -g $(getlvodm -v $vg) >/dev/null 2>&1 then # : This VG "'$vg'" is already varied on. # continue elif LC_ALL=C lsvg -L $vg 2>/dev/null | grep -i -q 'passive-only' then # : This VG "'$vg'" is already varied on in passive mode. # Since already locally accessable, so do not need to have reserves # broken. However, the VG does have to be moved to the active # state, so add it to the list of inactive ones to activate. # if [[ $RETURN == "VGSP" ]] then INACTIVE_VGS="$INACTIVE_VGS $vg" fi else # : VG "'$vg'" inactive, and reserves may have to be broken on its disks. # if [[ $RETURN == "DISKS" ]] then HDISK_LIST="$HDISK_LIST $(cl_fs2disk -pg $vg)" fi INACTIVE_VGS="$INACTIVE_VGS $vg" fi done if [[ -s /tmp/lsvg.err ]] then # : print the stderr from lsvg # print -u2 "stderr: lsvg -o" cat /tmp/lsvg.err >&2 fi # : Remove any duplicates from the list of volume groups to vary on # INACTIVE_VGS=$(echo $INACTIVE_VGS | tr ' ' '\n' | sort -u) fi if [[ $RETURN == "DISKS" ]] then print -- $HDISK_LIST; else print -- $INACTIVE_VGS; fi } ################################################################################# # # Function : postvg_for_rdisk # # Purpose : Invoke appropriate replicated resource methods for raw disks # in case of offline event. # # Input : RESOURCE_GROUPS - Space separated list of resource groups # # RHDISKS - Comma separated list of raw disks, colon # separated by resource group # HDISKS - Comma separated list of hdisks, colon # separated by resource group # # Return : status of replicated methods. # ################################################################################# postvg_for_rdisk () { typeset PS4_FUNC="postvg_for_rdisk" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 integer FAILURE_IN_METHOD=0 LIST_OF_FAILED_RGS="" RG_LIST=$RESOURCE_GROUPS RDISK_LIST=$RHDISKS DISK_LIST=$HDISKS # : Resource groups are processed individually. This is required because : the replication mechanism may differ between resource groups. # for GROUPNAME in $RG_LIST do REPLICATED_RESOURCES=$(getReplicatedResources ${GROUPNAME} ) #check for replicated resource. if [[ $REPLICATED_RESOURCES == "true" ]] then RESOURCE_GROUPS=$GROUPNAME VOLUME_GROUPS="" HDISKS="" RHDISKS="" RDISK_LIST="" #check if any raw_disk associated with the resource group exist. if [[ -n $(clodmget -q "group=$GROUPNAME AND name=RAW_DISK" HACMPresource) ]] then TDISK_LIST=$(get_raw_hdisks $GROUPNAME|tail -1) RDISK_LIST="$RDISK_LIST $TDISK_LIST" fi echo $RDISK_LIST | IFS=: read LIST_OF_DISKS_FOR_RG DISK_LIST LIST_OF_DISKS_FOR_RG=$(echo $LIST_OF_DISKS_FOR_RG | tr ',' ' ') LIST_OF_DISKS_FOR_RG=$(echo $LIST_OF_DISKS_FOR_RG | tr ' ' '\n'|sort -u) RHDISKS=$LIST_OF_DISKS_FOR_RG echo $VG_LIST | IFS=: read LIST_OF_VG_FOR_RG VG_LIST LIST_OF_VG_FOR_RG=$(echo $LIST_OF_VG_FOR_RG | tr ' ' '\n') LIST_OF_VG_FOR_RG=$(echo $LIST_OF_VG_FOR_RG | tr ',' '\n') VOLUME_GROUPS=$LIST_OF_VG_FOR_RG # : At this point, the global variables below should be set to : the values associated with resource group $GROUPNAME # export RESOURCE_GROUPS export VOLUME_GROUPS export HDISKS export RHDISKS FAILURE_IN_METHOD=0 if [[ -n $RHDISKS && -z $VOLUME_GROUPS && $FAILURE_IN_METHOD == 0 ]] then # : clsetrepenv utility sets up the environment for replicated methods. # set -a eval $(clsetrepenv $GROUPNAME) set +a METHODS=$(cl_rrmethods2call postvg_offline) FAILURE_IN_METHOD=0 for method in $METHODS do if [[ -x $method ]] then $method $VOLUME_GROUPS fi STAT=$? if [[ $STAT != 0 ]] then # : call clRMupdate for the first resource, so that the cluster manager can react to the failure # print $RHDISKS | read FIRST_HDISKS rest cl_RMupdate resource_error $FIRST_HDISKS $PROGNAME FAILURE_IN_METHOD=1 fi done fi if (( FAILURE_IN_METHOD == 1 )) then LIST_OF_FAILED_RGS="$LIST_OF_FAILED_RGS $GROUPNAME" fi export $LIST_OF_FAILED_RGS fi done return $STAT } ############################################################################ # # Function: get_disks_main # # Purpose: Invoke any appropriate replicated resource methods to make # disks accessible for later volume group varyon # # Because of the restriction that a given resource group can # contain only one type of replicated resource, but different # resource groups can contain different types, resource groups # must be processed serially. # # Input: RESOURCE_GROUPS - Space separated list of resource groups # RHDISKS - Comma separated list of raw disks, colon # separated by resource group # HDISKS - Comma separated list of hdisks, colon # separated by resource group # VOLUME_GROUPS - Comma separated list of volume groups, # one per hdisk, colon separated by # resource group # # E.g., # RESOURCE_GROUPS="curley larry moe shemp" # HDISKS="hdisk11,hdisk22:hdisk21,hdisk31:hdisk99:hdisk101,hdisk1" # VOLUME_GROUPS="vg01,vg01:vg02,vg02,::vg03,vg0" # # Output: None # ############################################################################ get_disks_main () { typeset PS4_FUNC="get_disks_main" [[ $VERBOSE_LOGGING == "high" ]] && set -x integer SKIPBRKRES=0 STAT=0 integer FAILURE_IN_METHOD=0 LIST_OF_FAILED_RGS="" # : Below are the list of resources as generated by clrgpa # RG_LIST=$RESOURCE_GROUPS RDISK_LIST=$RHDISKS DISK_LIST=$HDISKS VG_LIST=$VOLUME_GROUPS # : Resource groups are processed individually. This is required because : the replication mechanism may differ between resource groups. # for GROUPNAME in $RG_LIST do REPLICATED_RESOURCES=$(getReplicatedResources ${GROUPNAME} ) # : Break out the resources for resource group $GROUPNAME # RESOURCE_GROUPS=$GROUPNAME VOLUME_GROUPS="" HDISKS="" RHDISKS="" RDISK_LIST="" # : Get the volume groups in resource group $GROUPNAME # print $VG_LIST | IFS=: read VOLUME_GROUPS VG_LIST # : Removing duplicate entries in VG list. # VOLUME_GROUPS=$(echo $VOLUME_GROUPS | tr ',' '\n' | sort -u | xargs) # : Get the disks corresponding to these volume groups # print $DISK_LIST | IFS=: read HDISKS DISK_LIST HDISKS=${HDISKS//,/ } # : Pick up any raw disks not returned by clrgpa # if [[ -n $(clodmget -q "group=$GROUPNAME AND name=RAW_DISK" HACMPresource) ]] then TDISK_LIST=$(get_raw_hdisks $GROUPNAME|tail -1) if [[ -n $TDISK_LIST ]] then RDISK_LIST="$RDISK_LIST $TDISK_LIST" fi fi # : Get any raw disks in resource group $GROUPNAME # print $RDISK_LIST | IFS=: read RHDISKS RDISK_LIST RHDISKS=${RHDISKS//,/ } print $VOLUME_GROUPS | read VOLUME_GROUPS # : At this point, the global variables below should be set to : the values associated with resource group $GROUPNAME # export RESOURCE_GROUPS export VOLUME_GROUPS export HDISKS export RHDISKS if [[ $REPLICATED_RESOURCES == "true" ]] then # : clsetrepenv utility sets up the environment for replicated methods. # set -a eval $(clsetrepenv $GROUPNAME) set +a # : Determine the predisk_available method for this resource group. : By convention, a resource group can contain only one type of : replicated resource, so there is only one method for this resource : group. # METHODS=$(cl_rrmethods2call predisk_available) FAILURE_IN_METHOD=0 INACTIVE_VGS=$(get_inactive_vgs_or_disks "VGS") if [[ -n $INACTIVE_VGS || -n $HDISKS || -n $RHDISKS ]] then FAILURE_IN_METHOD=0 for method in $METHODS do if [[ -x $method ]] then $method $INACTIVE_VGS case $? in 0) continue ;; 3) SKIPBRKRES=1 ;; 4) export SKIP_FORCED_VARYON=true ;; *) cl_log 3090 "Replicated Method $method failed for $GROUPNAME.\n" $method $GROUPNAME # : call clRMupdate for the first resource, so that the cluster manager can react to the failure # INACTIVE_VGS contains volume groups if [[ -n $INACTIVE_VGS ]] then print $INACTIVE_VGS | read FIRST_VG rest cl_RMupdate resource_error $FIRST_VG $PROGNAME else print $HDISKS | read FIRST_HDISKS rest cl_RMupdate resource_error $FIRST_HDISKS $PROGNAME fi FAILURE_IN_METHOD=1 break # : do not call the other methods for this rg, since we ran into acquisition failure # ;; esac fi done # : Only break reserves if methods ran correctly # if (( FAILURE_IN_METHOD == 0 )) then if [[ -n $(clodmget -q "group=$GROUPNAME AND name like '*VOLUME_GROUP'" HACMPresource) ]] then # : Only need to break reserves for nonconcurrent volume groups, : and only if not told to skip it by the replicated resource : method # (( $SKIPBRKRES == 0 )) && get_disks STAT=$? fi elif (( FAILURE_IN_METHOD == 1 )) then LIST_OF_FAILED_RGS="$LIST_OF_FAILED_RGS $GROUPNAME" fi if [[ -n $RHDISKS && -z $VOLUME_GROUPS && $FAILURE_IN_METHOD == 0 ]] then # # This block of code appears to be part of "raw disk" support - # if there are raw disks, but no volume groups defined, call the # prevg_online method here, since it will not be called later on. # # The careful reader will note with some bemusement that # $INACTIVE_VGS, passed to the prevg_online method, is always # null if $VOLUME_GROUPS is null, which it must be by the if # statement above. Additionally, the list of raw hdisks is not # passed, except possibly through the environment. # : clsetrepenv utility sets up the environment for replicated methods. # set -a eval $(clsetrepenv $GROUPNAME) set +a METHODS=$(cl_rrmethods2call prevg_online) FAILURE_IN_METHOD=0 for method in $METHODS do if [[ -x $method ]] then $method $INACTIVE_VGS fi STAT=$? if [[ $STAT != 0 ]] then # : call clRMupdate for the first resource, so that the cluster manager can react to the failure # print $RHDISKS | read FIRST_HDISKS rest cl_RMupdate resource_error $FIRST_HDISKS $PROGNAME FAILURE_IN_METHOD=1 fi done fi fi export $LIST_OF_FAILED_RGS else get_disks STAT=$? fi done # end of GROUPNAME loop return $STAT } ############################################################################ # Function: get_disks # Purpose: calls cl_disk_available, which will make disks available to # this node # Parameters: none # Return: none ############################################################################ get_disks() { typeset PS4_FUNC="get_disks" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 # : Most volume groups are Enhanced Concurrent Mode, and it should : not be necessary to break reserves. If all the volume groups : are ECM, we should be able to skip breaking reserves. If it : turns out that there is a reserve on a disk in an ECM volume : group, that will be handled by cl_pvo making an explicit call : to cl_disk_available. # # See prolog of get_disks_main for the format of $VOLUME_GROUPS # all_ecm=TRUE for vg_list in $(IFS=: set -- $VOLUME_GROUPS ; print $*) do for vg in $(print $vg_list | tr ',' '\n' | sort -u) do if [[ y != $(clodmget -q "name = $vg and attribute = conc_capable" -f value -n CuAt) ]] then all_ecm=FALSE break fi done [[ $all_ecm == FALSE ]] && break done [[ $all_ecm == TRUE ]] && return 0 # # If the 'sddsrv' daemon is running - vpath dead path detection and # recovery - turn it off, since interactions with the fibre channel # device driver will, in the case where there actually is a dead path, # slow down every vpath operation. # if echo $HDISKS | grep -q vpath then # # Each of the V, R, M and F fields are padded to fixed length, # to allow reliable comparisons. E.g., maximum VRMF is # 99.99.999.999 # integer V R M F typeset -Z2 R # two digit release typeset -Z3 M # three digit modification typeset -Z3 F # three digit fix integer VRMF=0 # : Check if running an early level of SDD, which requires stopping. # sdd_level=106003000 if lslpp -lcq "devices.sdd.*.rte" | cut -f3 -d':' | IFS=. read V R M F then VRMF=$V$R$M$F # get the SDD level fi if (( $R >= 07 )) then sdd_level=107002005 fi # # Check to see if SDD is active, and an early level # if (( $VRMF < $sdd_level )) && lssrc_out=$(LC_ALL=C lssrc -s sddsrv) then integer pid=0 print "$lssrc_out" | tail -1 | read subsys rest (set -- $rest ; eval print \${$(($#-1))} \${$#}) | read pid state if [[ $subsys == "sddsrv" && $state == "active" ]] && (( $pid != 0 )) then date # took how long to shut down SDD # : The stopsrc command does not include the -c flag for 2 reasons: : 1. The possible SIGKILL could result in "Invalid vpaths", and : 2. Time for the daemon to go inoperative could be several : minutes in cases where many vpaths are not accessible # stopsrc -s sddsrv dspmsg scripts.cat 3091 "$PROGNAME: Waiting for sddsrv to go inoperative. This could take several minutes if some vpaths are inaccessible.\n" $PROGNAME # : No need to clog the log file with this # set +x # : Now wait for sddsrv to shut down # while [[ $subsys == "sddsrv" && $state != "inoperative" ]] do sleep 1 if ! lssrc_out=$(LC_ALL=C lssrc -s sddsrv) then # : SRC stopped talking to us. No longer wait for it # break else # : Pick up current state # lssrc_out=$(LC_ALL=C lssrc -s sddsrv | tail -1) state=$(set -- $lssrc_out ; eval print \${$#}) fi done [[ "$VERBOSE_LOGGING" == "high" ]] && set -x date # took how long to shut down SDD sddsrv_off=TRUE # Note that it was turned off fi fi fi # : Break any reserverations, and make the disks available # cl_disk_available -v RC=$? if (( $RC != 0 && $RC != 11 )) then # : 11 = acquiring/releasing unsuccessful, but clRMupdate was run. : If there was an error not already handled, notify the resource manager # for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME ALLDISKS="All_disks" cl_RMupdate resource_error $ALLDISKS $PROGNAME done fi if (( $RC != 0 )) then STAT=$RC fi return $STAT } ############################################################################ # Function: get_raw_disks # Purpose: calls cfgmgr, which will make disks available to # this node # Parameters: none # Return: none ############################################################################ get_raw_hdisks() { typeset PS4_FUNC="get_raw_hdisks" [[ $VERBOSE_LOGGING == "high" ]] && set -x group=$1 UUIDS=$(clodmget -q "group=$group AND name=RAW_DISK" -f value -n HACMPresource) LOCALNODENAME=$(get_local_nodename) LOCALSITENAME=$(cllssite -c -n| grep -v "#nodename" | grep -w $LOCALNODENAME |cut -f2 -d":") TMP_HDISK="" TMP_RDISK_LIST="" for uuid in $UUIDS do REPDISK_TYPE=$(clodmget -q "value=$uuid AND type like 'repdisk@*'" -f type -n HACMPresource) REPDISK_SITE=$(echo $REPDISK_TYPE |cut -f2 -d "@") if [[ -n $REPDISK_TYPE ]] then if [[ $REPDISK_SITE == $LOCALSITENAME ]] then TMP_HDISK=$(/usr/es/sbin/cluster/xd_generic/xd_cli/clxd_list_rdisk -u $uuid |grep -w $uuid |awk '{print $2}') [[ -z $TMP_HDISK ]] && cl_log 3092 "WARNING: UUID %s not found on node %s of site %s. Run /usr/es/sbin/cluster/xd_generic/xd_cli/clxd_list_rdisk and then run cfgmgr -l hdiskX for those disks showing 'none'.\n" "$uuid" "$LOCALNODENAME" "$LOCALSITENAME" TMP_RDISK_LIST="$TMP_RDISK_LIST $TMP_HDISK," /usr/sbin/cfgmgr -l $TMP_HDISK -v 2> /dev/null else continue fi else TMP_HDISK=$(/usr/es/sbin/cluster/xd_generic/xd_cli/clxd_list_rdisk -u $uuid |grep -w $uuid |awk '{print $2}') [[ -z $TMP_HDISK ]] && cl_log 3092 "WARNING: UUID %s not found on node %s of site %s. Run /usr/es/sbin/cluster/xd_generic/xd_cli/clxd_list_rdisk and then run cfgmgr -l hdiskX for those disks showing 'none'.\n" "$uuid" "$LOCALNODENAME" "$LOCALSITENAME" TMP_RDISK_LIST="$TMP_RDISK_LIST $TMP_HDISK," PSWAPDISK_TYPE=$(clodmget -q "value=$uuid AND type like 'pswap'" -f type -n HACMPresource) [[ -z $PSWAPDISK_TYPE ]] && { /usr/sbin/cfgmgr -l $TMP_HDISK -v 2> /dev/null } fi done echo "\n\n" echo "$TMP_RDISK_LIST" } ############################################################################ # Function: unmount_nfs_filesystems # Purpose: Unmounts nfs filesystems # Parameters: none # Return: none ############################################################################ unmount_nfs_filesystems() { typeset PS4_FUNC="unmount_nfs_filesystems" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 cl_deactivate_nfs RC=$? # if (( $RC != 0 && $RC != 11 )) then # : 11 = acquiring/releasing unsuccessful, but clRMupdate was run. : If there was an error not already handled, notify the resource manager # for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME ALLNFS="All_nfs_filesystems" cl_RMupdate resource_error $ALLNFS $PROGNAME done fi if (( $RC != 0 )) then STAT=$RC fi return $STAT } ############################################################################ # Function: mount_nfs_filesystems # Purpose: Mounts NFS filesystems # Parameters: none # Return: none ############################################################################ mount_nfs_filesystems() { typeset PS4_FUNC="mount_nfs_filesystems" [[ $VERBOSE_LOGGING == "high" ]] && set -x post_event_member=FALSE for node in $POST_EVENT_MEMBERSHIP do if [[ "$node" == "$LOCALNODENAME" ]] then post_event_member=TRUE break fi done # : This node will not be in the resource group so do not mount filesystems. # if [[ $post_event_member == "FALSE" ]] then return 0 fi STAT=0 for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME get_list_head $FILE_SYSTEMS | read LIST_OF_FILE_SYSTEMS_FOR_RG get_list_tail $FILE_SYSTEMS | read FILE_SYSTEMS get_list_head $NFS_HOSTS | read NFS_HOST get_list_tail $NFS_HOSTS | read NFS_HOSTS get_list_head $NFS_NETWORKS | read NFS_NETWORK get_list_tail $NFS_NETWORKS | read NFS_NETWORKS get_list_head $IP_LABELS | read LIST_OF_IP_LABELS_FOR_RG get_list_tail $IP_LABELS | read IP_LABELS MOUNT_FILESYSTEM=$LIST_OF_FILE_SYSTEMS_FOR_RG NFSMOUNT_LABEL=$LIST_OF_IP_LABELS_FOR_RG # : Do the required NFS_mounts. # NW_NFSMOUNT_LABEL="" if [[ -z $NFS_HOST ]] then NFS_HOST=$LOCALNODENAME fi NFSHOST="" if [[ -n $NFS_HOST ]] then if [[ -n $NFS_NETWORK ]] then # : This lets the user pick a preferred network for NFS mounting, and is calculated for the RG # ALL_NFSMOUNT_LABEL=$LIST_OF_IP_LABELS_FOR_RG for label in $ALL_NFSMOUNT_LABEL do IN_NETWORK=$(cllsif -cS 2> /dev/null | grep :$NFS_NETWORK: | cut -d: -f1 | grep -x ${label} ) if [[ -n $IN_NETWORK ]] then NW_NFSMOUNT_LABEL="$NW_NFSMOUNT_LABEL $label" fi done fi # # preference: # RG-bound service labels for given prefered network first, then # RG-bound service labels # if [[ $1 == "REMOUNT" ]]; then for host in $NW_NFSMOUNT_LABEL $NFSMOUNT_LABEL do arp -d $host done fi for host in $NW_NFSMOUNT_LABEL $NFSMOUNT_LABEL do if ping $host 1024 1 >/dev/null then NFSHOST=$host break fi done if [[ -n $NFSHOST ]] then # : activate_nfs will not wait for the mounts to complete # cl_activate_nfs 1 $NFSHOST "$MOUNT_FILESYSTEM" RC=$? if (( $RC != 0 && $RC != 11 )) then for fs in $MOUNT_FILESYSTEM do cl_RMupdate resource_error $fs $PROGNAME done fi if (( $RC != 0 )) then STAT=$RC fi else for fs in $MOUNT_FILESYSTEM do cl_RMupdate resource_error $fs $PROGNAME done cl_log 653 "$PROGNAME: NFS Mounting failed. No reachable service interfaces found on node $NFS_HOST. " $PROGNAME $NFS_HOST STAT=1 fi fi done return $STAT } ############################################################################ # Function: unexport_filesystems # Purpose: Unexports nfs filesystems # Parameters: none # Return: none ############################################################################ unexport_filesystems() { typeset PS4_FUNC="unexport_filesystems" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 NFSSTOPPED=0 RPCSTOPPED=0 export NFSSTOPPED # : For NFSv4, cl_unexport_fs will use STABLE_STORAGE_PATH, which is set by : clRGPA and can have colon-separated values for multiple RGs. : We will save off clRGPA values in stable_storage_path and then extract : each RG into STABLE_STORAGE_PATH for cl_unexport_fs. # typeset stable_storage_path="$STABLE_STORAGE_PATH" for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME get_list_head $EXPORT_FILE_SYSTEMS | read LIST_OF_EXPORT_FILE_SYSTEMS_FOR_RG get_list_tail $EXPORT_FILE_SYSTEMS | read EXPORT_FILE_SYSTEMS get_list_head $EXPORT_FILE_SYSTEMS_V4 | read LIST_OF_EXPORT_FILE_SYSTEMS_V4_FOR_RG get_list_tail $EXPORT_FILE_SYSTEMS_V4 | read EXPORT_FILE_SYSTEMS_V4 get_list_head $stable_storage_path | read STABLE_STORAGE_PATH get_list_tail $stable_storage_path | read stable_storage_path cl_unexport_fs "$LIST_OF_EXPORT_FILE_SYSTEMS_FOR_RG" \ "$LIST_OF_EXPORT_FILE_SYSTEMS_V4_FOR_RG" done return $STAT } ############################################################################ # Function: export_filesystems # Purpose: Exports filesystems specified # Parameters: none # Return: none ############################################################################ export_filesystems() { typeset PS4_FUNC="export_filesystems" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 NFSSTOPPED=0 # see which daemons we have to stop for daemon in $DAEMONS do if [[ $daemon == RPCLOCKD ]] then stopsrc -s rpc.lockd touch /tmp/.RPCLOCKDSTOPPED # This should not be needed... fi done # : For NFSv4, cl_export_fs will use STABLE_STORAGE_PATH, which is set by : clRGPA and can have colon-separated values for multiple RGs. : We will save off clRGPA values in stable_storage_path and then extract : each RG into STABLE_STORAGE_PATH for cl_unexport_fs. # typeset stable_storage_path="$STABLE_STORAGE_PATH" export NFSSTOPPED for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME get_list_head $EXPORT_FILE_SYSTEMS | read LIST_OF_EXPORT_FILE_SYSTEMS_FOR_RG get_list_tail $EXPORT_FILE_SYSTEMS | read EXPORT_FILE_SYSTEMS get_list_head $EXPORT_FILE_SYSTEMS_V4 | read LIST_OF_EXPORT_FILE_SYSTEMS_V4_FOR_RG get_list_tail $EXPORT_FILE_SYSTEMS_V4 | read EXPORT_FILE_SYSTEMS_V4 get_list_head $stable_storage_path | read STABLE_STORAGE_PATH get_list_tail $stable_storage_path | read stable_storage_path cl_export_fs "$IP_LABELS" "$LIST_OF_EXPORT_FILE_SYSTEMS_FOR_RG" \ "$LIST_OF_EXPORT_FILE_SYSTEMS_V4_FOR_RG" RC=$? if (( $RC != 0 && $RC != 11 )) then ALLEXPORTS="All_exports" cl_RMupdate resource_error $ALLEXPORTS $PROGNAME fi if (( $RC != 0 )) then STAT=$RC fi done return $STAT } ############################################################################ # Function: process_comm_links # Purpose: Processes communication links (SNA) # Parameters: none # Return: none ############################################################################ process_comm_links() { typeset PS4_FUNC="process_comm_links" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 if [[ $1 == "ACQUIRE" ]] then cmd_to_execute="cl_start_commlinks" else cmd_to_execute="cl_stop_commlinks" fi for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME get_list_head $COMM_LINKS | read LIST_OF_COMM_LINKS_FOR_RG get_list_tail $COMM_LINKS | read COMM_LINKS for COMMLINK in $LIST_OF_COMM_LINKS_FOR_RG do $cmd_to_execute $COMMLINK RC=$? if (( $RC != 0 && $RC != 11 )) then cl_RMupdate resource_error $COMMLINK $PROGNAME fi if (( $RC != 0 )) then STAT=$RC break fi done done return $STAT } ############################################################################ # Function: process_aix_connections # Purpose: Gets/Releases AIX connection services # Parameters: none # Return: none ############################################################################ process_aix_connections() { typeset PS4_FUNC="process_aix_connections" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 if [[ $1 == "ACQUIRE" ]] then cmd_to_execute="get_aconn_rs" else cmd_to_execute="release_aconn_rs" fi for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME get_list_head $AIX_CONNECTIONS_SERVICES | read LIST_OF_AIX_CONNECTIONS_SERVICES_FOR_RG get_list_tail $AIX_CONNECTIONS_SERVICES | read AIX_CONNECTIONS_SERVICES clcallev $cmd_to_execute "$LIST_OF_AIX_CONNECTIONS_SERVICES_FOR_RG" RC=$? if (( $RC != 0 && $RC != 11 )) then ALLACONNRS="All_aconn_rs" cl_RMupdate resource_error $ALLACONNRS $PROGNAME fi if (( $RC != 0 )) then STAT=$RC fi done return $STAT } ############################################################################ # Function: process_aix_fast_connections # Purpose: Gets/Releases AIX Fast Connection Services # Parameters: none # Return: none ############################################################################ process_aix_fast_connections() { typeset PS4_FUNC="process_aix_fast_connections" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 if [[ $1 == "ACQUIRE" ]] then cmd_to_execute="get_fast_connect_rs" else cmd_to_execute="release_fast_connect_rs" fi for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME get_list_head $AIX_FAST_CONNECTION_SERVICES | read LIST_OF_AIX_FAST_CONNECT_SERVICES_FOR_RG get_list_tail $AIX_FAST_CONNECTION_SERVICES | read AIX_FAST_CONNECTION_SERVICES get_list_head $MISCDATA | read MISCDATA_FOR_RG get_list_tail $MISCDATA | read MISCDATA export AIX_FAST_CONNECT_SERVICES=$LIST_OF_AIX_FAST_CONNECT_SERVICES_FOR_RG export MISC_DATA=$MISCDATA_FOR_RG # : update resource manager with this action # ALLFCONNRS="All_fast_connrs" if [[ $1 == "ACQUIRE" ]] then cl_RMupdate resource_acquiring $ALLFCONNRS $PROGNAME else cl_RMupdate resource_releasing $ALLFCONNRS $PROGNAME fi $cmd_to_execute RC=$? if (( $RC != 0 && $RC != 11 )) then cl_RMupdate resource_error $ALLFCONNRS $PROGNAME fi if (( $RC != 0 )) then STAT=$RC fi # : update resource manager with results # ALLNOERRFCONN="All_nonerror_fast_connrs" if [[ $1 == "ACQUIRE" ]] then cl_RMupdate resource_up $ALLNOERRFCONN $PROGNAME else cl_RMupdate resource_down $ALLNOERRFCONN $PROGNAME fi done return $STAT } ############################################################################ # Function: process_tape_resources # Purpose: Gets/Releases tape resources # Parameters: none # Return: none ############################################################################ process_tape_resources() { typeset PS4_FUNC="process_tape_resources" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 if [[ $1 == "ACQUIRE" ]] then cmd_to_execute="cl_tape_resource_get_multi" else cmd_to_execute="cl_tape_resource_release_multi" fi for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME get_list_head $SHARED_TAPE_RESOURCES | read LIST_OF_SHARED_TAPE_RESOURCES_FOR_RG get_list_tail $SHARED_TAPE_RESOURCES | read SHARED_TAPE_RESOURCES $cmd_to_execute "$LIST_OF_SHARED_TAPE_RESOURCES_FOR_RG" RC=$? if (( $RC != 0 && $RC != 11 )) then ALLTAPE="All_tape_resources" cl_RMupdate resource_error $ALLTAPE $PROGNAME fi if (( $RC != 0 )) then STAT=$RC fi done return $STAT } ############################################################################ # Function: process_concurrent_vgs # Purpose: varyon/varyoff concurrent volume groups # Parameters: none # Return: none ############################################################################ process_concurrent_vgs() { typeset PS4_FUNC="process_concurrent_vgs" [[ $VERBOSE_LOGGING == "high" ]] && set -x integer SKIPVARYON=0 integer SKIPVARYOFF=0 integer RC=0; typeset ACTION=$1 RG_LIST=$RESOURCE_GROUPS # space separated list of resource groups VG_LIST=$VOLUME_GROUPS # comma separated list of volume groups # colon separated by resource group for GROUPNAME in $RG_LIST do RESOURCE_GROUPS=$GROUPNAME REPLICATED_RESOURCES=$(getReplicatedResources ${GROUPNAME}) # do we have any replicated resources? # : Get a space-separated list of VGs in this RG # print $VG_LIST | IFS=: read VOLUME_GROUPS VG_LIST VOLUME_GROUPS=${VOLUME_GROUPS//,/ } # : Override the original list for the routines called below. : Some resource routines expect to deal with only one RG at a time. # export VOLUME_GROUPS export RESOURCE_GROUPS # : Check specific operation for RG "'$GROUPNAME'" # if [[ $ACTION == "RELEASE" || $ACTION == "RELEASE_SECONDARY" ]] then # : RG "'$GROUPNAME'" releasing, associated VGs must be deactivated # if [[ $REPLICATED_RESOURCES == "true" ]] then # : The concurrent VGs being released are replicated resources. First, set : the SIBLING_ environment variables for the replicated resource methods # set -a eval $(clsetrepenv $GROUPNAME) set +a # : Invoke the appropriate replicated resource method prior to releasing the VG. # The alert reader will note the implicit assumption that there is only one kind # of replicated resource in a RG. # call_conc_replicated_methods "prevg_offline" "$VOLUME_GROUPS" RC=$? if (( $RC == 3 )) then # : The replicated method asked to skip the default action...not an error # SKIPVARYOFF=1 RC=0 # not an error elif (( $RC != 0 )) then # : Any other nonzero return code from the replicated resource method is a failure, RC=$RC # return $RC fi fi # end have replicated resources # : All VGs groups must be varyd off unless an error occurred, or : the replicated resource method requested the operation be skipped # if (( $SKIPVARYOFF == 0 )) then cl_deactivate_vgs RC=$? fi # : Process based on the results of cl_deactivate_vgs for the : concurrent VGs "'$VOLUME_GROUPS'" in RG "'$GROUPNAME'" # if (( $RC != 0 && $RC != 11 )) then # : Any error not yet reported to the resource manager will result in all the : concurrent VGs for this RG to be considered in error state. # cl_RMupdate resource_error "All_concurrent_vgs" $PROGNAME elif (( $RC == 0 )) && [[ $REPLICATED_RESOURCES == "true" ]] then # : The VGs went offline cleanly. Invoke the appropriate replicated resource : method for post-offline processing for RG "'$GROUPNAME'". # set -a eval $(clsetrepenv $GROUPNAME) set +a call_conc_replicated_methods "postvg_offline" "$VOLUME_GROUPS" RC=$? if (( $RC != 0 )) then if [[ $ACTION == "RELEASE" ]] then cl_RMupdate rg_error $GROUPNAME $PROGNAME else cl_RMupdate rg_error_secondary $GROUPNAME $PROGNAME fi fi fi if (( $RC != 0 )) then return $RC # bail if anything went wrong fi elif [[ $ACTION == "ACQUIRE" || $ACTION == "ACQUIRE_SECONDARY" ]] then # : Acquiring RG "'$GROUPNAME'", activate the associated VGs "'$VOLUME_GROUPS'". # if ls /dev/vpath* > /dev/null 2>&1 then # # If the 'sddsrv' daemon is running - vpath dead path detection and # recovery - turn it off, since interactions with the fibre channel # device driver will, in the case where there actually is a dead path, # slow down every vpath operation. # integer V R M F typeset -Z2 R # two digit release typeset -Z3 M # three digit modification typeset -Z3 F # three digit fix integer VRMF=0 # : Check if running an early level of SDD, which requires stopping. # Each of the V, R, M and F fields are padded to fixed length, # to allow reliable comparisons. E.g., maximum VRMF is # 99.99.999.999 # sdd_level=106003000 if lslpp -lcq devices.sdd.*.rte | cut -f3 -d':' | IFS=. read V R M F then VRMF=$V$R$M$F # get the SDD level fi if (( $R >= 07 )) then sdd_level=107002005 fi # : Check to see if SDD is active, and an early level # if (( $VRMF < $sdd_level )) && lssrc_out=$(LC_ALL=C lssrc -s sddsrv) then integer pid=0 print "$lssrc_out" | tail -1 | read subsys rest (set -- $rest ; eval print \${$(($#-1))} \${$#}) | read pid state if [[ $subsys == "sddsrv" && $state == "active" ]] && (( $pid != 0 )) then date # took how long to shut down SDD # : The stopsrc command does not include the -c flag for 2 reasons: : 1. The possible SIGKILL could result in "Invalid vpaths", and : 2. Time for the daemon to go inoperative could be several : minutes in cases where many vpaths are not accessible # stopsrc -s sddsrv dspmsg scripts.cat 3091 "$PROGNAME: Waiting for sddsrv to go inoperative. This could take several minutes if some vpaths are inaccessible.\n" $PROGNAME # : No need to clog the log file with this # set +x # : Now wait for sddsrv to shut down # while [[ $subsys == "sddsrv" && $state != "inoperative" ]] do sleep 1 if ! lssrc_out=$(LC_ALL=C lssrc -s sddsrv) then # : SRC stopped talking to us. No longer wait for it # break else # : Pick up current state # lssrc_out=$(LC_ALL=C lssrc -s sddsrv | tail -1) state=$(set -- $lssrc_out ; eval print \${$#}) fi # end check lssrc state done [[ "$VERBOSE_LOGGING" == "high" ]] && set -x date # took how long to shut down SDD sddsrv_off=TRUE # Note that it was turned off fi # end sddsrv is active fi # end sddsrv is at a level that needs this fi # end vpaths present # : Determine whether sync processing has been requested. Must be the same for all VGs in the RG. # print $SYNCH | IFS=: read LIST_OF_SYNCS_FOR_RG SYNCH print $LIST_OF_SYNCS_FOR_RG | cut -f 1 -d ',' | read SYNC_FOR_RG if [[ $REPLICATED_RESOURCES == "true" ]] then # : The concurrent VGs being released are replicated resources. First, set : the SIBLING_ environment variables for the replicated resource methods # set -a eval $(clsetrepenv $GROUPNAME) set +a # : Invoke the appropriate replicated resource method prior to releasing the VG. # The alert reader will note the implicit assumption that there is only one kind # of replicated resource in a RG. # call_conc_replicated_methods "prevg_online" "$VOLUME_GROUPS" RC=$? if (( 3 == $RC )) then # : The replicated method asked to skip the default action...not an error # SKIPVARYON=1 RC=0 elif (( 0 != $RC )) then # : Any other nonzero return code from the replicated resource method is a failure, RC=$RC # return $RC fi fi # end have replicated resources # : All VGs groups must be varyd off unless an error occurred, or : the replicated resource method requested the operation be skipped # sync_flag="" if [[ $ACTION == "ACQUIRE" ]] && (( $SKIPVARYON == 0 )) then if (( $SYNC_FOR_RG == 1 )) then sync_flag="-s" #need to sync it else sync_flag="-n" #no need to sync it fi elif [[ $1 == "ACQUIRE_SECONDARY" ]] && (( $SKIPVARYON == 1 )) then # # The alert reader will note that we appear to be about # to do a varyon of the volume group - albeit without # synchronization - in direct contradiction to the state # of the SKIPVARYON flag. This apparent studied # perversity is due to the fact that the flag actually # means 'do the opposite of what you normally do'. # For ACQUIRE_SECONDARY, the normal action is to not # do a varyon, so, when the flag is set, a varyon is # done. Clear? The practical utility, not so much so. # : SKIPVARYON inverts ACQUIRE_SECONDARY action, so varyon without sync sync_flag="-n" #no need to sync it fi if [[ -n $sync_flag ]] then # : Varyon VGs "'$VOLUME_GROUPS'", sync_flag = "'$sync_flag'" # cl_mode3 $sync_flag "$VOLUME_GROUPS" RC=$? # : Check results of cl_mode3 for concurrent VGs "'$VOLUME_GROUPS'" in "'$GROUPNAME'" # if (( $RC != 0 && $RC != 11 )) then # : Any error not yet reported to the resource manager will result in all the : concurrent VGs for this RG to be considered in error state. # cl_RMupdate resource_error "All_concurrent_vgs" $PROGNAME elif (( $RC == 0 )) && [[ $REPLICATED_RESOURCES == "true" ]] then # : The VGs went offline cleanly. Invoke the appropriate replicated resource : method for post-offline processing for RG "'$GROUPNAME'". # set -a eval $(clsetrepenv $GROUPNAME) set +a call_conc_replicated_methods "postvg_online" "$VOLUME_GROUPS" RC=$? if (( $RC != 0 )) then if [[ $1 == "ACQUIRE" ]] then cl_RMupdate rg_error $GROUPNAME $PROGNAME else cl_RMupdate rg_error_secondary $GROUPNAME $PROGNAME fi fi # end of post vg online failed fi # end of replicated resource processing fi # end of synchronous choice if (( $RC != 0 )) then return $RC # bail if anything went wrong fi fi # end of based on operation done # end loop over resource groups return 0 # no errors on any resource group } ############################################################################ # Function: process_lvm_phype_disks # Purpose: Identifies, whether any hyperswap raw disks also defined for # VG. If defined, it's MG would be added to GENXD_REP_RES # to bring MG online # Parameters: none # Global Variables: HDISKS, GENXD_REP_RESOURCE # Return: list if MGs ############################################################################ process_lvm_phype_disks() { MG_LIST="" VG_HDISK_LIST="" ALL_RGs=$(clodmget -n -f group HACMPgroup ) print $ALL_RGs | read ALL_RGs for vg in $VOLUME_GROUPS do VG_HDISK_LIST="$VG_HDISK_LIST $(cl_fs2disk -pg $vg)" done for group_name in $ALL_RGs do Need_prevg=0 TRDISK=$(clodmget -q "group=$group_name AND type=pswap AND name=RAW_DISK" -f value -n HACMPresource) if [[ -n $TRDISK ]] then TRHDISK_LIST=$(get_raw_hdisks $group_name |tail -1 | sort) #Verify all disks of retreived RG are part of current HDISKS if [[ -z $TRHDISK_LIST ]] then continue fi TRHDISK_LIST=$(echo $TRHDISK_LIST | tr ',' ' ') for TRHDISK in $TRHDISK_LIST do Need_prevg=0 print $VG_HDISK_LIST | grep -w $TRHDISK | read ret if [[ -n $ret ]] then Need_prevg=1 else break fi done if (( $Need_prevg == 1 )) then MG=$(clodmget -q "group=$group_name AND name=GENXD_REP_RESOURCE" -f value -n HACMPresource) print $GENXD_REP_RESOURCE | grep -w $MG | read ret if [[ -z $ret ]] then MG_LIST="$MG_LIST $MG" fi fi fi done echo ${MG_LIST} } process_volume_groups_main () { typeset PS4_FUNC="process_volume_groups_main" [[ $VERBOSE_LOGGING == "high" ]] && set -x integer DEF_VARYON_ACTION=0 integer FAILURE_IN_METHOD=0 typeset ACTION=$1 STAT=0 VG_LIST=$VOLUME_GROUPS RG_LIST=$RESOURCE_GROUPS for GROUPNAME in $RESOURCE_GROUPS do REPLICATED_RESOURCES=$(getReplicatedResources ${GROUPNAME}) RESOURCE_GROUPS=$GROUPNAME print -- $VG_LIST | IFS=: read VOLUME_GROUPS VG_LIST VOLUME_GROUPS=${VOLUME_GROUPS//,/ } # : At this point, these variables contain information only for $GROUPNAME # export VOLUME_GROUPS export RESOURCE_GROUPS #Override the original list if [[ $REPLICATED_RESOURCES == "true" ]] then if [[ $ACTION == "ACQUIRE" || $ACTION == "ACQUIRE_SECONDARY" ]] then # : Call the pre-varyon replicated resource method for RG "'$GROUPNAME'". # Note that a return code of 3 from the prevg-online method indicates # the default action should not happen. The default action for the # online_primary case is to varyon the VG. The default action for the # online_secondary case is to NOT varyon the VG # set -a eval $(clsetrepenv $GROUPNAME) set +a MGs=$(process_lvm_phype_disks) if [[ -n $MGs ]] then GENXD_REP_RESOURCE="$GENXD_REP_RESOURCE $MGs" export GENXD_REP_RESOURCE fi INACTIVE_VGS=$(get_inactive_vgs_or_disks "VGS") METHODS=$(cl_rrmethods2call prevg_online) FAILURE_IN_METHOD=0 for method in $METHODS do if [[ -x $method ]] then $method $INACTIVE_VGS case $? in 0) continue ;; 3) DEF_VARYON_ACTION=1 ;; *) cl_log 3090 "Replicated Method $method failed for $GROUPNAME.\n" $method $GROUPNAME # : call clRMupdate for the first resource, so that the cluster manager can react to the failure # for inactive_vg in $LIST_OF_VG_FOR_RG; do cl_RMupdate resource_error $inactive_vg $PROGNAME FAILURE_IN_METHOD=1 break #enough to call clRMupdate for one done break : do not call the other methods for this rg, since we ran into acquisition failure ;; esac fi done if (( $FAILURE_IN_METHOD == 0 )) then if [[ -n $INACTIVE_VGS ]] && [[ $ACTION == "ACQUIRE" && $DEF_VARYON_ACTION == 0 ]] then # : There are volume groups associated with replicated : resources that are going to be varied on later on. : Set up a passive varyon for all these volume groups : on all the nodes at this site. # if [[ -z $LOCALNODENAME ]] then export LOCALNODENAME=$(get_local_nodename) fi this_site_nodes=$(cllssite -c | tail +2 | cut -f2 -d: | grep -w $LOCALNODENAME) rg_node_list=$(clodmget -q "group = $GROUPNAME" -f nodes -n HACMPgroup | tr ' ' '\n' | paste -s -d'|' -) this_site_rg_nodes=$(print $this_site_nodes | tr ' ' '\n' | egrep -x "$rg_node_list" | paste -s -d',' -) VG_PASSIVE_LIST="" for VG in $INACTIVE_VGS do if [[ -z $(odmget -q "name = CONCURRENT_VOLUME_GROUP and value = $VG" HACMPresource 2>/dev/null) ]] then # : Add $VG to the list of volume groups to passive vary on : if not true concurrent mode # VG_PASSIVE_LIST="$VG_PASSIVE_LIST $VG" fi done if [[ -n $VG_PASSIVE_LIST && -n $this_site_rg_nodes ]] then # : If necessary, convert these volume groups to ECM : and passively vary on the volume groups in $VG_PASSIVE_LIST : on all nodes on this site # if [[ $VERBOSE_LOGGING == "high" ]] then logger='VERBOSE_LOGGING="high"' fi set_lang='LC_ALL="C"' cl_on_node -cspoc "-n $this_site_rg_nodes -f" $set_lang 'lssrc -s gsclvmd | tail +2 | grep -q gsclvmd.*active &&' $logger cl_pvo -v "'$VG_PASSIVE_LIST'" fi fi # acquiring inactive vgs if [[ ( $ACTION == "ACQUIRE" && $DEF_VARYON_ACTION == 0 ) || ( $ACTION == "ACQUIRE_SECONDARY" && $DEF_VARYON_ACTION == 1 ) ]] then # : Now, bring the volume groups on line in active mode # cl_activate_vgs -n RC=$? if (( $RC != 0 && $RC != 11 )) then export GROUPNAME # update the resource manager ALLVGS="All_volume_groups" cl_RMupdate resource_error $ALLVGS $PROGNAME fi fi METHODS=$(cl_rrmethods2call postvg_online) for method in $METHODS do if [[ -x $method ]] then if ! $method $INACTIVE_VGS then cl_log 3090 "Replicated Method $method failed for $GROUPNAME.\n" $method $GROUPNAME # call clRMupdate for the first resource, so that the # cluster manager can react to the failure for inactive_vg in $LIST_OF_VG_FOR_RG do cl_RMupdate resource_error $inactive_vg $PROGNAME break #enough to call clRMupdate for one done break # do not call the rest of the methods fi fi done fi # FAILURE_IN_METHOD == 0 elif [[ $ACTION == "RELEASE" || $ACTION == "RELEASE_SECONDARY" ]] then # ## RELEASE # # : Call the pre-varyoff replicated resource method for RG "'$GROUPNAME'". # that we are currently processing. Note that a return code of 3 from # the prevg-offline method indicates the we should override the # default action. The default action for offline_primary is to # varyoff the VG and the default action for offline_secondary # is to NOT varyoff the VG # set -a eval $(clsetrepenv $GROUPNAME) set +a MGs=$(process_lvm_phype_disks) if [[ -n $MGs ]] then GENXD_REP_RESOURCE="$GENXD_REP_RESOURCE $MGs" export GENXD_REP_RESOURCE fi DEF_VARYOFF_ACTION=0 METHODS=$(cl_rrmethods2call prevg_offline) for method in $METHODS do if [[ -x $method ]] then $method $VOLUME_GROUPS case $? in 0) continue ;; 3) DEF_VARYOFF_ACTION=1 ;; *) exit 1 ;; esac fi done if [[ ( $ACTION == "RELEASE" && $DEF_VARYOFF_ACTION == 0 ) || ( $ACTION == "RELEASE_SECONDARY" && $DEF_VARYOFF_ACTION == 1 ) ]] then cl_deactivate_vgs -n RC=$? if (( $RC != 0 && $RC != 11 )) then export GROUPNAME # update the resource manager ALLVGS="All_volume_groups" cl_RMupdate resource_error $ALLVGS $PROGNAME else # : On successful deactivation of the volume groups, : release the passive varyons # LOCALNODE=$(get_local_nodename) LOCALSITE=$(cllssite -c -n| grep -v "#nodename" | grep -w $LOCALNODE |cut -f2 -d":") this_site_nodes=$(cllssite -c | grep -w "^$LOCALSITE" | cut -f2 -d:) this_site_nodes=$(IFS=, set -- $this_site_nodes ; print "$*" ) # comma separated for VG in $VOLUME_GROUPS do if ! LC_ALL=C lsvg -L $VG 2>/dev/null | grep -i -q 'passive-only' then continue # not in passive mode fi # : Bring fully off line on all nodes at this site # cl_on_node -cspoc "-n $this_site_nodes -f" varyoffvg $VG done fi fi # : Call the post-varyoff replicated resource method for RG "'$GROUPNAME'". # METHODS=$(cl_rrmethods2call postvg_offline) for method in $METHODS do if [[ -x $method ]] then if ! $method $VOLUME_GROUPS then exit 1 fi fi done fi # elseif RELEASE or RELEASE_SECONDARY else # No replicated resources process_volume_groups $ACTION STAT=$? fi done return $STAT } ############################################################################ # Function: process_volume_groups # Purpose: varyon/varyoff non-concurrent volume groups # Parameters: none # Return: none ############################################################################ process_volume_groups() { typeset PS4_FUNC="process_volume_groups" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 export GROUPNAME=$RESOURCE_GROUPS if [[ $1 == "ACQUIRE" ]] then # : Varyon the VGs in the environment # cl_activate_vgs -n RC=$? if (( $RC != 0 && $RC != 11 )) then for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME # : update the resource manager # ALLVGS="All_volume_groups" cl_RMupdate resource_error $ALLVGS $PROGNAME done fi if (( $RC != 0 )) then STAT=$RC fi elif [[ $1 == "RELEASE" ]] then cl_deactivate_vgs -n RC=$? if (( $RC != 0 && $RC != 11 )) then for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME # : update the resource manager # ALLVGS="All_volume_groups" cl_RMupdate resource_error $ALLVGS $PROGNAME done fi if (( $RC != 0 )) then STAT=$RC fi fi return $STAT } ############################################################################ # Function: process_file_systems # Purpose: Mount/Unmount local filesystems # Parameters: none # Return: none ############################################################################ process_file_systems() { typeset PS4_FUNC="process_file_systems" [[ $VERBOSE_LOGGING == high ]] && set -x STAT=0 if [[ $1 == "ACQUIRE" ]] then cl_activate_fs RC=$? if (( $RC != 0 && $RC != 11 )) then for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME # : update the resource manager # ALLFS="All_filesystems" cl_RMupdate resource_error $ALLFS $PROGNAME done fi if (( $RC != 0 )) then STAT=$RC fi else cl_deactivate_fs RC=$? if (( $RC != 0 && $RC != 11 )) then for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME # : update the resource manager # ALLFS="All_filesystems" cl_RMupdate resource_error $ALLFS $PROGNAME done fi if (( $RC != 0 )) then STAT=$RC fi fi return $STAT } ############################################################################ # Function: sync_volume_groups # Purpose: Synchronizes volume groups that need synchronization. # Parameters: none # Return: none ############################################################################ sync_volume_groups() { typeset PS4_FUNC="sync_volume_groups" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME get_list_head $VOLUME_GROUPS | read LIST_OF_VOLUME_GROUPS_FOR_RG get_list_tail $VOLUME_GROUPS | read VOLUME_GROUPS # : Sync the active volume groups # lsvg -L -o 2> /tmp/lsvg.err | sort > /tmp/lsvg.out.$$ for one_vg in $(echo $LIST_OF_VOLUME_GROUPS_FOR_RG | \ tr ' ' '\n' | sort | comm -12 /tmp/lsvg.out.$$ - ) do cl_sync_vgs $one_vg & done if [[ -s /tmp/lsvg.err ]] then print -u2 "stderr: lsvg -o" cat /tmp/lsvg.err >&2 fi rm -f /tmp/lsvg.out.$$ /tmp/lsvg.err done # This variable is set either in rg_move or rg_move_complete # to determine that cl_sync_vgs is called as part of those events # and is used for logging VG sync in clavailability.log. # unsetting AM_SYNC_CALLED_BY from caller's environment as we don't # require it after this point in execution unset AM_SYNC_CALLED_BY return $STAT } ############################################################################ # Function: logredo_volume_groups # Purpose: Does a logredo on volume groups specified in the environment # Parameters: none # Return: none ############################################################################ logredo_volume_groups() { typeset PS4_FUNC="logredo_volume_groups" typeset PS4_TIMER="true" [[ $VERBOSE_LOGGING == "high" ]] && set -x export TMP_FILE="/var/hacmp/log/.process_resources_logredo.$$" rm -f /var/hacmp/log/.process_resources_logredo* STAT=0 for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME get_list_head $VOLUME_GROUPS | read LIST_OF_VOLUME_GROUPS_FOR_RG get_list_tail $VOLUME_GROUPS | read VOLUME_GROUPS # : Run logredo on all JFS/JFS2 log devices to assure FS consistency # ALL_LVs="" lv_all="" mount_fs="" fsck_check="" MOUNTGUARD="" FMMOUNT_OUT="" FMMOUNT="" for VG in $LIST_OF_VOLUME_GROUPS_FOR_RG do LC_ALL=C lsvg -lL $VG | tail +3 >>$TMP_FILE done ALL_LVs=$(cat $TMP_FILE | awk '{print $1}') # : Verify if any of the file system associated with volume group $VG : is already mounted anywhere else in the cluster. : If it is already mounted somewhere else, we dont want to continue : here to avoid data corruption. # lv_all=$(cat $TMP_FILE | grep -v "N/A" | awk '{print $1}') for lv in $lv_all do # : When a filesystem is protected against concurrent mounting, : MountGuard flag is set and lsfs command displays characteristics of file systems. # MOUNTGUARD=$(LC_ALL=C lsfs -qc $lv | tr ":" "\n" | grep -w MountGuard | cut -d" " -f2) # : fsdb and its subcommands allow us to view the information in a file system. : The FM_MOUNT flag is set if the file system is mounted cleanly on any node. # FMMOUNT_OUT=$(fsdb $lv << EOF su q EOF) FMMOUNT=$(echo "$FMMOUNT_OUT" | grep -w FM_MOUNT | awk '{ print $1 }') if [[ $MOUNTGUARD == "yes" ]] && [[ $FMMOUNT == "FM_MOUNT" ]]; then fsck_check="File system has mountguard option enabled and could be mounted on another node" fi if [[ -n $fsck_check ]]; then # : volume group $VG associated file system is mounted on one of the node : in the cluster, hence not going to mount here. # mount_fs="1" break fi done comm_failure="" rc_mount="" if [[ -n $fsck_check ]]; then cluster_nodes=$(clodmget -n -f name -q object=COMMUNICATION_PATH HACMPnode) #Get the list of cluster nodes # : Check if we have any communication failures between the nodes and : if filesystem is really mounted on any of the node. # for node in $cluster_nodes do clrsh $node date rc=$? if (( $rc != 0 )) then comm_failure=1 continue fi # verify if file system is really mounted on any of the node. clrsh $node mount | grep -w /dev/$lv rc=$? if (( $rc == 0 )) then # : file system is mounted on the node $node, hence setting mount flag to 1 # rc_mount=1 break fi done #set mount_fs flag to zero if there is any communication failure and nowhere filesystem is mounted if [[ -n $comm_failure && -z $rc_mount ]] then mount_fs="0" fi fi logdevs="" HAVE_GEO="" if lslpp -l 'hageo.*' >/dev/null 2>&1 || lslpp -l 'geoRM.*' >/dev/null 2>&1 ; then # : HAGEO tends to create file systems where the log files are of : type jfs or jfs2. Catch all these by looking them up in /etc/filesystems # HAVE_GEO="true" for LV in $ALL_LVs do if [[ -n $(odmget -q "name = ${LV} and \ attribute = type and \ value like jfs*" CuAt) ]] then if grep -w /dev/${LV} /etc/filesystems | grep -qw log then logdevs="$logdevs /dev/${LV}" fi fi done fi pattern='jfs*log' # : Any device with the type as log should be added # for LV in $ALL_LVs do if [[ -n $(odmget -q "name = ${LV} and \ attribute = type and \ value like ${pattern}" CuAt) ]] then logdevs="${logdevs} /dev/${LV}" fi done # : JFS2 file systems can have inline logs where the log LV is the same as the FS LV. # for LV in $ALL_LVs do if [[ -n $(odmget -q"name = ${LV} and \ attribute = type and \ value = jfs2" CuAt) ]] then if [[ -n $(odmget -q "name = ${LV} and attribute = label" CuAt |\ sed -n '/value =/s/^.*"\(.*\)".*/\1/p') ]] then LOG=$(grep -wp /dev/${LV} /etc/filesystems | awk '$1 ~ /log/ {printf $3}') if [[ $LOG == "INLINE" || $LOG == "/dev/${LV}" ]] then logdevs="$logdevs /dev/${LV}" fi fi fi done # : Remove any duplicates acquired so far # logdevs=$(echo $logdevs | tr ' ' '\n' | sort -u) # : Run logredos in parallel to save time. # for dev in $logdevs do if [[ -n $HAVE_GEO ]] then # : HAGEO or GeoRM is installed. If Geo is mirroring the file : system log, we have to run logredo on the GMD, so that updates : get reflected to the remote site. # gmd=$(odmget -q "attribute = local_device and value = /dev/r${dev##*/}" CuAt | grep 'name =' | cut -f3 -d' ' | tr -d '"') if [[ -n $gmd ]] then # : There is a possibility that logredo will throw an error here but this is : harmless. This will happen when GMD is already part of a mount. : However if a raw I/O is being done the logredo will continue with no error : and the below logredo is for the second case. # logredo /dev/$gmd & continue fi fi # : Run logredo only if the LV is closed. # if [[ -n $(awk '$1 ~ /^'${dev##*/}'$/ && $6 ~ /closed\// {print "CLOSED"}' $TMP_FILE) ]] then # : Run logredo only if filesystem is not mounted on any of the node in the cluster. # if [[ -z $mount_fs || $mount_fs == "0" ]] then logredo $dev & fi fi done rm -f $TMP_FILE done # : Wait for the background logredos from the RGs # wait return $STAT } ############################################################################ # Function: RunJoinCleanup # Purpose: Runs the join-cleanup method for replicated resources. # Parameters: none # Return: none ############################################################################ RunJoinCleanup () { typeset PS4_FUNC="RunJoinCleanup" [[ $VERBOSE_LOGGING == "high" ]] && set -x # : if PROGNAME_XD is not set, or is empty, then we set it to rg_move_complete # if [[ -z ${PROGNAME_XD:-} || $PROGNAME_XD == "process_resources" ]] then export PROGNAME_XD="rg_move_complete" fi if [[ -z ${NODENAME:-} ]] then export NODENAME=$(get_local_nodename) fi for siblingGroup in $SIBLING_ACQUIRING_GROUPS do set -a eval $(clsetrepenv $siblingGroup) set +a export GROUPNAME=$siblingGroup METHODS=$(cl_rrmethods2call join_cleanup) for method in $METHODS do if [[ -x $method ]] then if ! $method $PROGNAME_XD $NODENAME then STATUS=1 fi fi done done } ############################################################################ # Function: RunLeaveCleanup # Purpose: Runs the leave-cleanup method for replicated resources. # Parameters: none # Return: none ############################################################################ RunLeaveCleanup () { typeset PS4_FUNC="RunLeaveCleanup" [[ $VERBOSE_LOGGING == "high" ]] && set -x : if PROGNAME_XD is not set, or is empty, then we set it to rg_move_fence if [[ -z ${PROGNAME_XD:-} || $PROGNAME_XD == "process_resources" ]] then export PROGNAME_XD="rg_move_fence" fi if [[ -z ${NODENAME:-} ]] then export NODENAME=$(get_local_nodename) fi for siblingGroup in $SIBLING_RELEASING_GROUPS do set -a eval $(clsetrepenv $siblingGroup) set +a export GROUPNAME=$siblingGroup METHODS=$(cl_rrmethods2call leave_cleanup) for method in $METHODS do if [[ -x $method ]] then if ! $method $PROGNAME_XD $NODENAME then STATUS=1 fi fi done done } setSiblingEnv () { typeset PS4_FUNC="setSiblingEnv" [[ $VERBOSE_LOGGING == "high" ]] && set -x export SIBLING_GROUPS; export SIBLING_NODES_BY_GROUP; export SIBLING_RELEASING_GROUPS; export SIBLING_RELEASING_NODES_BY_GROUP; export SIBLING_ACQUIRING_GROUPS; export SIBLING_ACQUIRING_NODES_BY_GROUP; return 0; } ############################################################################ # Function: acquire_service_labels # Purpose: Acquires service adapters # Parameters: none # Return: none ############################################################################ acquire_service_labels() { typeset PS4_FUNC="acquire_service_labels" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 clcallev acquire_service_addr RC=$? if (( $RC != 0 && $RC != 11 )) then # : 11 shows that the acquire was unsuccessful, but clRMupdate was run # for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME # : update the resource manager # ALLSRVADDRS="All_service_addrs" cl_RMupdate resource_error $ALLSRVADDRS $PROGNAME done fi if (( $RC != 0 )) then STAT=1 fi #refresh clcomd refresh -s clcomd return $STAT } ############################################################################ # Function: release_service_labels # Purpose: Releases service labels # Parameters: none # Return: none ############################################################################ release_service_labels() { typeset PS4_FUNC="release_service_labels" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 clcallev release_service_addr RC=$? if (( $RC != 0 && $RC != 11 )) then # : 11 shows that the release was unsuccessful, but clRMupdate was run # for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME # : update the resource manager # ALLSRVADDRS="All_service_addrs" cl_RMupdate resource_error $ALLSRVADDRS $PROGNAME done fi if (( $RC != 0 )) then STAT=1 fi #refresh clcomd refresh -s clcomd return $STAT } ############################################################################ # Function: acquire_takeover_labels # Purpose: Acquires takeover labels # Parameters: none # Return: none ############################################################################ acquire_takeover_labels() { typeset PS4_FUNC="acquire_takeover_labels" [[ $VERBOSE_LOGGING == "high" ]] && set -x clcallev acquire_takeover_addr RC=$? if (( $RC != 0 && $RC != 11 )) then # : 11 shows that the acquire was unsuccessful, but clRMupdate was run # for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME # : update the resource manager # ALLSRVADDRS="All_service_addrs" cl_RMupdate resource_error $ALLSRVADDRS $PROGNAME done fi if (( $RC != 0 )) then STAT=1 fi #refresh clcomd refresh -s clcomd return $STAT } ############################################################################ # Function: release_takeover_labels # Purpose: Releases takeover labels # Parameters: none # Return: none ############################################################################ release_takeover_labels() { typeset PS4_FUNC="release_takeover_labels" [[ $VERBOSE_LOGGING == "high" ]] && set -x clcallev release_takeover_addr RC=$? if (( $RC != 0 && $RC != 11 )) then # : 11 shows that the release was unsuccessful, but clRMupdate was run # for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME # : update the resource manager # ALLSRVADDRS="All_service_addrs" cl_RMupdate resource_error $ALLSRVADDRS $PROGNAME done fi if (( $RC != 0 )) then STAT=1 fi #refresh clcomd refresh -s clcomd return $STAT } ############################################################################ # Function: cross_site_rg_move_prevented # Purpose: Runs intersite_fallover_prevented event, which will in # turn run notification methods for customized resource group # recovery, if action is "notify", and a notification method # is specified. # Parameters: none # Return: none ############################################################################ cross_site_rg_move_prevented() { typeset PS4_FUNC="cross_site_rg_move_prevented" [[ $VERBOSE_LOGGING == "high" ]] && set -x clcallev intersite_fallover_prevented "$RESOURCE_GROUPS" } ############################################################################ # Function: process_wpars # Purpose: Process the wpars associated with the resource groups. # Parameters: action # Return: none ############################################################################ process_wpars() { typeset PS4_FUNC="process_wpars" [[ $VERBOSE_LOGGING == "high" ]] && set -x STAT=0 typeset action=$1 for GROUPNAME in $RESOURCE_GROUPS do export GROUPNAME case $action in ACQUIRE ) clstart_wpar RC=$? ;; RELEASE ) clstop_wpar RC=$? ;; esac if (( $RC != 0 && $RC != 11 )) then # : 11 shows that the action was unsuccessful, but clRMupdate was run # STAT=1 fi done return $STAT } ###################################################################### # MAIN STARTS HERE ###################################################################### # Including Availability metrics library file . /usr/es/lib/ksh93/availability/cl_amlib if [[ -n ${PROGNAME:-} ]]; then PROGNAME_XD=$PROGNAME else PROGNAME_XD=${0##*/} # basename PROGNAME=${0##*/} fi PATH="$(/usr/es/sbin/cluster/utilities/cl_get_path all)" export GROUPNAME="" if [[ $VERBOSE_LOGGING == "high" ]] then set -x version='1.169' fi STATUS=0 sddsrv_off=FALSE while true; do # : call rgpa, and it will tell us what to do next # set -a eval $(clRGPA $1) RC=$? set +a if (( $RC != 0 )) then # : Return code from clRGPA was $RC # cl_log 3093 "*************clRGPA CALL WAS UNSUCCESSFUL*********\n" exit $RC fi RESOURCE_GROUPS=${RESOURCE_GROUPS% } # trim the trailing space from clRGPA export GROUPNAME=$RESOURCE_GROUPS #Logging for RG Failover Events in clavailability starts #IS_SERVICE_START and IS_SERVICE_STOP are used to identify #if both source and destination are present for an RG, #value 0 for both IS_SERVICE_START and IS_SERVICE_STOP #means that both source and destination are present. # We log RG failover only if both source and destination # are present as that will be the case of RG Failover IS_SERVICE_START=1 IS_SERVICE_STOP=1 #Logging is done for RG Failover begin when JOB_TYPE # returned by clRGPA.sh is RELEASE and for end #when JOB_TYPE returned by clRGPA.sh is ONLINE if [[ $JOB_TYPE == "RELEASE" || $JOB_TYPE == "ONLINE" ]];then for CL_RG_NAME in $RESOURCE_GROUPS do INFO_STRING="" for NODENAME in $(clnodename); do #ENV_VAR can have two possible values #for the combination of node and RG #if it's value is WILLBEUPPOSTEVENT # the node is destination node for RG #if it's value is ISUPPREEVENT the node #is source node for the RG ENV_VAR=GROUP_${CL_RG_NAME}_${NODENAME} eval "echo \$$ENV_VAR"|read ENV_VAR if [[ $ENV_VAR == "WILLBEUPPOSTEVENT" ]];then INFO_STRING="$INFO_STRING|DESTINATION=$NODENAME" IS_SERVICE_STOP=0 fi if [[ $ENV_VAR == "ISUPPREEVENT" ]];then INFO_STRING="$INFO_STRING|SOURCE=$NODENAME" IS_SERVICE_START=0 fi done if (( $IS_SERVICE_START == 0 && $IS_SERVICE_STOP ==0 ));then eval "echo \$$ENV_VAR"|read ENV_VAR INFO_STRING="RG_FAILOVER|$CL_RG_NAME$INFO_STRING|$EVENT_SERIAL_NUMBER" if [[ $JOB_TYPE == "RELEASE" ]];then amlog_trace $RG_FAILOVER_BEGIN $INFO_STRING else amlog_trace $RG_FAILOVER_END $INFO_STRING fi fi done fi #Logging for RG Failover Events in clavailability ends case $JOB_TYPE in SETPRKEY ) setPRKey ;; SIBLINGS ) setSiblingEnv ;; SERVICE_LABELS ) if [[ $ACTION == "ACQUIRE" ]] then acquire_service_labels else release_service_labels RC=$? if (( $RC != 0 )) then STATUS=$RC fi fi ;; TAKEOVER_LABELS ) if [[ $ACTION == "ACQUIRE" ]] then acquire_takeover_labels else release_takeover_labels RC=$? if (( $RC != 0 )) then STATUS=$RC fi fi ;; TELINIT ) cl_telinit ;; DISKS ) if [[ $ACTION == "ACQUIRE" ]] then FAILED_RR_RGS="" get_disks_main FAILED_RR_RGS=$(echo $LIST_OF_FAILED_RGS | tr ' ' '\n') if [[ -n $FAILED_RR_RGS ]]; then for FAILEDRG in $FAILED_RR_RGS do cl_RMupdate rg_error $FAILEDRG $PROGNAME done fi typeset SCSIPR_ENABLED=$(clodmget -n -q "policy=scsi" -f value HACMPsplitmerge) if [[ $SCSIPR_ENABLED == Yes ]];then : Confirming SCSIPR reservations on Raw Disk typeset PR_Key=$(clpr_obtain_prkey $LOCALNODENAME) typeset diskList=$(clodmget -q "name=DISK and group=$GROUPNAME" -f value -n HACMPresource) if [[ -n $diskList ]];then typeset pvid="" for pvid in $diskList do typeset hdisk=$(lspv -L | grep -w $pvid | awk '{print $1}') clpr_verifyKey $hdisk $PR_Key if (( $? != 0 ));then : No reservations found. Put RG to error state. set_resource_group_state "ERROR" fi done fi fi fi ;; SYNC_VGS ) if [[ $ACTION == "ACQUIRE" ]] then sync_volume_groups # STATUS is not changed for Acquire fi ;; FILESYSTEMS ) process_file_systems "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]] then (( $RC != 0 )) && STATUS=$RC fi ;; WPAR ) process_wpars "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]] then (( $RC != 0 )) && STATUS=$RC fi ;; TAPE_RESOURCES ) process_tape_resources "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]] then if (( $RC != 0 )) then STATUS=$RC fi fi ;; AIX_CONNECTIONS ) process_aix_connections "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]] then if (( $RC != 0 )) then STATUS=$RC fi fi ;; AIX_FAST_CONNECT ) process_aix_fast_connections "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]] then if (( $RC != 0 )) then STATUS=$RC fi fi ;; COMMUNICATION_LINKS ) process_comm_links "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]] then if (( $RC != 0 )); then STATUS=$RC fi fi ;; APPLICATIONS ) process_applications "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]] then if (( $RC != 0 )) then STATUS=$RC fi fi ;; USERDEF_RESOURCES ) process_userdefined_resources "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]]; then if [[ $RC != 0 ]]; then STATUS=$RC fi fi ;; VGS ) process_volume_groups_main "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]] then if [[ $RC != 0 ]] then STATUS=$RC fi fi ;; CONCURRENT_VOLUME_GROUPS ) process_concurrent_vgs "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]] then if (( $RC != 0 )) then STATUS=$RC fi fi ;; EXPORT_FILESYSTEMS ) if [[ $ACTION == "ACQUIRE" ]] then export_filesystems else unexport_filesystems RC=$? if [[ $ACTION == "RELEASE" ]] then if (( $RC != 0 )) then STATUS=$RC fi fi fi ;; MOUNT_FILESYSTEMS ) if [[ $ACTION == "ACQUIRE" ]] then mount_nfs_filesystems "MOUNT" else unmount_nfs_filesystems RC=$? if [[ $ACTION == "RELEASE" ]] then if (( $RC != 0 )) then STATUS=$RC fi fi fi ;; REMOUNT_FILESYSTEMS ) mount_nfs_filesystems "REMOUNT" : failure of remount will not cause failure of the event ;; LOGREDO ) if [[ $ACTION == "ACQUIRE" ]]; then logredo_volume_groups fi ;; FENCE ) process_ssa_fence "$ACTION" RC=$? if [[ $ACTION == "RELEASE" ]] then if (( $RC != 0 )) then STATUS=$RC fi fi ;; ACQUIRE ) set_resource_group_state "ACQUIRING" ;; ACQUIRE_SECONDARY ) set_resource_group_state "ACQUIRING_SECONDARY" ;; RELEASE ) set_resource_group_state "RELEASING" RC=$? if (( $RC != 0 )) then STATUS=$RC fi ;; RELEASE_SECONDARY ) set_resource_group_state "RELEASING_SECONDARY" ;; ONLINE ) set_resource_group_state "UP" ;; ONLINE_SECONDARY ) set_resource_group_state "ONLINE_SECONDARY" ;; OFFLINE ) set_resource_group_state "DOWN" RC=$? postvg_for_rdisk if (( $RC != 0 )) then STATUS=$RC fi ;; OFFLINE_SECONDARY ) set_resource_group_state "OFFLINE_SECONDARY" ;; ERROR ) set_resource_group_state "ERROR" ;; ERROR_SECONDARY ) set_resource_group_state "ERROR_SECONDARY" ;; UPDATESTATD ) notify_rpc_statd : STATUS is not updated ;; CALL_RG_MOVE ) queue_RG_MOVE "$ACTION" RC=$? if (( $RC != 0 )) then STATUS=$RC fi ;; CROSS_SITE_RG_MOVE_PREVENTED ) cross_site_rg_move_prevented ;; ACQUIRE_JOIN_CLEANUP ) RunJoinCleanup ;; RELEASE_LEAVE_CLEANUP ) RunLeaveCleanup ;; NONE ) break ;; * ) cl_log 3094 "************* clRGPA RETURNED UNKNOWN JOB *********\n" exit $RC ;; esac done # : If sddsrv was turned off above, turn it back on again # if [[ $sddsrv_off == TRUE ]] && ! LC_ALL=C lssrc -s sddsrv | grep -iqw active ; then startsrc -s sddsrv fi exit $STATUS