#!/bin/ksh ## IBM_PROLOG_BEGIN_TAG ## This is an automatically generated prolog. ## ## ## ## Licensed Materials - Property of IBM ## ## (C) COPYRIGHT International Business Machines Corp. 2009,2022 ## All Rights Reserved ## ## US Government Users Restricted Rights - Use, duplication or ## disclosure restricted by GSA ADP Schedule Contract with IBM Corp. ## ## IBM_PROLOG_END_TAG # # sccsid = "@(#)53 1.85.1.1 src/rsct/cfg_access/ctcaactrl.sh, cfg.access, rsct_rady, radys004a 9/18/21 17:18:37" # # usage: # ctcaactrl PRE_START validate this node can be added to a CAA cluster # ctcaactrl [ -a | START | POST_START ] prepare/start RSCT CAA env # (subsys etc) # ctcaactrl [STOP|PRE_STOP] stop/remove RSCT CAA env (subsys etc) # ctcaactrl PRE_STOP validate this node can be removed from a CAA # cluster # ctcaactrl { -O ADD_NODE|RM_NODE|STOP_NODE|START_NODE|JOIN_NODE|COMMIT_LVL| # ADD_SITE|MOD_SITE|RM_SITE|MOD_TUNE } # { -P CHECK | PRE | POST | UNDO_PRE } # -T -c -o # -t -s [-v variables] # [-m] [-g] [-i] # # Soon-to-be-obsolete # ctcaactrl -d StopAndDelete RSCT CAA env # ctcaactrl -s start RSCT CAA subsystems # ctcaactrl -k stop HA subsystems # ctcaactrl -r refresh HA subsystems # # Set path to known value PATH=/opt/rsct/install/bin:/opt/rsct/bin:/usr/bin:/bin:/usr/sbin:/sbin export PATH RSCTBIN=/opt/rsct/bin CTROOT=/var/ct # make sure ODMDIR to point /etc/objrepos ODMDIR=/etc/objrepos HA_CCAL_DIR=/var/ct/ HA_CCAL_LOG=${HA_CCAL_DIR}/caa.ctcaactrl.log HA_CCAL_LOG_MAXSIZE_KB=500 # 500 KB export HA_CCAL_LOG # This is the file which talks to ConfigRM # Don't change the filename CAACMDCTRLFILE=/var/ct/cfg/caa-control.cmd CAACOMMITCTRLFILE=/var/ct/cfg/caa-commit.cmd # # presence of this file means simulation of having # powerha pre 721 version. # POWERHA_PRE_721="/var/ct/cfg/powerha_pre_721" basecmd=$(basename $0) ######################################################################### # Get the current Locale and determine whether English output is also needed # Output: LOCALE # PRINT_ENG_MSG ######################################################################### get_current_locale() { # Check if current locale is a English if [[ -f /usr/bin/locale ]]; then LOCALE=`/usr/bin/locale` else LOCALE=`/bin/locale` fi # /usr/bin/locale may produce LC_MESSAGES=en_US or LC_MESSAGES="en_US". # We use two sed "s" commands, the first one gets rid of LC_MESSAGES=, # the second one removes the two surrounding '"' if they are there. LOCALE=`echo "$LOCALE" | grep LC_MESSAGES | sed "s/LC_MESSAGES=//;s/\"//g"` # Do not print English messages if the local language is English. # C and POSIX are special locales. Treat them as en_US. if [[ "$LOCALE" = "C" || "$LOCALE" = "POSIX" ]]; then PRINT_ENG_MSG="NO" else # A locale may look like en_US or en_US.ISO8859-1. Use cut command # to remove the language encoding. LOCALE=`echo "$LOCALE" | cut -f 1 -d "."` if [[ "$LOCALE" = "en_US" || "$LOCALE" = "En_US" || "$LOCALE" = "en_GB" || "$LOCALE" = "En_GB" ]] then # Local language is English. PRINT_ENG_MSG="NO" else # Local language is not English (including "$LOCALE"=""), # which should not happen in normal cases) PRINT_ENG_MSG="YES" fi fi } ######################################################################### # # # Function: print_message # # Description: wrapper for message printing. All messages go to stderr. # # For now, all messages are printed in both English and the current # # language unless current language is en_US, C, or POSIX. # # # # This subroutine makes it easy to print messages in either English# # or current language or both. Also, it ensure all messages will be # # printed to the same stream. # ######################################################################### #Usage create_logfile_with_permission "logfile" "mode" function create_logfile_with_permission { typeset logfile=$1 typeset permission=$2 # Create a log file and set the permission if defined if [ -n "$logfile" ]; then touch $logfile [ -z "$permission" ] || chmod $permission $logfile fi } # function print_dbgmsg { if [[ -n $HA_CCAL_LOG ]]; then print `date` ${basecmd} "($$)" : "$*" >> $HA_CCAL_LOG fi } print_message() { MSGCMD2="hadspmsg cfgaccess ha_com.cat" print -u2 "`${MSGCMD2} $*`" print_dbgmsg "`${MSGCMD2} $*`" } ######################################################################### function print_process_tree { typeset OSNAME=$(uname -s) if [[ "$OSNAME" = "AIX" ]] then PSTREE_MSG=$(proctree -T -p $$ 2> /dev/null) [ -z "${PSTREE_MSG}" ] || print_dbgmsg "proctree:\n${PSTREE_MSG}" else PSTREE_MSG=$(pstree -sp $$ 2> /dev/null) [ -z "${PSTREE_MSG}" ] || print_dbgmsg "\n${PSTREE_MSG}" fi } ######################################################################### # # backup the ccal log file if its size is big function backup_ccal_log_file { if [[ -z $HA_CCAL_LOG ]]; then return fi if [[ -n $HA_CCAL_DIR && ! -d $HA_CCAL_DIR ]]; then print_dbgmsg "Create ccal dir $HA_CCAL_DIR" mkdir -p ${HA_CCAL_DIR} fi if [[ ! -f $HA_CCAL_LOG ]]; then # LOG file does not exist. Nothing to backup print_dbgmsg "No log file exist. No backup is needed" return 1; fi # Check size of "HA_CCAL_LOG" if [[ -z $HA_CCAL_LOG_MAXSIZE_KB ]]; then HA_CCAL_LOG_MAXSIZE_KB=500 fi set -A lsout $(ls -s ${HA_CCAL_LOG}) if (( ${lsout[0]} > $HA_CCAL_LOG_MAXSIZE_KB )) then # Exceed print_dbgmsg "Log file $HA_CCAL_LOG exceeds the size $HA_CCAL_LOG_MAXSIZE_KB Kb. Backup it" mv -f ${HA_CCAL_LOG} ${HA_CCAL_LOG}.bak fi } ######################################################################### # return RPD cluster names function GetRPDClusters { clusters="" while read cid cname ctype do if [[ "$cname" != "IW" ]]; then if [[ -z "$ctype" || "$ctype" != "CAA" ]]; then clusters="$clusters $cname" fi fi done < /var/ct/cfg/clusters # print outputs echo $clusters } # # Get CAA RPD clusters. # function Get_CAA_RPD_Clusters { clusters="" while read cid cname ctype do if [[ "$cname" != "IW" ]]; then if [[ "$ctype" == "CAA" ]]; then clusters="$clusters $cname" fi fi done < /var/ct/cfg/clusters echo $clusters } # # return (1 + 'OR'ed reason-codes) if an error happens # reason code for cthagsctrl = 0x01 (1) # input: (gs_rc) function exit_script { rc=$gs_rc exit $rc } ################################################################################ # # # exec_cmd_timeout - executes a command with a given timeout # # # # The function returns when the command returns or when the timeout expires, # # whichever comes first. # # # # # # Arguments: # # # # Timeout value in secs # # Target program and its arguments # # # # # ################################################################################ function eval_cmd { print_dbgmsg "Background running: $*" eval $* # run command in the background rc=$? print_dbgmsg "Cmd $* exited with rc=$rc" return $rc } function exec_cmd_timeout { typeset timeOut=$1 shift # remove the first arg print_dbgmsg "Invoking $*" eval_cmd $* & # run command in the background pid=$! print_dbgmsg "Background pid=$pid" typeset isTimeout=1 typeset sleepTime=2 typeset waitTime=0 while [[ $waitTime -lt $timeOut ]] do kill -s 0 $pid >/dev/null 2>&1 if [ $? -ne 0 ] # process no longer running then isTimeout=0 break; fi sleep $sleepTime (( waitTime = $waitTime + $sleepTime)) done if [[ $isTimeout == 1 ]] then print_dbgmsg "Command $1 ($pid) timed out after $waitTime secs" else print_dbgmsg "Command $1 ($pid) successful" fi } # # Tell ConfigRM and wait for the completion of operation (with timeout) # Input: Op = AddAndStart | StopAndDelete # function tell_ConfigRM_resync { # Operation typeset Op=$1 # Make sure ConfigRM is auto-startable touch /var/ct/cfg/ConfigRM.autostart # Just in case RMC is not running rmcctrl -A 2> /dev/null >/dev/null # Check whether ConfigRM is running state=`LC_ALL=C /usr/bin/lssrc -s IBM.ConfigRM 2> /dev/null | LC_ALL=C /bin/sed -e"1d;s/.* \([a-zA-Z0-9]*\)$/\1/"` if [[ -z $state ]] then # not defined in SRC ... add it here (instead going thru RMC) print_dbgmsg "Refresh RMC to start ConfigRM" /usr/bin/refresh -s ctrmc elif [[ $state != "active" ]] then # Make sure ConfigRM running just in case ConfigRM was stopped print_dbgmsg "start ConfigRM subsystem" startsrc -s IBM.ConfigRM 2> /dev/null >/dev/null fi # refresh ConfigRM print_dbgmsg "Tell ConfigRM to resync CAA...." exec_cmd_timeout 15 /usr/bin/refresh -s IBM.ConfigRM frc=$? print_dbgmsg "refresh ConfigRM is done, rc=$frc" } ################################################################################ # Test whether HAGS is still running for upto secs # # If it is too long, forcefully stopsrc -cthags # ################################################################################ function waitfor_hags_termination_or_stopsrc { #typeset timeOut=70 # 70secs (a little more than 60s [internal timeout]) typeset timeOut=15 # 70secs cause whole rmcluster operation delay. So, reduece to 15 seconds for now # get 'cthags pid' PARSE_SRC_PIDS="s/^ *[^ ]* *[^ ]* *\([0-9]*\) *active/\1/p" TGT_SUBSYS="cthags" pid=$(LC_ALL=C /usr/bin/lssrc -s "$TGT_SUBSYS" 2> /dev/null | grep "active" | sed -n -e "$PARSE_SRC_PIDS") if [[ -n $pid ]] then print_dbgmsg "Wait for HAGS (pid=$pid) to stop" typeset isTimeout=1 typeset sleepTime=2 typeset waitTime=0 while [[ $waitTime -lt $timeOut ]] do kill -s 0 $pid >/dev/null 2>&1 if [ $? -ne 0 ] # process no longer running then isTimeout=0 break; fi sleep $sleepTime (( waitTime = $waitTime + $sleepTime)) done if [[ $isTimeout == 1 ]] then print_dbgmsg "HAGS (pid=$pid) is still running. Stop it now" stopsrc -cs cthags 2> /dev/null fi #check it again kill -s 0 $pid >/dev/null 2>&1 if [ $? -ne 0 ] # process no longer running then print_dbgmsg "HAGS is now stopped" fi fi } ################################################################################ # Check for cluster Offline status. If it is still Online wait for it until # # it becomes offline. Wait for a maximum of 30 sec # ################################################################################ function waitfor_ConfigRM_Offline_state { print_dbgmsg "waitfor_ConfigRM_Offline_state " CLSTNAME=`/opt/rsct/bin/ct_clusterinfo -c` typeset sleepTime=1 typeset IWClusterName="IW" typeset waitTime=0 typeset timeOut=30 while [[ $waitTime -lt $timeOut ]] do if [ $CLSTNAME == $IWClusterName ] then break; else sleep $sleepTime fi (( waitTime = $waitTime + $sleepTime )) CLSTNAME=`/opt/rsct/bin/ct_clusterinfo -c` done } ################################################################################ # # # parse_arguments: parse input arguments # # # # Expected input format is # # # # # # # # : one of {CHECK| PRE | POST | UNDO_PRE} # # : cluster UUID # # : # # comma separated list of other nodes to be targeted # # (including target) # # NO_OTHER_TARGETS if none # # : # # node add: Node UUID, canonical hostname, shortid # # others: Node UUID # # : # # TARGET_NODE if the local node is the target # # NON_TARGET_NODE if the local node is NOT the target # # # # # # The following variables are filled in by parse_arguments # # # # PARSED_PHASE: script phase: one of the strings above # # PARSED_CLUSTER_UUID: cluster UUID # # PARSED_TARGET_UUID: node UUID of the target # # PARSED_TARGET_HOSTNAME: hostname of the target, if available # # (empty otherwise) # # PARSED_TARGET_SHORT_ID: short node ID of the target, if available # # ("INVAL_SHORT_ID" otherwise) # # PARSED_AM_I_THE_TARGET: "yes" if local node is the target, # # "no" otherwise # # # # # # parse_arguments will cause the script to exit if the syntax is incorrect. # # # # # ################################################################################ parse_arguments() { # check if 'phase' string is valid PARSED_PHASE=$1 case $PARSED_PHASE in CHECK | PRE | POST | UNDO_PRE) ;; *) # invalid phase specification print_message IMSG_ctcaactrl_Usage $basecmd print_message EMSG100 $basecmd $PARSED_PHASE exit 1 ;; esac # Cluster ID PARSED_CLUSTER_UUID=$2 # Target ID PARSED_TARGET_SHORT_ID="INVAL_SHORT_ID" PARSED_TARGET_HOSTNAME="" typeset target_id_info=$4 if echo $target_id_info | grep "," > /dev/null then # Format assumed to be: Node UUID, canonical hostname, shortid PARSED_TARGET_UUID=${target_id_info%%,*} rest=${target_id_info#*,} # echo "PARSED_TARGET_UUID=$PARSED_TARGET_UUID" PARSED_TARGET_HOSTNAME=${rest%%,*} rest=${rest#*,} # echo "PARSED_TARGET_HOSTNAME=$PARSED_TARGET_HOSTNAME" PARSED_TARGET_SHORT_ID=${rest%%,*} # echo "PARSED_TARGET_SHORT_ID=$PARSED_TARGET_SHORT_ID" else # Format assumed to be: Node UUID PARSED_TARGET_UUID=$target_id_info fi # Target indicator typeset target_indicator=$5 case $target_indicator in TARGET_NODE ) PARSED_AM_I_THE_TARGET="yes" ;; *) PARSED_AM_I_THE_TARGET="no" esac # echo "PARSED_PHASE = $PARSED_PHASE" # echo "PARSED_CLUSTER_UUID = $PARSED_CLUSTER_UUID" # echo "PARSED_TARGET_UUID = $PARSED_TARGET_UUID" # echo "PARSED_AM_I_THE_TARGET = $PARSED_AM_I_THE_TARGET" } ################################################################################ # # # check_for_legacy_cluster - checks whether there is a legacy (pre-CAA) # # present on the local node. This function should be run on the CHECK # # phase when the node is about to be added to the cluster or join to it. # # # # # # The function will output an error message and exit 1 if a legacy cluster # # is present. The function will NOT exit if there is no legacy cluster; # # It's up to the caller to exit 0. # # # # Arguments: None # # # # Return value: 0: indicates that no legacy cluster has been found # # # ################################################################################ check_for_legacy_cluster() { caa_is_disabled_for_migration=`ct_caa_is_disabled_for_migration` print_dbgmsg "caa_is_disabled_for_migration=$caa_is_disabled_for_migration" if [[ "$caa_is_disabled_for_migration" == "true" ]] then # # If "disabled for migration", the PowerHA/RPD domain is being # migrated in coordination with RSCT, so having a previously # defined domain is not an error. # print_dbgmsg "SUCESS: RSCT-to-CAA is in progress" return 0 fi clusters=$(Get_CAA_RPD_Clusters) if [[ -n "$clusters" ]] then # # The existence of a CAA RPD cluster means we have a previously # defined domain, which is not an error. # print_dbgmsg "SUCCESS: CAA RPD cluster already exists: $clusters" return 0 fi # # See if a PowerHA cluster is configured which includes this node. # If so, the odm entry for the HACMPcluster object class will have # a non-zero id attribute value, and a non-empty name attribute value. # power_ha=`/usr/bin/odmget HACMPcluster 2>/dev/null` rc=$? if [[ 0 == $rc ]] then # # grep for the id. # power_ha_id=`echo "$power_ha" | /usr/bin/grep -w "id"` rc=$? if [[ 0 == $rc ]] then # # grep for the id. # power_ha_name=`echo "$power_ha" | /usr/bin/grep -w "name"` rc=$? if [[ 0 == $rc ]] then # # The id is an integer. Remove the "large left" pattern up # through a space to leave only the integer itself. # power_ha_id=${power_ha_id##* } # # The name is a string. Remove the "small left" pattern up # through the first double quote, then the "small right" # pattern from the second double quote on, leaving only the # string itself. # power_ha_name=${power_ha_name#*\"} power_ha_name=${power_ha_name%\"*} if [[ "0" == "$power_ha_id" ]] then print_dbgmsg "SUCCESS: power_ha_id is 0" return 0 fi if [ -z "$power_ha_name" ] then print_dbgmsg "SUCCESS: power_ha is not defined" return 0 fi # # The id and the name are legitimate. Consider PowerHA # configured. # # If PowerHA cluster version is at least 7.1 then # this cluster creation is being made on behalf of a # CAA-aware instance of PowerHA. Allow cluster. PowerHA7_version=12 cluster_version=`/usr/es/sbin/cluster/utilities/clmixver` rc=$? if [[ ($rc == 0) || ($rc == 1) ]] then if (( $cluster_version >= $PowerHA7_version )) then print_dbgmsg "PowerHA 7.1 or later present" return 0 else print_dbgmsg "PowerHA pre-7.1 present: cluster version $cluster_version" fi else # A problem with clmixver: should not happen, but it does, # we assume that a propoer PowerHA cluster is not present print_dbgmsg "clmixver error: $rc. Assume cluster not present" return 0 fi print_message EMSG111 $basecmd $power_ha_id $power_ha_name exit 1 fi fi fi # # See if an RPD is configured which includes this node. If so, # there will be more than just an IW entry in /var/ct/cfg/clusters. # clusters=$(GetRPDClusters) if [[ "" != "$clusters" ]] then # # An RPD is configured on this node. # migration_realm=`ct_caa_get_migration_realm` print_dbgmsg "migration_realm=$migration_realm" if [[ "$caa_is_disabled_for_migration" == "false" || "$migration_realm" != "RPD" ]] then # RPD is not in migration. print_dbgmsg "Fail: RPDs: $clusters are already defined" print_message EMSG112 $basecmd exit 1 fi fi return 0 } ################################################################################ # # # check_for_inconsistent_cluster_ID - if CAA is up, check if the cluster ID # # matches the one sent as parameter to the script. # # # # The function will output an error message and exit 1 if the cluster IDs # # don't match. The function will NOT exit if the cluster IDs match. # # It's up to the caller to exit 0. # # # # Depends on the following variables already being set: # # * REALM: if 'CAA', indicates CAA is configured on the node # # (obtained via ct_clusterinfo -r) # # * CAA_UUID: if CAA is configured, its cluster ID # # * PARSED_CLUSTER_UUID: cluster ID retrieved from the parameters to the # # the script # # # # Arguments: None # # # # Return value: 0: indicates that either CAA is not up or the cluster IDs # # did match # # # ################################################################################ check_for_inconsistent_cluster_ID() { if [[ "$REALM" == "CAA" ]] then # This indicates that CAA is configured on the node # just in case: if our cluster ID is invalid then assume CAA is not # really configured if [[ -z "$CAA_UUID" || "$CAA_UUID" == "00000000-0000-0000-0000-000000000000" ]] then print_dbgmsg "Local Cluster ID ($CAA_UUID) is invalid" return 0 fi if [[ -z "$PARSED_CLUSTER_UUID" ]] then print_dbgmsg "Cluster ID received as paramter not set" return 0 fi if [[ "$CAA_UUID" != "$PARSED_CLUSTER_UUID" ]] then print_dbgmsg "Cluster ID ($CAA_UUID) does not match ID passed as parameter ($PARSED_CLUSTER_UUID)" ## ==> need new message after EMSG112 (?) exit 1 else print_dbgmsg "Cluster ID check succeeded" fi fi return 0 } ################################################################################ # # # add_and_start_local - creates the RPD cluster tree, if not present, # # create the GS subsystem (if not already created), and instruct ConfigRM # # to go online. The actions are intended to be idempotent: it should cause # # no harm to invoke this function when some of the actions to bring the # # node online have already previously been taken. # # # # Assumes that the following variables are already set up: # # * REALM # # * CLUSTER_ID # # * CLUSTER_NAME # # * NODE_NUMBER # # # # This function will exit 1 (error) if some fatal error is encountered. # # # # Arguments: None # # # # Return value: None # # # ################################################################################ add_and_start_local() { # # Start with assumptions about what this block should accomplish. # print_dbgmsg "Processing AddAndStart ... " if [[ ! -e "$CT_CAA_STATE_DEFINED_FILE" ]] then /usr/bin/touch $CT_CAA_STATE_DEFINED_FILE print_dbgmsg "CAA defined state file created" fi if [[ -e "$CT_CAA_STATE_STOPPED_FILE" ]] then rm -f $CT_CAA_STATE_STOPPED_FILE print_dbgmsg "CAA stopped state file removed" fi # Important: Write CAA cmd file so that ConfigRM can understand the operation echo "START $CLUSTER_NAME $CLUSTER_ID" > $CAACMDCTRLFILE if [[ ! -z $REALM && $REALM != "CAA" ]]; then # non CAA environment print_message EMSG110 $basecmd exit 1 fi # create_group_services_subsystem=true start_group_services_subsystem=true refresh_ConfigRM=true # # Take migration into account. Start by discovering whether operating in a # CAA environment should be disabled because migration is taking place. # caa_is_disabled_for_migration=`ct_caa_is_disabled_for_migration` print_dbgmsg "caa_is_disabled_for_migration=$caa_is_disabled_for_migration" if [[ "$caa_is_disabled_for_migration" == "true" ]] then start_group_services_subsystem=false refresh_ConfigRM=false migration_realm=`ct_caa_get_migration_realm` print_dbgmsg "migration_realm=$migration_realm" if [[ "$migration_realm" == "RPD" ]] then # # Don't need to create the Group Services subsystem in an RPD # environment, because it should already have been created in # that environment. # create_group_services_subsystem=false fi else # not disabled for migration (but migration may still be under way) # ctcaactrl may have been invoked via ct_caa_gs_migrate_ctrl. # If the GS daemon undegoing migration is still running then # it should not be started. Especially critical in PowerHA migration, # where the subsystem being running is grpsvcs, not cthags. # In addition, if the migrated GS daemon is still running, the # cthags subsystem will have been already created, at the time # ctcaactrl POST_START was invoked upon CAA cluster creation. So no # need to create GS subsystems again migrated_GS_daemon_still_running=`ct_caa_migrated_GS_still_running` if [[ $migrated_GS_daemon_still_running == "true" ]] then print_dbgmsg "GS daemon which underwent migration is still running: do not start cthags" create_group_services_subsystem=false start_group_services_subsystem=false fi fi print_dbgmsg "refresh_ConfigRM=$refresh_ConfigRM" print_dbgmsg "start_group_services_subsystem=$start_group_services_subsystem" print_dbgmsg "create_group_services_subsystem=$create_group_services_subsystem" # # Setup the cluster info & dir. # if [[ ! -d /var/ct/${CLUSTER_ID} || ! -d /var/ct/${CLUSTER_NAME} ]] then # cluster does not exist...then create it print_dbgmsg "Configure CAA ${CLUSTER_NAME} ${CLUSTER_ID} ${NODE_NUMBER}" cfghact -c ${CLUSTER_NAME} -n ${NODE_NUMBER} -y ${CLUSTER_ID} -t CAA -f else print_dbgmsg "/var/ct/${CLUSTER_ID} is already found...but make sure to have the correct ones" # 'cfghact' will recreate if nodedef.cfg is empty cfghact -c ${CLUSTER_NAME} -n ${NODE_NUMBER} -y ${CLUSTER_ID} -t CAA -f fi if [[ "$create_group_services_subsystem" == "true" ]] then print_dbgmsg "Calling $RSCTBIN/cthagsctrl -a ..." ctrl_result=`$RSCTBIN/cthagsctrl -a 2>&1` gs_rc=$? if (( $gs_rc != 0 )) then print_dbgmsg "cthagsctrl -a exited with rc $gs_rc:\n$ctrl_result" fi fi if [[ "$start_group_services_subsystem" == "true" ]] then print_dbgmsg "Calling $RSCTBIN/cthagsctrl -s ..." ctrl_result=`$RSCTBIN/cthagsctrl -s 2>&1` gs_rc=$? if (( $gs_rc != 0 )) then print_dbgmsg "cthagsctrl -s exited with rc $gs_rc:\n$ctrl_result" fi fi if [[ "$refresh_ConfigRM" == "true" ]] then tell_ConfigRM_resync $Op else # Make ConfigRM auto-startable touch /var/ct/cfg/ConfigRM.autostart fi print_dbgmsg "Initialization done rc=$gs_rc" } ################################################################################ # # # stop_and_delete_local_pre - stops the RPD on the local node and remove it. # # If ConfigRM is running (it should) then ConfigRM is responsible for # # stopping GS. # # # # # # Assumes that the following variables are already set up: # # * REALM # # * CLUSTER_ID # # * CLUSTER_NAME # # # # This function will exit 1 (error) if some fatal error is encountered. # # # # Arguments: None # # # # Return value: 0: success # # 1: failure which should be reflected in the ctcaactrl exit val # # # ################################################################################ stop_and_delete_local_pre() { # Should avoid returning non-zero, since that will cause the script to # return non-zero, resulting in the entire cluster operation (likely the # node or cluster being removed) to fail. print_dbgmsg "Processing stop_and_delete_local_pre ... " # Write CMD file (STOP) echo "STOP $CLUSTER_NAME $CLUSTER_ID" > $CAACMDCTRLFILE echo "STOP-COMMIT $CLUSTER_NAME $CLUSTER_ID" > $CAACOMMITCTRLFILE if [[ $REALM != "CAA" && $CAAForceOpt != "FORCE" ]]; then # non CAA environment and Not force print_message EMSG110 $basecmd print_dbgmsg "Assume stop_and_delete_local as successful even if REALM is not CAA" return 0 fi # If ConfigRM is running state=`LC_ALL=C /usr/bin/lssrc -s IBM.ConfigRM 2> /dev/null | LC_ALL=C /bin/sed -e"1d;s/.* \([a-zA-Z0-9]*\)$/\1/"`; if [[ -z $state || $state != "active" ]] then # ConfigRM is not running, remove CAA cluster structure print_dbgmsg "Unconfig CAA cluster..." # put "IW" to default_cluster so that ConfigRM won't react to online IWClusterID=$(grep -w "IW" /var/ct/cfg/clusters | awk '{print $1}') if [[ -n ${IWClusterID} ]]; then print "${IWClusterID}" > /var/ct/cfg/default_cluster fi # Stop HAGS if ConfigRM is not running cthagsctrl -k fi # Tell ConfigRM to resync tell_ConfigRM_resync $Op # Wait a few seconds so that ConfigRM can process the active RMC requests (e.g. rmrpdomain) # This will wait up to 60secs waitfor_hags_termination_or_stopsrc # Wait a few seconds so that ConfigRM finishes up Offline processing. # This will wait up to 30 secs waitfor_ConfigRM_Offline_state print_dbgmsg "Processing stop_and_delete_local_pre Done. " } ################################################################################ # # # stop_local - stops the RPD on the local node, which includes stopping # # the GS daemon. ConfigRM is responsible for stopping GS # # # # # # Assumes that the following variables are already set up: # # * REALM # # * CLUSTER_ID # # * CLUSTER_NAME # # # # This function will exit 1 (error) if some fatal error is encountered. # # # # Arguments: None # # # # Return value: 0: success # # 1: failure which should be reflected in the ctcaactrl exit val # # # ################################################################################ stop_local() { # Should avoid returning non-zero, since that will cause the script to # return non-zero, resulting in the entire cluster operation (likely the # node or cluster being removed) to fail. print_dbgmsg "Processing stop_local ... " # Write CMD file (OFFLINE) & NONE for CommitLvl echo "OFFLINE $CLUSTER_NAME $CLUSTER_ID" > $CAACMDCTRLFILE echo "NONE $CLUSTER_NAME $CLUSTER_ID" > $CAACOMMITCTRLFILE if [[ $REALM != "CAA" && $CAAForceOpt != "FORCE" ]]; then # non CAA environment and Not force print_message EMSG110 $basecmd print_dbgmsg "Assume stop_local as successful even if REALM is not CAA" return 0 fi # If ConfigRM is running state=`LC_ALL=C /usr/bin/lssrc -s IBM.ConfigRM 2> /dev/null | LC_ALL=C /bin/sed -e"1d;s/.* \([a-zA-Z0-9]*\)$/\1/"`; if [[ -z $state || $state != "active" ]] then # put "IW" to default_cluster so that ConfigRM won't react to online IWClusterID=$(grep -w "IW" /var/ct/cfg/clusters | awk '{print $1}') if [[ -n ${IWClusterID} ]]; then print "${IWClusterID}" > /var/ct/cfg/default_cluster fi # Stop HAGS if ConfigRM is not running cthagsctrl -k fi # Tell ConfigRM to resync tell_ConfigRM_resync $Op # Wait a few seconds so that ConfigRM can process the active RMC requests # (e.g. rmrpdomain) # This will wait up to 60 secs waitfor_hags_termination_or_stopsrc # Wait a few seconds so that ConfigRM finishes up Offline processing. # This will wait up to 30 secs waitfor_ConfigRM_Offline_state print_dbgmsg "Processing stop_local Done." } ################################################################################ # # # stop_and_delete_local_post - The RPD was supposed to have been deleted # # via stop_and_delete_local() (run in the "PRE" phase), but this function # # is invoked via "POST" just as a "catch all", to ensure that, for example, # # GS has been stopped. # # # # # # Arguments: None # # # # Return value: None # # # ################################################################################ stop_and_delete_local_post() { # show the current states caa_is_disabled_for_migration=`ct_caa_is_disabled_for_migration` print_dbgmsg "caa_is_disabled_for_migration=$caa_is_disabled_for_migration" migration_realm=`ct_caa_get_migration_realm` print_dbgmsg "migration_realm=$migration_realm" # # See if an RPD is configured which includes this node. If so, # there will be more than just an IW entry in /var/ct/cfg/clusters. # clusters=$(GetRPDClusters) if [[ "" != "$clusters" ]] then # An RPD is configured on this node. print_dbgmsg "RPDs: $clusters are already defined. HAGS subsystem will not be stopped." else print_dbgmsg "Stopping cthags if it is still up" # Final cleanup steps in case StopAndDelete did not get to complete them cthagsctrl -k fi # Tell ConfigRM to resync tell_ConfigRM_resync $Op # Make sure HAGS to be stopped waitfor_hags_termination_or_stopsrc waitfor_ConfigRM_Offline_state print_dbgmsg "Processing stop_and_delete_local_post Done." } commit_lvl_check() { # # Starting from 0DY support onwards CAA will generate new style of site # events different from linked cluster. We need to reject the commit # level operation if PowerHA SystemMirror is having older version (< 17) # so that CAA can generate LC style of site events. # # # # if file POWERHA_PRE_721 is available which means to simulate of having # PowerHA with pre 7.2.1 version # if [[ -e "$POWERHA_PRE_721" ]] then print_dbgmsg "Simulated of having PowerHA pre-7.2.1, so rejecting commit level operation" exit 1 fi PowerHA721_version=17 PowerHA_cluster_version=`/usr/sbin/rsct/install/bin/ct_is_haes_present` rc=$? if [[ $rc == 0 ]] then if (( $PowerHA_cluster_version < $PowerHA721_version && $CAP_SPLT_MRG_FOUND == 1 )) then print_dbgmsg "PowerHA pre-7.2.1 (PowerHA version: $PowerHA_cluster_version) is present, so rejecting commit level operation." exit 1 fi fi print_dbgmsg "Processing commit_lvl_check Done." } ################################################################################ # # # commit_lvl_local - The CAA cluster has just been updated to a newer level # # and now RSCT must in turn update the mapped active version level to match. # # # # Arguments: None # # # # Return value: None # # # ################################################################################ commit_lvl_local() { # Write to CAA Cmd file echo "START-COMMIT $CLUSTER_NAME $CLUSTER_ID" > $CAACOMMITCTRLFILE if [[ ! -z $REALM && $REALM != "CAA" ]]; then # non CAA environment print_message EMSG110 $basecmd exit 1 fi # # Take migration into account. Do not continue with commit if the # migration is taking place. # caa_is_disabled_for_migration=`ct_caa_is_disabled_for_migration` print_dbgmsg "caa_is_disabled_for_migration=$caa_is_disabled_for_migration" if [[ "$caa_is_disabled_for_migration" != "true" ]] then # Tell ConfigRM to resync tell_ConfigRM_resync $Op fi print_dbgmsg "Processing commit_lvl_local Done." } ################################################################################ # # # process_AddNode: process ADD_NODE request (all phases) # # # # # # Arguments: : one of {CHECK| PRE | POST | UNDO_PRE} # # # # : # # NO_OTHER_TARGETS if none # # : Node UUID, canonical hostname, shortid # # : # # TARGET_NODE if the local node is the target # # NON_TARGET_NODE if the local node is NOT the target # # # # If the script parameters were processed in the main flow (which should be # # the case) then the environment variables mentioned below are assumed to # # have been set, and in that case this function is invoked with no arguments. # # # # # # Return value: None # # # ################################################################################ process_AddNode() { # echo "process_AddNode: called" # if the parameters are passed via options then parsing should have # been done in the main flow if [[ -z "$PARSED_PHASE" ]] then parse_arguments $* fi # Assume the following variables have been set: # PARSED_PHASE, PARSED_CLUSTER_UUID, PARSED_TARGET_SHORT_ID, # PARSED_AM_I_THE_TARGET case $PARSED_PHASE in CHECK ) # ADD_NODE CHECK Processing print_dbgmsg "ADD_NODE CHECK Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then # check for legacy (non-CAA) clusters. Function below # should cause the script to exit with 1 if any is found check_for_legacy_cluster # check for inconsistent cluster IDs between what local CAA # reports and what has been passed as parameter check_for_inconsistent_cluster_ID else # Not the local node # That's where checks can be added for the remote node being # added. For example: # * Is the new node already in the cluster? (if so, reject) # * Is the new node's shorthand node ID too large? # * Do we have too many nodes in the cluster already? print_dbgmsg "Not the target" fi ;; PRE ) # ADD_NODE PRE Processing print_dbgmsg "ADD_NODE PRE Processing (nothing to do)" # No real work to do in ADD_NODE PRE (just make sure we exit 0, # so that CAA will not cancel the operation) ;; POST ) # ADD_NODE POST Processing print_dbgmsg "ADD_NODE POST Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target node (nothing to do)" # Making the RPD online is now being done in JOIN_NODE, # and not in ADD_NODE # # Create RPD and go online, which includes creating the # # GS subsystem and starting it # add_and_start_local else print_dbgmsg "Not the target" fi ;; UNDO_PRE ) # ADD_NODE UNDO_PRE Processing print_dbgmsg "ADD_NODE UNDO_PRE Processing (nothing to do)" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target node" # Nothing to do fi ;; esac exit 0 } ################################################################################ # # # process_JoinNode: process JOIN_NODE request (all phases) # # # # # # Arguments: : one of {CHECK| PRE | POST | UNDO_PRE} # # # # : # # NO_OTHER_TARGETS if none # # : Node UUID # # : # # TARGET_NODE if the local node is the target # # NON_TARGET_NODE if the local node is NOT the target # # # # If the script parameters were processed in the main flow (which should be # # the case) then the environment variables mentioned below are assumed to # # have been set, and in that case this function is invoked with no arguments. # # # # # # Return value: None # # # ################################################################################ process_JoinNode() { # echo "process_JoinNode: called" # if the parameters are passed via options then parsing should have # been done in the main flow if [[ -z "$PARSED_PHASE" ]] then parse_arguments $* fi # Assume the following variables have been set: # PARSED_PHASE, PARSED_CLUSTER_UUID, PARSED_TARGET_SHORT_ID, # PARSED_AM_I_THE_TARGET case $PARSED_PHASE in CHECK ) # JOIN_NODE CHECK Processing # CHECK processing should be essentially identical for JOIN_NODE # and ADD_NODE. We cannot take too much from granted in a # JOIN_NODE, since (1) lots could have happened to the node # while it was not active in the cluster and (2) the node might # have been added to the cluster while it was down. print_dbgmsg "JOIN_NODE CHECK Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then # check for legacy (non-CAA) clusters. Function below # should cause the script to exit with 1 if any is found check_for_legacy_cluster # check for inconsistent cluster IDs between what local CAA # reports and what has been passed as parameter check_for_inconsistent_cluster_ID else # Not the local node # That's where checks can be added for the remote node being # added. For example: # * Is the new node already in the cluster? (if so, reject) # * Is the new node's shorthand node ID too large? # * Do we have too many nodes in the cluster already? print_dbgmsg "Not the target" fi ;; PRE ) # JOIN_NODE PRE Processing print_dbgmsg "JOIN_NODE PRE Processing (nothing to do)" # No real work to do in JOIN_NODE PRE (just make sure we exit 0, # so that CAA will not cancel the operation) ;; POST ) # JOIN_NODE POST Processing print_dbgmsg "JOIN_NODE POST Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target node" # Create RPD and go online, which includes creating the # GS subsystem and starting it add_and_start_local else print_dbgmsg "Not the target" fi ;; UNDO_PRE ) # JOIN_NODE UNDO_PRE Processing print_dbgmsg "JOIN_NODE UNDO_PRE Processing (nothing to do)" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target node" # Nothing to do else print_dbgmsg "Not the target" fi ;; esac exit 0 } ################################################################################ # # # process_RmNode: process RM_NODE request (all phases) # # # # # # Arguments: : one of {CHECK| PRE | POST | UNDO_PRE} # # # # : # # NO_OTHER_TARGETS if none # # : Node UUID # # : # # TARGET_NODE if the local node is the target # # NON_TARGET_NODE if the local node is NOT the target # # # # If the script parameters were processed in the main flow (which should be # # the case) then the environment variables mentioned below are assumed to # # have been set, and in that case this function is invoked with no arguments. # # # # # # Return value: None # # # ################################################################################ process_RmNode() { # echo "process_RmNode: called" # if the parameters are passed via options then parsing should have # been done in the main flow if [[ -z "$PARSED_PHASE" ]] then parse_arguments $* fi # Assume the following variables have been set: # PARSED_PHASE, PARSED_CLUSTER_UUID, PARSED_TARGET_SHORT_ID, # PARSED_AM_I_THE_TARGET case $PARSED_PHASE in CHECK ) # RM_NODE CHECK Processing # There is nothing to do for RM_NODE CHECK. # Exploiters like PowerHA and VIOS may use CHECK to verify that # the node removal is being orchestrated through the exploiter, # and reject removals when raw CAA APIs/CLIs are used. # But RSCT so far has no reason to veto a node/cluster removal. # Possible reasons to consider rejecting RM_NODE operations: # * A subsystem is in the middle of a crucial operation # * Some non-target node in a degraded state (say, GS not # running) # * (TBD) print_dbgmsg "RM_NODE CHECK Processing (nothing to do)" ;; PRE ) # RM_NODE PRE Processing print_dbgmsg "RM_NODE PRE Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target node" # This is where the RPD is removed, and the GS daemon is # stopped. By the time RM_NODE PRE is invoked, the RPD is # supposed to be offline, since STOP_NODE should have been # invoked first. However, stop_and_delete_local_pre() does not # assume that, and will cause GS to stop if it happens to be # running. # RPD removal is done at the PRE phase to ensure that the # removal is processed at the target node *first*, before any # other node is notified (and starts to take failover/takeover) # actions. stop_and_delete_local_pre else print_dbgmsg "Not the target node" fi ;; POST ) # RM_NODE POST Processing print_dbgmsg "RM_NODE POST Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target node" # In principle, there is nothing to do since all actions # were supposed to have been taken during PRE, but this is a # fallback in case actions in the PRE failed to stop GS and # remove domain stop_and_delete_local_post else print_dbgmsg "Not the target node" fi # For the remote nodes, a check could be added to ensure that # these nodes no longer see the target node in the cluster # configuration. ;; UNDO_PRE ) # RM_NODE UNDO_PRE Processing print_dbgmsg "RM_NODE UNDO_PRE Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target node (nothing to do)" # In principle, the goal would be to revert the node back to # the state prior to 'PRE', which was "RPD present but offline", # while the expected state when UNDO_PRE is invoked is # "RPD removed and GS is down". However, "doing nothing" in # UNDO_PRE should be acceptable, since even if JOIN_NODE or # START_NODE are subsequently run, their processing should # result in the RPD being (re-) created if not present. else print_dbgmsg "Not the target node" fi ;; esac exit 0 } ################################################################################ # # # process_StartNode: process START_NODE request (all phases) # # # # # # Arguments: : one of {CHECK| PRE | POST | UNDO_PRE} # # # # : # # NO_OTHER_TARGETS if none # # : Node UUID # # : # # TARGET_NODE if the local node is the target # # NON_TARGET_NODE if the local node is NOT the target # # # # If the script parameters were processed in the main flow (which should be # # the case) then the environment variables mentioned below are assumed to # # have been set, and in that case this function is invoked with no arguments. # # # # # # Return value: None # # # ################################################################################ process_StartNode() { # echo "process_StartNode: called" # if the parameters are passed via options then parsing should have # been done in the main flow if [[ -z "$PARSED_PHASE" ]] then parse_arguments $* fi # Assume the following variables have been set: # PARSED_PHASE, PARSED_CLUSTER_UUID, PARSED_TARGET_SHORT_ID, # PARSED_AM_I_THE_TARGET # The processing of START seems identical to that of JOIN. In both cases # * The CHECK phase should check against non-CAA clusters, given that # it's not known what happened to the node while it was offline in the # cluster # * The RPD domain may have to be created or recreated in case it was # removed by mistake while the node was offline # Given the above, we'll just invoke process_JoinNode() process_JoinNode $* exit 0 } ################################################################################ # # # process_StopNode: process STOP_NODE request (all phases) # # # # # # Arguments: : one of {CHECK| PRE | POST | UNDO_PRE} # # # # : # # NO_OTHER_TARGETS if none # # : Node UUID # # : # # TARGET_NODE if the local node is the target # # NON_TARGET_NODE if the local node is NOT the target # # # # If the script parameters were processed in the main flow (which should be # # the case) then the environment variables mentioned below are assumed to # # have been set, and in that case this function is invoked with no arguments. # # # # # # Return value: None # # # ################################################################################ process_StopNode() { # echo "process_StopNode: called" # if the parameters are passed via options then parsing should have # been done in the main flow if [[ -z "$PARSED_PHASE" ]] then parse_arguments $* fi # Assume the following variables have been set: # PARSED_PHASE, PARSED_CLUSTER_UUID, PARSED_TARGET_SHORT_ID, # PARSED_AM_I_THE_TARGET case $PARSED_PHASE in CHECK ) # STOP_NODE CHECK Processing # There is nothing to do for STOP_NODE CHECK. # Exploiters like PowerHA and VIOS may use CHECK to verify that # the node stop is being orchestrated through the exploiter, # and reject stops when raw CAA APIs/CLIs are used. # But RSCT so far has no reason to veto a node stop. # Possible reasons to consider rejecting STOP_NODE operations: # * A subsystem is in the middle of a crucial operation # * (TBD) print_dbgmsg "STOP_NODE CHECK Processing (nothing to do)" ;; PRE ) # STOP_NODE PRE Processing print_dbgmsg "STOP_NODE PRE Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target node" # This is where the GS daemon is stopped, which is done # with coordination with ConfigRM stop_local else print_dbgmsg "Not the target node" fi ;; POST ) # STOP_NODE POST Processing print_dbgmsg "STOP_NODE POST Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target node" # In principle, there is nothing to do since all actions # were supposed to have been taken during PRE. fi # For the remote nodes, a check could be added to ensure that # these nodes no longer see the target node as alive in the cluster ;; UNDO_PRE ) # STOP_NODE UNDO_PRE Processing print_dbgmsg "STOP_NODE UNDO_PRE Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target node" # The goal is to revert the node back to # the state prior to 'PRE', which was "RPD present an online" # The domain should still be present, since it was not # supposed to have been removed in the PRE phase. add_and_start_local fi ;; esac exit 0 } ################################################################################ # # # process_CommitLvl: process COMMIT_LVL request (all phases) # # # # # # Arguments: : one of {POST} # # # # : # # flags for new capabilities # : # # TARGET_NODE if the local node is the target # # NON_TARGET_NODE if the local node is NOT the target # # # # Return value: None # # # ################################################################################ process_CommitLvl() { #echo "process_CommitLvl: called" # if the parameters are passed via options then parsing should have # been done in the main flow if [[ -z "$PARSED_PHASE" ]] then parse_arguments $* fi # Assume the following variables have been set: # PARSED_PHASE, PARSED_CLUSTER_UUID, # PARSED_AM_I_THE_TARGET case $PARSED_PHASE in #Nothing to do on PRE or UNDO_PRE CHECK ) print_dbgmsg "COMMIT_LVL CHECK Processing" commit_lvl_check ;; PRE ) print_dbgmsg "COMMIT_LVL PRE Processing (nothing to do)" ;; UNDO_PRE ) print_dbgmsg "COMMIT_LVL UNDO_PRE Processing (nothing to do)" ;; POST ) print_dbgmsg "COMMIT_LVL POST Processing" commit_lvl_local ;; esac exit 0 } ################################################################################ # # # check_for_inconsistent_site_info - if CAA is up, check if the # # local site uuid, name, short id, priority # # matches the one sent as parameter to the script. # # # # The function will output an error message and exit 1 if the above infor # # don't match. The function will NOT exit if the cluster IDs match. # # It's up to the caller to exit 0. # # # # Depends on the following variables already being set: # # * SIT_UUID: Local SITE ID (obtained via ct_clusterinfo -U) # # * SITE_NAME: Local SITE name (obtained via ct_clusterinfo -S) # # * SITE_NUMBER: Local SITE number (obtained via ct_clusterinfo -N) # # * SITE_PRIORITY: Local SITE priority (obtained via ct_clusterinfo -p) # # * PARSED_TARGET_SITE_UUID: Site ID retrieved from the parameters to # # the script # # * PARSED_TARGET_SITENAME: Site Name retrieved from the parameters to # # the script # # * PARSED_TARGET_SITE_NUMBER: Site number retrieved from the parameters # # to the script # # * PARSED_TARGET_SITE_PRIORITY: Site priority retrieved from the # # parameters to the script # # # # Arguments: None # # # # Return value: 0: indicates that either CAA is not up or the site # # information did match # # # ################################################################################ check_for_inconsistent_site_info() { if [[ "$REALM" == "CAA" ]] then # This indicates that CAA is configured # just in case: if our site ID is invalid then assume site is not # really configured if [[ -z "$SITE_UUID" || "$SITE_UUID" == "00000000-0000-0000-0000-000000000000" ]] then print_dbgmsg "Local Site ID ($SITE_UUID) is invalid" return 0 fi if [[ -z "$PARSED_TARGET_SITE_UUID" ]] then print_dbgmsg "Site ID received as parameter not set" return 0 fi if [[ -z "PARSED_TARGET_SITENAME" ]] then print_dbgmsg "Site name received as parameter not set" return 0 fi if [[ -z "PARSED_TARGET_SITE_NUMBER" ]] then print_dbgmsg "Site short id received as parameter not set" return 0 fi if [[ -z "PARSED_TARGET_SITE_PRIORITY" ]] then print_dbgmsg "Site priority received as parameter not set" return 0 fi if [[ "$SITE_UUID" != "$PARSED_TARGET_SITE_UUID" ]] then print_dbgmsg "Site ID ($SITE_UUID) does not match ID passed as parameter ($PARSED_TARGET_SITE_UUID)" exit 1 elif [[ "$SITE_NAME" != "$PARSED_TARGET_SITENAME" ]] then print_dbgmsg "Site Name ($SITE_NAME) does not match Name passed as parameter ($PARSED_TARGET_SITENAME)" exit 1 elif [[ "$SITE_NUMBER" != "$PARSED_TARGET_SITE_NUMBER" ]] then print_dbgmsg "Site short id ($SITE_NUMBER) does not match short id passed as parameter ($PARSED_TARGET_SITE_NUMBER)" exit 1 elif [[ "$SITE_PRIORITY" != "$PARSED_TARGET_SITE_PRIORITY" ]] then print_dbgmsg "Site priority ($SITE_PRIORITY) does not match priority passed as parameter ($PARSED_TARGET_SITE_PRIORITY)" exit 1 else print_dbgmsg "Site information check succeeded" fi fi return 0 } ################################################################################ # # # process_AddSite: process ADD_SITE request (all phases) # # # # # # Arguments: : one of {CHECK| PRE | POST} # # # # : # # NO_OTHER_TARGETS if none # # : Site UUID, Site name, Site shortid, Site priority # # # # If the script parameters were processed in the main flow (which should be # # the case) then the environment variables mentioned below are assumed to # # have been set, and in that case this function is invoked with no arguments. # # # # # # Return value: None # # # ################################################################################ process_AddSite() { # echo "process_AddSite: called" # Assume the following variables have been set: # PARSED_CLUSTER_UUID, PARSED_TARGET_SITE_NUMBER, # PARSED_SITE_UUID, PARSED_SITE_NAME, PARSED_SITE_PRIORITY # PARSED_AM_I_THE_TARGET case $PARSED_PHASE in CHECK ) print_dbgmsg "ADD_SITE CHECK Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then # check for inconsistent cluster IDs between what local CAA # reports and what has been passed as parameter check_for_inconsistent_cluster_ID else # Not the local site print_dbgmsg "Not the target site" fi ;; PRE ) print_dbgmsg "ADD_SITE PRE Processing (nothing to do)" # No real work to do in ADD_SITE PRE (just make sure we exit 0, # so that CAA will not cancel the operation) ;; POST ) print_dbgmsg "ADD_SITE POST Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then tell_ConfigRM_resync print_dbgmsg "Target site" else print_dbgmsg "Not the target site" fi ;; esac exit 0 } ################################################################################ # # # process_ModSite: process MOD_SITE request (all phases) # # # # # # Arguments: : one of {CHECK| PRE | POST} # # # # : # # NO_OTHER_TARGETS if none # # : Site UUID, Site name, Site shortid, Site priority # # # # If the script parameters were processed in the main flow (which should be # # the case) then the environment variables mentioned below are assumed to # # have been set, and in that case this function is invoked with no arguments. # # # # The final changes would be reflected in POST phase # # # # Return value: None # # # ################################################################################ process_ModSite() { # echo "process_ModSite: called" # Assume the following variables have been set: # PARSED_CLUSTER_UUID, PARSED_TARGET_SITE_NUMBER, # PARSED_SITE_UUID, PARSED_SITE_NAME, PARSED_SITE_PRIORITY # PARSED_AM_I_THE_TARGET case $PARSED_PHASE in CHECK ) print_dbgmsg "MOD_SITE CHECK Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then # check for inconsistent cluster IDs between what local CAA # reports and what has been passed as parameter check_for_inconsistent_cluster_ID # check for inconsistent site information between what local site # reports and what has been passed as parameter check_for_inconsistent_site_info else # Not the local site print_dbgmsg "Not the target site" fi ;; PRE ) print_dbgmsg "MOD_SITE PRE Processing (nothing to do)" # No real work to do in MOD_SITE PRE (just make sure we exit 0, # so that CAA will not cancel the operation) ;; POST ) print_dbgmsg "MOD_SITE POST Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then tell_ConfigRM_resync print_dbgmsg "Target site" else print_dbgmsg "Not the target site" fi ;; esac exit 0 } ################################################################################ # # # process_RmSite: process RM_SITE request (all phases) # # # # # # Arguments: : one of {CHECK| PRE | POST} # # # # : # # NO_OTHER_TARGETS if none # # : Site UUID # # # # If the script parameters were processed in the main flow (which should be # # the case) then the environment variables mentioned below are assumed to # # have been set, and in that case this function is invoked with no arguments. # # # # # # Return value: None # # # ################################################################################ process_RmSite() { # echo "process_RmSite: called" # Assume the following variables have been set: # PARSED_PHASE, PARSED_CLUSTER_UUID, PARSED_TARGET_SITE_NUMBER, # PARSED_SITE_UUID, PARSED_SITE_NAME, PARSED_SITE_PRIORITY # PARSED_AM_I_THE_TARGET case $PARSED_PHASE in CHECK ) print_dbgmsg "RM_SITE CHECK Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then print_dbgmsg "Target site" else # Not the local site print_dbgmsg "Not the target site" fi ;; PRE ) print_dbgmsg "RM_SITE PRE Processing (nothing to do)" # No real work to do in ADD_SITE PRE (just make sure we exit 0, # so that CAA will not cancel the operation) ;; POST ) print_dbgmsg "RM_SITE POST Processing" if [[ "$PARSED_AM_I_THE_TARGET" == "yes" ]] then tell_ConfigRM_resync print_dbgmsg "Target site" else print_dbgmsg "Not the target site" fi ;; esac exit 0 } ################################################################################ # # # # # MAIN FUNCTION # # # # # # # # # ################################################################################ # get_current_locale # parse the arguments gs_rc=0 # cthagsctrl return code Argcount=0 # # Expected syntax: # ctcaactrl -O # -P # -T # -c # -o (not for ADD_NODE) # -t # [-m] # [-g] # [-i] # BAD_ARG=0 UNKNOWN_OPERATION=0 UNKNOWN_PHASE=0 SITE_OPERATION=0 OPERATION="" PARSED_PHASE="" PARSED_TRANS_ID="" PARSED_CLUSTER_UUID="" PARSED_TARGET_UUID="" PARSED_TARGET_HOSTNAME="" PARSED_TARGET_SHORT_ID="" PARSED_SITE_UUID="" PARSED_SITENAME="" PARSED_SITE_NUMBER="" PARSED_SITE_PRIORITY="" PARSED_TARGET_SITE_UUID="" PARSED_TARGET_SITENAME="" PARSED_TARGET_SITE_NUMBER="" PARSED_TARGET_SITE_PRIORITY="" PARSED_AM_I_THE_TARGET="no" # assume not the target unless -m specified PARSED_AM_I_THE_INITIATOR="no" # assume not the initiator unless -i specified PARSED_TARGET_IS_GATEWAY_SERVER="no" LKU_PD_MARKER_FILE="/var/ct/lvupd/lku_PD_mark_file" LKU_DISABLE_START=0 OPERATION_CALLER="" CT_CAA_STATE_DEFINED_FILE="/var/ct/cfg/caa-state-defined" CT_CAA_STATE_STOPPED_FILE="/var/ct/cfg/caa-state-stopped" CAP_SPLT_MRG_FOUND=0 # Create logfile if defined create_logfile_with_permission "$HA_CCAL_LOG" "0640" # print_dbgmsg "--------------------------------------" print_dbgmsg "Invoked with parameters: $*" print_process_tree # show process tree # # LKU (live kernel update) changes # # If marker file (/var/ct/lvupd/lku_mark_file - This file is controlled by "ctlvupdate" script) is # exists don't allow any operation as presence of the marker file means currently LKU is going on. # if [[ -e "$LKU_PD_MARKER_FILE" ]] then print_dbgmsg "The LKU is currently going on, start operations cannot be executed." LKU_DISABLE_START=1 fi # Note the ':' as first argument in getopts: this changes how getopts # deals with unknown options. The goal is to be able to ignore functions # we don't understand. while getopts ":O:P:T:c:o:t:s:l:mgiv:" opt do case $opt in O ) # operation: ADD_NODE, etc OPERATION=$OPTARG ;; P ) PARSED_PHASE=$OPTARG case $PARSED_PHASE in CHECK | PRE | POST | UNDO_PRE) ;; *) # unknown operation/phase UNKNOWN_PHASE=1 ;; esac Argcount=$((Argcount + 1)) ;; T ) PARSED_TRANS_ID=$OPTARG ;; c ) PARSED_CLUSTER_UUID=$OPTARG ;; o ) # other targets # (not used for now) ;; s ) # Site UUID typeset site_info=$OPTARG if [[ "$OPERATION" == "ADD_SITE" || "$OPERATION" == "MOD_SITE" || "$OPERATION" == "RM_SITE" ]] then SITE_OPERATION=1 if echo $site_info | grep "," > /dev/null then # Format assumed to be: Site UUID, Site name, site short id, site priority # Site UUID PARSED_SITE_UUID=${site_info%%,*} rest=${site_info#*,} # Site Name PARSED_SITENAME=${rest%%,*} rest=${rest#*,} # Site short id PARSED_SITE_NUMBER=${rest%%,*} rest=${rest#*,} # Site priority PARSED_SITE_PRIORITY=${rest%%,*} fi else # format assumed to Site UUID PARSED_SITE_UUID=$site_info fi ;; t ) # target typeset target_id_info=$OPTARG if [[ "$OPERATION" == "ADD_SITE" || "$OPERATION" == "MOD_SITE" || "$OPERATION" == "RM_SITE" ]] then SITE_OPERATION=1 if echo $target_id_info | grep "," > /dev/null then # Format assumed to be: Site UUID, Site name, site short id, site priority # Site UUID PARSED_TARGET_SITE_UUID=${target_id_info%%,*} rest=${target_id_info#*,} # Site Name PARSED_TARGET_SITENAME=${rest%%,*} rest=${rest#*,} # Site short id PARSED_TARGET_SITE_NUMBER=${rest%%,*} rest=${rest#*,} # Site priority PARSED_TARGET_SITE_PRIORITY=${rest%%,*} else # Format assumed to be: Site UUID PARSED_TARGET_SITE_UUID=$target_id_info fi else if echo $target_id_info | grep "," > /dev/null then # Format assumed to be: Node UUID, canonical hostname, shortid PARSED_TARGET_UUID=${target_id_info%%,*} rest=${target_id_info#*,} # echo "PARSED_TARGET_UUID=$PARSED_TARGET_UUID" PARSED_TARGET_HOSTNAME=${rest%%,*} rest=${rest#*,} # echo "PARSED_TARGET_HOSTNAME=$PARSED_TARGET_HOSTNAME" PARSED_TARGET_SHORT_ID=${rest%%,*} # echo "PARSED_TARGET_SHORT_ID=$PARSED_TARGET_SHORT_ID" else # Format assumed to be: Node UUID PARSED_TARGET_UUID=$target_id_info fi fi ;; m ) # local node is the target PARSED_AM_I_THE_TARGET="yes" ;; g ) # target is gateway server PARSED_TARGET_IS_GATEWAY_SERVER="yes" ;; i ) # local node is the initiator PARSED_AM_I_THE_INITIATOR="yes" ;; v ) if [[ "$OPERATION" == "MOD_TUNE" ]] then # Tunable details typeset tunable_info=$OPTARG # Format assumed to be: Parameter,Value print_dbgmsg "Received tunable setting: $tunable_info" PARSED_TUNABLE_PARAMETER=${tunable_info%%,*} PARSED_TUNABLE_VALUE=${tunable_info#*,} print_dbgmsg "Parameter: $PARSED_TUNABLE_PARAMETER" print_dbgmsg "Value: $PARSED_TUNABLE_VALUE" elif [[ "$OPERATION" == "COMMIT_LVL" ]] then typeset capabilities=$OPTARG # check if SPLT_MRG capability found from the input list if [[ "$capabilities" == *"SPLT_MRG"* ]] then CAP_SPLT_MRG_FOUND=1 fi fi ;; a ) Op=AddAndStart; Argcount=$((Argcount + 1));; s ) Op=AddAndStart; Argcount=$((Argcount + 1));; d ) Op=StopAndDelete; Argcount=$((Argcount + 1));; k ) Op=stop; Argcount=$((Argcount + 1));; r ) Op=refresh; Argcount=$((Argcount + 1));; h ) Op=help print_message IMSG_ctcaactrl_Usage $basecmd exit 0;; l ) # flag to include the caller OPERATION_CALLER=$OPTARG ;; ?|* ) print_message EMSG100 $basecmd $OPTARG # since ':' was used as first argument in getopts, the unknown option # is stored in $OPTARG # No longer quit if an unknown option is received: ignore the option. # This allows CAA to add new arguments that can be ignored without # causing a failure # print_message IMSG_ctcaactrl_Usage $basecmd # exit 1;; ;; esac done # # Log the caller that initiated the operation # if [[ -n "$OPERATION_CALLER" ]] then print_dbgmsg "The operation has been initiated by $OPERATION_CALLER" fi # # Some options are mandatory # # if OPERATION is set then must have PARSED_PHASE if [[ -n "$OPERATION" ]] then if [[ -z "$PARSED_PHASE" ]] then BAD_ARG=1 fi fi # if OPERATION/PARSED_PHASE are set then must have cluster ID and target # (removed requirement for target when op is COMMIT_LVL - does not require one) if [[ -n "$PARSED_PHASE" ]] then if [[ -z "$PARSED_CLUSTER_UUID" ]] then BAD_ARG=1 fi # # both PARSED_TARGET_UUID & PARSED_TARGET_SITE_UUID # can't be NULL # or # can have values # at same time when OP is not COMMIT_LVL # if [[ "$OPERATION" != "COMMIT_LVL" ]] then if [[ -z "$PARSED_TARGET_UUID" && -z "$PARSED_TARGET_SITE_UUID" || \ -n "$PARSED_TARGET_UUID" && -n "$PARSED_TARGET_SITE_UUID" ]] then BAD_ARG=1 fi fi fi if [[ "$BAD_ARG" == 1 ]] then print_message IMSG_ctcaactrl_Usage $basecmd exit 1 fi #remove options shift $(($OPTIND - 1)) if [[ -n "$OPERATION" ]] then # if -o option specified then use that as 'operation' CAACmdOpt=$OPERATION # [ADD_NODE | RM_NODE | STOP_NODE | START_NODE | # JOIN_NODE | ADD_DISK | RM_DISK | COMMIT_LVL | # ADD_SITE | MOD_SITE | RM_SITE] else CAACmdOpt=$1 # [START | STOP | PRE_START | POST_START | PRE_STOP] fi if [[ "$LKU_DISABLE_START" == 1 ]] then echo "ADD_NODE START_NODE JOIN_NODE START PRE_START POST_START" | grep -w $CAACmdOpt > /dev/null 2>&1 rc=$? if [[ 0 == $rc ]] then print_dbgmsg "The ${CAACmdOpt} operation cannot be executed." exit 0 fi fi CAAForceOpt=$2 # [FORCE] for stop print_dbgmsg "Command: $CAACmdOpt Phase: $PARSED_PHASE TransID: $PARSED_TRANS_ID" print_dbgmsg "Cluster ID: $PARSED_CLUSTER_UUID" if [[ "$SITE_OPERATION" == 1 ]] then print_dbgmsg "Target Site UUID: $PARSED_TARGET_SITE_UUID" else print_dbgmsg "Target Node UUID: $PARSED_TARGET_UUID" fi if [[ "$SITE_OPERATION" == 0 && -n "$PARSED_SITE_UUID" ]] then print_dbgmsg "Target Site UUID: $PARSED_SITE_UUID" fi if [[ "$SITE_OPERATION" == 1 ]] then print_dbgmsg "Is local site the target: $PARSED_AM_I_THE_TARGET" print_dbgmsg "Is local site the initiator: $PARSED_AM_I_THE_INITIATOR" else print_dbgmsg "Is local node the target: $PARSED_AM_I_THE_TARGET" print_dbgmsg "Is local node the initiator: $PARSED_AM_I_THE_INITIATOR" fi if [[ "$UNKNOWN_PHASE" == 1 ]] then print_dbgmsg "Unknown phase ($PARSED_PHASE) ignored" exit 0 fi if [[ ! -z $CAACmdOpt ]] then case $CAACmdOpt in ADD_NODE ) Op=AddNode Argcount=$((Argcount + 1)) ;; RM_NODE ) Op=RmNode Argcount=$((Argcount + 1)) ;; STOP_NODE ) Op=StopNode Argcount=$((Argcount + 1)) ;; START_NODE ) Op=StartNode Argcount=$((Argcount + 1)) ;; JOIN_NODE ) Op=JoinNode Argcount=$((Argcount + 1)) ;; ADD_DISK ) Op=AddDisk Argcount=$((Argcount + 1)) ;; RM_DISK ) Op=RmDisk Argcount=$((Argcount + 1)) ;; COMMIT_LVL ) Op=CommitLvl Argcount=$((Argcount + 1)) ;; START ) Op=AddAndStart Argcount=$((Argcount + 1)) ;; STOP ) Op=StopAndDelete Argcount=$((Argcount + 1)) ;; PRE_START ) Op=PreStart Argcount=$((Argcount + 1)) ;; POST_START ) Op=AddAndStart # same as START Argcount=$((Argcount + 1)) ;; PRE_STOP ) Op=StopAndDelete # same as STOP Argcount=$((Argcount + 1)) ;; POST_STOP ) Op=PostStop # leftover cleanup steps Argcount=$((Argcount + 1)) ;; PRE_SYNC ) Op=PreSync #NoOp Argcount=$((Argcount + 1)) ;; POST_SYNC ) Op=PostSync #NoOp Argcount=$((Argcount + 1)) ;; ADD_SITE ) Op=AddSite Argcount=$((Argcount + 1)) ;; MOD_SITE ) Op=ModSite Argcount=$((Argcount + 1)) ;; RM_SITE ) Op=RmSite Argcount=$((Argcount + 1)) ;; MOD_TUNE ) Op=ModTune Argcount=$((Argcount + 1)) ;; * ) # Unknown operation: ignore UNKNOWN_OPERATION=1 ;; esac fi if [[ "$UNKNOWN_OPERATION" == 1 ]] then print_dbgmsg "Unknown operation ignored" exit 0 fi if [[ -z $CAAForceOpt ]]; then CAAForceOpt=NONE fi # To shortcut.... if [[ "PreSync" == "$Op" || "PostSync" == "$Op" || "AddDisk" == "$Op" || "RmDisk" == "$Op" ]] then # Note: PreSync/PostSync seem happenning too often. # RSCT does not use those events. # Ignore those events even from the logging. exit 0; fi # Check CCAL log file size and backup if necessary backup_ccal_log_file # if [[ -n "$*" ]] then print_dbgmsg "" print_dbgmsg "$* called" fi # if ((Argcount == 0)) then # no argument specified print_message EMSG101 $basecmd print_message IMSG_ctcaactrl_Usage $basecmd exit 1 fi ## if ((Argcount > 1)) ## then ## # More than one argument specified ## print_message EMSG102 $basecmd ## print_message IMSG_ctcaactrl_Usage $basecmd ## exit 1 ## fi print_dbgmsg "Op=$Op" # get the cluster/caa info set -A clinfo $(${RSCTBIN}/ct_clusterinfo -r -c -n -i -u -M) REALM=${clinfo[0]} # realm (CAA or CLUSTER) CLUSTER_NAME=${clinfo[1]} # cluster-name NODE_NUMBER=${clinfo[2]} # node-number CLUSTER_ID=${clinfo[3]} # cluster-id CAA_UUID=${clinfo[4]} # caa-cluster-uuid if [[ "$SITE_OPERATION" == 1 ]] then set -A clinfo $(${RSCTBIN}/ct_clusterinfo -U -S -N -p) SITE_UUID=${clinfo[1]} # site uuid SITE_NAME=${clinfo[2]} # site name SITE_NUMBER=${clinfo[3]} # site number SITE_PRIORITY=${clinfo[4]} # site priority fi print_dbgmsg "REALM=$REALM CLUSTER_NAME=$CLUSTER_NAME NODE_NUMBER=$NODE_NUMBER CLUSTER_ID=$CLUSTER_ID CAA_UUID=$CAA_UUID" # No need to shift if the parameters were passed via options if [[ -z "$OPERATION" ]] then shift # shift arguments since operand (1st argument) has been processed fi # Start 'switch' for the arguments case $Op in AddNode ) process_AddNode $* ;; RmNode ) process_RmNode $* ;; StopNode ) process_StopNode $* ;; StartNode ) process_StartNode $* ;; JoinNode ) process_JoinNode $* ;; CommitLvl ) process_CommitLvl $* ;; AddSite ) process_AddSite $* ;; ModSite ) process_ModSite $* ;; RmSite ) process_RmSite $* ;; esac #end of 'switch' for the arguments if [[ "PreStart" == "$Op" ]] then caa_is_disabled_for_migration=`ct_caa_is_disabled_for_migration` print_dbgmsg "caa_is_disabled_for_migration=$caa_is_disabled_for_migration" if [[ "$caa_is_disabled_for_migration" == "true" ]] then # # If "disabled for migration", the PowerHA/RPD domain is being # migrated in coordination with RSCT, so having a previously # defined domain is not an error. # print_dbgmsg "SUCESS: RSCT-to-CAA is in progress" exit 0 fi clusters=$(Get_CAA_RPD_Clusters) if [[ -n "$clusters" ]] then # # The existence of a CAA RPD cluster means we have a previously # defined domain, which is not an error. # print_dbgmsg "SUCCESS: CAA RPD cluster already exists: $clusters" exit 0 fi # # See if a PowerHA cluster is configured which includes this node. # If so, the odm entry for the HACMPcluster object class will have # a non-zero id attribute value, and a non-empty name attribute value. # power_ha=`/usr/bin/odmget HACMPcluster 2>/dev/null` rc=$? if [[ 0 == $rc ]] then # # grep for the id. # power_ha_id=`echo "$power_ha" | /usr/bin/grep -w "id"` rc=$? if [[ 0 == $rc ]] then # # grep for the id. # power_ha_name=`echo "$power_ha" | /usr/bin/grep -w "name"` rc=$? if [[ 0 == $rc ]] then # # The id is an integer. Remove the "large left" pattern up # through a space to leave only the integer itself. # power_ha_id=${power_ha_id##* } # # The name is a string. Remove the "small left" pattern up # through the first double quote, then the "small right" # pattern from the second double quote on, leaving only the # string itself. # power_ha_name=${power_ha_name#*\"} power_ha_name=${power_ha_name%\"*} if [[ "0" == "$power_ha_id" ]] then print_dbgmsg "SUCCESS: power_ha_id is 0" exit 0 fi if [ -z "$power_ha_name" ] then print_dbgmsg "SUCCESS: power_ha is not defined" exit 0 fi # # The id and the name are legitimate. Consider PowerHA # configured. # # If PowerHA cluster version is at least 7.1 then # this cluster creation is being made on behalf of a # CAA-aware instance of PowerHA. Allow cluster. PowerHA7_version=12 cluster_version=`/usr/es/sbin/cluster/utilities/clmixver` rc=$? if [[ ($rc == 0) || ($rc == 1) ]] then if (( $cluster_version >= $PowerHA7_version )) then print_dbgmsg "PowerHA 7.1 or later present" exit 0 else print_dbgmsg "PowerHA pre-7.1 present: cluster version $cluster_version" fi else # A problem with clmixver: should not happen, but it does, # we assume that a propoer PowerHA cluster is not present print_dbgmsg "clmixver error: $rc. Assume cluster not present" exit 0 fi print_message EMSG111 $basecmd $power_ha_id $power_ha_name exit 1 fi fi fi # # See if an RPD is configured which includes this node. If so, # there will be more than just an IW entry in /var/ct/cfg/clusters. # clusters=$(GetRPDClusters) if [[ "" != "$clusters" ]] then # # An RPD is configured on this node. # migration_realm=`ct_caa_get_migration_realm` print_dbgmsg "migration_realm=$migration_realm" if [[ "$caa_is_disabled_for_migration" == "false" || "$migration_realm" != "RPD" ]] then # RPD is not in migration. print_dbgmsg "Fail: RPDs: $clusters are already defined" print_message EMSG112 $basecmd exit 1 fi fi exit 0 fi if [[ "PreSync" == "$Op" || "PostSync" == "$Op" ]] then # do nothing exit 0; fi if [[ -z $REALM ]]; then REALM=CLUSTER fi if [[ -z ${CLUSTER_ID} ]]; then # No CAA cluster info found print_message EMSG110 $basecmd exit 1 fi ## Now CAA is active # # case $Op in AddAndStart ) # # Start with assumptions about what this block should accomplish. # print_dbgmsg "Processing AddAndStart ... " # Important: Write CAA cmd file so that ConfigRM can understand the operation echo "START $CLUSTER_NAME $CLUSTER_ID" > $CAACMDCTRLFILE if [[ ! -z $REALM && $REALM != "CAA" ]]; then # non CAA environment print_message EMSG110 $basecmd exit 1 fi # create_group_services_subsystem=true start_group_services_subsystem=true refresh_ConfigRM=true # # Take migration into account. Start by discovering whether operating in a # CAA environment should be disabled because migration is taking place. # caa_is_disabled_for_migration=`ct_caa_is_disabled_for_migration` print_dbgmsg "caa_is_disabled_for_migration=$caa_is_disabled_for_migration" if [[ "$caa_is_disabled_for_migration" == "true" ]] then start_group_services_subsystem=false refresh_ConfigRM=false migration_realm=`ct_caa_get_migration_realm` print_dbgmsg "migration_realm=$migration_realm" if [[ "$migration_realm" == "RPD" ]] then # # Don't need to create the Group Services subsystem in an RPD # environment, because it should already have been created in # that environment. # create_group_services_subsystem=false fi else # not disabled for migration (but migration may still be under way) # ctcaactrl may have been invoked via ct_caa_gs_migrate_ctrl. # If the GS daemon undegoing migration is still running then # it should not be started. Especially critical in PowerHA migration, # where the subsystem being running is grpsvcs, not cthags. # In addition, if the migrated GS daemon is still running, the # cthags subsystem will have been already created, at the time # ctcaactrl POST_START was invoked upon CAA cluster creation. So no # need to create GS subsystems again migrated_GS_daemon_still_running=`ct_caa_migrated_GS_still_running` if [[ $migrated_GS_daemon_still_running == "true" ]] then print_dbgmsg "GS daemon which underwent migration is still running: do not start cthags" create_group_services_subsystem=false start_group_services_subsystem=false fi fi print_dbgmsg "refresh_ConfigRM=$refresh_ConfigRM" print_dbgmsg "start_group_services_subsystem=$start_group_services_subsystem" print_dbgmsg "create_group_services_subsystem=$create_group_services_subsystem" # # Setup the cluster info & dir. # if [[ ! -d /var/ct/${CLUSTER_ID} || ! -d /var/ct/${CLUSTER_NAME} ]] then # cluster does not exist...then create it print_dbgmsg "Configure CAA ${CLUSTER_NAME} ${CLUSTER_ID} ${NODE_NUMBER}" cfghact -c ${CLUSTER_NAME} -n ${NODE_NUMBER} -y ${CLUSTER_ID} -t CAA -f else print_dbgmsg "/var/ct/${CLUSTER_ID} is already found" fi if [[ "$create_group_services_subsystem" == "true" ]] then print_dbgmsg "Calling $RSCTBIN/cthagsctrl -a ..." $RSCTBIN/cthagsctrl -a gs_rc=$? fi if [[ "$start_group_services_subsystem" == "true" ]] then print_dbgmsg "Calling $RSCTBIN/cthagsctrl -s ..." $RSCTBIN/cthagsctrl -s gs_rc=$? fi if [[ "$refresh_ConfigRM" == "true" ]] then tell_ConfigRM_resync $Op else # Make ConfigRM auto-startable touch /var/ct/cfg/ConfigRM.autostart fi print_dbgmsg "Initialization done rc=$gs_rc" exit_script # exit the script ;; StopAndDelete ) #StopAndDelete subsystems # Note: since this script is being invoked with PRE_STOP, a non-zero # exit code will indicate to CAA that this code cannot be deleted from # the cluster. print_dbgmsg "Processing StopAndDelete... " # Write CMD file (STOP) echo "STOP $CLUSTER_NAME $CLUSTER_ID" > $CAACMDCTRLFILE echo "STOP-COMMIT $CLUSTER_NAME $CLUSTER_ID" > $CAACOMMITCTRLFILE if [[ $REALM != "CAA" && $CAAForceOpt != "FORCE" ]]; then # non CAA environment and Not force print_message EMSG110 $basecmd print_dbgmsg "Assume StopAndDelete as successful even if REALM is not CAA" # exiting 0, since we don't the PRE_STOP invocation to return # non-zero, except in cases where cluster cannot be allowed to be # removed exit 0 fi # If ConfigRM is running state=`LC_ALL=C /usr/bin/lssrc -s IBM.ConfigRM 2> /dev/null | LC_ALL=C /bin/sed -e"1d;s/.* \([a-zA-Z0-9]*\)$/\1/"`; if [[ -z $state || $state != "active" ]] then # ConfigRM is not running, remove CAA cluster structure print_dbgmsg "Unconfig CAA cluster..." # put "IW" to default_cluster so that ConfigRM won't react to online IWClusterID=$(grep -w "IW" /var/ct/cfg/clusters | awk '{print $1}') if [[ -n ${IWClusterID} ]]; then print "${IWClusterID}" > /var/ct/cfg/default_cluster fi # Stop HAGS if ConfigRM is not running cthagsctrl -k fi # Tell ConfigRM to resync tell_ConfigRM_resync $Op # Wait a few seconds so that ConfigRM can process the active RMC requests (e.g. rmrpdomain) # This will wait upto 60secs waitfor_hags_termination_or_stopsrc print_dbgmsg "Processing StopAndDelete Done. " exit 0 # see comment above on the exit value ;; PostStop ) # show the current states caa_is_disabled_for_migration=`ct_caa_is_disabled_for_migration` print_dbgmsg "caa_is_disabled_for_migration=$caa_is_disabled_for_migration" migration_realm=`ct_caa_get_migration_realm` print_dbgmsg "migration_realm=$migration_realm" # # See if an RPD is configured which includes this node. If so, # there will be more than just an IW entry in /var/ct/cfg/clusters. # clusters=$(GetRPDClusters) if [[ "" != "$clusters" ]] then # An RPD is configured on this node. print_dbgmsg "RPDs: $clusters are already defined. HAGS subsystem will not be stopped." else print_dbgmsg "Stopping cthags if it is still up" # Final cleanup steps in case StopAndDelete did not get to complete them cthagsctrl -k fi # Tell ConfigRM to resync (so that it can delete tree if it is removed) tell_ConfigRM_resync $Op # Make sure HAGS to be stopped waitfor_hags_termination_or_stopsrc # # Defect 166865 # Migration cleanup. # ct_caa_set_migration_cleanup_complete print_dbgmsg "Processing PostStop Done." ;; refresh ) # refresh HAGS $RSCTBIN/cthagsctrl -r # refresh ConfigRM refresh -s IBM.ConfigRM gs_rc=$? ;; stop ) $RSCTBIN/cthagsctrl -k gs_rc=$? ;; ModTune ) if [[ "POST" != $PARSED_PHASE ]] then print_dbgmsg "Tunables only handled in POST phase" else print_dbgmsg "No action for $PARSED_TUNABLE_PARAMETER" fi ;; esac exit_script # exit this script exit 1 # just in case #-----------------------------------------------------