#!/bin/ksh93 # ALTRAN_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # Copyright (C) Altran ACT S.A.S. 2017,2018,2019,2020,2021. All rights reserved. # # ALTRAN_PROLOG_END_TAG # # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # 61haes_r721 src/43haes/usr/sbin/cluster/utilities/rc.cluster.sh 1.3.24.13 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 1990,2016 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#) c391568 43haes/usr/sbin/cluster/utilities/rc.cluster.sh, 726, 2147A_aha726, Jul 23 2021 06:38 PM ############################################################################### # # Arguments: -boot : configures service adapter to use boot address # [ -i : start client information daemon | # -I : start clinfo with consistency group support (snmp traps) ] # -b : broadcast these start events # -N : start now # -R : start on system restart # -B : both # -r : re-acquire resources after forced down # -P arg; Parent calling process name # -v : Ignore verification errors during startup (auto ver sync) # -x : Activate NFS crossmounts # -C arg: Corrective actions level (interactive, yes), or option not present # # Usage: rc.cluster [-boot] [-i | -I] [-l] [-x] [-c] [-b] [-N | -R | -B] [-r] [-P parent calling process] [-C interactive|yes] [-v] # ############################################################################### ############################################################################### # # create_active_nodelist # # Description: Create a list of active nodes on the cluster. # Used by the main to check if there are any changes to # topology/resources. # parameters: none # # returns: none # ############################################################################### create_active_nodelist() { typeset PS4_FUNC="create_active_nodelist" [[ "$VERBOSE_LOGGING" == "high" ]] && set -x DCD_NODELIST="" DCD_NODELIST=$(ODMDIR=$DCD clnodename) for node in $DCD_NODELIST do CM_ACTIVE_NODES=$(clgetactivenodes -n $node -o $DCD 2>/dev/null) # Note: clgetactivenodes returns the number of active nodes, # or -1 on failure. if [ $? -eq 255 -o $? -eq 0 ] then continue else break fi done ACTIVE_NODELIST="" for node in $CM_ACTIVE_NODES do NODE_IP_ADDR=$(clgetaddr -o$DCD $node) if [ ! -z "$NODE_IP_ADDR" ] then # Ensure one can successfully rsh a command to the node RC=$(cl_rsh $NODE_IP_ADDR "/usr/es/sbin/cluster/utilities/cldare -a" 2>> /dev/null) if [ "$RC" != "ACTIVE" ] then continue fi fi # Add nodename to active node list ACTIVE_NODELIST="$ACTIVE_NODELIST $node" done return 0 } ############################################################################### # # function: check_clcomd # # Description: This function checks whether clcomd is active. # If clcomd is not active then this function waits # for 60 seconds for clcomd to come up. if clcomd is not # active after 60 seconds then this function starts clcomd. # # parameters: None. # # Returns: none. # ############################################################################### check_clcomd() { # Check if clcomd is up /usr/es/sbin/cluster/utilities/clcheck_server clcomd typeset -i RC=$? if (( $RC == 1 )) then return fi # If clcomd is not up wait for 60 seconds as rc.init may start it. typeset -i count=20 while (( count-- )) do /usr/es/sbin/cluster/utilities/clcheck_server clcomd RC=$? if (( $RC == 1 )) then return fi sleep 3 done # If we are here then clcomd is still not up so we shall start it now /usr/bin/startsrc -s clcomd /usr/es/sbin/cluster/utilities/clcheck_server clcomd RC=$? if [ $RC -eq 1 ] then return fi exit } ############################################################################### # # function: check_caaservices # # Description: This function checks whether cthags and caa services are active. # If they are not active, then this function waits # for 60 seconds for them to come up. If caa and cthags are not # active after 60 seconds then this function exits the script. # # parameters: None. # # Returns: none. # ############################################################################### check_caaservices() { WAIT_TIMETOACTIVE=180 # in Secs. Wait time to check caa services typeset -i wt_count=0 if [[ $(clodmget -n -f cluster_version HACMPcluster) > 11 ]] then while (( $wt_count <= $WAIT_TIMETOACTIVE )) do ################################################################### # check caa ################################################################### lscluster -i >/dev/null 2>&1 caa_state=$? ################################################################### # check cthags or grpsvcs ################################################################### LC_ALL=C lssrc -s cthags | grep -w cthags | grep -qw active cth_state=$? LC_ALL=C lssrc -s grpsvcs | grep -w grpsvcs | grep -qw active grp_state=$? if [ $caa_state != 0 ] || ([ $cth_state != 0 ] && [ $grp_state != 0 ]) then sleep 3; (( wt_count=$wt_count + 3 )) else break; # caa and cthags services are running fi done if [[ $caa_state != 0 ]] then cl_echo 10515 "$PROGNAME: Error: CAA cluster services are not active on this node.\n" $PROGNAME >&2 cl_echo 10518 "$PROGNAME: Contact IBM support if the problem persists.\n" $PROGNAME >&2 exit 1 fi if [[ $grp_state != 0 ]] then if [[ $cth_state != 0 ]] then cl_echo 10516 "$PROGNAME: Error: RSCT cluster services (cthags) are not active on this node.\n" $PROGNAME >&2 cl_echo 10518 "$PROGNAME: Contact IBM support if the problem persists.\n" $PROGNAME >&2 exit 2 fi fi fi } ############################################################################### # # function: check_repos # # Description: This function checks whether the repository disk is UP for # all the cluster node or not. If any of the cluster node's # repository disk is DOWN and cluster is started without the # force option 'true' then this function will exit the script # giving an error. # If cluster is started with force flag 'true' and repository is # DOWN for any of the cluster node then it will let the script to # start the cluster after giving a WARNING to the user. # If all the cluster nodes have their repository disk UP then the # function returns success. # # parameters: None. # # Returns: 1 - FAILURE # 0 - SUCCESS # ############################################################################### check_repos() { typeset REPO_NODE="" typeset REPO_DISK="" typeset REPO_STATE="" typeset REPOS_DOWN_NODES="" LC_ALL=C lscluster -d | grep -p '^Node ' | \ egrep -iv 'uuid|udid|^Number of disks discovered' | \ awk '/^Node / { node_name=$2 } /:$/ { disk=$1 } /State :/ { state=$3 } /Type :/ { type=$3 print node_name, disk, state, type }' | \ while read REPO_NODE REPO_DISK REPO_STATE type do if [[ $type == REPDISK && $REPO_STATE != UP ]] then # : The $type disk ${REPO_DISK%:} on node $REPO_NODE is $REPO_STATE # REPOS_DOWN_NODES="${REPOS_DOWN_NODES}${REPOS_DOWN_NODES:+,}$REPO_NODE" fi done if [[ -n "$REPOS_DOWN_NODES" ]]; then cl_dspmsg -s 31 scripts.cat 11 "Repository disk is down on node(s): %1\$s.\n\ Use lscluster -d to check the repository state on each node\n\ to ensure it has access to a working repository.\n" "$REPOS_DOWN_NODES" 1>&2 cl_dspmsg -s 31 scripts.cat 12 "Hint: You can use \"smitty cl_replace_repository_nm\" or\n\ \"clmgr replace repository\" command to replace the bad disk.\n" if [[ -n $FORCE_START_FLAG ]]; then cl_dspmsg -s 31 scripts.cat 9 "WARNING: Repository disk is down for %1\$s.\n\ As Force option is specified as \"true\", returning 0 so that\n\ cluster services can start. Cluster may not function properly\n\ in all the scenarios and cannot perform configuration changes.\n" "$REPOS_DOWN_NODES" 1>&2 return 0 else cl_dspmsg -s 31 scripts.cat 10 "ERROR: Repository disk is down for %1\$s. Cluster is not started.\n\ It is not suggested to start the cluster with repository disk down as\n\ cluster may not function correctly in all the scenarios and cannot\n\ perform configuration changes. If you still want to start the cluster\n\ then use FORCE=\"true\" in clmgr command or make Ignore verification\n\ errors flag \"true\" from smitty.\n" "$REPOS_DOWN_NODES" 1>&2 exit 1 fi else return 0 fi } ############################################################################### # # Main entry point # ############################################################################### [[ "$VERBOSE_LOGGING" == "high" ]] && set -x [[ "$VERBOSE_LOGGING" == "high" ]] && version='%I%' export PATH=$(/usr/es/sbin/cluster/utilities/cl_get_path all) PROGNAME=$(basename ${0}) # set the environment to access the correct odm and libraries export ODMDIR=/etc/es/objrepos unset LIBPATH CLLSIF_FILE="/usr/es/sbin/cluster/utilities/cllsif" TELINIT_FILE="/usr/es/sbin/cluster/.telinit" SERVER_STATUS_FILE="/usr/es/sbin/cluster/server.status" CLUSTER_START_FILE="/usr/es/sbin/cluster/etc/.clusterStarted" CLLSIF=0 EXISTS=1 BOOT=false WHEN=now CLINFOD="" BCAST="" CLSTART_NFS_MOUNT="" CLSTART_NFS_MOUNTD="" BROADCAST="false" CLINFO="false" REACQUIRE="false" AUTOMATIC_OPTION="false" MANUAL_OPTION="false" MANUAL_AUTOMATIC_FLAG="" FORCE_START_FLAG="" # # These define the supported version and minimal version for mixed mode. # Note these values match those defined in cluster.h # CLUSTERVERSION=22 CLUSTERCOMPATVERSION=19 # Minimum version for rolling migration # # Get the version of the cluster definition (may not be the same as the # installed software if this is a migration) # VERSION=$(clodmget -n -f cluster_version HACMPcluster) #################################################################### # Save Parameters for later #################################################################### ALL_PARAMS="$*" #################################################################### # Log Start-Up #################################################################### cl_echo 220 "Starting execution of $0\nwith parameters: ${ALL_PARAMS}\n\n" $0 "${ALL_PARAMS}" ############################################################################### # Look for boot option flag, must be first. ############################################################################### CLSTART_BOOT_FLAG= if [ $# -gt 0 ] then if [ "$1" = "-boot" ] then BOOT=true CLSTART_BOOT_FLAG=-B shift fi fi START_OPTION="auto" # set it to the default ############################################################################### # Parse the command line. ############################################################################### set -- $(getopt AMixIbNRBvC:P:r $*) if [ $? -ne 0 ] then cl_echo 10601 "Usage: $PROGNAME [-boot] [-i | -I] [-b] [-N | -R | -B] [-M | -A | -x] [-r]" $PROGNAME >&2 exit 1 fi while [[ $1 != "--" ]] do case $1 in -b) BCAST=-b BROADCAST="true" shift ;; # Parent process who called us, used in cl_rc.cluster # to inform cl_auto_versync not to run in clstart # only current valid argument is cl_rc_cluster -P) shift PARENT_PROCESS="-P $1" shift ;; -i) [[ "$CLINFO" == "true_CG_support" ]] && { cl_echo 10601 "Usage: $PROGNAME [-boot] [-i | -I] [-b] [-N | -R | -B] [-M | -A | -x] [-r]" $PROGNAME >&2 exit 1 } CLINFO="true" CLINFOD=-i shift ;; -x) # NFS cross mount during manual start CLSTART_NFS_MOUNT="true" CLSTART_NFS_MOUNTD="-x" MANUAL_OPTION="true" [[ $AUTOMATIC_OPTION == "true" ]] && { cl_echo 10401 "Usage: $PROGNAME [-boot] [-i | -I] [-b] [-N | -R | -B] [-M | -A | -x] [-r]" $PROGNAME >&2 exit 1 } shift ;; -I) [[ "$CLINFO" == "true" ]] && { cl_echo 10601 "Usage: $PROGNAME [-boot] [-i | -I] [-b] [-N | -R | -B] [-M | -A | -x] [-r]" $PROGNAME >&2 exit 1 } CLINFO="true_CG_support" CLINFOD=-I shift ;; -N) WHEN=now shift ;; -R) WHEN=restart shift ;; -B) WHEN=both shift ;; -r) REACQUIRE=true shift ;; # Corrective action setting, if any # valid arguments are 'interactive' and 'yes' -C) shift VERIFY_MODE="-C $1" shift ;; # Single argument to tell cl_auto_versync to ignore # verification errors -v) FORCE_START_FLAG="true" VERIFY_MODE="-v" shift ;; -A) AUTOMATIC_OPTION="true" [[ $MANUAL_OPTION == "true" ]] && { cl_echo 10601 "Usage: $PROGNAME [-boot] [-i | -I] [-b] [-N | -R | -B] [-M | -A | -x] [-r]" $PROGNAME >&2 exit 1 } shift ;; -M) [[ $AUTOMATIC_OPTION == "true" ]] && { cl_echo 10601 "Usage: $PROGNAME [-boot] [-i | -I] [-b] [-N | -R | -B] [-M | -A | -x] [-r]" $PROGNAME >&2 exit 1 } MANUAL_OPTION="true" shift ;; *) shift ;; esac done if [[ $AUTOMATIC_OPTION == "true" ]] then MANUAL_AUTOMATIC_FLAG="-A" elif [[ $MANUAL_OPTION == "true" ]] then MANUAL_AUTOMATIC_FLAG="-M" #Manually START_OPTION="manual" else # if neither of the flags set, assume Automatically Manage resource groups MANUAL_AUTOMATIC_FLAG="-A" fi #################################################################### # Check to see if srcmstr is running; if so, we try to use it; # otherwise, we exit with error. # Repositioned the code to make sure srcmstr is responsive # before issuing lssrc to get clstrmgr's state. #################################################################### typeset i=3 # make sure init has time to start it while (( i-- )) do src_running=$(ps -e | awk '$NF == "srcmstr" { print $1; exit }') [[ -n "$src_running" ]] && break done if (( $i == 0 )) then cl_echo 234 "$PROGNAME: srcmstr is not running.\n" $PROGNAME exit 1 fi #################################################################### # If srcmstr is running, ensure that it is active before issuing the # startsrc commands #################################################################### if [[ -n "$src_running" ]] then cl_echo 235 "Checking for srcmstr active...\n" typeset -i i=10 # try ten times to contact it while (( i-- )) do lssrc -s inetd >/dev/null 2>&1 && break # break out on success sleep 1 # otherwise wait a second and try again echo ".\c" done if (( i == 0 )) then cl_echo 236 "ERROR: srcmstr is not accepting connections\n" exit 1 fi cl_echo 237 "complete.\n" fi #################################################################### # Check for clcomd status #################################################################### check_clcomd #################################################################### # Check for caa and cthags status #################################################################### check_caaservices #################################################################### # Check if repository is up on all the cluster nodes #################################################################### check_repos #################################################################### # check if this is a client or server envirnment #################################################################### if [ -f "${CLLSIF_FILE}" ] then CLLSIF=1 cllsif >/dev/null 2>&1 EXISTS=$? fi if [ $CLLSIF = 1 -a $EXISTS = 0 ] then # Get the list of nodes as reported by the local node CLUSTER_NODES="$(clnodename)" # Check each node until we get a list of nodes for node in $CLUSTER_NODES; do ACTIVE_NODES=$(clgetactivenodes -n $node) [ -n "$ACTIVE_NODES" ] && break; done fi # # Get the force down nodes list. Note that a for loop should not be # required, though we will continue to try if we dont get results # for node in $ACTIVE_NODES; do FORCEDOWN_NODES=$(cl_rsh $node lssrc -ls clstrmgrES | grep "Forced down node list:" | cut -f2 -d:) [[ -n $FORCEDOWN_NODES ]] && break; done #################################################################### # Define constants for ACD, SCD, DCD, and ODMs to be linked and copied #################################################################### if [[ -f /usr/es/sbin/cluster/etc/ha_odm_constants ]] then . /usr/es/sbin/cluster/etc/ha_odm_constants fi # # 541 introduces a new odm. When going through NDU, these # odms will be in the DCD but will not get copied to the ACD when # the updated node restarts. This breaks auto verify which looks to # copy all odms from the ACD, and the new ones aren't there. # As a workaround we copy the new odms (only) from DCD to ACD # if [ -d $ACD ] then # # ACD exists - copy the new odms if not already there # [ ! -f $ACD/HACMPpprcconsistgrp ] && cp $DCD/HACMPpprcconsistgrp $ACD fi LOCALNODENAME="" if [ $CLLSIF = 1 -a $EXISTS = 0 ] then LOCALNODENAME=$(get_local_nodename) fi # If it is forced down node, we need to refresh the cluster # services. for forced_node in $FORCEDOWN_NODES; do if [[ $forced_node == $LOCALNODENAME ]] then # # If we are coming back up after a NDU from a prior release, we # need to copy the DCD to ACD, so when we start the new daemons # they will read the correct (updated) odm's from the ACD # (( $VERSION < $CLUSTERVERSION )) && { # # stop clinfo and snmpd # if [[ -n $CLINFOD ]] then stopsrc -s clinfoES -c >/dev/null 2>&1 stopsrc -s snmpd -c >/dev/null 2>&1 fi # # copy DCD to ACD # /usr/es/sbin/cluster/utilities/cl_createacd } # # if rsct is down break out here so we go through clstart to # bring up the stack # # For CAA based clusters we already checked if cthags is UP. # if (( "$VERSION" < 12 )) then clcheck_server topsvcs if [ "$?" -eq 0 ] then break; fi fi # # Rsct is up - tell the clstrmgr to start and pass the specified mode # if [[ $MANUAL_AUTOMATIC_FLAG == "-A" ]] then clRMupdate clrm_start_request elif [[ $MANUAL_AUTOMATIC_FLAG == "-M" ]] then if [[ CLSTART_NFS_MOUNT == "true" ]] then clRMupdate clrm_manual_nfs_start_request else clRMupdate clrm_manual_start_request fi fi # # Start gsclvmd if the system supports and it is not active # then start it # if [[ -x /usr/sbin/gsclvmd ]] && clcheck_server gsclvmd then rm -f /usr/es/sbin/cluster/.gsclvmd # clean up any prior start indication GSCLVMD_ENVIRONMENT="HA_GS_SUBSYS=grpsvcs \ HA_DOMAIN_NAME=$(/usr/es/sbin/cluster/utilities/cldomain) \ HA_SYSPAR_NAME=$(/usr/es/sbin/cluster/utilities/cldomain)" if startsrc -s gsclvmd -e "$GSCLVMD_ENVIRONMENT" then if [[ $BROADCAST == "true" ]] then wall <<- END >/dev/null 2>&1 Starting Concurrent Logical Volume Manager (gsclvmd) subsystem on $HOSTNAME END fi date > /usr/es/sbin/cluster/.gsclvmd else cl_echo 10441 "$PROGNAME: Unable to start Concurrent Logical Volume Manager (gsclvmd) via SRC" $PROGNAME fi fi # # start clinfo and snmpd - dont care if they are already running # if [[ -n $CLINFOD ]] then startsrc -s snmpd >/dev/null 2>&1 startsrc -s clinfoES >/dev/null 2>&1 fi # # start clevmgrdES - dont care if already running # startsrc -s clevmgrdES >/dev/null 2>&1 # : verify the grace period for node level and cluster level, if grace period is set, : CriticalMode need to set to 1 to avoid the rebooting of the node, when cthags daemon got killed. # typeset grace_period typeset node_grace_period typeset local_node grace_period=$(clodmget -f crit_daemon_restart_grace_period HACMPcluster) local_node=$(get_local_nodename) node_grace_period=$(clodmget -n -q "object=CRIT_DAEMON_RESTART_GRACE_PERIOD and name=$local_node" -f value HACMPnode) # Get node level value, if it is set, verify the value is set to more than zero. # if node level is not set, check cluster level value, if it is more than zero if [[ -n $node_grace_period ]] && (( $node_grace_period > 0 )) || (( $grace_period != 0 )) then export CT_MANAGEMENT_SCOPE=2 chrsrc -c IBM.PeerNode CriticalMode=2 unset CT_MANAGEMENT_SCOPE vglist=$(clodmget -n -q "name=VOLUME_GROUP" -f value HACMPresource) if [[ -n $vglist ]] then for vg in $vglist do lsvg_out=$(LC_ALL=C lsvg $vg 2>/dev/null) if print -- "$lsvg_out" | grep -i -q 'VG STATE: * active ' then varyonvg -a $vg >/dev/null 2>&1 if [[ $? == 0 ]];then cl_dspmsg -s 46 scripts.cat 21 "%1\$s: Successfully ran varyonvg -a command for volume group %2\$s.\n" $PROGNAME $vg fi fi done fi fi exit 0 fi done #################################################################### # If this node is a SERVER (i.e., cluster topology exists) # bring up the rsct stack #################################################################### if [ $CLLSIF = 1 -a $EXISTS = 0 ] then #################################################################### # local node name and cluster version must be set, if not, then # there are unsynchronized changes to topology or security #################################################################### # We can skip the node handle check if autoverification is enabled, since # clstart will call auto_verify_sync, but the rest of this script # needs LOCALNODENAME to be set properly. if [ -z "$LOCALNODENAME" ] then cl_echo 803 "$PROGNAME: Error: Changes have been made to the Cluster Topology or Resource \n\ configuration. The Cluster Configuration must be synchronized before \n\ starting Cluster Services.\n" $PROGNAME >&2 # # If we're being asked to start at node boot time, then report this error message # to the console # if [[ $BOOT == "true" ]] then cl_echo 803 "$PROGNAME: Error: Changes have been made to the Cluster Topology or Resource \n\ configuration. The Cluster Configuration must be synchronized before \n\ starting Cluster Services.\n" $PROGNAME >/dev/console 2>&1 fi exit 1 fi # # During migration the version number will be set to the version of # the prior release - check this against our minimum supported release # for rolling miration # if [ "$VERSION" -lt $CLUSTERCOMPATVERSION -o "$VERSION" -gt $CLUSTERVERSION ] then cl_echo 8060 "ERROR: The cluster version number $VERSION read from PowerHA SystemMirrorcluster is not\n compatable with this version of the PowerHA SystemMirror for AIX software. Supported versions\n are $CLUSTERCOMPATVERSION or higher and $CLUSTERVERSION. Check the configuration and conversion log for errors.\n", $VERSION $CLUSTERCOMPATVERSION $CLUSTERVERSION exit 1 fi IGNORE_VERIFICATION=$(clodmget -f clverstartupoptions -n HACMPcluster) #################################################################### # Check the remote active nodes if there are unsynchronized changes. # Skip this check if autoverification is enabled. #################################################################### if (( $IGNORE_VERIFICATION != 0 )) then create_active_nodelist for rnode in $ACTIVE_NODELIST do NODE_IP_ADDR=$(clgetaddr -o$DCD $rnode) if [ -z "$NODE_IP_ADDR" ] then cl_echo 2963 "$PROGNAME: Unable to retrieve pingable address for node $rnode." $PROGNAME $rnode exit 1 fi REMOTENODENAME=$(cl_rsh $NODE_IP_ADDR ODMDIR=/etc/es/objrepos /usr/bin/odmget HACMPcluster | grep -w nodename | cut -d'"' -f2) REMOTEHANDLE=$(cl_rsh $NODE_IP_ADDR ODMDIR=/etc/es/objrepos /usr/bin/odmget HACMPcluster | grep handle | cut -d' ' -f3) if [ -z "$REMOTENODENAME" -o "$REMOTEHANDLE" = 0 ] then echo "Changes detected on Node $REMOTENODENAME". cl_echo 803 "$PROGNAME: Error: Changes have been made to the Cluster Topology or Resource \n\ configuration. The Cluster Configuration must be synchronized before \n\ starting Cluster Services.\n" $PROGNAME >&2 exit 1 fi done fi # setup the env for this node eval $(cllsparam -n $LOCALNODENAME) #################################################################### # Check for valid node and network ids #################################################################### clchkids if [[ $? != 0 ]] then exit 1 fi #################################################################### # modify the HACMPdaemon odm to remember the brodcast and clinfo # startup options #################################################################### TYPE="start" clchdaemons -n "$LOCALNODENAME" -d "clstrmgr" -t "$TYPE" -o "broadcast" -v "$BROADCAST" clchdaemons -n "$LOCALNODENAME" -d "clinfo" -t "$TYPE" -o "time" -v "$CLINFO" clchdaemons -n "$LOCALNODENAME" -d "clstrmgr" -t "$TYPE" -o "manage" -v "$START_OPTION" #################################################################### # Make inittab entry if necessary #################################################################### cl_migcheck "HAES" typeset -i RC=$? if (( $RC == 0 )) then #Check if cl services are started with 'NOW' OR action is provided as #'Disable_cluster_services_autostart'. #If yes, remove hacmp6000 entry from /etc/inittab file if exists. #Else, add/modify entry in /etc/inittab for other split/merge policies. value=$(clodmget -n -q "policy = action" -f value HACMPsplitmerge) if [ "$WHEN" = "now" -o "$value" = "Disable_cluster_services_autostart" ] then cl_lsitab hacmp6000 > /dev/null 2>&1 if [ $? -eq 0 ] then cl_rmitab hacmp6000 > /dev/null 2>&1 fi else AUTOMATIC_OR_MANUAL=$MANUAL_AUTOMATIC_FLAG #By default RG's will be handled as per configuration if [ "$value" = "Disable_rgs_autostart" ] then AUTOMATIC_OR_MANUAL="-M" #Set to Manually fi #Pass WHEN argument as '-B' to make sure cluster services #during restart does not remove the entry from the inittab. #If we don't pass '-B' flag, it will be treated as NOW and #it will remove cluster restart entry from inittab. WHEN_FLAG="-B" #Set to BOTH # Check if error correction mode is interactive # If interactive mode present then change it to no. # as interactive correction cannot be honored at system boot time if [ "$VERIFY_MODE" = "-C interactive" ] then # change error correction mode to "no" VERIFY_MODE=" " dspmsg scripts.cat 10506 "$PROGNAME: Warning: You have selected to start cluster services during system \nboot time. Interactive correction of errors at system boot time cannot be \nhonored. No error correction would be performed.\n" $PROGNAME fi # pass all the arguments. cl_chitab "hacmp6000:2:wait:/usr/es/sbin/cluster/etc/rc.cluster -boot $CLINFOD $BCAST $AUTOMATIC_OR_MANUAL $VERIFY_MODE $WHEN_FLAG # Bring up Cluster" > /dev/null 2>&1 if [ $? -ne 0 ] then cl_mkitab "hacmp6000:2:wait:/usr/es/sbin/cluster/etc/rc.cluster -boot $CLINFOD $BCAST $AUTOMATIC_OR_MANUAL $VERIFY_MODE $WHEN_FLAG # Bring up Cluster" > /dev/null 2>&1 if [ $? -ne 0 ] then cl_echo 233 "$PROGNAME: Update of /etc/inittab failed." $PROGNAME >&2 exit 1 fi fi fi #Exit if cluster services are started with WHEN=restart option #as we don't need to start the cl services now if [ "$WHEN" = "restart" ] then cl_echo 10761 "$PROGNAME: Info: You have selected to start cluster services during system \nRESTART time. Cluster services would not be started now.\n" $PROGNAME exit 0 fi fi #################################################################### # Remove the files necessary to cause resources to be reacquired #################################################################### if [ "$REACQUIRE" = "true" ] then rm -f $SERVER_STATUS_FILE 2>/dev/null rm -f $FORCE_DOWN_STATUS_FILE 2>/dev/null fi #################################################################### # The following block of code is executed only if starting # cluster services and they are not already running. #################################################################### JOINING=0 if (( $VERSION < $CLUSTERVERSION )) then clcheck_server grpsvcs RC=$? else RC=0 fi # There is a timing window in which clcheck_server grpsvcs will indicate that # the clstrmgr is not registered with group services even though the clstrmgr is # in the process of trying to join. In that case, the state of the clstrmgrES # subsystem will be something other than ST_INIT or NOT_CONFIGURED. So, we do not # want to try initializing the clstrmgr, as it is already in the process of trying # to come up. if [ $RC -eq 0 ] then # First check to see if the clstrmgr is active, otherwise lssrc -ls returns # no "Current State". If clstrmgr is running, clcheck_server returns 1 clcheck_server clstrmgrES if [ "$?" -eq 1 ] then LC_ALL=C lssrc -ls clstrmgrES 2>&1 | grep "Current state" | egrep "ST_INIT|NOT_CONFIGURED" > /dev/null JOINING=$? else JOINING=0 fi fi if [ $RC -eq 0 -a $JOINING -eq 0 ] then # # This script will create the ACD directory from the DCD # If this script fails we still want to perform the init operations # AV&S will stop us from starting the cluster if the HANDLE == 0, or # will perform the appropirate sync and then re-run cl_createacd # /usr/es/sbin/cluster/utilities/cl_createacd #################################################################### # Remove stale DARE lock, if it exists #################################################################### if [[ -d $SCD ]] then rm -fr $SCD fi #################################################################### # If this is a boot sequence, AND IP Address Takeover is configured, # AND we are coming up on a boot address, AND the Cluster Manager # is NOT already running, then and only then do we run # harc.net. #################################################################### if [ "$BOOT" = "true" ] then # Are we setup for IP address takeover? cl_lsitab clinit >/dev/null 2>&1 if (( $? == 0 )) then # Make sure HA applicable nets are up harc.net else touch ${TELINIT_FILE} rm -f ${SERVER_STATUS_FILE} fi fi #################################################################### # See if syslogd already running. If not, start it #################################################################### ps -e | grep -s syslog if (( $? != 0 )) then typeset syslog_type=$(clodmget -n -q "subsysname = 'syslogd'" -f path SRCsubsys) if [[ ${syslog_type} == "/usr/sbin/rsyslogd" ]] then cl_echo 10737 "Starting rsyslogd" startsrc -s syslogd else cl_echo 221 "Starting syslogd" startsrc -s syslogd fi fi # quietly ignore errors for downleveled versions of AIX no -o routerevalidate=1 2>/dev/null #################################################################### # Turn off Power Management, if installed #################################################################### if [[ -x /usr/bin/pmctrl ]] then { /usr/bin/pmctrl -ea enable >/dev/null 2>&1 /usr/bin/pmctrl -ea full_on >/dev/null 2>&1 } fi fi # end of clstrmgrES not running #################################################################### # Call clstart to start rsct and clinfo #################################################################### if [ $JOINING -eq 0 ] then if [ "$VERBOSE_LOGGING" == "high" ] then clstart -m -G $CLINFOD $BCAST $CLSTART_NFS_MOUNTD $PARENT_PROCESS $VERIFY_MODE $CLSTART_BOOT_FLAG $MANUAL_AUTOMATIC_FLAG else clstart -m -G $CLINFOD $BCAST $CLSTART_NFS_MOUNTD $PARENT_PROCESS $VERIFY_MODE $CLSTART_BOOT_FLAG $MANUAL_AUTOMATIC_FLAG 2>/dev/console fi fi if [ $? -ne 0 -o $JOINING -ne 0 ] then exit 1 fi ################################################################### # Record the fact the that cluster is running on this node ################################################################### /bin/touch $CLUSTER_START_FILE #################################################################### # Bring up (alias) any defined persistent labels on appropriate # interfaces. #################################################################### cl_configure_persistent_address config_all -d #################################################################### # Log end of start-up #################################################################### cl_echo 32 "Completed execution of $0\nwith parameters: ${ALL_PARAMS}.\nExit status = $?\n\n" $0 "${ALL_PARAMS}" $? else #################################################################### # If this is a CLIENT, start CLINFO. #################################################################### #################################################################### # Check to see if srcmstr is running; if so, we try to use it; # otherwise, we exit with error #################################################################### typeset -i i=3 # make sure init has time to start it while (( i-- )) do src_running=$(ps -e | awk '$NF == "srcmstr" { print $1; exit }') [ -n "$src_running" ] && break done if (( $i == 0 )) then cl_echo 238 "ERROR: srcmstr is not running\n" exit 1 fi #################################################################### # If srcmstr is running, ensure that it is active before issuing the # startsrc commands #################################################################### if [ -n "$src_running" ] then cl_echo 235 "Checking for srcmstr active...\n" typeset i=10 # try ten times to contact it while (( i-- )) do lssrc -s inetd >/dev/null 2>&1 && break # break out on success sleep 1 # otherwise wait a second and try again echo ".\c" done if (( i == 0 )) then cl_echo 236 "ERROR: srcmstr is not accepting connections\n" exit 1 fi cl_echo 237 "complete" fi #################################################################### # See if syslogd already running. If not, start it #################################################################### ps -e | grep -s syslog if (( $? != 0 )) then typeset syslog_type=$(clodmget -n -q "subsysname = 'syslogd'" -f path SRCsubsys) if [[ ${syslog_type} == "/usr/sbin/rsyslogd" ]] then cl_echo 10737 "Starting rsyslogd" startsrc -s syslogd else cl_echo 221 "Starting syslogd" startsrc -s syslogd fi fi #################################################################### # Check to see if clinfo already running. If not, start it #################################################################### clcheck_server clinfoES if (( $? == 0 )) then startsrc -s clinfoES fi #################################################################### # Log end of start-up #################################################################### cl_echo 32 "Completed execution of $0\nwith parameters: ${ALL_PARAMS}.\nExit status = $?\n\n" $0 "${ALL_PARAMS}" $? fi exit 0