#!/bin/ksh # ALTRAN_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # Copyright (C) Altran ACT S.A.S. 2017,2018,2021. All rights reserved. # # ALTRAN_PROLOG_END_TAG # # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # 61haes_r714 src/43haes/usr/sbin/cluster/utilities/clchpcihpif.sh 1.36.2.2 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 2000,2013 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#) 7d4c34b 43haes/usr/sbin/cluster/utilities/clchpcihpif.sh, 726, 2147A_aha726, Feb 05 2021 09:50 PM ############################################################################### # # Name: clchpcihpif # # An interactive tool for hot replacing an HACMP managed PCI network adapter. # # Arguments: # # Returns: 0 on SUCCESS # 1 on FAILURE # ############################################################################### usage() { typeset PS4_FUNC="usage" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x dspmsg scripts.cat 6900 \ "Usage: $CMD \n" $CMD exit 1 } ############################################################################### # isClusterStable # # Arguments: $1: "exit": if the cluster is unstable, then print a message # that the cluster is unstable and remove the # lockfile, preparing to exit the process. # "none": just return with status # # Returns: 0 if cluster is unstable # 1 if cluster is stable # # Determines if the cluster is stable by querying the SRC. # If it is not stable, print out a message and remove the lock file, # preparing to exit in the calling function. ################################################################################ isClusterStable() { typeset PS4_FUNC="isClusterStable" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x EXIT=$1 # Check to see if cluster services are running, and if not # just return with "stable", since we won't have to be doing any # adapter swaps or maintenace modes with the cluster not running. # Ensure that the Cluster services are up if clcheck_server $GRPSVCS_DAEMON && clcheck_server $MIG_GRPSVCS_DAEMON then dspmsg scripts.cat 6972 \ "Hot plug replacement is not supported when the cluster is not running.\n" exit 1 fi STATE=$(lssrc -ls clstrmgrES \ | awk -F: '($1 == "Current state") { print $2 }' | sed 's/ //g') if [ "$STATE" != "ST_STABLE" ]; then if [ "$EXIT" = "exit" ]; then dspmsg scripts.cat 6903 \ "\nThe cluster is in an unstable state. Cancelling PCI hot plug replacement.\n\n" rm -f $LOCKFILE fi return 0 else return 1 fi } ############################################################################### # waitUntilClusterStable # # Arguments: None # # Returns: 1 when cluster is stable # # Calls isClusterStable repetatively, sleeping between calls, until # the cluster is stable. ############################################################################### waitUntilClusterStable() { # We sleep here because most functions call this function right # after requesting a cluster event to take place. Thus, we need # to wait a bit to see if the MIB will be updated to an unstable # cluster status. typeset PS4_FUNC="waitUntilClusterStable" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x dspmsg scripts.cat 6943 \ "\nThe PCI Hot Plug procedure is waiting for cluster events to be\n\ processed before continuing. This may take a few minutes.\n" sleep $SLEEP_TIME FIRST=0 while [ 1 ] do isClusterStable none if [ $? = 1 ]; then return 1 fi if [ $FIRST = 0 ]; then dspmsg scripts.cat 6911 \ "\nStill waiting until the cluster is stable...\n" FIRST=1 fi sleep $SLEEP_TIME done } ############################################################################### # getInterfaces # # Arguments: $1 = adapter # # Output: # Global variables $INTERFACES[] and $NUM_INTERFACES for interfaces found. # Global variables $IP_LABELS[] and $NUM_IP_LABELS for IP labels found. # # Determines all the HACMP configured interfaces that are on the specified # adapter. # Also finds the current IP label that is on each interface found. ############################################################################### getInterfaces() { typeset PS4_FUNC="getInterfaces" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset ADAPTER=$1 FOUND_HACMP="" # check if addresses on the adapater are configured as part of the # HACMP topology. for IP_ADDRESS in $(netstat -in | awk '$1 == "'$ADAPTER'" && $3 !~ "[Ll]ink" {print $4}') do IP_LABEL=$(host $IP_ADDRESS | awk '{print $1}') FOUND_HACMP=$(cllsif -Sn $IP_LABEL 2> /dev/null) if [ "$FOUND_HACMP" != "" ]; then INTERFACES[$NUM_INTERFACES]=$ADAPTER IP_LABELS[$NUM_INTERFACES]=$IP_LABEL IP_ADDRESSES[$NUM_INTERFACES]=$IP_ADDRESS NETMASKS[$NUM_INTERFACES]=$(clgetif -n $IP_LABEL) MTUS[$NUM_INTERFACES]=$(getMTU $ADAPTER) HW_ADDRESSES[$NUM_INTERFACES]=$(cllsif -J "$OP_SEP" -Sn $IP_LABEL \ | cut -d"$OP_SEP" -f8) let NUM_INTERFACES=NUM_INTERFACES+1 else # This interface is not part of the HACMP topology, # We do not preserve non-HACMP interfaces, so we don't # need to store any other values besides the interface name. NON_HACMP_INTERFACES[$NUM_NON_HACMP]=$ADAPTER let NUM_NON_HACMP=NUM_NON_HACMP+1 fi done } ############################################################################### # getATMInterfaces # # Arguments: $1 = ATM adapter # # Output: # Global variables $INTERFACES[] and $NUM_INTERFACES for interfaces found. # Global variables $IP_LABELS[] and $NUM_IP_LABELS for IP labels found. # # Determines all the HACMP configured interfaces, including Classic IP # and LANE interfaces, that are on the specified ATM adapter. # Also finds the current IP label that is on each interface found. ############################################################################### getATMInterfaces() { typeset PS4_FUNC="getATMInterfaces" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset ATM_ADAPTER=$1 FOUND_HACMP="" # Since this is an ATM adapter, it can have multiple interfaces. # First, we look to see if any Classic IP (at0, etc) interfaces # are configured as part of the HACMP topology. for ATM_INTERFACE in $(arp -t atm -a \ | grep " on device $ATM_ADAPTER" \ | awk '{ print $3 }') do if [ "$ATM_INTERFACE" != "" ]; then IP_ADDRESS=$(netstat -nI $ATM_INTERFACE \ | awk 'BEGIN { getline; getline; getline; print $4 }') if [ "$IP_ADDRESS" = "" ]; then # We don't need to handle detached interfaces. continue fi IP_LABEL=$(host $IP_ADDRESS | awk '{print $1}') FOUND_HACMP=$(cllsif -Sn $IP_LABEL 2> /dev/null) if [ "$FOUND_HACMP" != "" ]; then INTERFACES[$NUM_INTERFACES]=$ATM_INTERFACE IP_LABELS[$NUM_INTERFACES]=$IP_LABEL IP_ADDRESSES[$NUM_INTERFACES]=$IP_ADDRESS NETMASKS[$NUM_INTERFACES]=$(clgetif -n $IP_LABEL) MTUS[$NUM_INTERFACES]=$(getMTU $ATM_INTERFACE) HW_ADDRESSES[$NUM_INTERFACES]=$(cllsif -J "$OP_SEP" -Sn $IP_LABEL \ | cut -d"$OP_SEP" -f8) let NUM_INTERFACES=NUM_INTERFACES+1 else # This interface is not part of the HACMP topology, # but it needs to be detached later so we can rmdev # the physical ATM adapter. We do not preserve # non-HACMP interfaces, so we don't need to store # any other values besides the interface name. NON_HACMP_INTERFACES[$NUM_NON_HACMP]=$ATM_INTERFACE let NUM_NON_HACMP=NUM_NON_HACMP+1 fi fi done # Now check to see if any LANE (LAN Emulated) interfaces on this ATM # adapter are configured for HACMP. FOUND_HACMP="" for LANE_ADAPTER in \ $(lsdev -Ccadapter -F 'name description' \ | awk '{ if ($1 ~ /^(ent|tok)[0-9]+/ && $2 == "ATM") print $1 }') do if [ "$LANE_ADAPTER" != "" ]; then LANE_ADAPTER_TYPE=$(print $LANE_ADAPTER | sed 's/[0-9]*$//') if [ "$LANE_ADAPTER_TYPE" = "ent" ]; then LANE_INTERFACE=$(print $LANE_ADAPTER | sed 's/ent/en/') LANE_ATM_ADAPTER=$(entstat -d $LANE_ADAPTER \ | grep "Local ATM Device Name:" \ | awk '{ print $5 }') elif [ "$LANE_ADAPTER_TYPE" = "tok" ]; then LANE_INTERFACE=$(print $LANE_ADAPTER | sed 's/tok/tr/') LANE_ATM_ADAPTER=$(tokstat -d $LANE_ADAPTER \ | grep "Local ATM Device Name:" \ | awk '{ print $5 }') fi # Does this LANE adapter belong to this ATM adapter? if [ "$LANE_ATM_ADAPTER" = "$ADAPTER" ]; then # We have to store all the ATM LANE adapters, # because even if the interface of the LANE adapter # is detached, we still need to rmdev the LANE adapter later, # otherwise rmdev'ing the physical ATM adapter will error. LANE_ADAPTERS[$NUM_LANE_ADAPTERS]=$LANE_ADAPTER let NUM_LANE_ADAPTERS=NUM_LANE_ADAPTERS+1 IP_ADDRESS=$(netstat -nI $LANE_INTERFACE \ | awk 'BEGIN { getline; getline; getline; print $4 }') if [ "$IP_ADDRESS" = "" ]; then # We don't need to handle detached interfaces. continue fi IP_LABEL=$(host $IP_ADDRESS | awk '{print $1}') FOUND_HACMP=$(cllsif -Sn $IP_LABEL 2> /dev/null) if [ "$FOUND_HACMP" != "" ]; then INTERFACES[$NUM_INTERFACES]=$LANE_INTERFACE IP_LABELS[$NUM_INTERFACES]=$IP_LABEL IP_ADDRESSES[$NUM_INTERFACES]=$IP_ADDRESS NETMASKS[$NUM_INTERFACES]=$(clgetif -n $IP_LABEL) MTUS[$NUM_INTERFACES]=$(getMTU $LANE_INTERFACE) HW_ADDRESSES[$NUM_INTERFACES]=$(cllsif -Scn $IP_LABEL \ | cut -d: -f8) let NUM_INTERFACES=NUM_INTERFACES+1 else # This interface is not part of the HACMP topology, # but it needs to be detached later so we can rmdev # the physical ATM adapter. We do not preserve # non-HACMP interfaces, so we don't need to store # any other values besides the interface name. NON_HACMP_INTERFACES[$NUM_NON_HACMP]=$LANE_INTERFACE let NUM_NON_HACMP=NUM_NON_HACMP+1 fi fi fi done } ############################################################################### # getIPLabel # # Arguments: $1 = the interface (en# or tr#) of which we want to know its # current IP label # # Output: the IP label found on the interface specified ############################################################################### getIPLabel() { typeset PS4_FUNC="getIPLabel" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INTERFACE=$1 IP_ADDRESS=$(netstat -nI $INTERFACE \ | awk 'BEGIN { getline; getline; getline; print $4 }') IP_LABEL=$(host $IP_ADDRESS | awk '{print $1}') if [ "$(cllsif -Sn $IP_LABEL)" != "" ]; then print $IP_LABEL else dspmsg scripts.cat 6907 \ "\nThe IP label $IP_LABEL found on interface $INTERFACE was not found\n\ as a configured HACMP interface.\n\n" $IP_LABEL $INTERFACE rm -f $LOCKFILE exit 1 fi } ############################################################################### # getMTU # # Arguments: $1 = the interface of which we want to know the MTU size. # # Output: The MTU size found (from netstat). ############################################################################### getMTU() { typeset PS4_FUNC="getMTU" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INTERFACE=$1 print $(netstat -I $INTERFACE \ | awk 'BEGIN { getline; getline; print $2 }') } ############################################################################### # handleInterfacePreReplacement # # Arguments: $1 = index into $IP_LABELS[] # # Determines what type (service, boot, or standby) of each IP label that we're # handling, and runs functions to determine what needs to be done before # the replacement will start. ############################################################################### handleInterfacePreReplacement() { typeset PS4_FUNC="handleInterfacePreReplacement" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 TYPE=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f2) case $TYPE in boot) handleBootPreReplacement $INDEX INTERFACES_TYPE[$INDEX]="boot" ;; service) handleServicePreReplacement $INDEX INTERFACES_TYPE[$INDEX]="service" ;; standby) # no special handling needed for a standby INTERFACES_TYPE[$INDEX]="standby" ;; esac } ############################################################################### # handleBootPreReplacement # # Arguments: $1 = global array index # # Determines if there are any available standbys to swap this boot label # with. Let the user know if there aren't, in which case we continue # on without any address swaps. This of course does not apply to alias nets. ############################################################################### handleBootPreReplacement() { typeset PS4_FUNC="handleBootPreReplacement" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 typeset NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) typeset ALIAS_NETWORK=$(cllsnw -c | cut -f1,3 -d: | grep $NETWORK | cut -f2 -d:) if [ "$ALIAS_NETWORK" != "true" ]; then ALT_ADDRESSES[$INDEX]=$(findAvailableStandby $INDEX) if [ "${ALT_ADDRESSES[$INDEX]}" = "" ]; then dspmsg scripts.cat 6955 \ "\nThe IP label ${IP_LABELS[$INDEX]} on interface ${INTERFACES[$INDEX]} \ is a boot network interface \n\ IP label, but there are no available boot network interfaces with\n\ which it can be swapped.\n\ ${IP_LABELS[$INDEX]} will be unavailable during the replacement.\n" \ ${IP_LABELS[$INDEX]} ${INTERFACES[$INDEX]} else STANDBY_SWAPS[$INDEX]=1 fi fi } ############################################################################### # handleServicePreReplacement # # Arguments: $1 = global array index # # Determines if there are any available standbys to swap this service label # with. If not, determine if there is a resource group on this service # label, and ask the user if they want to move the resource group to another # node during the replacement. Otherwise, ask the user if they would like to # continue the replacement, knowing that this service label will be off-line # during the replacement. ############################################################################### handleServicePreReplacement() { typeset PS4_FUNC="handleServicePreReplacement" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 # We have to figure out if this service is part of a resource group now, # because if we end up handling multiple interfaces on this adapter, # then there is the possibility that there could be multiple services # on this adapter that are part of this same resource group. # So we need to keep track of every service that's in a resource group # for future processing. RESOURCE_GROUPS[$INDEX]=$(getRGUpOnIPLabel ${IP_LABELS[$INDEX]}) if [ $INDEX -gt 0 ]; then typeset INDEX_2=0 while [ $INDEX_2 -lt $NUM_INTERFACES ] do if [ "${RESOURCE_GROUPS[$INDEX_2]}" = "${RESOURCE_GROUPS[$INDEX]}" ] \ && [ "${RG_MIG_NODES[$INDEX_2]}" != "" ] then # The resource group that $INTERFACES[$INDEX] belongs to also # has another service label that we've already handled, # $INTERFACES[$INDEX_2], and when handling that the user # chose to migrate the resource group to another node. # Thus, that will also move this service label off this node, # so don't bother to do any more handling here. dspmsg scripts.cat 6971 \ "\nThe IP label ${IP_LABELS[$INDEX]} on interface ${INTERFACES[$INDEX]}\n\ is a service IP label that is part of resource group ${RESOURCE_GROUPS[$INDEX]},\n\ which you have chosen to migrate to node ${RG_MIG_NODES[$INDEX_2]}.\n\ Thus, this service IP label will be migrated to that node as well.\n\n" \ ${IP_LABELS[$INDEX]} ${INTERFACES[$INDEX]} \ ${RESOURCE_GROUPS[$INDEX]} ${RG_MIG_NODES[$INDEX_2]} return fi let INDEX_2=INDEX_2+1 done fi # if this network uses ip aliasing for ipat, search for available boots, # else look for standbys typeset NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) typeset ALIAS_NETWORK=$(cllsnw -c | cut -f1,3 -d: | grep $NETWORK | cut -f2 -d:) if [ "$ALIAS_NETWORK" = "true" ]; then ALT_ADDRESSES[$INDEX]=$(findAvailableBoot $INDEX) else ALT_ADDRESSES[$INDEX]=$(findAvailableStandby $INDEX) fi if [ "${ALT_ADDRESSES[$INDEX]}" != "" ]; then STANDBY_SWAPS[$INDEX]=1 else # If no available standby was found for the service label, then we # check to see if the service label is part of a resource group that # is up. If it is, then we need to ask the user if they want to # move the resource group to another node while the replacement is in # progress. if [ "${RESOURCE_GROUPS[$INDEX]}" != "" ]; then handleRGPreReplacement $INDEX ${RESOURCE_GROUPS[$INDEX]} else dspmsg scripts.cat 6956 \ "\nThe IP label ${IP_LABELS[$INDEX]} on interface ${INTERFACES[$INDEX]} \ is a service IP label\n\ that is not part of a resource group, but there are no available\n\ boot network interfaces with which it can be swapped.\n\ Continuing this process will cause ${IP_LABELS[$INDEX]} to be unavailable\n\ during the replacement.\n\ \n\ Would you like to continue with the replacement process?\n\ 1) Continue\n\ 2) Cancel\n\ Enter Selection: [default: 1] " \ ${IP_LABELS[$INDEX]} ${INTERFACES[$INDEX]} getUserInput 2 if [ $? = 2 ]; then dspmsg scripts.cat 6918 \ "\nCancelling the hot plug replacement via user request.\n\ No network settings have been changed.\n\n" rm -f $LOCKFILE exit 0 fi fi fi } ############################################################################### # findAvailableStandby # # Arguments: $1 = index into $IP_LABELS[] # # Output: The first available standby address if one is found, # otherwise nothing (an empty string.) ############################################################################### findAvailableStandby() { typeset PS4_FUNC="findAvailableStandby" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 typeset NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) typeset STANDBY_ADDR="" typeset ALIVE_IP_LABEL="" typeset ALIVE_IP_ADDR="" typeset ALIVE_ADAPTER="" typeset ALIVE_INTERFACE="" typeset ALIVE_LANE_ADAPTER="" # See if there are any available standby interfaces that can takeover # this service/boot address while the replacement is in progress. IFS="$(cl_get_path -S)" cllsaliveif -J "$IFS" | while read ALIVE_IP_LABEL ALIVE_IP_ADDR ALIVE_ADAPTER \ ALIVE_INTERFACE ALIVE_LANE_ADAPTER do STANDBY_ADDR=$(cllsif -J "$IFS" -Sn $ALIVE_IP_LABEL \ | awk -vNETWORK=$NETWORK -vFS=$IFS \ '{ if ($2 == "standby" && $3 == NETWORK) print $7 }') if [ "$STANDBY_ADDR" != "" ] \ && [ "$ALIVE_ADAPTER" != "$ADAPTER" ] \ && [ "$ALIVE_LANE_ADAPTER" != "$ADAPTER" ] then # If we're replacing an ATM adapter, then we must make # sure that the standby label that we found is not on the # same ATM adapter. Otherwise, the service/boot address # will swap to an interface on the same ATM adapter, # and it won't get detached before we try to rmdev the # ATM adapter, and the process will fail. # Note that the 3rd column of output from cllsaliveif # is the adapter to which the interface belongs. This # could be a LANE interface, so we check the 5th column # as well, which is the physical ATM adapter to which # a LANE interface belongs. print $STANDBY_ADDR break # break out of cllsaliveif output read loop fi done unset IFS } ############################################################################### # findAvailableBoot # # Arguments: $1 = index into $IP_LABELS[] # # Output: The boot address for this network on this node, if it is alive on # an interface. ############################################################################### findAvailableBoot() { typeset PS4_FUNC="findAvailableBoot" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 typeset NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) for ALIVE_IP_LABEL in $(cllsaliveif | awk '{print $1}') do cllsif -J "$OP_SEP" -Sn $ALIVE_IP_LABEL \ | awk -vNETWORK=$NETWORK -vFS="$OP_SEP" \ '{ if ($2 == "boot" && $3 == NETWORK) print $7" "$9 }' | \ read BOOT_ADDR BOOT_IF if [ "$BOOT_ADDR" != "" ]; then # lets not use a boot on the same inetface if [ "$BOOT_IF" != "${INTERFACES[$INDEX]}" ] then print $BOOT_ADDR return fi fi done } ############################################################################### # findUnavailableStandby # # Arguments: $1 = index into $IP_LABEL[] # # Output: The first unavailable standby address if one is found, # otherwise nothing (an empty string.) # By "unavailable" we mean a standby address that is both not alive # in terms of the cluster manager, and is not configured on any # interface, and is not already being handled by this program. ############################################################################### findUnavailableStandby() { typeset PS4_FUNC="findUnavailableStandby" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 typeset NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) for ADDR in $(cllsif -J "$OP_SEP" -Si $LOCALNODENAME \ | awk -vNETWORK=$NETWORK -vFS="$OP_SEP" \ '{ if ($2 == "standby" && $3 == NETWORK) print $7}') do TAKEN=0 clgetif $ADDR > /dev/null 2>&1 if [ $? = 0 ]; then INDEX_2 = 0 while [ $INDEX_2 -lt $NUM_INTERFACES ] do if [ "${ALT_ADDRESSES[$INDEX]}" = "$ADDR" ] \ || [ "${IP_ADDRESSES[$INDEX]}" = "$ADDR" ] then TAKEN=1 break # break out of this loop fi let INDEX_2=INDEX_2+1 done if [ $TAKEN = 1 ]; then continue # continue to the next $ADDR fi print $ADDR break fi done } ############################################################################### # findAvailableService # # Arguments: $1 = index into $IP_LABELS[] # # Output: A service address that was found to be up on the specified network # on the local node, or an empty string if one wasn't found. ############################################################################### findAvailableService() { typeset PS4_FUNC="findAvailableService" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 typeset NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) for ALIVE_IP_LABEL in $(cllsaliveif | awk '{print $1}') do SVC_ADDR=$(cllsif -J "$OP_SEP" -Sn $ALIVE_IP_LABEL \ | awk -vNETWORK=$NETWORK -vFS="$OP_SEP" \ '{ if ($2 == "service" && $3 == NETWORK) print $7 }') if [ "$SVC_ADDR" != "" ]; then print $SVC_ADDR break fi done } ############################################################################### # handleRGPreReplacement # # Arguments: $1 = resource group name # # Returns: 1 on success # 0 on failure # # Ask the user if they want to move the resource group to another node. # If they do, then also ask them to which node they want to move the resource # group. Also ask them if they want the resource group moved back to this # node (the local node) after the replacement is complete. ############################################################################### handleRGPreReplacement() { typeset PS4_FUNC="handleRGPreReplacement" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 typeset RG=$2 dspmsg scripts.cat 6915 \ "\nThe IP label ${IP_LABELS[$INDEX]} on interface ${INTERFACES[$INDEX]} is a\n\ service IP label that is part of resource group $RG, and there are\n\ no available network interfaces on the local node to which the service label\n\ can be swapped. Temporary loss of connectivity to this interface will\n\ pause connectivity for this resource group.\n\ \n\ Would you like to migrate resource group $RG over to another\n\ node, leave the resource group as is, or cancel the replacement process?\n\ \n\ Note: Cancelling the replacement process now will leave all network\n\ settings untouched.\n\ \n\ 1) Continue Without Moving\n\ 2) Move The Resource Group\n\ 3) Cancel The Replacement Process\n\ Enter Selection [default: 1]: " \ ${IP_LABELS[$INDEX]} ${INTERFACES[$INDEX]} $RG getUserInput 3 SELECTION=$? case $SELECTION in 1) dspmsg scripts.cat 6917 \ "\nThe resource group $RG will not be migrated to another node.\n\ Warning: Replacement of this adapter will pause all connectivity to\n\ this resource group while the replacement is progress.\n\n" $RG RG_MIG_NODES[$INDEX]="" return 1 ;; 3) dspmsg scripts.cat 6918 \ "\nCancelling the hot plug replacement via user request.\n\ No network settings have been changed.\n\n" rm -f $LOCKFILE exit 0 ;; 2) selectRGMigrationNode $INDEX if [ $? = 0 ]; then # set -u makes ksh error if we try to access # an unitialized value RG_MIG_NODES[$INDEX]="" fi return 1 ;; esac } ############################################################################### # selectRGMigrationNode # # Arguments: $1 = index into $RESOURCE_GROUPS[] # # Output: The node name that the user selected, or "" if they user # selected to not move the RG. # # Print a list of valid nodes in the given resource group that the user # can migrate the resource group to. Then prompt the user to choose # one, offering "Cancel Migration" as option. ############################################################################### selectRGMigrationNode() { typeset PS4_FUNC="selectRGMigrationNode" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 typeset NUM_ITEMS=0 typeset NODE_NAMES[0]="" # HAES has the ability to determine which nodes have enough # available resources to takeover a resource group. clsetenvgrp $LOCALNODENAME candidateNodeQuery \ ${RESOURCE_GROUPS[$INDEX]}:"<>" 2>/dev/null | while read line;do echo $line | IFS=: read group_name current_location node_list for NODE_NAME in $node_list do if [ "$NODE_NAME" != "" ]; then let NUM_ITEMS=NUM_ITEMS+1 NODE_NAMES[$NUM_ITEMS]=$NODE_NAME print "$NUM_ITEMS) $NODE_NAME" fi done done if [ $NUM_ITEMS = 0 ]; then # There are no other alive nodes to move this resource group to. dspmsg scripts.cat 6922 \ "\nThere are no alive cluster nodes to which the resource group\n\ ${RESOURCE_GROUPS[$INDEX]} can be migrated.\n\ \n\ Note: Continuing will cause the resource group to be unavailable during\n\ the replacement.\n\ \n\ Do you want to continue with the replacement?\n\n\ 1) Yes\n\ 2) No\n\ Enter Selection [default: 1]: " ${RESOURCE_GROUPS[$INDEX]} getUserInput 2 if [ $? = 2 ]; then dspmsg scripts.cat 6918 \ "\nCancelling the hot plug replacement via user request.\n\ No network settings have been changed.\n\n" rm -f $LOCKFILE exit 0 else return 0 fi fi let NUM_ITEMS=NUM_ITEMS+1 dspmsg scripts.cat 6920 "$NUM_ITEMS) Cancel Migration\n" $NUM_ITEMS let NUM_ITEMS=NUM_ITEMS+1 dspmsg scripts.cat 6921 "$NUM_ITEMS) Cancel The Replacement Process\n" \ $NUM_ITEMS dspmsg scripts.cat 6925 "\nEnter Selection [default: 1]: " getUserInput $NUM_ITEMS SELECTION=$? if [ $SELECTION -eq $NUM_ITEMS-1 ]; then dspmsg scripts.cat 6923 \ "\nThe resource group ${RESOURCE_GROUPS[$INDEX]} will not be migrated\n\ to any node.\n\n" ${RESOURCE_GROUPS[$INDEX]} return 0 elif [ $SELECTION = $NUM_ITEMS ]; then dspmsg scripts.cat 6918 \ "\nCancelling the hot plug replacement via user request.\n\ No network settings have been changed.\n\n" rm -f $LOCKFILE exit 0 else dspmsg scripts.cat 6924 \ "\nThe resource group ${RESOURCE_GROUPS[$INDEX]} will be migrated\n\ to the node ${NODE_NAMES[$SELECTION]}.\n\n" \ ${RESOURCE_GROUPS[$INDEX]} ${NODE_NAMES[$SELECTION]} RG_MIG_NODES[$INDEX]=${NODE_NAMES[$SELECTION]} dspmsg scripts.cat 6926 \ "\nDo you want to migrate this resource group back to this node,\n\ $LOCALNODENAME, after the hot replacement is complete?\n\ 1) Yes\n\ 2) No\n\ Enter Selection: [default: 1]: " $LOCALNODENAME getUserInput 2 RG_MIG_BACK[$INDEX]=$? return 1 fi } ############################################################################### # getRGUpOnIPLabel # # Arguments: $1 = IP Label on which to determine if a resource group is up. # # Returns: No integer value is returned # # Output: The resource group name that was found to be up (online) on the # IP label given on the *local* node. This does not check to see if # the IP label is on another node. ############################################################################### getRGUpOnIPLabel() { typeset PS4_FUNC="getRGUpOnIPLabel" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset IP_LABEL=$1 FOUND_RG_UP="" RG=$(findIPLabelAssociatedRG $IP_LABEL) if [ "$RG" != "" ]; then FOUND_RG_UP=$(clRGinfo -s $RG \ | awk -vNODE=$LOCALNODENAME -vRG=$RG -vFS=: \ '{ if ($1 == RG && $2 == "ONLINE" && $3 == NODE) print $2 }') if [ "$FOUND_RG_UP" != "" ]; then print $RG fi fi } ############################################################################### # findIPLabelAssociatedRG # # Arguments: $1 = IP Label for which we want to determine if it is a service # that is part of a resource group. # # Output: The resource group found, or an empty string. ############################################################################### findIPLabelAssociatedRG() { typeset PS4_FUNC="findIPLabelAssociatedRG" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset IP_LABEL=$1 RG=$(odmget -q "name=SERVICE_LABEL AND value=$IP_LABEL" HACMPresource \ | grep "group =" | cut -d\" -f 2) print $RG } ############################################################################### # migrateRG # # Arguments: $1 = IP label # $2 = resource group name # $3 = node to which to migrate the resource group # $4 : 1 = run verification # 0 = do not run verification # $5 : "away" = moving the RG away from the local node. # "back" = moving the RG back to the local node. # $6 = index into $INTERFACES # # Returns: 0 : success, RG migrated # 1 : the service label was no longer found up on the specified # interface, so no migration took place # # Does a User Requested rg_move of the specified resource group to the # specified node. ############################################################################### migrateRG() { typeset PS4_FUNC="migrateRG" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset IP_LABEL=$1 typeset RG=$2 typeset NODE=$3 typeset VERIFY=$4 typeset AWAY=$5 typeset INDEX=$6 if [ "$AWAY" = "away" ]; then if [ "$(getRGUpOnIPLabel $IP_LABEL)" != "$RG" ]; then dspmsg scripts.cat 6944 \ "\nResource group $RG was no longer found up on the local node.\n\ Skipping migration for this resource group.\n" $RG # We don't want to try to do any other resource group processing. RG_MIG_BACK[$INDEX]="" return 1 fi fi if [ $VERIFY = 1 ]; then VERIFY="" else VERIFY="-v" fi CASCADING=$(odmget -q "group=$RG AND type=cascading" HACMPgroup) if [ "$CASCADING" != "" ]; then STICKY="sticky" else STICKY="" fi clRGmove -g $RG -m -n $NODE -i if [ $? != 0 ]; then dspmsg scripts.cat 6932 \ "\nError: DARE migration of resource group $RG to node\n\ $NODE failed.\n\n" $RG $NODE dspmsg scripts.cat 6910 \ "\nCancelling PCI hot plug replacement process because of fatal error above.\n\n" dspmsg scripts.cat 6929 \ "\nManual intervention may be required to reconfigure affected interfaces\n\ and adapters.\n\n" rm -f $LOCKFILE exit 1 fi print # vertical spacing return 0 } ############################################################################### # getUserInput # # Arguments: $1 = maximum number that the user can input # # Returns: the value that user input, or 1 if they entered nothing # # Loop, reading input from the keyboard, until the user either enters # a number that is between 0 and MAX_NUMBER, or until they enter nothing # (just hit enter), meaning the want the default value, which is always 1. ############################################################################### getUserInput() { typeset PS4_FUNC="getUserInput" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset MAX_NUMBER=$1 typeset INPUT="" while [ 1 ] do read INPUT if [ "$(print $INPUT | awk '/^[0-9]+$/')" != "" ]; then if [ $INPUT -gt 0 ] && [ $INPUT -le $MAX_NUMBER ]; then dspmsg scripts.cat 6919 "\nYou entered: $INPUT\n\n" $INPUT return $INPUT fi elif [ "$INPUT" = "" ]; then # User entered nothing, meaning they want the default # value, which is always 1. dspmsg scripts.cat 6919 "\nYou entered: 1\n\n" 1 return 1 fi dspmsg scripts.cat 6916 \ "\nYou entered a value that was out of range. Try again.\n\ Enter Selection [default: 1]: " done } ############################################################################### # interfaceMaintenanceMode # # Arguments: $1 : "start" = start the maintenance mode # : "stop" = stop the maintenance mode # $2 = IP address of the interface # $3 = HACMP network that the IP address is on # $4 = logical interface that the IP address is configured on # # Starts/stops an interface maintenance mode on the specified IP address. # # In HAS, this calls the interface_maintenance_mode event via clifmaint. # This event marks the interface down in clstrmgr without causing any # failure events, unless the interface is the last alive interface on # its HACMP network, in which case a network_down event is generated. # If the maintenance mode is being stopped, then a network_up will happen # if this interface will be the only alive interface on its HACMP network. # # In HAES, this calls cl_hats_adapter to start an infinite grace period # on the specified IP address. If the maintenance mode is being stopped, # then cl_hats_adapter is called to end the grace period. ############################################################################### interfaceMaintenanceMode() { typeset PS4_FUNC="interfaceMaintenanceMode" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset START_STOP=$1 typeset IP_ADDR=$2 typeset NETWORK=$3 typeset INTERFACE=$4 # Wait until the cluster is stable. For ATM adapters with multiple # configured interfaces, it's likely that if we just applied a # maintenance mode to one interface, then the cluster still might be # processing the event. # It's also possible that the cluster is still processing a swap_adapter # event that we called previously. waitUntilClusterStable if [ "$START_STOP" = "start" ]; then # HAES: Tell topology services to disable the IP label ERROR=$(cl_hats_adapter $INTERFACE -g $IP_ADDR 2>&1 \ | grep ERROR) if [ "$ERROR" = "" ]; then dspmsg scripts.cat 6935 \ "\nSuccessfully disabled network connectivity monitoring on the interface\n\ $INTERFACE with IP address $IP_ADDR.\n\ No failure events on $INTERFACE should be generated as a loss of network\n\ connectivity.\n" $INTERFACE $IP_ADDR else dspmsg scripts.cat 6908 \ "\nError: Trying to disable network connectivity monitoring on the interface\n\ $INTERFACE with IP address $IP_ADDR failed.\n\n" $INTERFACE $IP_ADDR dspmsg scripts.cat 6910 \ "\nCancelling PCI hot plug replacement process because of fatal error above.\n\n" rm -f $LOCKFILE exit 1 fi else # stop maintenance mode # stop HAES topology services "grace period" ERROR=$(cl_hats_adapter $INTERFACE -e $IP_ADDR 2>&1 \ | grep ERROR) if [ "$ERROR" = "" ]; then dspmsg scripts.cat 6936 \ "\nSuccessfully enabled network connectivity monitoring on the interface\n\ $INTERFACE with IP address $IP_ADDR.\n" $INTERFACE $IP_ADDR else dspmsg scripts.cat 6909 \ "\nError: Trying to enable network connectivity monitoring on the interface\n\ $INTERFACE with IP address $IP_ADDR failed.\n\n" $INTERFACE $IP_ADDR dspmsg scripts.cat 6910 \ "\nCancelling PCI hot plug replacement process because of fatal error above.\n\n" rm -f $LOCKFILE exit 1 fi fi } ############################################################################### # stopAllInterfaceMaintenanceMode # # Arguments: None # Returns: Nothing if no errors happened. # # Stop the interface maintenance modes that may be applied to all the # interfaces in $INTERFACES[]. ############################################################################### stopAllInterfaceMaintenanceMode() { typeset PS4_FUNC="stopAllInterfaceMaintenanceMode" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do if [[ ${INTERFACES[$INDEX]} = "" ]]; then let INDEX=INDEX+1 continue; fi if [ ${INTERFACES_ALIVE[$INDEX]} = 1 ]; then NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) ALIAS_NETWORK=$(cllsnw -c | cut -f1,3 -d: | grep $NETWORK | cut -f2 -d:) if [ "${ALT_ADDRESSES[$INDEX]}" != "" -a "$ALIAS_NETWORK" != "true" ] then # If we have swapped our address with a standby, or if we # migrated a resource group off of this ip label to another # node, then we need to stop the maintenanace mode on the # standby or boot address that is now on this interface. interfaceMaintenanceMode stop ${ALT_ADDRESSES[$INDEX]} \ $NETWORK ${INTERFACES[$INDEX]} else interfaceMaintenanceMode stop ${IP_ADDRESSES[$INDEX]} \ $NETWORK ${INTERFACES[$INDEX]} fi fi let INDEX=INDEX+1 done } ############################################################################### # configureHWAddress # # Arguments: $1 = interface of the adapter (physical or LANE) to configure # with the HW address # $2 = HW address # # Runs chdev on the adapter to set the alternate hardware address. ############################################################################### configureHWAddress() { typeset PS4_FUNC="configureHWAddress" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INTERFACE=$1 typeset HW_ADDRESS=$2 typeset HW_ADAPTER="" # First, if we're not dealing with an ATM adapter, just access the # global $ADAPTER variable. if [ $IS_ATM = 0 ]; then HW_ADAPTER=$ADAPTER # For use in the message below. chdev -l $ADAPTER -a alt_addr=$HW_ADDRESS -a use_alt_addr=yes # After setting the HW address, turn off use_alt_addr in the ODM # so that if an adapter swap happens with this interface, # the HW address will be brought back up on it's burned-in address. chdev -l $ADAPTER -a use_alt_addr=no -P else INTERFACE_TYPE=$(print $INTERFACE | sed 's/[0-9]*$//') INTERFACE_NUM=$(print $INTERFACE | sed 's/^[^0-9]*//') if [ "$INTERFACE_TYPE" = "en" ]; then # LANE ethernet HW_ADAPTER=ent$INTERFACE_NUM chdev -l $HW_ADAPTER \ -a local_lan_addrs=$(hex12_to_dotted $HW_ADDRESS) elif [ "$INTERFACE_TYPE" = "tr" ]; then # LANE token ring HW_ADAPTER=tok$INTERFACE_NUM chdev -l $HW_ADAPTER \ -a local_lan_addrs=$(hex12_to_dotted $HW_ADDRESS) else # ATM classical IP HW_ADAPTER=$ADAPTER ATM_HW_ADDRESS=$(print $HW_ADDRESS | cut -c1-14) chdev -l $ADAPTER -a alt_addr=$ATM_HW_ADDRESS -a use_alt_addr=yes chdev -l $ADAPTER -a use_alt_addr=no -P fi fi if [ $? != 0 ]; then dspmsg scripts.cat 6957 \ "\nError: Could not change hardware address on device $HW_ADAPTER to\n\ $HW_ADDRESS.\n" $HW_ADAPTER $HW_ADDRESS dspmsg scripts.cat 6910 \ "\nCancelling PCI hot plug replacement process because of fatal error above.\n\n" dspmsg scripts.cat 6929 \ "\nManual intervention may be required to reconfigure affected interfaces\n\ and adapters.\n\n" stopAllInterfaceMaintenanceMode restoreAllRoutes rm -f $LOCKFILE exit 1 fi } ############################################################################### # unconfigureAdapter # # Arguments: $1 = adapter to unconfigure # $2 : verbose = print message on successful unconfiguration # silent = do no print message on successful unconfiguration # # Runs rmdev on the given adapter, without deleting the definition from # the ODM. ############################################################################### unconfigureAdapter() { typeset PS4_FUNC="unconfigureAdapter" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset DEV_ADAPTER=$1 typeset VERBOSE=$2 rmdev -l $DEV_ADAPTER if [ $? = 0 ]; then if [ "$VERBOSE" = "verbose" ]; then dspmsg scripts.cat 6930 \ "Successfully unconfigured device $DEV_ADAPTER.\n" $DEV_ADAPTER fi else dspmsg scripts.cat 6931 \ "\nError: Could not unconfigure device $DEV_ADAPTER.\n\n" $DEV_ADAPTER dspmsg scripts.cat 6910 \ "\nCancelling PCI hot plug replacement process because of fatal error above.\n\n" dspmsg scripts.cat 6929 \ "\nManual intervention may be required to reconfigure affected interfaces\n\ and adapters.\n\n" stopAllInterfaceMaintenanceMode restoreAllRoutes rm -f $LOCKFILE exit 1 fi } ############################################################################### # configureAdapter # # Arguments: $1 = adapter to configure # $2 : verbose = print message on successful configuration # silent = do no print message on successful configuration # # Runs mkdev on the given adapter. ############################################################################### configureAdapter() { typeset PS4_FUNC="configureAdapter" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset DEV_ADAPTER=$1 typeset VERBOSE=$2 dspmsg scripts.cat 6963 "\nConfiguring adapter $DEV_ADAPTER...\n" \ $DEV_ADAPTER mkdev -l $DEV_ADAPTER if [ $? = 0 ]; then if [ "$VERBOSE" = "verbose" ]; then dspmsg scripts.cat 6934 \ "Successfully configured device $DEV_ADAPTER.\n" $DEV_ADAPTER fi else dspmsg scripts.cat 6941 \ "\nError: Could not configure device $DEV_ADAPTER.\n\n" $DEV_ADAPTER dspmsg scripts.cat 6942 \ "\nThe device $DEV_ADAPTER is in the \"defined\" state.\n\ All interfaces associated with this adapter are unconfigured.\n" $DEV_ADAPTER dspmsg scripts.cat 6910 \ "\nCancelling PCI hot plug replacement process because of fatal error above.\n\n" dspmsg scripts.cat 6929 \ "\nManual intervention may be required to reconfigure affected interfaces\n\ and adapters.\n\n" stopAllInterfaceMaintenanceMode restoreAllRoutes rm -f $LOCKFILE exit 1 fi } ############################################################################### # configureInterface # # Arguments: $1 = index into $INTERFACES[] # # Runs ifconfig on the interface, taking care to configure to the correct # address depending on whether $ALT_ADDRESSES[$INDEX] is filled. # Also restores any routes that were saved during the unconfiguration. ############################################################################### configureInterface() { typeset PS4_FUNC="configureInterface" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 dspmsg scripts.cat 6964 \ "\nConfiguring interface ${INTERFACES[$INDEX]}...\n" \ ${INTERFACES[$INDEX]} # mkdev updates the AIX ODM (CuDv) and marks it available. mkdev -l ${INTERFACES[$INDEX]} # If this is an ATM Classical IP interface, then we need to get the ARP # server address and configure that when we call ifconfig, otherwise # the interface will not get configured correctly and no network # traffic will go through it. ARP_CONFIG="" INTERFACE_TYPE=$(print ${INTERFACES[$INDEX]} | sed 's/[0-9]*$//') if [ "$INTERFACE_TYPE" = "at" ]; then # ATM Classical IP interface ARP_SERVER=$(lsattr -EOl ${INTERFACES[$INDEX]} -a servaddr \ | grep -v servaddr) if [ "$ARP_SERVER" != "" ]; then ARP_CONFIG="svc_c $ARP_SERVER" fi fi if [ "${ALT_ADDRESSES[$INDEX]}" != "" ]; then if [ "$INTERFACE_TYPE" = "at" ]; then ifconfig ${INTERFACES[$INDEX]} ${ALT_ADDRESSES[$INDEX]} \ netmask ${NETMASKS[$INDEX]} mtu ${ALT_MTUS[$INDEX]} \ $ARP_CONFIG device $ADAPTER up else ifconfig ${INTERFACES[$INDEX]} ${ALT_ADDRESSES[$INDEX]} \ netmask ${NETMASKS[$INDEX]} mtu ${ALT_MTUS[$INDEX]} up fi check_ifconfig_status ${INTERFACES[$INDEX]} ${ALT_ADDRESSES[$INDEX]} \ ${NETMASKS[$INDEX]} else if [ "$INTERFACE_TYPE" = "at" ]; then ifconfig ${INTERFACES[$INDEX]} ${IP_LABELS[$INDEX]} \ netmask ${NETMASKS[$INDEX]} mtu ${MTUS[$INDEX]} \ $ARP_CONFIG device $ADAPTER up else ifconfig ${INTERFACES[$INDEX]} ${IP_LABELS[$INDEX]} \ netmask ${NETMASKS[$INDEX]} mtu ${MTUS[$INDEX]} up fi check_ifconfig_status ${INTERFACES[$INDEX]} ${IP_ADDRESSES[$INDEX]} \ ${NETMASKS[$INDEX]} fi if [ $? = 0 ]; then dspmsg scripts.cat 6937 \ "Successfully configured the interface ${INTERFACES[$INDEX]}.\n" \ ${INTERFACES[$INDEX]} else dspmsg scripts.cat 6938 \ "\Error: Could not configure the interface ${INTERFACES[$INDEX]}.\n\n" \ ${INTERFACES[$INDEX]} dspmsg scripts.cat 6910 \ "\nCancelling PCI hot plug replacement process because of fatal error above.\n\n" dspmsg scripts.cat 6929 \ "\nManual intervention may be required to reconfigure affected interfaces\n\ and adapters.\n\n" stopAllInterfaceMaintenanceMode restoreAllRoutes rm -f $LOCKFILE exit 1 fi $RESTORE_ROUTES.${INTERFACES[$INDEX]} > /dev/null 2>&1 : Completed $RESTORE_ROUTES.${INTERFACES[$INDEX]} with return code $?. rm -f $RESTORE_ROUTES.${INTERFACES[$INDEX]} } ############################################################################### # unconfigureInterface # # Arguments: $1 = index into $INTERFACES[] or $NON_HACMP_INTERFACES[] # $2 : "hacmp" = this is an index into $INTERFACES[] # : "nonhacmp" = this is an index into $NON_HACMP_INTERFACES[] # # Runs ifconfig detach on the interface. Also calls replace_routes to create # a script that can be called when configuring the interface to restore # routes that are configured now. ############################################################################### unconfigureInterface() { typeset PS4_FUNC="unconfigureInterface" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=$1 typeset INTERFACE="" if [ "$2" = "hacmp" ]; then INTERFACE=${INTERFACES[$INDEX]} replace_routes $RESTORE_ROUTES.$INTERFACE $INTERFACE > /dev/null 2>&1 if [ "${INTERFACES_TYPE[$INDEX]}" = "standby" ]; then addback_standby_route ${IP_ADDRESSES[$INDEX]} > /dev/null 2>&1 fi else INTERFACE=${NON_HACMP_INTERFACES[$INDEX]} fi ifconfig $INTERFACE detach rmdev -l $INTERFACE if [ $? = 0 ]; then dspmsg scripts.cat 6927 \ "Successfully unconfigured the interface $INTERFACE.\n\n" $INTERFACE else dspmsg scripts.cat 6928 \ "\Error: Could not unconfigure the interface $INTERFACE.\n\n" $INTERFACE dspmsg scripts.cat 6910 \ "\nCancelling PCI hot plug replacement process because of fatal error above.\n\n" dspmsg scripts.cat 6929 \ "\nManual intervention may be required to reconfigure affected interfaces\n\ and adapters.\n\n" stopAllInterfaceMaintenanceMode restoreAllRoutes rm -f $LOCKFILE exit 1 fi } ############################################################################### # handleAllInterfacesPostReplacement # # Arguments: None # # After the physical hot replacement has completed, we need to see if any # network topology changes occured on the local node that change what we're # expecting to find. # # For example: If we swapped a service or boot address to a standby, # that service or boot address could now be unavailable for various reasons, # thus we need to take care to setup our global variables so we don't # try to call a swap_address that will fail, and so we can come back up on # an available IP label. ############################################################################### handleAllInterfacesPostReplacement() { typeset PS4_FUNC="handleAllInterfacesPostReplacement" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do if [ "${RESOURCE_GROUPS[$INDEX]}" != "" ] \ && [ "${RG_MIG_NODES[$INDEX]}" = "" ] \ && [ ${RG_MOVED[$INDEX]} = 0 ] \ && [ "${STANDBY_SWAPS[$INDEX]}" != "1" ] then if [ "$(getRGUpOnIPLabel ${IP_LABELS[$INDEX]})" = "" ]; then # We won't be reconfiguring as a service, so don't attempt # to restore the original hardware address. HW_ADDRESSES[$INDEX]="" dspmsg scripts.cat 6945 \ "\nResource group ${RESOURCE_GROUPS[$INDEX]} was no longer found up on \ the local node.\n" ${RESOURCE_GROUPS[$INDEX]} # If there was a resource group up on this label when we # started, and there was no standby to swap our service label # to, and if the user chose not to migrate the resource group, # and the resource group is no longer found up on this node, # then we need to reconfigure to either a boot or standby # address. if [ "${CASCADING_IPAT_SVCS[$INDEX]}" != "" ]; then # Cascading RG not on its home node. ALT_ADDRESSES[$INDEX]=$(findUnavailableStandby $INDEX) if [ "${ALT_ADDRESSES[$INDEX]}" = "" ]; then dspmsg scripts.cat 6946 \ "\nNo available boot network interface was found with which we \n\ can configure interface ${INTERFACES[$INDEX]}.\n\ This interface will be left unconfigured.\n" ${INTERFACES[$INDEX]} INTERFACES_ALIVE[$INDEX]=0 let INDEX=INDEX+1 continue fi dspmsg scripts.cat 6947 \ "\nConfiguring interface ${INTERFACES[$INDEX]} with address \ ${ALT_ADDRESSES[$INDEX]}.\n" ${ALT_ADDRESSES[$INDEX]} else # Cascading RG on its home node, or rotating RG. ADDR=$(findAvailableBoot $INDEX) if [ "$ADDR" = "" ]; then # If an available boot wasn't found, try a standby. ADDR=$(findAvailableStandby $INDEX) fi if [ "$ADDR" = "" ]; then dspmsg scripts.cat 6946 \ "\nNo available boot network interface was found with which we \n\ can configure interface ${INTERFACES[$INDEX]}.\n\ This interface will be left unconfigured.\n" ${INTERFACES[$INDEX]} INTERFACES_ALIVE[$INDEX]=0 let INDEX=INDEX+1 continue fi ALT_ADDRESSES[$INDEX]=$ADDR # Set the interface as not alive so we don't try # to call a stop interface maintenance mode that will fail. INTERFACES_ALIVE[$INDEX]=0 fi ALT_MTUS[$INDEX]=${MTUS[$INDEX]} fi elif [ ${INTERFACES_ALIVE[$INDEX]} = 1 ] \ && [ "${STANDBY_SWAPS[$INDEX]}" = "1" ] \ && [ "${ALT_ADDRESSES[$INDEX]}" != "" ] then if [ "${INTERFACES_TYPE[$INDEX]}" = "boot" ] \ && [ "$(findAvailableBoot $INDEX)" = "" ] then dspmsg scripts.cat 6948 \ "\nThe boot network interface address ${IP_ADDRESSES[$INDEX]}, that was previously swapped with\n\ boot network interface address ${ALT_ADDRESSES[$INDEX]}, was not found to be alive now.\n" \ ${IP_ADDRESSES[$INDEX]} ${ALT_ADDRESSES[$INDEX]} # The boot we originally had on this interface is no longer up, # even though we swapped it to a standby earlier. Most likely # it was taken over by IPAT. if [ "$(findAvailableService $INDEX)" != "" ]; then # The boot address that was swapped to a standby # has been taken over by a service address. Don't # swap it back, just reconfigure this interface as # the standby we swapped with. STANDBY_SWAPS[$INDEX]=0 dspmsg scripts.cat 6949 \ "\nConfiguring interface ${INTERFACES[$INDEX]} with boot network interface address \ ${ALT_ADDRESSES[$INDEX]}.\n" ${INTERFACES[$INDEX]} ${ALT_ADDRESSES[$INDEX]} else # Somehow the boot address is no longer on the standby # interface we swapped to, and also no service address # was found up on this network on this node. # So simply reconfigure as the original boot, and # don't do any address swap. ALT_ADDRESSES[$INDEX]="" ALT_MTUS[$INDEX]="" STANDBY_SWAPS[$INDEX]=0 # Set the interface as not alive so we don't try # to call a stop interface maintenance mode that will fail. INTERFACES_ALIVE[$INDEX]=0 dspmsg scripts.cat 6950 \ "\nConfiguring interface ${INTERFACES[$INDEX]} with boot network interface address \ ${IP_ADDRESSES[$INDEX]}.\n" ${INTERFACES[$INDEX]} ${IP_ADDRESSES[$INDEX]} fi elif [ "${INTERFACES_TYPE[$INDEX]}" = "service" ] \ && [ "$(findAvailableService $INDEX)" = "" ] then HW_ADDRESSES[$INDEX]="" dspmsg scripts.cat 6951 \ "\nThe service IP address ${IP_ADDRESSES[$INDEX]}, that was previously swapped\n\ with network interface address ${ALT_ADDRESSES[$INDEX]}, was not found to be alive \ now.\n" ${IP_ADDRESSES[$INDEX]} ${ALT_ADDRESSES[$INDEX]} # The service we orginally had on this interface is no longer up, # even though we swapped it to a standby earlier. Most # likely it was released during a resource group migration. ADDR=$(findAvailableBoot $INDEX) if [ "$ADDR" = "" ]; then if [ "$(findIPLabelAssociatedRG ${IP_LABELS[$INDEX]})" \ != "" ] then ADDR=$(findAvailableStandby $INDEX) if [ "$ADDR" = "" ]; then dspmsg scripts.cat 6946 \ "\nNo available boot network interface was found with which we \n\ can configure interface ${INTERFACES[$INDEX]}.\n\ This interface will be left unconfigured.\n" ${INTERFACES[$INDEX]} ALT_ADDRESSES[$INDEX]="" ALT_MTUS[$INDEX]="" STANDBY_SWAPS[$INDEX]=0 INTERFACES_ALIVE[$INDEX]=0 let INDEX=INDEX+1 continue fi ALT_ADDRESSES[$INDEX]=$ADDR ALT_MTUS[$INDEX]=MTUS[$INDEX] dspmsg 6954 \ "\nConfiguring interface ${INTERFACES[$INDEX]} with standby address $ADDR.\n" \ $ADDR else # This service isn't in a resource group, # so just configure the interface with the service # address and don't do any address swaps. ALT_ADDRESSES[$INDEX]="" ALT_MTUS[$INDEX]="" STANDBY_SWAPS[$INDEX]=0 INTERFACES_ALIVE[$INDEX]=0 dspmsg scripts.cat 6952 \ "\nConfiguring interface ${INTERFACES[$INDEX]} with service IP address \ ${IP_ADDRESSES[$INDEX]}.\n" ${INTERFACE[$INDEX]} ${IP_LABELS[$INDEX]} fi else INTERFACES_ALIVE[$INDEX]=0 dspmsg scripts.cat 6953 \ "\nConfiguring interface ${INTERFACES[$INDEX]} with boot network interface address $ADDR.\n" \ $ADDR fi fi fi let INDEX=INDEX+1 done } ############################################################################### # messageTimer # # Arguments: None # # While we are waiting for the user to confirm the physical replacement, # we log a message to stdout and to hacmp.out every minute for 10 minutes, # and then every 10 minutes after that. ############################################################################### messageTimer() { typeset PS4_FUNC="messageTimer" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x HACMP_OUT_DIR=$(odmget -q "name=hacmp.out" HACMPlogs \ | awk '{ if ($1 == "value") print $3 }' \ | sed 's/"//g') MINUTES=0 while [ 1 ]; do if [ $MINUTES -lt 10 ]; then sleep 60 let MINUTES=MINUTES+1 else sleep 600 let MINUTES=MINUTES+10 fi dspmsg scripts.cat 6965 \ "\nStill waiting for the user to confirm replacement of adapter $ADAPTER\n\ after $MINUTES minutes.\n" $ADAPTER $MINUTES print \ "\nclchpcihpif[$PID]: Still waiting for the user to confirm replacement of \ adapter $ADAPTER after $MINUTES minutes.\n" >> $HACMP_OUT_DIR/hacmp.out 2>&1 dspmsg scripts.cat 6966 "\nHit Enter to confirm the replacement.\n" done } ############################################################################### # handleHotReplacement # # Arguments: None # # Run drslot to let the user physically replace the adapter. # If the replacement failed, ask the user if they want to try again or cancel # the replacement process. ############################################################################### handleHotReplacement() { typeset PS4_FUNC="handleHotReplacement" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x SLOT=$(lsslot -c pci -F: -l $ADAPTER | cut -d: -f1) FIRST_TRY=1 messageTimer & MSGPID=$! while [ 1 ] do dspmsg scripts.cat 6958 \ "\nIt is now OK to hot replace the PCI adapter $ADAPTER.\n\ The PCI slot number is $SLOT.\n\ \n\ Note: If you now decide that you do not want to replace the adapter,\n\ but rather to keep the current adapter, you may leave the current\n\ adapter in the PCI slot and just hit Enter. The adapter will be\n\ reconfigured normally, and minimal disruption to the cluster will be\n\ caused.\n\n" $ADAPTER $SLOT if [ $FIRST_TRY = 1 ]; then # On the first attempt to physically replace the adapter, # we run a full replacement. That is, drslot will let the # user remove the current adapter and then replace it with # another one. drslot -c pci -I -R -l $ADAPTER else # If this was a follow-up attempt to a failure of the first # (or later) attempt, then we cannot run a full replacement, # since drslot will have left the PCI slot in the removed # state from the failed attempt. So we first place the # PCI slot into the remove state. This will work even # when nothing is in the slot. print | drslot -c pci -I -r -s $SLOT > /dev/null 2>&1 # Now we try to add the adapter. The user will still # be able to remove any adapter currently in the slot while # the slot is in the action state. drslot -c pci -I -a -s $SLOT fi lsslot -c pci -l $ADAPTER > /dev/null 2>&1 if [ $? = 0 ]; then # The hot replacement was a success. break fi # If we got to here than the hot replacement was either canceled # or failed. dspmsg scripts.cat 6959 \ "\nThe hot replacement of $ADAPTER failed. If you did not manually cancel\n\ the replacement, then an error occurred. See the above error message for\n\ details.\n\ \n\ Would you like to try the replacement again?\n\ 1) Yes\n\ 2) No\n\ \n\ Warning: Selecting No will cancel the hot replacement process, and the\n\ adapter will be left unconfigured, the PCI slot will be left in the\n\ removed state, and network connectivity monitoring will resume on the\n\ adapter if it was alive before the replacement began. This will not cause\n\ an adapter fail event to occur.\n \n Enter Selection: [default: 1]: " $ADAPTER getUserInput 2 if [ $? = 1 ]; then FIRST_TRY=0 continue fi # If we got here than the user selected to cancel the replacement. dspmsg scripts.cat 6960 \ "\nCancelling the hot replacement process. Adapter $ADAPTER and all related\n\ interfaces will be left unconfigured. The PCI slot $SLOT will be left in\n\ the removed state. Network connectivity monitoring will resume on all\n\ related interfaces that were alive before the replacement began.\n" \ $ADAPTER $SLOT kill $MSGPID # kill the messageTimer process stopAllInterfaceMaintenanceMode rm -f $LOCKFILE exit 1 done kill $MSGPID # kill the messageTimer process } ############################################################################### # From cl_swap_IP_address # # Name: check_ifconfig_status # # This modification has been made because of change of behavior # of 'ifconfig' on AIX 4.1. If we want to add a route which is # already in the routing table, 'ifconfig' DOES change the # address on the adapter BUT the return code IS 1 (error). The # error is accompanied with a message : # # "ifconfig: ioctl (SIOCAIFADDR): Do not specify an existing file." # # Since the adapter does get the new address, we do a 'netstat' on # that adapter and get the current address. If the current address # is the same as the address that we want to put on the adapter, # then everything is fine. Otherwise, we give an error. # # Arguments: # Interface to check # IP address in dotted-decimal format # Netmask in dotted-decimal format # # Returns: # 0 Interface has a proper entry # 1 Interface does not have a proper entry # ############################################################################### check_ifconfig_status () { typeset PS4_FUNC="check_ifconfig_status" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x CH_INTERFACE=$1 CH_ADDRESS=$2 CH_NETMASK=$3 ADDR=$(ifconfig $CH_INTERFACE | (read ; read a b c ; print $b)) [[ "$ADDR" != "$CH_ADDRESS" ]] && { return 1 } return 0 } ############################################################################### # From cl_swap_IP_address # # Name: addback_standby_route # # When two or more standbys are on the same subnet, only one of the # standbys is in the routing table as the route. If this standby is # used to takeover another address, the route also gets destroyed # in routing table. This routine is used to restore the route for # the remaining standbys on the subnet. # # Arguments: standby_IP_address # # Returns: None # ############################################################################### addback_standby_route () { typeset PS4_FUNC="addback_standby_route" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x down_if=$1 cllsif -Sn $1 | read a b NETWORK c # get a list of standby adapters which are elegible to take over any # routes going out through the standby we are taking off line IFS="$(cl_get_path -S)" cllsif -SJ "$OP_SEP" | while read adapt type network net_type attrib node ip_addr hw_addr ; do # can't be us [[ "$ip_addr" = "$down_if" ]] && continue # it's gotta be a standby [[ "$type" != "standby" ]] && continue # it's gotta be on the same network [[ "$network" != "$NETWORK" ]] && continue # it's gotta be attached to this host [[ "$node" != "$LOCALNODENAME" ]] && continue # if we came this far, make certain the interface is # in (in HA parlance, that is). addr=\$i$(print $ip_addr | tr "./" "xx")_$LOCALNODENAME [[ $(eval print $addr) != "UP" ]] && continue # we have a good address in "ip_addr" if we made it this far, # "down_if" is the interface we are replacing, take all its # routes and change them to point out the other standby we # just located unset IFS netstat -nrf inet | while read dest gw flags other ; do [[ "$gw" = "$down_if" ]] && { case $flags in U*H*) route change -host $dest -interface $ip_addr ;; U*) route change -net $dest -interface $ip_addr ;; esac } done # once we have the first qualified standby there is no need # to go on. return 0 done unset IFS # if we got this far there are no qualified alternate standbys return 0 } ############################################################################### # From cl_swap_IP_address # # Name: replace_routes # # For the list of interfaces given on command line, moves each route # to 127.1 (lo0) and writes a file (name in $1) containing a shell # script that will restore the original routing table # # Arguments: # $1 - filename # $2 - $n interface names # # Returns: N/A ############################################################################### replace_routes() { typeset PS4_FUNC="replace_routes" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x RR=$1 shift interfaces="$*" cp /dev/null $RR cat >$RR<<-EOF #!/bin/ksh # # Script created by $PROGNAME on $(date) # PATH=$PATH PS4='$PS4' export VERBOSE_LOGGING=\${VERBOSE_LOGGING:-"high"} [[ "\$VERBOSE_LOGGING" = "high" ]] && set -x : Starting \$0 at \$(date) # EOF LOCADDRS=$(netstat -in | awk '$3 !~ "[Ll]ink" {print $4}') netstat -rn # according to the netstat command man page: # U = Up # c = Access to this route creates a cloned route # H = route to a host (use -host instead of -net) # W = the route is a cloned route for ifce in $interfaces ; do integer I=1 netstat -nrf inet | fgrep -w $ifce | while read DEST GW FLAGS OTHER ; do LOOPBACK="127.0.0.$I" case $FLAGS in U|Uc) # all routes created by cloning are evil route delete -net $DEST $GW ;; U*W) # all routes created by cloning are evil route delete -host $DEST $GW ;; U*H*b*) # Do not change any broadcast routes ;; U*H*) # If $DEST is a locally configured address, do # nothing with this route since it may interfere # with netmon logic. found=0 for addr in $LOCADDRS do if [ "$addr" = "$DEST" ] then found=1 break fi done if [ $found -eq 0 ] then # into RR we put the route command: print "cl_route_change $DEST $LOOPBACK $GW" >>$RR # which will undo: cl_route_change $DEST $GW $LOOPBACK I=I+1 fi ;; U*) # into RR we put the route command: print "cl_route_change $DEST $LOOPBACK $GW" >>$RR # which will undo: cl_route_change $DEST $GW $LOOPBACK I=I+1 ;; esac done done chmod +x $RR return 0 } ############################################################################### # From cl_swap_ATMLE_HW_address # # Name: hex12_to_dotted # # Convert a 12-character HEX value to a dotted HEX value. # # Arguments: A HEX string of the form, 0xabcdefghijkl # # Stdout: A dotted HEX string of the form, ab.cd.ef.gh.ij.kl # # Returns: None ############################################################################### hex12_to_dotted () { typeset PS4_FUNC="hex12_to_dotted" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x echo $1 | awk '{ if (length($0) != 14) exit if (substr($0,1,2) == "0x" || substr($0,1,2) == "0X") { dotted = substr($0,3,2) "." \ substr($0,5,2) "." \ substr($0,7,2) "." \ substr($0,9,2) "." \ substr($0,11,2) "." \ substr($0,13,2) print dotted exit } }' } ############################################################################### # restoreAllRoutes # # Arguments: None # Returns: Nothing # # Call the scripts created by replace_routes for each interface. ############################################################################### restoreAllRoutes() { typeset PS4_FUNC="restoreAllRoutes" [[ "${VERBOSE_LOGGING:-}" = "high" ]] && set -x typeset INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do $RESTORE_ROUTES.${INTERFACES[$INDEX]} > /dev/null 2>&1 : Completed $RESTORE_ROUTES.${INTERFACES[$INDEX]} with return code $?. rm -f $RESTORE_ROUTES.${INTERFACES[$INDEX]} let INDEX=INDEX+1 done } ############################################################################### # Main program procedure. ############################################################################### PROGNAME=$(basename ${0}) export PATH="$(/usr/es/sbin/cluster/utilities/cl_get_path all)" set +u [[ "$VERBOSE_LOGGING" = "high" ]] && set -x [[ "$VERBOSE_LOGGING" = "high" ]] && version='1.36.2.2' # Determine what node we are on export LOCALNODENAME=$(get_local_nodename) OP_SEP="$(cl_get_path -S)" CMD="clchpcihpif" if [ $# -ne 1 ] && [ $# -ne 2 ]; then usage fi set -u # Get arguments. ADAPTER=$1 if [ $# = 2 ]; then SHUTDOWN_HACMP=$2 else SHUTDOWN_HACMP="" fi # We care about having //'s here because we may print out # $LOCKFILE to the screen (user visible) later, so let's # make sure it looks clean. LOCKFILE=/usr/es/sbin/cluster/.pcihp.lock GRPSVCS_DAEMON="cthags" MIG_GRPSVCS_DAEMON="grpsvcs" SLEEP_TIME=5 # Number of seconds to sleep while waiting for the # cluster to become stable. # If the cluster is unstable right now, then wait until it is stable. isClusterStable none if [ $? = 0 ]; then waitUntilClusterStable fi # If the lockfile already exists, then either another clchpcihpif process # is already running, or a previous clchpcihpif process terminated abnormally. if [ -e $LOCKFILE ]; then OLDPID=$(< $LOCKFILE) OLDPIDALIVE=$(ps -eo 'pid,ppid,args' | awk -vPID=$OLDPID '{ if ($1 == PID) print }' | grep "$CMD") if [ "$OLDPIDALIVE" != "" ]; then dspmsg scripts.cat 6901 \ "\nAnother PCI hot plug replacement is happening on this node at this time.\n\ Only one replacement process is supported at one time per node.\n\ This process will now exit, leaving the adapter and interface untouched.\n\n" exit 1 else dspmsg scripts.cat 6902 \ "\nA previous PCI hot plug replacement process terminated abnormaly.\n\ The network interface that was to be replaced in the previous process could be\n\ in an unconfigured or removed state. If you want to continue with this\n\ replacement process, remove the lock file $LOCKFILE and\n\ restart the PCI hot plug replacement.\n\ This process will now exit, leaving the network adapter and interface untouched.\n\n" \ $LOCKFILE exit 1 fi else print $$ > $LOCKFILE fi PID=$$ # Make sure the PCI adapter given as an argument is a valid, real, # configured PCI hot pluggable adapter. lsslot -c pci -l $ADAPTER > /dev/null 2>&1 if [ $? != 0 ]; then dspmsg scripts.cat 6905 \ "\nThe adapter $ADAPTER was not found in a PCI hot pluggable slot.\n\n" $ADAPTER rm -f $LOCKFILE exit 1 fi # Initialize global variables # Since ATM adapters can have more than one interface per adapter, we use # arrays to store the interfaces and IP labels for the adapter specified. # Although this makes the code a little more complex, it makes it much easier # to write and debug for ATM adapters. # Except for LANE_ADAPTERS[], all the arrays declared below are associated # with the array INTERFACES[]. INTERFACES[0]="" # Interfaces on $ADAPTER. IP_LABELS[0]="" # IP labels associated with $INTERFACES[]. IP_ADDRESSES[0]="" # IP addresses associated with $INTERFACES[]. HW_ADDRESSES[0]="" # Hardware address if HWAT is defined for this # IP label. This only applies to service labels. # Multiple ATM LANE adapters could have HWAT # defined. # Only one ATM Classical IP interface is allowed # to have HWAT defined per ATM card. NETMASKS[0]="" # Netmask associated with the IP address on the # interface. MTUS[0]="" # MTU size of the interface. LANE_ADAPTERS[0]="" # LANE adapters on $ADAPTER, if $ADAPTER is an # ATM adapter. # NOT associated with $INTERFACES[]. NUM_LANE_ADAPTERS=0 # Number of LANE adapters on $ADAPTER. INTERFACES_ALIVE[0]="" # Element value = 1 if associated interface is alive. ALT_ADDRESSES[0]="" # Addresses that we either swapped svc/boot # addresses with, or the boot address that the # interface was configured with after moving a # resource group. ALT_MTUS[0]="" # MTU size of the alternate addresses from # $ALT_ADDRESSES[]. STANDBY_SWAPS[0]="" # "1" if this is a service/boot label and we have # an available standby with which we can swap. INTERFACES_TYPE[0]="" # Interface type. boot, service, or standby. CASCADING_IPAT_SVCS[0]="" # Filled if the corresponding interface has a service # label on it that is part of a cascading resource # group that got to this node via IPAT. RESOURCE_GROUPS[0]="" # Filled if the corresponding interface has a service # label on it that is part of a resource group. RG_MIG_NODES[0]="" # Filled if the corresponding resource group # has been chosen to migrate to another # node by the user. Elements contain # the node to migrate the resource group to. RG_MIG_BACK[0]="" # Filled with 1 or 0, if the user wants to migrate # the corresponding resource group back to this # (the local) node after the replacement is # complete. RG_MOVED[0]=0 # 1 = the resource group was moved off this node, # 0 = the resource group was not moved. NUM_INTERFACES=0 # Number of interfaces on $ADAPTER. # For the first resource group migration, we want to have DARE # run verification checks. After that, we set this to 0, so # we don't run verification checks for subsequent migrations. FIRST_MIG=1 # These arrays are for ATM interfaces that are not configured as part of the # HACMP topology. We need to store all the interfaces that are in the "up" # state so we can detach them before rmdev'ing the physical ATM adapter, # otherwise the rmdev will fail. # This only applies to ATM adapters. NON_HACMP_INTERFACES[0]="" NUM_NON_HACMP=0 # The replace_routes routine writes to this file so we can restore the routes # on the interfaces after they're reconfigured. RESTORE_ROUTES="/usr/es/sbin/cluster/.pcihp.restore_routes" # Determine the adapter type. We only support ethernet, token ring, and ATM # adapters for PCI hot plug replacement in HACMP. ADAPTER_TYPE=$(print $ADAPTER | sed 's/[0-9]*$//') ADAPTER_NUM=$(print $ADAPTER | sed 's/^[^0-9]*//') case $ADAPTER_TYPE in ent) IS_ATM=0 getInterfaces en$ADAPTER_NUM ;; tok) IS_ATM=0 getInterfaces tr$ADAPTER_NUM ;; atm) IS_ATM=1 getATMInterfaces $ADAPTER ;; *) dspmsg scripts.cat 6904 \ "\nThe network adapter $ADAPTER is an unknown or unsupported network adapter type.\n\n" rm -f $LOCKFILE exit 1 ;; esac # See if any interfaces on the adapter are alive. (ATM can have more than # one interface on an adapter.) # If this happens to be the last alive HACMP adapter on this node, then we # then we inform the user that a partitioned cluster would occur if they # were to replace the adapter w/o shutting down HACMP on this node first. FOUNDALIVE=$(cllsaliveif -J "$OP_SEP" | \ awk -F"$OP_SEP" -v ADAPTER=$ADAPTER \ '{ if ($3 == ADAPTER || $5 == ADAPTER) print }') if [ "$FOUNDALIVE" != "" ]; then FOUNDOTHERS=$(cllsaliveif -J "$OP_SEP" | \ awk -F"$OP_SEP" -v ADAPTER=$ADAPTER \ '{ if ($3 != ADAPTER && $5 != ADAPTER) print }') if [ "$FOUNDOTHERS" = "" ]; then dspmsg scripts.cat 6906 \ "\nAdapter $ADAPTER is the only alive HACMP network adapter on this\n\ node and there are no other keepalive network paths available.\n\ Removing this adapter would cause a partitioned cluster. You cannot\n\ continue this procedure. You must do the hot replacement manually with\n\ drslot or the smit fast-path devdrpci after shutting down HACMP on this\n\ node.\n\n" $ADAPTER # Shutdown HACMP on the local node if the user requested so from SMIT. if [ "$SHUTDOWN_HACMP" = "shutdown" ]; then dspmsg scripts.cat 6961 \ "\nContinuing to shutdown HACMP on node $LOCALNODENAME gracefully with\n\ takeover as you requested.\n\n" $LOCALNODENAME clstop -yNgr fi rm -f $LOCKFILE exit 1 fi fi # Initialize all variables that aren't guaranteed to be filled for # each interface. This ensures no invalid references happen because # of "set -u". INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do ALT_ADDRESSES[$INDEX]="" ALT_MTUS[$INDEX]="" STANDBY_SWAPS[$INDEX]="" INTERFACES_TYPE[$INDEX]="" CASCADING_IPAT_SVCS[$INDEX]="" RESOURCE_GROUPS[$INDEX]="" RG_MIG_NODES[$INDEX]="" RG_MIG_BACK[$INDEX]="" RG_MOVED[$INDEX]=0 let INDEX=INDEX+1 done # Determine what actions to take on each IP label that belongs to the # adapter we are going to hot replace. INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do ALIVE=$(cllsaliveif \ | awk -vLABEL=${IP_LABELS[$INDEX]} '{ if ($1 == LABEL) print }') if [ "$ALIVE" != "" ]; then INTERFACES_ALIVE[$INDEX]=1 # This interface is alive, so we should handle any possible # address swaps, resource group movements, and mainteneance modes. handleInterfacePreReplacement $INDEX else INTERFACES_ALIVE[$INDEX]=0 CASCADING_IPAT_SVCS[$INDEX]=$(cllsif -Scn ${IP_LABELS[$INDEX]} \ awk -vNODE=$LOCALNODENAME -v FS=: \ '{ if ($2 == "service" && $5 != NODE) print "1" }') if [ "${CASCADING_IPAT_SVCS[$INDEX]}" != "" ]; then # This interface was found to have an IP label on it that came # to this node via IPAT from a cascading resource group migration. # This is a special case, since HACMP does not monitor the network # traffic on a service label that was moved over to the local node # via IPAT to what was originally a standby interface. # Thus, since the resource group on associated with this service # label might be up, we need to handle that by giving the user # the ability to move the resource group, but we don't apply # a maintenance mode or attempt to swap the address with # a standby address. RG=$(getRGUpOnIPLabel ${IP_LABELS[$INDEX]}) if [ "$RG" != "" ]; then RESOURCE_GROUPS[$INDEX]=$RG handleRGPreReplacement $INDEX $RG fi fi fi let INDEX=INDEX+1 done # Now that we've setup all our arrays for any resource group movement # and standby address swapping, we now actually carry out the appropriate # actions. # First, we handle all resource group migrations. INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do if [ "${RG_MIG_NODES[$INDEX]}" != "" ]; then if [ $FIRST_MIG = 1 ]; then FIRST_MIG=0 VERIFY=1 else VERIFY=0 fi typeset RG=${RESOURCE_GROUPS[$INDEX]} dspmsg scripts.cat 6933 \ "Migrating resource group $RG to node \ ${RG_MIG_NODES[$INDEX]}.\n\n" $RG ${RG_MIG_NODES[$INDEX]} migrateRG ${IP_LABELS[$INDEX]} $RG \ ${RG_MIG_NODES[$INDEX]} $VERIFY away $INDEX if [ $? = 1 ]; then # If the migration didn't take place because the service # label was no longer found on this interface, then # don't do any more processing here. Continue on to # the next IP label to process. let INDEX=INDEX+1 continue fi RG_MOVED[$INDEX]=1 # Now that we have migrated the resource group to another node, # we now have standby or boot label where the service label was, # depending of whether the resource group was a rotating group, # a cascading group on its home node, or a cascading group # not on it's home node. # Thus, we now need to determine what that standby/boot address is, # as well as it's MTU size, and make sure we don't try to restore # the service label's HW address to the standby/boot after the # physical hot replacement. NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) ALIAS_NETWORK=$(cllsnw -c | cut -f1,3 -d: | grep $NETWORK | cut -f2 -d:) if [[ $ALIAS_NETWORK = "true" ]] \ && [[ ${INTERFACES_TYPE[$INDEX]} = "service" ]] then # If this is an aliasing network service label, then it was # moved away during the above rg_move, so we need to "forget" # about it so that we don't try to deconfigure it, etc. INTERFACES[$INDEX]="" IP_LABELS[$INDEX]="" IP_ADDRESSES[$INDEX]="" else IP_LABEL=$(getIPLabel ${INTERFACES[$INDEX]}) ALT_ADDRESSES[$INDEX]=$(cllsif -Scn $IP_LABEL | cut -d: -f7) ALT_MTUS[$INDEX]=$(getMTU ${INTERFACES[$INDEX]}) HW_ADDRESSES[$INDEX]="" # We have to make sure the interface is considered alive at this # point, otherwise if the original address was from IPAT via a # cascading resource migrating to this (not its home) node, # then standby address we now have wouldn't get put into # maintenance mode. INTERFACES_ALIVE[$INDEX]=1 fi # Since a resource group can have multiple service labels, # and an ATM adapter can have multiple interfaces configured, # we need to check if any other IP labels we're handling on # this adapter are in this resource group that was migrated. # If we find any that are, then they will have moved off this # node during the migration above. So, perform the same # post-migration operations we just did above. INDEX_2=0 while [ $INDEX_2 -lt $NUM_INTERFACES ] do if [ "${RESOURCE_GROUPS[$INDEX_2]}" = "$RG" ] \ && [ $INDEX_2 != $INDEX ] then RG_MOVED[$INDEX_2]=1 IP_LABEL=$(getIPLabel ${INTERFACES[$INDEX_2]}) ALT_ADDRESSES[$INDEX_2]=$(cllsif -J "$OP_SEP" -Sn $IP_LABEL | cut -d"$OP_SEP" -f7) ALT_MTUS[$INDEX_2]=$(getMTU ${INTERFACES[$INDEX_2]}) HW_ADDRESSES[$INDEX_2]="" INTERFACES_ALIVE[$INDEX_2]=1 # This address could have been set to swap earlier, # so make sure that doesn't happen now. STANDBY_SWAPS[$INDEX_2]="" fi let INDEX_2=INDEX_2+1 done fi let INDEX=INDEX+1 done # Next, we handle all address swaps with standby addresses. INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do if [[ ${INTERFACES[$INDEX]} = "" ]]; then let INDEX=INDEX+1 continue; fi if [ ${INTERFACES_ALIVE[$INDEX]} = 1 ] \ && [ "${STANDBY_SWAPS[$INDEX]}" = "1" ] \ && [ "${ALT_ADDRESSES[$INDEX]}" != "" ] then NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) # An available standby was found, so we request a swap adapter # event. dspmsg scripts.cat 6913 \ "\nThe IP address ${IP_ADDRESSES[$INDEX]} is a ${INTERFACES_TYPE[$INDEX]} \ address.\n\ It is being swapped with the boot network interface address ${ALT_ADDRESSES[$INDEX]}.\n\n" \ ${IP_ADDRESSES[$INDEX]} ${INTERFACES_TYPE[$INDEX]} \ ${ALT_ADDRESSES[$INDEX]} waitUntilClusterStable /usr/es/sbin/cluster/utilities/clswapaddress \ $NETWORK ${IP_ADDRESSES[$INDEX]} \ ${ALT_ADDRESSES[$INDEX]} > /dev/null 2>&1 if [ $? != 0 ]; then dspmsg scripts.cat 6912 \ "\nA request to swap address ${IP_ADDRESSES[$INDEX]} with \ ${ALT_ADDRESSES[$INDEX]} failed.\n\n" \ ${IP_ADDRESSES[$INDEX]} ${ALT_ADDRESSES[$INDEX]} dspmsg scripts.cat 6910 \ "\nCancelling PCI hot plug replacement process because of fatal error above.\n\n" rm -f $LOCKFILE exit 1 fi # We must update our MTU value for this interface and set the # hardware address value to null now that we have # swapped the service/boot label with a standby label. # Otherwise we'd end up reconfiguring this interface with the # wrong MTU size and a hardware address that it's not supposed to have. ALT_MTUS[$INDEX]=$(getMTU ${INTERFACES[$INDEX]}) HW_ADDRESSES[$INDEX]="" waitUntilClusterStable fi let INDEX=INDEX+1 done # Now we apply maintenance modes to any alive addresses. INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do if [[ ${INTERFACES[$INDEX]} = "" ]]; then let INDEX=INDEX+1 continue; fi if [ ${INTERFACES_ALIVE[$INDEX]} = 1 ]; then NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) ALIAS_NETWORK=$(cllsnw -c | cut -f1,3 -d: | grep $NETWORK | cut -f2 -d:) if [ "${ALT_ADDRESSES[$INDEX]}" != "" -a "$ALIAS_NETWORK" != "true" ] then # If we have swapped our address with a standby, or if we migrated # a resource group off of this ip label to another node, # then we need to apply a maintenanace mode on the standby address, # or boot address, that is now on this interface. interfaceMaintenanceMode start ${ALT_ADDRESSES[$INDEX]} \ $NETWORK ${INTERFACES[$INDEX]} else interfaceMaintenanceMode start ${IP_ADDRESSES[$INDEX]} \ $NETWORK ${INTERFACES[$INDEX]} fi fi let INDEX=INDEX+1 done # We need to wait for the cluster state to be stable here # because we don't want to detach any interfaces before the last # interface maintenance mode event has finished processing. waitUntilClusterStable print # Now unconfigure each interface. INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do if [[ ${INTERFACES[$INDEX]} = "" ]]; then let INDEX=INDEX+1 continue; fi unconfigureInterface $INDEX hacmp let INDEX=INDEX+1 done # Now unconfigure each ATM Classical IP and LANE interface, if any, # that are not part of the HACMP topology that are configured on the # adapter to replace. INDEX=0 while [ $INDEX -lt $NUM_NON_HACMP ] do unconfigureInterface $INDEX nonhacmp let INDEX=INDEX+1 done sleep 3 # Sleep for a bit to allow the unconfiguration to complete, # otherwise rmdev on the adapter could fail. print # Unconfigure all ATM LANE adapters. INDEX=0 while [ $INDEX -lt $NUM_LANE_ADAPTERS ] do unconfigureAdapter ${LANE_ADAPTERS[$INDEX]} verbose let INDEX=INDEX+1 done sleep 3 # Sleep for a bit to allow the unconfiguration to complete, # otherwise rmdev on the adapter could fail. # Unconfigure the physical adapter. unconfigureAdapter $ADAPTER verbose # Re-alias any persistent labels that were deleted during the # rmdev of the interface that is being hot replaced. # As this is a user initiated operation we set the INVOCATION_FLAG # to skip exercising the resource distribution policy. export INVOCATION_FLAG=USER_REQUESTED cl_configure_persistent_address config_all # Perform the physical hot replacement. handleHotReplacement # Now we do things in reverse. # Check local network topology and handle any changes affecting us. handleAllInterfacesPostReplacement # Configure the physical adapter. configureAdapter $ADAPTER verbose # Perform any hardware address restorations. INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do if [[ ${INTERFACES[$INDEX]} = "" ]]; then let INDEX=INDEX+1 continue; fi if [ "${HW_ADDRESSES[$INDEX]}" != "" ]; then configureHWAddress ${INTERFACES[$INDEX]} ${HW_ADDRESSES[$INDEX]} fi let INDEX=INDEX+1 done # Configure all ATM LANE adapters. INDEX=0 while [ $INDEX -lt $NUM_LANE_ADAPTERS ] do configureAdapter ${LANE_ADAPTERS[$INDEX]} verbose let INDEX=INDEX+1 done sleep 2 # Sleep to make sure the devices are fully functioning # before reconfiguring interfaces. # Now configure each interface. INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do if [[ ${INTERFACES[$INDEX]} = "" ]]; then let INDEX=INDEX+1 continue; fi typeset NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) typeset ALIAS_NETWORK=$(cllsnw -c | cut -f1,3 -d: | grep $NETWORK | cut -f2 -d:) if [ "$ALIAS_NETWORK" != "true" -o "${INTERFACES_TYPE[$INDEX]}" != "service" ] then configureInterface $INDEX sleep 2 fi let INDEX=INDEX+1 done # Re-alias any persistent labels that were deleted during the # rmdev of the interface that is being hot replaced. cl_configure_persistent_address config_all # Stop all interface maintenance modes that were applied above. stopAllInterfaceMaintenanceMode # We need to wait for the cluster state to be stable here # because we don't want to swap any addresses before the last # interface maintenance mode event has finished processing. waitUntilClusterStable # Next, we handle all address swaps with standby addresses. # This time we swap the standby address on our interface with the service # or boot address that we originally had. INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do if [[ ${INTERFACES[$INDEX]} = "" ]]; then let INDEX=INDEX+1 continue; fi if [ ${INTERFACES_ALIVE[$INDEX]} = 1 ] \ && [ "${STANDBY_SWAPS[$INDEX]}" = "1" ] \ && [ "${ALT_ADDRESSES[$INDEX]}" != "" ] then NETWORK=$(cllsif -Scn ${IP_LABELS[$INDEX]} | cut -d: -f3) dspmsg scripts.cat 6939 \ "\nSwapping ${INTERFACES_TYPE[$INDEX]} address ${IP_ADDRESSES[$INDEX]} back \ with network interface address\n\ ${ALT_ADDRESSES[$INDEX]}.\n\n" \ ${INTERFACES_TYPE[$INDEX]} ${IP_ADDRESSES[$INDEX]} \ ${ALT_ADDRESSES[$INDEX]} waitUntilClusterStable /usr/es/sbin/cluster/utilities/clswapaddress \ $NETWORK ${IP_ADDRESSES[$INDEX]} \ ${ALT_ADDRESSES[$INDEX]} > /dev/null 2>&1 if [ $? != 0 ]; then dspmsg scripts.cat 6912 \ "\nA request to swap address ${IP_ADDRESSES[$INDEX]} with \ ${ALT_ADDRESSES[$INDEX]} failed.\n\n" \ ${IP_ADDRESSES[$INDEX]} ${ALT_ADDRESSES[$INDEX]} dspmsg scripts.cat 6910 \ "\nCancelling PCI hot plug replacement process because of fatal error above.\n\n" dspmsg scripts.cat 6940 \ "\nAlthough the hot replacement was a success, and all interfaces have been\n\ reconfigured, not all service IP addresses and boot network interface \n\ addresses are back to their original location, and no resource groups \n\ have been moved back to this node.\n\ Manual intervention will be required to resolve these issues.\n\n" rm -f $LOCKFILE exit 1 fi waitUntilClusterStable fi let INDEX=INDEX+1 done # Lastly, we handle all resource group migrations back to this node. INDEX=0 while [ $INDEX -lt $NUM_INTERFACES ] do if [ "${RG_MIG_BACK[$INDEX]}" = "1" ]; then dspmsg scripts.cat 6933 \ "Migrating resource group ${RESOURCE_GROUPS[$INDEX]} to node \ $LOCALNODENAME.\n\n" ${RESOURCE_GROUPS[$INDEX]} $LOCALNODENAME migrateRG "" ${RESOURCE_GROUPS[$INDEX]} \ $LOCALNODENAME 0 back $INDEX fi let INDEX=INDEX+1 done dspmsg scripts.cat 6962 "\nThe hot replacement was a success.\n" # Remove the lockfile. rm -f $LOCKFILE exit 0