#!/bin/ksh93 # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # 61haes_r714 src/43haes/usr/sbin/cluster/events/node_up_local.sh 1.2.18.1 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 1990,2014 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#)86 1.2.18.1 src/43haes/usr/sbin/cluster/events/node_up_local.sh, hacmp.events, 61haes_r714, 1415A_hacmp714 2/10/14 21:26:54 ######################################################################### # # COMPONENT_NAME: EVENTS # # FUNCTIONS: filesystem # ######################################################################### ######################################################################### # # # Name: node_up_local # # # # Description: This script is called when the local node # # joins the cluster. # # The script acquires the service address # # (or shared address), gets all its owned # # (or shared) resources, and takes the resources.# # These include making disks available, # # varyon volume groups, mounting filesystems, # # exporting filesystems, NFS_mounting # # filesystems, and varyon concurrent vgs, and # # starting AIX Connections realm/service pairs. # # # # Called by: node_up, reconfig_resource_acquire, rg_move # # # # Calls to: acquire_service_addr, get_disk_vg_fs, # # get_aconn_rs, cl_start_snalink # # # # Arguments: None # # # # Returns: 0 success # # 1 failure # # 2 bad argument # # # ######################################################################### ######################################################################### # # Function: get_filesystems # ######################################################################### get_fileysystems() { typeset PS4_FUNC="get_fileysystems" [[ "$VERBOSE_LOGGING" == "high" ]] && set -x integer rc set +u OEM_FILESYSTEM=${OEM_FILESYSTEM:-""} OEM_VOLUME_GROUP=${OEM_VOLUME_GROUP:-""} set -u [[ "$PLATFORM" == "__LINUX__" ]] && return; clcallev get_disk_vg_fs "$FILESYSTEM" "$VOLUME_GROUP" "$DISK" "$OEM_FILESYSTEM" "$OEM_VOLUME_GROUP" RC=$? : exit status of get_disk_vg_fs $FILESYSTEM $VOLUME_GROUP $DISK $OEM_FILESYSTEM $OEM_VOLUME_GROUP is: $RC if (( $RC != 0 )) then STATUS=2 if (( $RC == 7 )) ; then REP_RES_FATAL=1 fi return fi if [[ -n "$EXPORT_FILESYSTEM$EXPORT_FILESYSTEM_V4" ]] then # : Export filesystems to all other cluster nodes . : Before doing that, build a root label list, which : contains all nodes\' service labels. # SERVICE=$(cllsif -cSp | awk -F : '$2 == "boot" || $2 == "service" || $2 == "persistent"' | grep -v :serial: | cut -d: -f1 | sort -u) for service in $SERVICE do if [[ -n "$ROOT" ]] then ROOT="$ROOT:$service" else ROOT="$service" fi done # : stop NFS daemons # if [[ -n "$TAKEOVER_LABEL$SERVICE_LABEL" ]] then stopsrc -s rpc.lockd : exit status of stopsrc -s rpc.lockd is: $? touch /tmp/.RPCLOCKDSTOPPED fi cl_export_fs "$ROOT" "$EXPORT_FILESYSTEM" "$EXPORT_FILESYSTEM_V4" RC=$? : exit status of cl_export_fs $ROOT $EXPORT_FILESYSTEM $EXPORT_FILESYSTEM_V4 is: $RC if (( $RC != 0 && $STATUS == 0 )) ; then STATUS=2 fi fi } ######################################################################### # # Set status of resource in resource location DB. Note that we only set this # if NFS_resource is "FALSE" or nonexistent, # because this means that we are actually taking the resource. # ######################################################################### set_resource_status() { typeset PS4_FUNC="set_resource_status" [[ $VERBOSE_LOGGING == "high" ]] && set -x set +u eval TEMPNFS=\$"NFS_$GROUPNAME" set -u if [[ -z $TEMPNFS || $TEMPNFS == "FALSE" ]] then if ! clchdaemons -d clstrmgr_scripts -t resource_locator -n "$LOCALNODENAME" -o "$GROUPNAME" -v "$1" then cl_log 655 "$PROGNAME: Problem with resource location database in HACMPdaemons ODM.\n" $PROGNAME STATUS=1 fi # : Resource Manager Updates # if [[ $1 == ACQUIRING ]] then if [[ $FOLLOWER_ACTION == "ACQUIRE_SECONDARY" ]] then cl_RMupdate acquiring_secondary $GROUPNAME $PROGNAME elif [[ $FOLLOWER_ACTION == "PRIMARY_BECOMES_SECONDARY" ]] then cl_RMupdate acquiring_secondary $GROUPNAME $PROGNAME else cl_RMupdate acquiring $GROUPNAME $PROGNAME fi else if [[ $FOLLOWER_ACTION == "ACQUIRE_SECONDARY" ]] then cl_RMupdate rg_error_secondary $GROUPNAME $PROGNAME elif [[ $FOLLOWER_ACTION == "PRIMARY_BECOMES_SECONDARY" ]] then cl_RMupdate rg_error_secondary $GROUPNAME $PROGNAME else cl_RMupdate rg_error $GROUPNAME $PROGNAME fi fi (( $? != 0 )) && STATUS=1 fi } ######################################################################### # : Main Starts Here # ######################################################################### PROGNAME=${0##*/} export PATH="$(/usr/es/sbin/cluster/utilities/cl_get_path all)" [[ $VERBOSE_LOGGING == high ]] && set -x [[ $VERBOSE_LOGGING == high ]] && version='1.2.18.1 $Source: 61haes_r711 43haes/usr/sbin/cluster/events/node_up_local.sh 6$' . /usr/es/sbin/cluster/events/reconfig_udresources integer STATUS=0 integer CROSSMOUNT=0 integer REP_RES_FATAL=0 integer SKIPBRKRES=0 integer SKIPVARYON=0 ROOT="" sddsrv_off=FALSE if (( $# != 0 )) then cl_echo 1035 "Usage: $PROGNAME\n" $PROGNAME exit 2 fi set -u # : First, indicate that the resource is in the process of coming up by placing : it into state "ACQUIRING". This will persist until the resource comes : completely up or there is an error. # set_resource_status "ACQUIRING" export CROSSMOUNT if [[ -n "$MOUNT_FILESYSTEM" ]] then if echo $MOUNT_FILESYSTEM | grep -q "\;/" then CROSSMOUNT=1 fi fi if [[ $PRINCIPAL_ACTION == "ACQUIRE" ]] then # : Call replicated resource set_primary method associated : with any replicated resource defined in the resource group : we arecurrently processing. # call_replicated_methods "set_primary" "" STATUS=$? : exit status of call_replicated_methods is: $STATUS fi acquire_udresources FIRST RC=$? : exit status of acquire_udresources is: $RC if (( $RC != 0 )) then echo "Failed to Start userdefined resources for '${GROUPNAME}' " (( $STATUS == 0 )) && STATUS=2 fi # : Start the WPAR. Due to the fact that WPAR enablement/disablement is : done in a lazy fashion, the actual state of WPAR activity will not : necessarily match our ODM state. Consequently, we can\'t simply look : at the WPAR_NAME environment variable. # : The command clstart_wpar will check if the resource group actually has : a WPAR so we don\'t need to check for that here. # clstart_wpar RC=$? : exit status of clstart_wpar is: $RC if (( $RC != 0 )) then echo "Failed to start the WPAR associated with resource group '${GROUPNAME}'" (( $STATUS == 0 )) && STATUS=2 fi acquire_udresources AFTER_WPAR RC=$? : exit status of acquire_udresources is: $RC if (( $RC != 0 )) then echo "Failed to Start userdefined resources for '${GROUPNAME}' " (( $STATUS == 0 )) && STATUS=2 fi # : Mount filesystems, varyon volume groups, make disks available, : and export filesystems if FS_BEFORE_IPADDR is true i.e acquire : FS before acquiring IPaddr. This removes the error of "Missing Filesystem" # if [[ "$FS_BEFORE_IPADDR" == "true" ]] then get_fileysystems; : exit status of get_filesystems is: $? (( $REP_RES_FATAL == 1 )) && exit $REP_RES_FATAL acquire_udresources AFTER_FILE_SYSTEM RC=$? : exit status of acquire_udresources is: $RC if (( $RC != 0 )) then echo "Failed to Start userdefined resources for '${GROUPNAME}' " (( $STATUS == 0 )) && STATUS=2 fi fi # : Acquire service address on boot adapter if we are the highest : priority node for that resource. Determined by environment : variables. # if [[ -n "$SERVICE_LABEL" ]] then clcallev acquire_service_addr "$SERVICE_LABEL" RC=$? : exit status of acquire_service_addr is: $RC if (( $RC != 0 )) ; then (( STATUS == 0 )) && STATUS=2 else [[ $PLATFORM == "__AIX__" ]] && { # : Register the new service labels with our resource group\'s NFSv4 : node. # : Note that cl_nfs4smctl will ignore this request if the resource group : is not using NFSv4 so we can call it blindly without checking whether : NFSv4 is in use. We could try to optimize things by checking here : whether EXPORT_FILESYSTEM_V4 is set, however if this script is called : in the context of a dare operation, then EXPORT_FILESYSTEM_V4 will : only be set if new NFSv4 exports are being added which will cause us : to skip this code in the case that NFSv4 was already configured and : a dare operation adds a new service label but doesn\'t add any new : NFSv4 exports. # : Also note that the only real purpose of this code here is to cover : that particular case, since cl_export_fs will be called in all other : cases and it will add the service labels as well. # for service_label in $SERVICE_LABEL do cl_nfs4smctl -A -N $GROUPNAME -n $service_label RC=$? : exit status of cl_nfs4smctl is: $RC (( $RC != 0 && STATUS == 0 )) && STATUS=2 done } fi fi # : Acquire service address on standby adapter if we are not the : highest priority node for that resource. Determined by environment : variables. # if [[ -n $TAKEOVER_LABEL ]] then clcallev acquire_takeover_addr "$TAKEOVER_LABEL" RC=$? : exit status of acquire_takeover_addr $TAKEOVER_LABEL is: $RC if (( $RC != 0 && $STATUS == 0 )) ; then STATUS=2 fi fi if (( $STATUS == 1 )) then # : Problems: change resource status # set_resource_status "ERROR" : exit status of set_resource_status is: $? exit 1 fi acquire_udresources AFTER_SERVICEIP RC=$? : exit status of acquire_udresources is: $RC if (( $RC != 0 )) then echo "Failed to Start userdefined resources for '${GROUPNAME}' " (( $STATUS == 0 )) && STATUS=2 fi # : Mount filesystems, varyon volume groups, make disks available, : and export filesystems if it is not done yet. # if [[ "$FS_BEFORE_IPADDR" != "true" ]] then get_fileysystems; : exit status of get_filesystems is: $? (( $REP_RES_FATAL == 1 )) && exit $REP_RES_FATAL acquire_udresources AFTER_FILE_SYSTEM RC=$? : exit status of acquire_udresources is: $RC if (( $RC != 0 )) then echo "Failed to Start userdefined resources for '${GROUPNAME}' " (( $STATUS == 0 )) && STATUS=2 fi fi # : Do the required NFS_mounts. # if [[ -n "$MOUNT_FILESYSTEM" ]] then NW_NFSMOUNT_LABEL="" if (( $CROSSMOUNT == 1 )) then if [[ -z $NFS_HOST ]] then NFS_HOST=$LOCALNODENAME fi if [[ -z $NFSMOUNT_LABEL ]] then NFSMOUNT_LABEL=$(odmget -q "group=$GROUPNAME AND name=SERVICE_LABEL" HACMPresource \ | grep 'value =' | cut -f2 -d'"') fi fi NFSHOST="" if [[ -n $NFS_HOST ]] then # : The following lets the user : pick a preferred network to NFS mount over. # if [[ -n $NFS_NETWORK ]] then ALL_NFSMOUNT_LABEL=$(odmget -q "group=$GROUPNAME AND name=SERVICE_LABEL" HACMPresource | sed -n '/value =/s/^.*"\(.*\)".*/\1/p') for label in $ALL_NFSMOUNT_LABEL; do IN_NETWORK=$(cllsif -cS 2> /dev/null | grep :$NFS_NETWORK: | cut -f1 -d':' | grep -x ${label} ) if [[ -n $IN_NETWORK ]] then NW_NFSMOUNT_LABEL="$NW_NFSMOUNT_LABEL $label" fi done fi for host in $NW_NFSMOUNT_LABEL $NFSMOUNT_LABEL ; do if ping $host 1024 1 >/dev/null then NFSHOST=$host break fi done if [[ -n "$NFSHOST" ]] then cl_activate_nfs 1 $NFSHOST "$MOUNT_FILESYSTEM" RC=$? : exit status of cl_activate_nfs 1 $NFSHOST $MOUNT_FILESYSTEM is: $RC if (( $RC != 0 && $STATUS == 0 )) ; then STATUS=2 fi else cl_log 653 "$PROGNAME: NFS Mounting failed. No reachable service interfaces found on node $NFS_HOST" $PROGNAME $NFS_HOST fi fi fi # : Finally, take care of concurrent volume groups. # if [[ -n "$CONCURRENT_VOLUME_GROUP" ]] then # : If the 'sddsrv' daemon is running \(vpath dead path detection and : recovery\) turn it off, since interactions with the fibre channel : device driver will, in the case where there actually is a dead path, : slow down every vpath operation. # if ls /dev/vpath* > /dev/null 2>&1 then # : Each of the V, R, M and F fields are padded to fixed length, : to allow reliable comparisons. E.g., maximum VRMF is : 99.99.999.999 # integer V R M F typeset -Z2 R # two digit release typeset -Z3 M # three digit modification typeset -Z3 F # three digit fix integer VRMF=0 # : Check to see if we are running an early level of SDD, which : requires this. # sdd_level=106003000 if lslpp -lcq "devices.sdd.*.rte" | cut -f3 -d':' | IFS=. read V R M F then VRMF=$V$R$M$F # get the SDD level fi if (( $R >= 07 )); then sdd_level=107002005 fi # : Check to see if SDD is active, and an early level # if (( $VRMF < $sdd_level )) && lssrc_out=$(LC_ALL=C lssrc -s sddsrv) then integer pid=0 print "$lssrc_out" | tail -1 | read subsys rest (set -- $rest ; eval print \${$(($#-1))} \${$#}) | read pid state if [[ $subsys == "sddsrv" && $state == "active" ]] && (( $pid != 0 )) then date # : The -c flag was removed from the stopsrc command below for two : reasons: 1\) The possible SIGKILL could result in "Invalid vpaths" : and 2\) Time for the daemon to go inoperative could be several : minutes in cases where many vpaths are not accessible # stopsrc -s sddsrv : exit status of stopsrc -s sddsrv is: $? echo "$PROGNAME: Waiting for sddsrv to go inoperative. This could take several minutes when some vpaths are inaccessible.\n" # no need to clog the log file with this set +x # Now wait for sddsrv to shut down while [[ $subsys == "sddsrv" && $state != "inoperative" ]] ; do sleep 1 if ! lssrc_out=$(LC_ALL=C lssrc -s sddsrv) then # : SRC stopped talking to us. No longer wait for it # break else # : Pick up current state # lssrc_out=$(LC_ALL=C lssrc -s sddsrv | tail -1) state=$(set -- $lssrc_out ; eval print \${$#}) fi done [[ $VERBOSE_LOGGING == "high" ]] && set -x date sddsrv_off=TRUE # Note that it was turned off fi fi fi # : Call replicated resource predisk-available method associated : with any replicated resource defined in the resource group : we arecurrently processing. # call_conc_replicated_methods "predisk_available" $CONCURRENT_VOLUME_GROUP RC=$? : exit status of call_conc_replicated_methods is: $RC if (( 1 == $RC )) then SKIPBRKRES=1 fi # : Get the disks associated with the concurrent volume group : so that we can make sure disks within volume group are available. # if [[ -z $PRE_EVENT_MEMBERSHIP ]] then # : Find all the disks in all the concurrent volume groups # DK="" for vg in $CONCURRENT_VOLUME_GROUP do DK="$DK $(cl_fs2disk -p -g $vg)" done # : Now, make sure we can access them. Note that this is done : only if this is the first node up in the cluster. Otherwise, : the node that came up first ensured that the disks were : readable. # if [[ -n "$DK" ]] && (( $SKIPBRKRES == 0 )) then cl_disk_available -v "$DK" RC=$? : exit status of cl_disk_available is: $RC if (( $RC != 0 && $STATUS == 0 )) ; then STATUS=2 fi fi # : Call replicated resource prevg-online method associated with any : replicated resource that is a member of the resource group : we are currently processing. Note that a return code of 3 from : the prevg-online method indicates the varyonvg should not be done. # call_conc_replicated_methods "prevg_online" $CONCURRENT_VOLUME_GROUP RC=$? : exit status of call_conc_replicated_methods is: $RC if (( 1 == $RC )) then SKIPVARYON=1 fi # Varyon the vg and sync it. if (( $SKIPVARYON == 0 )) then cl_mode3 -s "$CONCURRENT_VOLUME_GROUP" RC=$? : exit status of cl_mode3 -s $CONCURRENT_VOLUME_GROUP is: $RC if (( $RC != 0 && $STATUS == 0 )) ; then STATUS=2 fi fi else # : Call replicated resource prevg-online method associated with any : replicated resource that is a member of the resource group : we are currently processing. Note that a return code of 3 from : the prevg-online method indicates the varyonvg should not be done. # call_conc_replicated_methods "prevg_online" $CONCURRENT_VOLUME_GROUP RC=$? : exit status of call_conc_replicated_methods is: $RC if (( 1 == $RC )) then SKIPVARYON=1 fi # : Varyon the vg but do not sync it. # if (( $SKIPVARYON == 0 )) then cl_mode3 -n "$CONCURRENT_VOLUME_GROUP" RC=$? : exit status of cl_mode3 is: $RC if (( $RC != 0 && $STATUS == 0 )) ; then STATUS=2 fi fi fi # : Call replicated resource postvg_online method associated : with any replicated resource defined in the resource group : we are currently processing. # call_conc_replicated_methods "postvg_online" $CONCURRENT_VOLUME_GROUP RC=$? : exit status of call_conc_replicated_methods is: $RC if [[ 0 != $RC ]] then STATUS=1 set_resource_status "ERROR" : exit status of set_resource_status is: $? fi # : If sddsrv was turned off, turn it back on again # if [[ $sddsrv_off == TRUE ]] && ! LC_ALL=C lssrc -s sddsrv | grep -iqw active ; then startsrc -s sddsrv : exit status of startsrc -s sddsrv is: $? fi fi # : Start tape resources # if [[ -n "$SHARED_TAPE_RESOURCES" ]] then cl_tape_resource_get_multi "$SHARED_TAPE_RESOURCES" RC=$? : exit status of cl_tape_resource_get_multi $SHARED_TAPE_RESOURCES is: $RC if (( $RC != 0 && $STATUS == 0 )) ; then STATUS=2 fi fi acquire_udresources AFTER_TAPE RC=$? : exit status of acquire_udresources is: $RC if (( $RC != 0 )) then echo "Failed to Start userdefined resources for '${GROUPNAME}' " (( $STATUS == 0 )) && STATUS=2 fi # : Start AIX Connections services # if [[ -n "$AIX_CONNECTIONS_SERVICES" ]] then clcallev get_aconn_rs "$AIX_CONNECTIONS_SERVICES" RC=$? : exit status of get_aconn_rs $AIX_CONNECTIONS_SERVICES is: $RC if (( $RC != 0 && $STATUS == 0 )) ; then STATUS=2 fi fi # : start commlink processing # if [[ -n $COMMUNICATION_LINKS ]] then cl_start_commlinks "$COMMUNICATION_LINKS" RC=$? : exit status of cl_start_commlinks $COMMUNICATION_LINKS is: $RC if (( $RC != 0 && $STATUS == 0 )) ; then STATUS=2 fi fi # : Start UP SMB/FASTConnect resources # if [[ -n "$AIX_FAST_CONNECT_SERVICES" ]] then get_fast_connect_rs RC=$? : exit status of get_fast_connect_rs is: $RC if (( $RC != 0 && $STATUS == 0 )) ; then STATUS=2 fi fi if (( $STATUS != 0 )) then set_resource_status "ERROR" : exit status of set_resource_status is: $? fi if (( $STATUS == 2 )) then exit 0 else exit $STATUS fi