#!/bin/ksh93 # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # 61haes_r721 src/43haes/lib/ksh93/hacmp/KLIB_HACMP_get_node_state.sh 1.15 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 1990,2010 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#)17 1.15 src/43haes/lib/ksh93/hacmp/KLIB_HACMP_get_node_state.sh, hacmp.assist, 61haes_r721, 1642B_hacmp721 10/14/16 14:04:31 #============================================================================= # # Name: KLIB_HACMP_get_node_state # # Description: This is the main, FPATH function that is invoked by clmgr # to retrieve the cluster manager state from the specified # cluster node. # # Inputs: See the "devDoc()" function, below. # # Outputs: The node state is returned on STDOUT, unless the optional # "state" input is provided, in which case the state is stored # in that variable. The only other outputs are any error # messages that might be needed. # # Returns: Zero if no errors are detected. Otherwise, an appropriate # non-zero value is returned. Refer to the "RETURN" section # of the "devDoc()" function, below, for the standard return # code values/meanings for clmgr. # #============================================================================= function KLIB_HACMP_get_node_state { LINENO=2 . $HALIBROOT/log_entry "$0()" "$CL" : version=1.15, src/43haes/lib/ksh93/hacmp/KLIB_HACMP_get_node_state.sh, hacmp.assist, 61haes_r721, 1642B_hacmp721 : INPUTS: $* typeset node=${1//\"/} typeset -i USE_STDOUT=1 # Default return style if [[ -n $2 ]]; then typeset -n state=$2 state="UNKNOWN" USE_STDOUT=0 # Return via reference to "$2" instead else typeset -u state="UNKNOWN" fi [[ $CLMGR_LOGGING == 'med' ]] && set +x # Only trace param values #=================================== : Declare and initialize variables #=================================== typeset -i rc=$RC_SUCCESS gotState=0 typeset -u name= typeset value= data= #=============================== : Allow for simplified testing # File Usage: = #=============================== if [[ -f /tmp/clmgr_node_states ]]; then print "Found a fake node state file at /tmp/clmgr_node_states. Loading..." >>$CLMGR_TMPLOG print "$0()[$LINENO]($SECONDS): . /tmp/clmgr_node_states" >>$CLMGR_TMPLOG # Always log commands . /tmp/clmgr_node_states typeset -u fake_node_state= eval "fake_node_state=\$$node" if [[ -n $fake_node_state ]]; then if [[ $fake_node_state == *@(INIT|NOT_CONFIGURED|FAILED|ERROR|JOINING|UNSTABLE|VOTING|RUNNING|BARRIER|DONE|STABLE|UNKNOWN|OFFLINE|ONLINE|WARNING)* ]] then [[ $fake_node_state == *OFFLINE* ]] && fake_node_state=ST_INIT [[ $fake_node_state == *ONLINE* ]] && fake_node_state=ST_STABLE print "Using a false node state found for \"$node\": \"$fake_node_state\" False node states are defined in /tmp/clmgr_node_states." >>$CLMGR_TMPLOG print -- $fake_node_state log_return_msg "$RC_SUCCESS" "$0()" "$LINENO" return $? else print "ERROR: invalid false node state found for \"$node\": \"$fake_node_state\" False node states are defined in /tmp/clmgr_node_states." >>$CLMGR_TMPLOG print "UNKNOWN" log_return_msg "$RC_INCORRECT_INPUT" "$0()" "$LINENO" return $? fi fi fi typeset -i timeout=90 typeset ERRMSG=$(dspmsg -s $CLVT_SET $CLVT_MSGS 463 'Warning: could not collect status data for "%1$s" in the allotted time (%2$s seconds).\n' "$node" "$timeout") trap get_node_state_alarm USR1 set_alarm "$timeout" $$ USR1 "$ERRMSG" 1>&2 if [[ -z $node || $LOCAL_NODE == $node ]]; then node=$LOCAL_NODE # Make *sure* it's set # Check if clstrmgrES is inoperative first # (clcheck_server has already retry functionality itself) print "$0()[$LINENO]($SECONDS): LC_ALL=C clcheck_server clstrmgrES" >>$CLMGR_TMPLOG LC_ALL=C clcheck_server clstrmgrES else # Check if clstrmgrES is inoperative first using 'clcheck_server clstrmgrES' # (clcheck_server has already retry functionality itself) print "$0()[$LINENO]($SECONDS): $CLRSH $node \"LC_ALL=C $HAUTILS/clcheck_server clstrmgrES\"" >>$CLMGR_TMPLOG # Always log commands $CLRSH "$node" "LC_ALL=C $HAUTILS/clcheck_server clstrmgrES" fi rc=$? print "$0()[$LINENO]($SECONDS): clcheck_server RC: $rc" >>$CLMGR_TMPLOG # Always log command result case $rc in 0) # clcheck_server returns 0 if the subsystem is inoperative gotState=0 state="INOPERATIVE" rc=$RC_SUCCESS ;; 1) # clcheck_server returns 1 if the subsystem is active # Now run 'lssrc -ls clstrmgrES'. Later "Current state" value is # extracted from data variable (clcheck_server does not provide # this info). state="UNKNOWN" if [[ -z $node || $LOCAL_NODE == $node ]]; then print "$0()[$LINENO]($SECONDS): LC_ALL=C lssrc -ls clstrmgrES" >>$CLMGR_TMPLOG # Always log commands data=$(LC_ALL=C lssrc -ls clstrmgrES) else print "$0()[$LINENO]($SECONDS): $CLRSH $node \"LC_ALL=C /usr/bin/lssrc -ls clstrmgrES\"" >>$CLMGR_TMPLOG # Always log commands data=$($CLRSH "$node" "LC_ALL=C /usr/bin/lssrc -ls clstrmgrES" 2>/dev/null) fi rc=$? print "$0()[$LINENO]($SECONDS): lssrc RC: $rc" >>$CLMGR_TMPLOG # Always log command result ;; *) # any other rc is indicating a problem to retrieve the state information gotState=0 state="UNKNOWN" rc=$ERROR ;; esac clear_alarm if (( $rc == RC_SUCCESS )) && [[ -n $data ]]; then print -- "$data" |\ while IFS=':' read name value rem; do name=${name// /} value=$(echo $value) # Trim all leading/trailing whitespace if [[ $name == "CURRENTSTATE" ]]; then #============================================================= : The cluster manager can sometimes report "NOT_CONFIGURED" : when it should actually report "ST_INIT", and vice-versa. : To make it more consistent/accurate/reliable, check for a : a node_id of zero in HACMPnode. If found, then the node : is not fully configured as part of a cluster, so the state : *should* be "NOT_CONFIGURED". It may be *created*, but not : *fully* configured. Otherwise, it should be "ST_INIT". #============================================================= if [[ $value == @(NOT_CONFIGURED|ST_INIT) ]]; then DATA=$(odmget -q node_id=0 HACMPnode 2>/dev/null) [[ $DATA == *([[:space:]]) ]] && value="ST_INIT" || value="NOT_CONFIGURED" fi state=$value gotState=1 elif [[ $name == "FORCEDDOWNNODELIST" ]]; then if [[ " $value " == *\ $node\ * ]]; then state="UNMANAGED" gotState=1 break fi fi done if (( gotState )); then rc=$RC_SUCCESS if [[ $state != @(WARNING|ERROR|NOT_CONFIGURED|UNMANAGED) ]]; then integer NODE_HANDLE=-1 if [[ -z $node || $LOCAL_NODE == $node ]]; then print "$0()[$LINENO]($SECONDS): clodmget -f handle -n HACMPcluster" >>$CLMGR_TMPLOG NODE_HANDLE=$(clodmget -f handle -n HACMPcluster) print "$0()[$LINENO]($SECONDS): clodmget RC: $? (NODE_HANDLE == $NODE_HANDLE)" >>$CLMGR_TMPLOG else print "$0()[$LINENO]($SECONDS): $CLRSH $node $HAUTILS/clodmget -f handle -n HACMPcluster" >>$CLMGR_TMPLOG NODE_HANDLE=$($CLRSH $node $HAUTILS/clodmget -f handle -n HACMPcluster) print "$0()[$LINENO]($SECONDS): clodmget RC: $? (NODE_HANDLE == $NODE_HANDLE)" >>$CLMGR_TMPLOG fi if [[ $CLMGR_GUI == "SMUI" ]]; then (( ! NODE_HANDLE )) && state="WARNING" if [[ $state != "WARNING" ]]; then : Check the autover log for a non-zero exit code : First, find the node where it ran. typeset AUTOVER="$(clodmget -q name=autoverify.log -f value -n HACMPlogs)" AUTOVER="$AUTOVER/autoverify.log" typeset NEWEST_NODE="" integer LASTMOD=0 NEWEST_MOD=0 for NODE in $(clnodename) do if [[ $NODE == $LOCAL_NODE ]] then if [[ -s $AUTOVER ]] then LASTMOD=$(perl -e "my \$LASTMOD = (stat('$AUTOVER'))[9]; printf '%d', \$LASTMOD;") fi else if cl_rsh "$NODE" "[ -s $AUTOVER ]" then LASTMOD=$(cl_rsh "$NODE" "perl -e \"printf '%d', (stat('$AUTOVER'))[9];\""); fi fi if (( LASTMOD != 0 && LASTMOD > NEWEST_MOD )) then NEWEST_MOD=$LASTMOD NEWEST_NODE=$NODE fi done : See if the last file update was within 24 hours. If not, its not valid. if [[ -n $NEWEST_NODE ]]; then integer CURRENT=0 if [[ $NEWEST_NODE == $LOCAL_NODE ]] then CURRENT=$(perl -e 'printf "%d", time();') else CURRENT=$(cl_rsh "$NEWEST_NODE" "perl -e 'printf \"%d\", time();'") fi if (( ( CURRENT - NEWEST_MOD ) < 86400 )) then : Looks like a valid autover log. Less than 24 hours old. typeset EXIT_RC="" if [[ $NEWEST_NODE == $LOCAL_NODE ]] then EXIT_RC=$(egrep 'exit [0-9]' $AUTOVER | tail -1) else EXIT_RC=$(cl_rsh "$NEWEST_NODE" "egrep 'exit [0-9]' $AUTOVER | tail -1") fi EXIT_RC=${EXIT_RC##* exit } [[ $EXIT_RC == +([0-9]) && $EXIT_RC != 0 ]] && state="WARNING" fi fi fi fi # End of SMUI-only code fi else dspmsg -s $CLVT_SET $CLVT_MSGS 119 '\nERROR: unable to retrieve the cluster manager status for node "%1$s".\n' "$node" 1>&2 state="UNKNOWN" gotState=1 rc=$RC_ERROR fi else # if clstrmgr is inoperative, we have no 'lssrc -ls clstrmgr' output, # so we are here even if we were able to contact the node. if [[ $state != "INOPERATIVE" ]] then dspmsg -s $CLVT_SET $CLVT_MSGS 119 '\nERROR: unable to retrieve the cluster manager status for node "%1$s".\n' "$node" 1>&2 [[ -n $data ]] && print -u2 -- "$data" state="UNKNOWN" gotState=1 rc=$RC_ERROR fi fi (( ! gotState && $rc != RC_SUCCESS )) && state="UNKNOWN" if (( $USE_STDOUT )); then print -- $state # Do not quote! No leading/trailing spaces desired! elif [[ $state == *([[:space:]]) ]]; then state="UNKNOWN" fi log_return_msg "$rc" "$0()" "$LINENO" return $? } # End of "KLIB_HACMP_get_node_state()" #============================================================================= # # Name: get_node_state_alarm # # Description: This is a trap function for a timeout set around collecting # node status. The intention of this is to prevent a hang # condition from occurring. If the trap is triggered prior # to the data collection completing, # # Inputs: None. # # Outputs: The node state is returned on STDOUT, unless the optional # "state" input is provided, in which case the state is stored # in that variable. # # Returns: Returns with a non-zero value. # #============================================================================= function get_node_state_alarm { state="UNKNOWN" (( USE_STDOUT )) && print "$state" return 1 } #============================================================================ # # Name: devDoc # # Description: This is a never-to-be-called, wrapper function that all the # clmgr FPATH functions implement in order to hide embedded # syntax from trace logging. This information is implemented # in POD format, and can be viewed in a number of ways using # POD tools. Some viewing suggestions for this function's POD- # formatted information are: # # perldoc # pod2text -c # pod2text -c --code # pod2html # # However, the more important use for this information is that # it is parsed by clmgr to display the syntax for this file's # operation. The information in the "SYNOPSIS" section is used # for this purpose. This feature was originally implemented # using the man page information. However, in a code review it # was pointed out that this approach had to be changed because # customers do not have to install the man pages! Therefore, a # built-in dependency on man page information would break the # automatic help feature of clmgr. So the SYNPOSIS section must # be used instead. # # IMPORTANT: As a result of this, it is imperative that the # information in this SYNOPSIS be kept in sync # with the man page information, which is owned # by the IDD team. # # Inputs: None. # # Outputs: None. # # Returns: n/a (not intended to be invoked) # #============================================================================ function devDoc { : <<'=cut' >/dev/null 2>&1 =head1 NAME KLIB_HACMP_get_node_state =head1 VERSION Version Number: 1.10 Last Extracted: 7/21/16 17:45:18 Last Changed: 7/21/16 15:05:33 Path, Component, Release(, Level): src/43haes/lib/ksh93/hacmp/KLIB_HACMP_get_node_state.sh, hacmp.assist, 61haes_r721, 1629B_hacmp721 =head1 SYNOPSIS clmgr -cSa state query node [ {|LOCAL}[,,,...] ] =head1 DESCRIPTION Queries the specified node (defaulting to the local node) for its current state. First "clcheck_server clstrmgrES" is used to determine if clstrmgrES subsystem is active or inoperative. If clstrmgrES subsystem is inoperative state is set to: INOPERATIVE If clstrmgrES subsystem is active the state is retrieved by calling "lssrc -ls clstrmgrES". The currently known states are: ST_INIT ST_JOINING ST_STABLE ST_UNSTABLE ST_VOTING ST_RP_RUNNING ST_BARRIER ST_RP_FAILED ST_CBARRIER ST_DONE NOT_CONFIGURED UNMANAGED The discovered state is returned to the caller via STDOUT. The only state processing that is done at all here is in the case where a state of "NOT_CONFIGURED" is detected. Under some conditions, this state can be reported even though the node actually *is* configured (and should more accurately be reported as "ST_INIT"). So some extra effort is called for to make *sure* the node really *is* "NOT_CONFIGURED". =head1 ARGUMENTS 1. node [REQUIRED] [string] The label of the node to be queried. 2. state [OPTIONAL] [string reference] An optional input that is a string reference. If this is provided, the node state is returned in the reference variable, rather than on STDOUT. =head1 RETURN 0: no errors were detected; the operation appears to have been successful 1: a general error has occurred 2: a specified resource does not exist, or could not be found 3: some required input was missing 4: some detected input was incorrect in some way 5: a required dependency does not exist 6: a specified search failed to match any data Output is provided to stdout =head1 COPYRIGHT COPYRIGHT International Business Machines Corp. 2005,2015 All Rights Reserved =cut } # End of "devDoc()" #============================================================================== # The following, comment block attempts to enforce coding standards when this # file is edited via emacs or vim. This block _must_ appear at the very end # of the file, or the editor will not find it, and it will be ignored. #============================================================================== # Local Variables: # indent-tabs-mode: nil # tab-width: 4 # End: #============================================================================== # vim: tabstop=4 shiftwidth=4 expandtab #==============================================================================