# @(#) 71c643f 43haes/usr/sbin/cluster/sa/maxdb/sbin/LCutilities.sh, 726, 2147A_aha726, May 26 2021 10:50 PM # ALTRAN_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # Copyright (C) Altran ACT S.A.S. 2017,2018,2021. All rights reserved. # # ALTRAN_PROLOG_END_TAG # #---------------------------------------------------------------------------- # Global Definitions #---------------------------------------------------------------------------- . /usr/es/sbin/cluster/sa/maxdb/sbin/DBUtilities . /usr/es/sbin/cluster/sa/maxdb/sbin/DBGlobals ################################################################## # FUNCTION: lc_check_process_status # # PURPOSE: checks if the kernel process of the LC is running # # PARAMETER: - # # RETURNCODE: 0=OK, or accumulated ERROR codes # ################################################################## function lc_check_process_status { [[ "$VERBOSE_LOGGING" == "high" ]] && set -x [[ $(ps -ef | grep -v grep | grep kernel | grep $SID | wc -l) > 1 ]] && return $OK return $ERROR } ################################################################## # FUNCTION: lc_start_vserver # # PURPOSE: call x_server start if not already started # # PARAMETER: - # # RETURNCODE: 0=OK, or accumulated ERROR codes # ################################################################## function lc_start_vserver { [[ "$VERBOSE_LOGGING" == "high" ]] && set -x # no check of service availability, just start it and SAP code validates it #lib_log "[INFO]: Start x_server now." KLIB_SA_logmsg INFO 70 27 maxdbsa.cat "Start x_server now.\n" ${LCBIN}/x_server start && return $OK } ################################################################## # FUNCTION: lc_start_vserver_global # # PURPOSE: call x_server start if not already started # # PARAMETER: - # # RETURNCODE: 0=OK, or accumulated ERROR codes # ################################################################## function lc_start_vserver_global { [[ "$VERBOSE_LOGGING" == "high" ]] && set -x for i in $(/usr/es/sbin/cluster/utilities/clnodename) do # no check of service availability, just start it and SAP code validates it KLIB_SA_logmsg INFO 70 27 maxdbsa.cat "Start x_server now.\n" /usr/es/sbin/cluster/cspoc/cli_on_node -N $i "${LCBIN}/x_server start" done return $OK } ################################################################## # FUNCTION: lc_check_db_active # # PURPOSE: health ckeck for database. Only Used for Long-term # # monitoring. startup monitor will fail using this # # 4 state possibilities are acceptable. Unknown and # # starting are transitional status. # # PARAMETER: - # # RETURNCODE: 0=OK, or accumulated ERROR codes # ################################################################## # function invalid for current design #function lc_check_db_active { # [[ "$VERBOSE_LOGGING" == "high" ]] && set -x # # set +x # LCSTATE=$($LCBIN/dbmcli -d ${SID} ${CONTROL_LOGIN} db_state | grep -v OK | grep -v State) # set -x # echo $LCSTATE # KLIB_SA_logmsg INFO 70 25 maxdbsa.cat "Instance \"%1\$s\" has a state of \"%2\$s\". \n" ${INSTANCE} ${LCSTATE} # # [[ $INSTANCE == PRIMARY ]] && [[ $LCSTATE == "ONLINE" ]] && return $OK # [[ $INSTANCE == PRIMARY ]] && [[ $LCSTATE == "STANDBY" ]] && return $OK # [[ $INSTANCE == PRIMARY ]] && [[ $LCSTATE == "STARTING" ]] && return $OK # [[ $INSTANCE == PRIMARY ]] && [[ $LCSTATE == "UNKNOWN" ]] && return $OK # [[ $INSTANCE == AUXILIARY ]] && [[ $LCSTATE == "STANDBY" ]] && return $OK # # return $ERROR #} ################################################################## # FUNCTION: lc_set_remote_host # # PURPOSE: sets the variable REMOTE_HOST. Also implements a # # configuration check # # PARAMETER: - # # RETURNCODE: 0=OK, or 1=ERROR # ################################################################## function lc_set_remote_host { [[ "$VERBOSE_LOGGING" == "high" ]] && set -x HOST=$(hostname) typeset HOSTNAME=$(hostname) [[ $HOSTNAME == $LCSERVER ]] && REMOTE_HOST="${STANDBY_DB_IP}" && LOCAL_HOST="${LCSERVER_DB_IP}" && return $OK [[ $HOSTNAME == $TAKEOVER ]] && REMOTE_HOST="${LCSERVER_DB_IP}" && LOCAL_HOST="${STANDBY_DB_IP}" && return $OK dspmsg -s 70 maxdbsa.cat 30 "ERROR: Host \"%1\$s\" is not configured. \n" ${HOSTNAME} return $ERROR } ################################################################## # FUNCTION: lc_stop_instance # # PURPOSE: stop an instance # # PARAMETER: - # # RETURNCODE: 0=OK, or accumulated ERROR codes # ################################################################## function lc_stop_instance { set +x $LCBIN/dbmcli -d ${SID} ${CONTROL_LOGIN} db_offline RC=$? set -x if (( $RC != 0 )) then # we want to sleep and check the state KLIB_SA_logmsg INFO 70 33 maxdbsa.cat "Sleeping \"%1\$s\" seconds. \n" ${TIMEOUT[lc_wait_med_in_stop_of_master]} /usr/bin/sleep ${TIMEOUT[lc_wait_med_in_stop_of_master]} KLIB_SA_logmsg INFO 70 34 maxdbsa.cat "Resuming from sleep \n" set +x LCSTATE=$(${LCBIN}/dbmcli -d ${SID} ${CONTROL_LOGIN} db_state | egrep -v "LCENV|OK|State" ) set -x if [[ $LCSTATE == "OFFLINE" ]] then return $OK fi fi return $RC } ################################################################## # FUNCTION: lc_start_master # # PURPOSE: start the lc_master instance accordingly to the # # current status of the database and handle the # # standby to restart on the REMOTE_NODE # # PARAMETER: - # # RETURNCODE: 0=OK, or 1=ERROR # ################################################################## function lc_start_master { [[ "$VERBOSE_LOGGING" == "high" ]] && set -x set +x LCSTATE=$($LCBIN/dbmcli -n ${LOCAL_HOST} -d ${SID} ${CONTROL_LOGIN} db_state | grep -v OK | grep -v State) set -x KLIB_SA_logmsg INFO 70 74 maxdbsa.cat "The state of the LiveCache \"%1\$s\" Primary is \"%2\$s\". \n" ${SID} ${LCSTATE} case $LCSTATE in OFFLINE) KLIB_SA_logmsg INFO 70 75 maxdbsa.cat "Bringing primary instance from OFFLINE into ONLINE state. \n" set +x $LCBIN/dbmcli -d ${SID} ${CONTROL_LOGIN} db_online rc=$? set -x [[ $rc == 0 ]] || return $ERROR # give the application some time to awake. This is relevant for th emonitors to not draw wrong conclusions #echo ${TIMEOUT[lc_wait_long_in_startup_of_master]} KLIB_SA_logmsg INFO 70 33 maxdbsa.cat "Sleeping \"%1\$s\" seconds. \n" ${TIMEOUT[lc_wait_long_in_startup_of_master]} /usr/bin/sleep ${TIMEOUT[lc_wait_long_in_startup_of_master]} KLIB_SA_logmsg INFO 70 34 maxdbsa.cat "Resuming from sleep \n" set +x LCSTATE=$($LCBIN/dbmcli -n ${LOCAL_HOST} -d ${SID} ${CONTROL_LOGIN} db_state | grep -v OK | grep -v State) set -x KLIB_SA_logmsg INFO 70 76 maxdbsa.cat "Succesfully started Primary instance. DB state is \"%1\$s\". \n" ${LCSTATE} return $OK ;; STANDBY) KLIB_SA_logmsg INFO 70 77 maxdbsa.cat "Making Standby instance a Primary. \n" set +x $LCBIN/dbmcli -d ${SID} ${CONTROL_LOGIN} db_online rc=$? set -x [[ $rc == 0 ]] || return $ERROR KLIB_SA_logmsg INFO 70 33 maxdbsa.cat "Sleeping \"%1\$s\" seconds. \n" ${TIMEOUT[lc_wait_long_in_startup_of_master]} /usr/bin/sleep ${TIMEOUT[lc_wait_short_in_startup_of_master]} KLIB_SA_logmsg INFO 70 34 maxdbsa.cat "Resuming from sleep \n" set +x LCSTATE=$($LCBIN/dbmcli -n ${LOCAL_HOST} -d ${SID} ${CONTROL_LOGIN} db_state | grep -v OK | grep -v State) set -x KLIB_SA_logmsg INFO 70 76 maxdbsa.cat "Succesfully started Primary instance. DB state is \"%1\$s\". \n" ${LCSTATE} return $OK ;; ONLINE) KLIB_SA_logmsg INFO 70 78 maxdbsa.cat "Primary instane is already online. Starting standby instance now. \n" #set SLAVE_NODE variable if SLAVE is online RG_SLAVE="RG_Standby_"${SID} typeset SLAVE_NODE=$(clRGinfo -s | awk -v RG=${RG_SLAVE} -F : '$1 == RG && $2 == "ONLINE" {print $3}') [[ -n $SLAVE_NODE ]] || { KLIB_SA_logmsg WARN 70 38 maxdbsa.cat "Standby Resource group is not online on any node. Standby instance will not be started. \n" return $OK } # Validate if LC SLAVE is already on the correct node and test if it is already active (just exit) or needs to be reconnected. [[ $SLAVE_NODE == $REMOTE_HOST ]] && { KLIB_SA_logmsg INFO 70 39 maxdbsa.cat "Standby Resource group is online on \"%1\$s\" \n" ${REMOTE_HOST} lc_start_slave_remote [[ $? == 0 ]] && return $OK } KLIB_SA_logmsg WARN 70 80 maxdbsa.cat "Restarting Auxiliary failed. \n" return $OK ;; ADMIN) KLIB_SA_logmsg INFO 70 75 maxdbsa.cat "Bringing Primary instance from ADMIN into ONLINE state. this will fail in case of invalid data but no data corruption is expected. \n" sleep 20 #give some time to allow the STB to resume set +x $LCBIN/dbmcli -d ${SID} ${CONTROL_LOGIN} db_online rc=$? set -x [[ $rc == 0 ]] || return $ERROR # give the application some time to awake. This is relevant for th emonitors to not draw wrong conclusions #echo ${TIMEOUT[lc_wait_long_in_startup_of_master]} KLIB_SA_logmsg INFO 70 33 maxdbsa.cat "Sleeping \"%1\$s\" seconds. \n" ${TIMEOUT[lc_wait_long_in_startup_of_master]} /usr/bin/sleep ${TIMEOUT[lc_wait_long_in_startup_of_master]} KLIB_SA_logmsg INFO 70 34 maxdbsa.cat "Resuming from sleep \n" set +x LCSTATE=$($LCBIN/dbmcli -n ${LOCAL_HOST} -d ${SID} ${CONTROL_LOGIN} db_state | grep -v OK | grep -v State) set -x KLIB_SA_logmsg INFO 70 35 maxdbsa.cat "The output of db_state is \"%1\$s\". \n" ${LCSTATE} return $OK ;; *) KLIB_SA_logmsg ERROR 70 43 maxdbsa.cat "Invalid state detected to start a Master. Quit startup and return with ERROR. \n" return $ERROR ;; esac return $OK } ################################################################## # FUNCTION: lc_start_slave # # PURPOSE: start the lc_slave instance accordingly to the # # current status of the database. # # PARAMETER: - # # RETURNCODE: 0=OK, or 1=ERROR # ################################################################## function lc_start_slave { [[ "$VERBOSE_LOGGING" == "high" ]] && set -x typeset LOCALNODENAME LOCALNODENAME=$(/usr/es/sbin/cluster/utilities/get_local_nodename 2>/dev/null) [[ -n $( /usr/es/sbin/cluster/utilities/cllssvcs -n $LOCALNODENAME | grep -w " ${LCSERVER_IP} ") ]] && { # The space after the IP is essential! # Otherwise Auxiliary might not start when hostname and Service IP only differ by additional characters return $OK } #Check if a Primary is running in the cluster to connect to LCSTATE=$(${LCBIN}/dbmcli -n ${REMOTE_HOST} -d ${SID} ${CONTROL_LOGIN} db_state | grep -v OK | grep -v State) [[ $LCSTATE != "ONLINE" ]] && { KLIB_SA_logmsg ERROR 70 82 maxdbsa.cat "No Primary instance detected. Standby instance will not be started. \n" return $ERROR } #check for a startup in progress LCSTATE=$(${LCBIN}/dbmcli -d ${SID} ${CONTROL_LOGIN} db_state | egrep -v "LCENV|OK|State") [[ "${LCSTATE}" == @(ADMIN|STARTING) ]] && { return $OK } # start Standby KLIB_SA_logmsg INFO 70 46 maxdbsa.cat "cleanup DB-environment of Standby.\n" $LCBIN/dbmcli -d ${SID} ${CONTROL_LOGIN} db_offline KLIB_SA_logmsg INFO 70 83 maxdbsa.cat "Starting Auxiliray from Primary node. \n" $LCBIN/dbmcli -n $REMOTE_HOST -d ${SID} ${CONTROL_LOGIN} db_standby $LOCAL_HOST [[ $? == 0 ]] && { return $OK;} return $ERROR } ################################################################## # FUNCTION: lc_start_slave_remote # # PURPOSE: start the lc slave instance # # PARAMETER: - # # RETURNCODE: 0=OK, or 1=ERROR # ################################################################## function lc_start_slave_remote { [[ "$VERBOSE_LOGGING" == "high" ]] && set -x dspmsg -s 70 maxdbsa.cat 46 "cleanup DB-environment of Standby. \n" set +x $LCBIN/dbmcli -n $REMOTE_HOST -d ${SID} ${CONTROL_LOGIN} db_offline set -x dspmsg -s 70 maxdbsa.cat 83 "Starting Auxiliray from Primary node. \n" set +x $LCBIN/dbmcli -n $LOCAL_HOST -d ${SID} ${CONTROL_LOGIN} db_standby $REMOTE_HOST [[ $? == 0 ]] && set -x && return $OK set -x return $ERROR } #################################################### # FUNCTION: timeout # # PURPOSE: implements a while loop with timing # # add-on # # PARAMETER: CMD = function to be tested # # RETRY = retry the test # # SLEEP = sleep between the tests # # RETURNCODE: OK, ERROR # #################################################### # unused function --> functionality not tetsted #function timeout { # # [[ "$VERBOSE_LOGGING" == "high" ]] && set -x # # typeset CMD=$1 RETRY=${2-10} SLEEP=${3-10} # typeset COUNT=1 # # while (( $COUNT <= $RETRY )) # do # $CMD && { # (( TIME_LOG == 1 )) && KLIB_SA_logmsg INFO 70 52 maxdbsa.cat "\"%1\$s\" was successfull after \"%2\$s\" out of \"%3\$s\" retries" ${CMD} ${COUNT} ${RETRY} # return $OK # } # sleep $SLEEP # (( COUNT += 1 )) # done # (( TIME_LOG == 1 )) && KLIB_SA_logmsg ERROR 70 53 maxdbsa.cat "\"%1\$s\" failed after \"%2\$s\" retries" ${CMD} ${RETRY} # return $ERROR #} ############################################################################# # FUNCTION: check_local_process_status # # PURPOSE: return status of process # # PARAMETER: USER = SIDADM or DB2 user # # PROCESS = Name to look for in the CMD column in the ps output # # NODE = node to check for the process # # RETURNCODE: OK, ERROR # ############################################################################# # unused function --> has never been tested #function check_local_process_status { # # [[ "$VERBOSE_LOGGING" == "high" ]] && set -x # typeset USER=$1 PROCESS=$2 #NODE=$3 # # #clrsh $NODE "ps -u ${USER} -o args" | grep "${PROCESS}" && return $OK # /usr/bin/ps -u ${USER} -o args | grep "${PROCESS}" && return $OK # # return $ERROR #} ######################################################### # FUNCTION: check_fs_status # # PURPOSE: return status of fs # # PARAMETER: FS = name to grep for in the mount output # # RETURNCODE: OK, ERROR # ######################################################### # unused function --> has never been tested #function check_fs_status { # # [[ "$VERBOSE_LOGGING" == "high" ]] && set -x # typeset FS=$1 # mount | grep $FS && return $OK # # KLIB_SA_logmsg INFO 70 51 maxdbsa.cat "check_fs_status: Filesystem \"%1\$s\" is not mounted \n" ${FS} # return $ERROR #} #################################################### # FUNCTION: check_executable_status # # PURPOSE: check if executable can be used # # PARAMETER: EXECUTABLE = full path to executable # # RETURNCODE: OK, ERROR # #################################################### function check_executable_status { [[ "$VERBOSE_LOGGING" == "high" ]] && set -x typeset EXECUTABLE=$1 [[ -x $EXECUTABLE ]] && return $OK if [[ -e $EXECUTABLE ]] ; then KLIB_SA_logmsg INFO 70 49 maxdbsa.cat "check_executable_status: can not execute \"%1\$s\" \n" ${EXECUTABLE} else KLIB_SA_logmsg INFO 70 50 maxdbsa.cat "check_executable_status: \"%1\$s\" not found. \n" ${EXECUTABLE} fi return $ERROR } function get_cluster_state_stable { [[ "$VERBOSE_LOGGING" == "high" ]] && set -x lssrc -ls clstrmgrES | grep "ST_STABLE" return $? } ############################################################################# # FUNCTION: check_global_process_status # # PURPOSE: return status of process # # PARAMETER: USER = SIDADM or DB2 user # # PROCESS = Name to look for in the CMD column in the ps output # # RETURNCODE: OK, ERROR # ############################################################################# # unused function --> has never been tested #function check_global_process_status { # # [[ "$VERBOSE_LOGGING" == "high" ]] && set -x # # typeset USER=$1 PROCESS=$2 # # /usr/bin/ps -u ${USER} -o args | grep "${PROCESS}" && return $OK # clrsh fails if cluster is not stable. This causes an Application monitor to hang. # # for i in `/usr/es/sbin/cluster/utilities/clnodename` # do # clrsh $i "ps -u ${USER} -o args" | grep -q "${PROCESS}" && return $OK # done # # return $ERROR #}