#!/bin/ksh93 # ALTRAN_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # Copyright (C) Altran ACT S.A.S. 2017,2019,2021. All rights reserved. # # ALTRAN_PROLOG_END_TAG # # @(#) 7d4c34b 43haes/usr/sbin/cluster/sa/sap/sbin/cl_sapStart.sh, 726, 2147A_aha726, Feb 05 2021 09:50 PM ## ## NAME: cl_sapStart ## ## PURPOSE: ## Startup script for SAP NetWeaver instance(s). ## ## ARGUMENTS: ## ## Type: ## -a Application ID ## ## ## OUTPUT: ## ## ## RETURNS: ## 0 on success ## 1 if not called with right input or as defined by EC_* variables on failure ## . /usr/es/lib/ksh93/func_include typeset version="1.13" #---------------------------------------------------------------------------- # Global Definitions #---------------------------------------------------------------------------- . /usr/es/sbin/cluster/sa/sap/etc/SAPGlobals . /usr/es/sbin/cluster/sa/sap/sbin/SAPUtilities # Including Availability metrics library file . /usr/es/lib/ksh93/availability/cl_amlib typeset PROGRAM=${0##*/} PATH=$PATH:/usr/es/sbin/cluster/sa/sbin PATH=$PATH:/usr/es/sbin/cluster/utilities PATH=$PATH:/bin:/usr/bin export PATH #------------------------------------------------------------------------------- #Main #------------------------------------------------------------------------------- [[ $VERBOSE_LOGGING == high ]] && set -x typeset script_name="cl_sapStart" typeset -i mycnt=0 typeset -i count=0 typeset -i ctl=0 typeset -i do_SCS_cleanup=0 if [[ -z $ODMDIR ]]; then ODMDIR=/usr/es/sbin/cluster/etc/objrepos/active/ fi export ODMDIR while getopts a: option do case $option in a) APPLICATION_ID=$OPTARG ;; esac done [[ -z $APPLICATION_ID ]] && { KLIB_SAP_SA_logmsg ERROR 151 73 sapsa.cat "\"%1\$s\": Application ID not passed as input Parameter. Quit immediately with Exit code 1. Please add application ID as input parameter to the PowerHA Smart Assist Application Servers.\n" $script_name exit 1 } ## GLUECODE # Indication from the glue code that we need not monitor the application. if [[ $(clodmget -n -q "application_id=${APPLICATION_ID} and name=SAP_HA_GLUE_CODE" -f value HACMPsap_connector 2>/dev/null) == "101" ]] then echo "HACMPsap_connector: \ value=103" |/usr/bin/odmchange -o HACMPsap_connector -q "application_id=${APPLICATION_ID} and name=SAP_HA_GLUE_CODE" fi ## GLUECODE # Define an array of instances from the application_ID and get its appropriate SID from ODM typeset myinst_names=$(clodmget -n -q "application_id=$APPLICATION_ID and name=INSTANCE_NAMES" -f value HACMPsa_metadata 2>/dev/null) [[ -z $myinst_names ]] && { KLIB_SAP_SA_logmsg ERROR 151 74 sapsa.cat "clodmget failed to get value of %1$s.\n" "INSTANCE_NAMES" exit 1 } myinst_names=$(echo $myinst_names | awk '{gsub("\n"," ");print $0}') # For PHA 712 Style ODM entries like --> "ASCS00_SCS01" myinst_names=$(echo $myinst_names | awk '{gsub("_"," ");print $0}') for i in $myinst_names do inst_names[$mycnt]=$i ((mycnt=mycnt+1)) done . /usr/es/sbin/cluster/sa/sap/sbin/SAPUtilities_xplatform while (( $count < ${#inst_names[@]} )) do unset SAPSYSTEMNAME setSAPGlobalEnv -i ${inst_names[$count]} ret=$? (( $ret != 0 )) && { KLIB_SAP_SA_logmsg ERROR 151 72 sapsa.cat "\"%1\$s\": Environment setup failed. Quit immediately with Exit code 1.\n" $script_name exit 1 } . /usr/es/sbin/cluster/sa/sap/etc/SAPInstanceGlobals if [[ -z ${virt_ips[$count]} ]] then ((ctl=count-1)) if (( $ctl >= 0 )) then virt_ips[$count]=${virt_ips[$ctl]} fi fi # prevent a restart of ERS in case the corresponding CS Instance is aquiring [[ ${inst_names[$count]} == @(ERS*) ]] && { ERS_RG=$(clodmget -n -q "application_id=$APPLICATION_ID and name=RESOURCE_GROUP" -f value HACMPsa_metadata) CS=$(clodmget -n -q "group=$ERS_RG and name=MISC_DATA" -f value HACMPresource) CS_IPs=$(echo $CS | cut -f2 -d,) CS_IP1=$(echo $CS_IPs | cut -f1 -d_) CS_IP2=$(echo $CS_IPs | cut -f2 -d_) if [[ -n $CS_IP2 ]];then CS_IP="${CS_IP1}|${CS_IP2}" else CS_IP="${CS_IP1}" fi #we exit here with zero as we can not assure the timing here. We must not relocate before the CS instance is completely recovered. [[ $(ifconfig -a | egrep -w "${CS_IP}") ]] && exit 0 } #if we run on a NFS we have to check its availability before starting [[ ${is_nfs} == 1 ]] && { # Format for consumption by cl_availability utility APP_ID=$(clodmget -n -q "name=INSTANCE_NAMES and value=${inst_names[$count]}" -f application_id HACMPsa_metadata) RG=$(clodmget -n -q "application_id=$APP_ID and name=RESOURCE_GROUP" -f value HACMPsa_metadata) amlog_trace $AM_SA_SAP_START_BEGIN "SAP NFS Availability check|$RG" KLIB_SAP_SA_logmsg DEBUG 151 1 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - has its SAP Global filesystem on a NFS based share.\n Availability is evaluated now.\n" $script_name ${inst_names[$count]} $SID Check_NFS_Service -V ${nfs_ip} -M ${sapmnt_nfs} [[ $? == $ERROR ]] && { KLIB_SAP_SA_logmsg ERROR 151 2 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - The SAP Global filesystem is unavailable.\n Start procedure stopped. Please resolve any issues in the NFS Server/Client before continue.\n Exit code of start script is \"%4\$s\". \n" $script_name ${inst_names[$count]} $SID ${exit_code_start_nfs_failed} [[ $notification_level > 3 ]] && [[ -n $notification_script ]] && $(${notification_script} "Instance ${inst_names[$count]} of ${SID} - The SAP Global filesystem is unavailable.\n Start procedure stopped. Please resolve any issues in the NFS Server/Client before continue.\n Exit code of start script is ${exit_code_start_nfs_failed}. \n") amlog_trace $AM_SA_SAP_START_FAILURE "SAP NFS Availability check|$RG" exit $exit_code_start_nfs_failed # we expect customers to insert a notification method # that the monitor will give notification of an unavailable NFS. # The application Monitor will now handle restart/failovers if set to 0 otherwise the configured monitoring method will send notification. } KLIB_SAP_SA_logmsg INFO 151 3 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - The SAP Global filesystem is available. Continue to start instance.\n" $script_name ${inst_names[$count]} $SID amlog_trace $AM_SA_SAP_START_END "SAP NFS Availability check|$RG" } # Format for consumption by cl_availability utility APP_ID=$(clodmget -n -q "name=INSTANCE_NAMES and value=${inst_names[$count]}" -f application_id HACMPsa_metadata) RG=$(clodmget -n -q "application_id=$APP_ID and name=RESOURCE_GROUP" -f value HACMPsa_metadata) amlog_trace $AM_SA_SAP_START_BEGIN "Prerequisites and Start Service|$RG" #ensure sapstartsrv is started or can be started otherwise exit Control_sapstartsrv -A "Prereq_and_start_cmd" -I ${inst_names[$count]} -S ${SAPSYSTEMNAME} -E ${sap_exe_dir[$count]} -V ${virt_ips[$count]} -L "${SAPenv}" [[ $? == $sapstartsrv_failed_to_start ]] && { KLIB_SAP_SA_logmsg ERROR 151 4 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - The sapstartsrv process failed to start.\n Start procedure stopped. Please evaluate the SAP logfile /usr/sap/\"%4\$s\"/\"%5\$s\"/work/sapstartsrv.log.\n Exit code of start script is \"%6\$s\".\n" $script_name ${inst_names[$count]} $SID ${SAPSYSTEMNAME} ${inst_names[$count]} ${exit_code_start_sapcontrol_startservice_failed} [[ $notification_level > 3 ]] && [[ -n $notification_script ]] && $(${notification_script} "Instance ${inst_names[$count]} of ${SID} - The sapstartsrv process failed to start.\n Start procedure stopped. Please evaluate the SAP logfile /usr/sap/${SAPSYSTEMNAME}/${inst_names[$count]}/work/sapstartsrv.log.\n Exit code of start script is ${exit_code_start_sapcontrol_startservice_failed}") amlog_err $AM_SA_SAP_START_FAILURE "Prerequisites and Start Service|$RG" exit $exit_code_start_sapcontrol_startservice_failed # we expect customers to insert a notification method # that the monitor will give notification of inability to start sapstartsrv. # The application Monitor will now handle restart/failovers to try it on the other node } KLIB_SAP_SA_logmsg INFO 151 5 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - The sapstartsrv is successfully started. Continue to start instance.\n" $script_name ${inst_names[$count]} $SID amlog_trace $AM_SA_SAP_START_END "Prerequisites and Start Service|$RG" #verify if the Instance is up and running already Control_instance -A "Check_cmd" -I ${inst_names[$count]} -S ${SAPSYSTEMNAME} -E ${sap_exe_dir[$count]} -V ${virt_ips[$count]} -L "${SAPenv}" -T "0" typeset -i rc_Control_instance_Check=$? ## Check if the Instance is working (( $rc_Control_instance_Check <= $EnqGetStatistic_err )) && [[ ${inst_names[$count]} != @(ERS*) ]] && { KLIB_SAP_SA_logmsg INFO 151 6 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - Instance is already running. No restart is performed. \n" $script_name ${inst_names[$count]} $SID #For appservers we are fine, Nothing to do for startup #For (A)SCS instances we give notification if rc > EnqGetStatistic_GREEN (( $rc_Control_instance_Check >= $EnqGetStatistic_YELLOW )) && { [[ $notification_level > 3 ]] && [[ -n $notification_script ]] && $(${notification_script} "Instance ${inst_names[$count]} of ${SID} - Instance is already running. But ENSA/ERS replication status is not GREEN. \n") KLIB_SAP_SA_logmsg WARNING 151 7 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - Instance is already running. But ENSA/ERS replication status is not GREEN. \n The PowerHA application monitor notification method is called if specified.\n" $script_name ${inst_names[$count]} $SID } } (( $rc_Control_instance_Check > $EnqGetStatistic_err )) || [[ ${inst_names[$count]} == @(ERS*) ]] && { #Appservers and ERS tend to hang. We ensure startup capability # make SCS clenaup only runs when there is no ERS instance is running on any of the cluster node,to avoid removing lock tables [[ ${inst_names[$count]} == @(*SCS*) ]] && { SCS_Appid=$(clodmget -n -q "name=INSTANCE_NAMES and value=${inst_names[$count]}" -f application_id HACMPsa_metadata) SCS_RG=$(clodmget -n -q "application_id=${SCS_Appid} and name=RESOURCE_GROUP" -f value HACMPsa_metadata) ERS_Instance_name=$(clodmget -n -q "group=${SCS_RG} and name=MISC_DATA" -f value HACMPresource) ERS_Application_id=$(clodmget -n -q "value=${ERS_Instance_name} and name=INSTANCE_NAMES" -f application_id HACMPsa_metadata) ERS_Instance_no=$(clodmget -n -q "application_id=${ERS_Application_id} and name=INSTANCE_NUMBERS" -f value HACMPsa_metadata) ERS_RG=$(clodmget -n -q "application_id=${ERS_Application_id} and name=RESOURCE_GROUP" -f value HACMPsa_metadata) ERS_Virt_ip=$(clodmget -n -q "application_id=${ERS_Application_id} and name=VIRTUAL_IPS" -f value HACMPsa_metadata) HOST_cmd="-host ${ERS_Virt_ip}" ERS_Nodes=$(clodmget -n -q "group=${ERS_RG}" -f nodes HACMPgroup) ERS_EXE_DIR=$(clodmget -n -q "application_id=${ERS_Application_id} and name=EXECUTABLE" -f value HACMPsa_metadata) # : check ERS is online on any of the participating nodes or not # for node in ${ERS_Nodes} do # : GetProcessList may fail with error "Command not found" on the nodes where ERS is not available # cl_rsh $node "${USER_cmd} \"${SAPenv} ${ERS_EXE_DIR}/sapcontrol ${HOST_cmd} -nr ${ERS_Instance_no} -function GetProcessList\"" 2>/dev/null rc=$? if (( $rc == 3 )) then # ERS is online on one of the node in the cluster, no need to coninue... break fi done # rc 3 means, ERS instance is running somewhere in the cluster, # so dont need to perform force cleanup, otherwise do cleanup. if (( $rc == 0 || $rc == 1 || $rc == 2 || $rc == 4 )) then # Cleanup SCS instance do_SCS_cleanup=1 fi } if [[ ${inst_names[$count]} != @(*SCS*) ]] || (( ${do_SCS_cleanup} == 1 )) then amlog_trace $AM_SA_SAP_START_BEGIN "Cleanup [A]SCS|$RG" KLIB_SAP_SA_logmsg INFO 151 8 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - Start cleanup of remainders of a previous startup. \n" $script_name ${inst_names[$count]} $SID Control_instance -A "Stop" -I ${inst_names[$count]} -S ${SAPSYSTEMNAME} -E ${sap_exe_dir[$count]} -V ${virt_ips[$count]} -L "${SAPenv}" -T "0" [[ $? != $sapcontrol_0 ]] && KLIB_SAP_SA_logmsg WARNING 151 9 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - The attempt to stop the instance using sapcontrol -function Stop failed. Cleanup will be done by killing processes and cleanup of shared memory segments.\n" $script_name ${inst_names[$count]} $SID Kill_instance -I ${inst_names[$count]} -S ${SAPSYSTEMNAME} -E ${sap_exe_dir[$count]} -V ${virt_ips[$count]} -L "${SAPenv}" Control_sapstartsrv -A "Kill" -I ${inst_names[$count]} -S ${SAPSYSTEMNAME} -E ${sap_exe_dir[$count]} -V ${virt_ips[$count]} -L "${SAPenv}" sleep 5 Control_sapstartsrv -A "Prereq_and_start_cmd" -I ${inst_names[$count]} -S ${SAPSYSTEMNAME} -E ${sap_exe_dir[$count]} -V ${virt_ips[$count]} -L "${SAPenv}" KLIB_SAP_SA_logmsg INFO 151 10 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - Cleanup finalized. Instance is cleaned up and sapstartsrv is running.\n" $script_name ${inst_names[$count]} $SID do_SCS_cleanup=0 amlog_trace $AM_SA_SAP_START_END "Cleanup [A]SCS|$RG" fi amlog_trace $AM_SA_SAP_START_BEGIN "Start SAP instance|$RG" #we start the instance after we had a clean stopped instance state or have cleaned up Control_instance -A "Start_cmd" -I ${inst_names[$count]} -S ${SAPSYSTEMNAME} -E ${sap_exe_dir[$count]} -V ${virt_ips[$count]} -L "${SAPenv}" -T "${wait_for_started_timeout} ${wait_for_started_delay}" rc=$? KLIB_SAP_SA_logmsg INFO 151 11 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - Start instance returned with a returncode of \"%4\$s\".\n" $script_name ${inst_names[$count]} $SID ${rc} case $rc in ${instance_started} ) KLIB_SAP_SA_logmsg INFO 151 12 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - Start completed successfully.\n" $script_name ${inst_names[$count]} $SID amlog_trace $AM_SA_SAP_START_END "Start SAP instance|$RG" ;; ${wait_for_started_timeout} ) [[ $notification_level > 4 ]] && [[ -n $notification_script ]] && $(${notification_script} "Instance ${inst_names[$count]} of ${SID} - Start completed successfully.\n But timeout specified for instance startup was not sufficient. \n") KLIB_SAP_SA_logmsg WARNING 151 13 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - Start completed successfully.\n But timeout specified for instance startup was not sufficient. \n" $script_name ${inst_names[$count]} $SID amlog_trace $AM_SA_SAP_START_END "Start SAP instance|$RG" ;; ${wait_for_started_failed} ) [[ $notification_level > 4 ]] && [[ -n $notification_script ]] && $(${notification_script} "Instance ${inst_names[$count]} of ${SID} - The Instance startup finished without error. \n The call sapcontrol -function WaitforStarted exit with error.\n") KLIB_SAP_SA_logmsg ERROR 151 14 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - The Instance startup finished without error. \n The call sapcontrol -function WaitforStarted exit with error.\n" $script_name ${inst_names[$count]} $SID amlog_trace $AM_SA_SAP_START_END "Start SAP instance|$RG" ;; ${instance_failed_to_start} ) KLIB_SAP_SA_logmsg ERROR 151 15 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - The Instance startup failed. \n Exit start script with exit code \"%4\$s\".\n" $script_name ${inst_names[$count]} $SID ${exit_code_start_sapcontrol_start_failed} [[ $notification_level > 3 ]] && [[ -n $notification_script ]] && $(${notification_script} "Instance ${inst_names[$count]} of ${SID} - The Instance startup failed. \n Exit start script with exit code ${exit_code_start_sapcontrol_start_failed}.\n") #Instance could not be started amlog_err $AM_SA_SAP_START_FAILURE "Start SAP instance|$RG" exit $exit_code_start_sapcontrol_start_failed ;; esac } #The following check we do only for ASCS, SCS and ERS server instances (ensure the enqueue table is build) [[ ${is_ers_enabled} == 1 ]] && { [[ ${inst_names[$count]} == @(*SCS*|ERS*) ]] && { amlog_trace $AM_SA_SAP_START_BEGIN "Ensure enqueue table is build|$RG" KLIB_SAP_SA_logmsg DEBUG 151 16 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - For Central Services and ERS instances the replication health status is verified now.\n" $script_name ${inst_names[$count]} $SID #verify enqueue replication has caught up to prevent from too early fail overs #Now wait until ensmon returns rc 0, then ES has recreated the enqueue table or ERS has catched up with ES Check_replication_status -I ${inst_names[$count]} -S ${SAPSYSTEMNAME} -E ${sap_exe_dir[$count]} -V ${virt_ips[$count]} -L "${SAPenv}" -T $max_ensmon_retries -P "${instance_prof}" rc=$? [[ $rc == $Check_Replication_OK ]] && KLIB_SAP_SA_logmsg INFO 151 17 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - Lock table fully created. \n" $script_name ${inst_names[$count]} $SID && amlog_trace $AM_SA_SAP_START_END "Ensuring enqueue table is build|$RG" [[ $rc == $Check_Replication_CS_WARNING ]] && { [[ $notification_level > 4 ]] && [[ -n $notification_script ]] && $(${notification_script} "Instance ${inst_names[$count]} of ${SID} - The ERS has not finalized to build its content or is not started.\n") KLIB_SAP_SA_logmsg INFO 151 18 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - The ERS has not finalized to build its content or is not started.\n" $script_name ${inst_names[$count]} $SID amlog_trace $AM_SA_SAP_START_END "Ensure enqueue table is build|$RG" } [[ $rc == $Check_Replication_ERS_WARNING ]] && { [[ $notification_level > 4 ]] && [[ -n $notification_script ]] && $(${notification_script} "Instance ${inst_names[$count]} of ${SID} - The ERS has not finalized to build its content or is not started.\n") KLIB_SAP_SA_logmsg WARNING 151 19 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - Specified timeout was not sufficient to fully build up replication table.\n Not correcting this will result in a loss of information when failing over.\n" $script_name ${inst_names[$count]} $SID amlog_trace $AM_SA_SAP_START_END "Ensure enqueue table is build|$RG" } [[ $rc == $Check_Replication_ERROR ]] && { [[ $notification_level > 3 ]] && [[ -n $notification_script ]] && $(${notification_script} "Instance ${inst_names[$count]} of ${SID} - Replication is not working.\n Please correct this instantly if this installation is enabled for ENSA/ERS. \n") KLIB_SAP_SA_logmsg ERROR 151 20 sapsa.cat "\"%1\$s\" Instance \"%2\$s\" of \"%3\$s\" - Replication is not working.\n Please correct this instantly if this installation is enabled for ENSA/ERS. \n" $script_name ${inst_names[$count]} $SID amlog_err $AM_SA_SAP_START_FAILURE "Ensure enqueue table is build|$RG" } } } ((count=count+1)) done exit 0