#!/bin/ksh93 # ALTRAN_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # Copyright (C) Altran ACT S.A.S. 2017,2019,2021. All rights reserved. # # ALTRAN_PROLOG_END_TAG # # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # 61haes_r720 src/43haes/usr/sbin/cluster/events/utils/cl_mirrorset.sh 1.9 # # Licensed Materials - Property of IBM # # Restricted Materials of IBM # # COPYRIGHT International Business Machines Corp. 2002,2015 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # @(#) 7d4c34b 43haes/usr/sbin/cluster/events/utils/cl_mirrorset.sh, 726, 2147A_aha726, Feb 05 2021 09:50 PM #================================================ # The following, commented line enforces coding # standards when this file is edited via vim. #================================================ # vim:tabstop=4:shiftwidth=4:expandtab:smarttab #================================================ ########################################################################### # # Name: cl_mirrorset # # Function: If a varyonvg of a volume group fails due to lack of # quorum, it may still be meaningful to do a forced varyon. # This would be true only if a complete copy of each logical # volume is present in the accessable disks. # # This routine determines if that is the case for a given # volume group by examining the partition maps of each # logical volume, and checking that at least one copy of # each logical partition is on an accessable disk. # # This routine is also used when an I/O error renders # access to a volume group questionable: it will # indicate if there is enough of the volume group # present to allow safe access. # # # Input: volume group name # # Environment: HACMP_MIRROR_VARYON can be set to 'TRUE' (or 'true'), # enabling the check for all volume groups. Otherwise, # the resource group definition containing this volume # group must indicate that forced varyon is supported. # # If one of those is not true, then this routine will # indicate that a forced varyon is not to be done. # # SKIP_FORCED_VARYON if set, will cause a return code of 1 # # HACMP_MIRRORSET_MAXTRIES is the maximum number of # tries to read a disk. # # LV_HACMP_MIRRORSET_MAXTRIES is the maximum number of # tries to read a LV partition map. # # Output: 0 - a complete copy of each logical volume is present # 1 - some logical volume is incomplete, or forced # varyon is not allowed # # Logging to clutils.log for when this routine is not # called in an event handling context. # # ########################################################################### PROGNAME=${0##*/} typeset -u $HACMP_MIRROR_VARYON # uppercase for comparison if [[ ( -z $PS4 || $PS4 == '+ ' ) && $HACMP_MIRROR_VARYON == TRUE ]] then # # Pick up PS4 and VERBOSE_LOGGING if not present in environment # [[ -z $LOCAL_NODE ]] && LOCAL_NODE=$(get_local_nodename) [[ -n $LOCAL_NODE ]] && eval export $(cllsparam -n $LOCAL_NODE) fi if [[ $VERBOSE_LOGGING == "high" ]] then PS4_TIMER=true # Track long running LVM operations set -x version='1.9' fi # : Pick up the passed volume group name # vgname=$1 integer mirrorset=1 # Default is 'no' - forced varyon disallowed integer MAX_TRIES=3 # Default number of tries to read disk integer LV_MAX_TRIES=3 # Default number of tries to read LV partition map integer lslv_rc=0 # initialize lslv_rc with 0 (OK) CLUTILS_DIR=$(clodmget -q "name = clutils.log" -f value -n HACMPlogs) CLUTILS_DIR=${CLUTILS_DIR:-/var/hacmp/log} CLUTILS_LOG="${CLUTILS_DIR}/clutils.log" if [[ -z $HACMP_MIRROR_VARYON ]] then # : Note the use of grep to check the environment variable - the settings : in /etc/environment do not get passed to HACMP shell scripts # eval $(grep -w '^HACMP_MIRROR_VARYON' /etc/environment | grep -iw TRUE) fi if [[ -z $HACMP_MIRRORSET_MAXTRIES ]] then # : Note the use of grep to check the environment variable - the settings : in /etc/environment do not get passed to HACMP shell scripts # eval $(grep -w '^HACMP_MIRRORSET_MAXTRIES' /etc/environment) [[ -n $HACMP_MIRRORSET_MAXTRIES ]] && (( $HACMP_MIRRORSET_MAXTRIES < 1 )) && HACMP_MIRRORSET_MAXTRIES=1 fi MAX_TRIES=${HACMP_MIRRORSET_MAXTRIES:-${MAX_TRIES}} # default is 3 tries if [[ -z ${LV_HACMP_MIRRORSET_MAXTRIES} ]] then # : Note the use of grep to check the environment variable - the settings : in /etc/environment do not get passed to HACMP shell scripts # eval $(grep -w '^LV_HACMP_MIRRORSET_MAXTRIES' /etc/environment) [[ -n ${LV_HACMP_MIRRORSET_MAXTRIES} ]] && (( ${LV_HACMP_MIRRORSET_MAXTRIES} < 1 )) && LV_HACMP_MIRRORSET_MAXTRIES=1 fi LV_MAX_TRIES=${LV_HACMP_MIRRORSET_MAXTRIES:-${LV_MAX_TRIES}} # default is 3 tries if [[ $HACMP_MIRROR_VARYON != "TRUE" ]] then # : We will try to vary on forced only if directed to. Since : it was not specified on a node-wide basis with the HACMP_MIRROR_VARYON : environment variable - backwards compatibility with the prior release - : check to see if its been specified in the resource group defintion. # if [[ $FORCED_VARYON == 'false' ]] then cl_msg -e e -m 10629 "%s[%d]: Volume group \"%s\" cannot be forced varyon because forced varyon is disallowed\n" $PROGNAME $LINENO $vgname 2>&1 | tee -a $CLUTILS_LOG return $mirrorset fi if [[ -z $GROUPNAME ]] ; then # : Figure out what resource group owns this volume group # GROUPNAME=$(clodmget -q "name like '*VOLUME_GROUP' and value = $vgname" -f group -n HACMPresource) if [[ -z $GROUPNAME ]] then # : Lacking a resource group definition that contains this volume : group, there can be no specification of forced varyon. # # Should not happen; should not be called with a volume group # not in a resource group. # return $mirrorset fi fi # : Check the resource group definition # FORCED_VARYON=$(odmget -q "group = $GROUPNAME and name = FORCED_VARYON and \ value = true" HACMPresource) if [[ -z $FORCED_VARYON || -n ${SKIP_FORCED_VARYON} ]] then cl_msg -e e -m 10628 "%s[%d]: Volume group \"%s\" cannot be forced varyon because the policies in resource group \"%s\" do not allow it\n" $PROGNAME $LINENO $vgname $GROUPNAME 2>&1 | tee -a $CLUTILS_LOG return $mirrorset # Forced varyon is dis-allowed fi fi # : Get the list of disks in the volume group # pvlist=$(lspv -L | grep -w $vgname | cut -f1 -d' ') # : Now, find out which disks are actually readable # goodpvlist="" badpvlist="" for pv in $pvlist do # : Try up to $MAX_TRIES times to read $pv # for (( retry_count=1 ; retry_count<=$MAX_TRIES ; retry_count++ )) do if saved_msg=$(cl_querypv -q /dev/$pv 2>&1) then # : Successful read of $pv on the ${retry_count} try # goodpvlist=${goodpvlist:+"${goodpvlist} "}${pv} if (( $retry_count > 1 )) then cl_msg -e 0 -m 10621 "%s[%d]: Disk %s became readable after %d tries\n" $PROGNAME $LINENO $pv $retry_count 2>&1 | tee -a $CLUTILS_LOG # # Logging to clutils.log for case when this routine # not called in an event handling context. # fi break else if (( $retry_count == $MAX_TRIES )) then # : What I tell you $MAX_TRIES times is true... : $pv is unresponsive and assumed dead : Log the details # TIME="$(LC_ALL=C date +"%h %e %Y %H:%M:%S")" print -- "$saved_msg" | sed "s/^/${TIME} cl_mirrorset[$LINENO]: /" | tee -a $CLUTILS_LOG badpvlist=${badpvlist:+"${badpvlist} "}${pv} fi fi done done # : Look at what we know so far - does it make sense to go on # if [[ -n $badpvlist ]] then cl_msg -e e -m 10622 "%s[%d]: The following disks in volume group \"%s\" could not be read after %d tries: %s\n" \ $PROGNAME $LINENO $vgname $MAX_TRIES "$badpvlist" 2>&1 | tee -a $CLUTILS_LOG else # : All disks are readable, so we should be good to go # cl_msg -e 0 -m 10623 "%s[%d]: All disks in volume group \"%s\" are readable\n" $PROGNAME $LINENO $vgname return 0 fi # : If we have at least one readable disk, but some that are not, test : to see if there is one accessable copy of every logical partition : in every logical volume # if [[ -n $goodpvlist ]] then cl_msg -e 0 -m 10624 "%s[%d]: The following disks in volume group \"%s\" are readable: %s\n" $PROGNAME $LINENO $vgname "$goodpvlist" # : Used to egrep the partition map - an 'or' of all the names of the : available disks. # pattern=$(echo $goodpvlist | tr ' ' '|') mirrorset=0 # : Look at the first readable disk - should be as good as any # Since the volume group $vgname may not be vary'd on at this # point, the lquery operations will have to go directly to disk. # echo $goodpvlist | read goodpv rest # : For each logical volume, check its partition map # lqueryvg -p $goodpv -L | \ while read lvid lvname rest do # : Do not check aio_cache logical volumes because they are not : mirrored across sites. # lvtype=$(lquerylv -T -L $lvid -p $goodpv) if [[ $lvtype == "aio_cache" ]] then continue fi # : Try up to $LV_MAX_TRIES times to read partition information for $lvname # integer lvpars=0 integer goodpars=0 for (( retry_count=1 ; retry_count<=$LV_MAX_TRIES; retry_count++ )) do # : Read LV partition map # partition_map=$(lslv -L -m -n $goodpv $lvname) lslv_rc=$? if (( ${lslv_rc} != 0 )) then # : Verify if disk has gone bad # if saved_msg=$(cl_querypv -q /dev/$goodpv 2>&1) then # : lslv command failed. So we do not have good data. Try again if ${LV_MAX_TRIES} is not reached. # echo "$PROGNAME[$LINENO]: Failed to read partition map of LV $lvname from disk $goodpv using lslv in $retry_count attempt(s)." | tee -a $CLUTILS_LOG if (( $retry_count == ${LV_MAX_TRIES} )) then echo "$PROGNAME[$LINENO]: This was the last attempt to read read partition map of LV $lvname from disk $goodpv." | tee -a $CLUTILS_LOG break fi continue else # : Disk $goodpv is no longer accessable. Switch to next disk. # new_badpv=$goodpv old_goodpvlist=$goodpvlist goodpvlist="" for pv in ${old_goodpvlist} do if [ "$pv" != "${new_badpv}" ] then goodpvlist=${goodpvlist:+"${goodpvlist} "}${pv} else badpvlist=${badpvlist:+"${badpvlist} "}${pv} fi done if [[ -n $goodpvlist ]] then echo $goodpvlist | read goodpv rest # : Continue to read LV partition map using $goodpv # pattern=$(echo $goodpvlist | tr ' ' '|') retry_count=1 # : Reset retry_count to $retry_count # continue else # : The last good disk has gone bad. # echo "$PROGNAME[$LINENO]: Volume group $vgname has no readable disks anymore" | tee -a $CLUTILS_LOG mirrorset=1 break fi fi fi # : Retrieve the number of logical partitions in $lvname using the parition map # lvpars=$(echo "${partition_map}" | tail -n +3 | wc -l | sed "s/ //g") # : Grep for known good disks in the partition map for this logical : volume. Grep returns a count of the lines, one per logical : partition, that it finds a good disk on. # goodpars=$(echo "${partition_map}" | tail -n +3 | egrep -c -w $pattern) if (( $lvpars == $goodpars )) then # : Every logical partition in $lvname has a copy on a readable disk # if (( $retry_count > 1 )) then cl_msg -e 0 -m 10700 "%s[%d]: After %d retries, the defined and available partition count for logical volume %s matched at %d\n" \ $PROGNAME $LINENO $retry_count $lvname $goodpars 2>&1 | tee -a $CLUTILS_LOG fi break else # : On attempt $retry_count, the defined partition count $lvpars : does not match the available partition count $goodpars # if (( $retry_count == $MAX_TRIES )) then cl_msg -e e -m 10701 "%s[%d]: After %d retries, the defined partition count %d for logical volume %s does not match the available partition count %d\n" \ $PROGNAME $LINENO $MAX_TRIES $lvpars $lvname $goodpars 2>&1 | tee -a $CLUTILS_LOG fi fi done if (( ${lslv_rc} != 0 )) || [[ -z $partition_map ]] then # : All attempts to read partition map of LV $lvname failed. # mirrorset=1 # Return failure, because we can not determine if local node has a full copy. break # Quit because, we have one LV which might not have a full copy. fi if (( $lvpars != $goodpars )) ; then # : If the counts do not match, not every logical partition : has a good copy, and the mirrorset for this logical volume : is incomplete # cl_log 10625 "%s[%d]: Unable to vary on volume group \"%s\" because logical volume \"%s\" is incomplete\n" $PROGNAME $LINENO $vgname $lvname 2>&1 | \ tee -a $CLUTILS_LOG mirrorset=1 # : Show the logical partitions that are not mapped to the : surviving disks. # lslv -L -m -n $goodpv $lvname | tail -n +3 | egrep -v -w $pattern | \ while read LP pp1 pv1 pp2 pv2 pp3 pv3 rest do cl_msg -e 0 -m 10626 "%s[%d]: Logical partition %s in logical volume \"%s\" in volume group \"%s\" \ exists only on unreadable disks \"%s\"\n" $PROGNAME $LINENO $LP $lvname $vgname "$pv1 $pv2 $pv3" 2>&1 | tee -a $CLUTILS_LOG done break # If any logical partition is not mapped, quit else # : Every logical partition in $lvname has a copy on a readable disk # cl_msg -e 0 -m 10702 "%s[%d]: Every one of the %d logical partitions in %s has a copy on a readable disk\n" \ $PROGNAME $LINENO $lvpars $lvname 2>&1 | tee -a $CLUTILS_LOG fi done else # : No good PVs found # cl_log 10627 "%s[%d] volume group \"%s\" has no readable disks\n" $PROGNAME $LINENO $vgname 2>&1 | tee -a $CLUTILS_LOG mirrorset=1 fi # : Indicate whether there is a complete mirrorset, making a forced varyon : meaningful # return $mirrorset