# $Header: emdb/sysman/admin/scripts/alertlogAdr.pl /st_emdbsa_11.2/1 2010/04/05 01:10:20 mappusam Exp $
#
# alertlogAdr.pl
#
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 
#
#    NAME
#      alertlogAdr.pl - scans alert log file(s) in ADR for patterns
#
#    DESCRIPTION
#      This script greps through alert log(s) in ADR for incident, operational
#      error, or data failure.  Matching lines are returned in a collection
#      along with the most recent timestamp logged.
#
#    NOTES
#      The output format (column list) of this script is dynamic: the number of
#       columns == n+7, n==EM_METRIC_ERROR_PATTERNS.
#       1: Timestamp/Line Number
#       2: Error category
#       3. (n+2): Errors matching patterns
#          EM_METRIC_ERROR_PATTERN1..EM_METRIC_ERROR_PATTERNn
#       n+3: Trace file
#       n+4: Error code list
#       n+5: Alert log filename
#       n+6: Impact
#       n+7: Incident ID
#       n+8: Number of failures
#
#    MODIFIED   (MM/DD/YY)
#       mappusam 03/30/10 - Backport mappusam_bug-8783120 from
#                           st_emdbgc_10.2.0.1.0
#       keiwong  03/03/07 - Change state directory
#       loliu    02/20/07 - Removed AdrEnabled property
#       keiwong  02/19/07 - add adrAlertLogDataFailure metric
#       keiwong  07/24/06 - Creation
#

# use/require appropriate libraries
require "emd_common.pl";
require "alertlogAdr_util.pl";

use strict;
use Time::HiRes;

use vars qw($STACK_BEGIN_PATTERN $STACK_TIME_PATTERN);
use vars qw($STACK_TXT_BEGIN_PATTERN $STACK_TXT_END_PATTERN);
use vars qw($STACK_ATTR_BEGIN_PATTERN $STACK_ATTR_NAME_PATTERN $STACK_ATTR_VALUE_PATTERN);

# command line arguments
my $adrHome = shift(@ARGV); # ADR home
my $limitSwitch = shift(@ARGV);    # The switch that limits the amount of
                                   # output this scan can produce.
                                   # Convention: > 0 => time (sec.) to spend
                                   #             eq "EOF" => skip to the end
                                   #             otherwise ignore

my $targetGuid = $ENV{EM_TARGET_GUID};

# establish alert directory
my $alertDir = get_adr_alert_dir($adrHome);

if (!$alertDir || !-e $alertDir || !-d $alertDir)
{
    # quit if alert directory is not valid
    EMAGENT_PERL_ERROR("Invalid alert directory: $alertDir");
    exit 1;
}

my $state_root = $ENV{EM_AGENT_STATE_DIR};

# get alert log(s), sorted with current file first
my @alertLogs = get_adr_alert_logs($adrHome);

# current log file (absolute)
my $log_file_absolute = $alertLogs[$[];

# check the accessibility and existence of the current alert log
if (@alertLogs < 1 || $log_file_absolute =~ /.*\.\d+$/ || !open(LOG, "< $log_file_absolute"))
{
    # This can happen in two cases:
    # 1) Alert log exists (i.e., was identifiable) but is not readable.  This
    #    certainly warrants a warning message.
    # 2) Alert log does not exist, and has been removed since the log file was
    #    identified.  This is not technically a warning situation -- ideally,
    #    the alert log identification should be cleared, but at this point
    #    that's not feasible.
    if (EMAGENT_isPerlWarningEnabled())
    {
        EMAGENT_PERL_WARN("target: $targetGuid; unable to open alert log at $log_file_absolute");
    }

    exit 0;
}

close LOG;

# establish the error type patterns
my @errorTypePatterns = get_error_type_patterns();

# initialize all element to 0
my @errorTypeMatched = (0) x @errorTypePatterns;

# build the fully qualify scanned file path
my $separator = $^O =~ m/MSWin32/ ? "\\" : "\/";

# location of the offsets state file is
#  $EMDROOT/sysman/emd/state/adr/<TARGET_GUID>.alert.log.xml.state
my $scannedFile = $state_root.$separator."sysman".$separator."emd".$separator."state".$separator."adr".$separator."$targetGuid.alert.log.xml.state";

# capture start time, in seconds and track the number of stacks processed so
# the time check is performed relatively infrequently.
my $start_time = Time::HiRes::time;
my $stacks_processed = 0;

# Limit switch must be:
# - a positive integer (representing seconds)
# - the string "EOF" (indicating that the tail should be skipped)
# - the string "tail" (indicating the normal behavior, process the entire tail)
# When it's something other than these, the "tail" behavior is followed.
if ($limitSwitch =~ m/^\d+$/)
{
    if (EMAGENT_isPerlDebugEnabled())
    {
        EMAGENT_PERL_DEBUG("Stopping soon after $limitSwitch seconds.");
    }
}
elsif ($limitSwitch eq "EOF")
{
    if (EMAGENT_isPerlDebugEnabled())
    {
        EMAGENT_PERL_DEBUG("Skipping remaining entries in $log_file_absolute");
    }
}
else
{
    if ($limitSwitch ne "tail")
    {
        if (EMAGENT_isPerlWarningEnabled())
        {
            EMAGENT_PERL_WARN("Invalid limit switch specification ($limitSwitch) -- using 'tail' option");
        }
    }

    $limitSwitch = 0;
}

# open scanned file for reading (if scanned file does not exist, create it.)
if (!open(SCANNED, "+>> $scannedFile"))
{
    EMAGENT_PERL_ERROR("target: $targetGuid; Cannot open $scannedFile for read/write.");
    exit 1;
}

# seek to top of file
seek(SCANNED, 0, 0);

my $offset = 0;
my $line = 0;
my $firstStackTimestamp = "";

# file contains a single line with three numbers: offset, line number, and firstStackTimestamp
# get these and close the scanned file
while (<SCANNED>)
{
    my @pos = split(' ', $_);
    my $pos = \@pos;

    $offset = $pos->[0];
    $line = $pos->[1];
    $firstStackTimestamp = $pos->[2];
}

if (EMAGENT_isPerlDebugEnabled())
{
    EMAGENT_PERL_DEBUG("Read $scannedFile: offset=$offset line=$line firstStackTimestamp=$firstStackTimestamp");
}

close(SCANNED);

$firstStackTimestamp =~ s/ //g;

my @errStackKeys;          # "keys" for already seen error stacks
my $initialTimestamp = ""; # timestamp for the current error stack

if ($offset == 0 || !$firstStackTimestamp || $limitSwitch eq "EOF")
{
    if (EMAGENT_isPerlDebugEnabled())
    {
        EMAGENT_PERL_DEBUG("target $targetGuid; counting alert log lines only");
    }

    if (!open(LOG, "< $log_file_absolute"))
    {
	$offset = $line = 0;
	$firstStackTimestamp = "";

	if (EMAGENT_isPerlWarningEnabled())
	{
	    EMAGENT_PERL_WARN("target: $targetGuid; unable to open alert log at $log_file_absolute");
	}	
    }
    else
    {
	my $currentTimestamp = "";
	my $stackFound = 0;

	seek(LOG, 0, 0);

	# get first timestamp
	while (<LOG>)
	{
	    if (/$STACK_BEGIN_PATTERN/)
	    {
		if ($stackFound)
		{
		    # second stack found, end search
		    last;
		}

		$stackFound = 1;
	    }

	    if ($stackFound && /$STACK_TIME_PATTERN/)
	    {
		$currentTimestamp = $1;
		last;
	    }
	}

	# saved first timestamp does not match current first timestamp, reset
	if (!$firstStackTimestamp || $firstStackTimestamp ne $currentTimestamp)
	{
	    $offset = $line = 0;
	    $firstStackTimestamp = $currentTimestamp;
	}

	# without first timestamp, offset and line are invalid
	if ($firstStackTimestamp)
	{
	    seek(LOG, $offset, 0);

	    while (<LOG>)
	    {
		# always keep offset and line in sync
		(my $pos = tell(LOG)) || last;
		$offset = $pos;
		$line++;
	    }
	}
    }
}
else
{
    # get logs with first timestamp >= saved first timestamp
    @alertLogs = get_current_alert_logs($adrHome, $firstStackTimestamp);

    if (EMAGENT_isPerlDebugEnabled())
    {
        EMAGENT_PERL_DEBUG(scalar @alertLogs . " current alert logs found for timestamp $firstStackTimestamp");
    }

    # no current alert logs found, reset
    if (@alertLogs < 1)
    {
	$offset = $line = 0;
	$firstStackTimestamp = "";
    }
    else
    {
	for my $i ($[..@alertLogs + $[ - 1)
	{
	    $log_file_absolute = $alertLogs[$i];

	    if (!open(LOG, "< $log_file_absolute"))
	    {
		if (EMAGENT_isPerlWarningEnabled())
		{
		    EMAGENT_PERL_WARN("target: $targetGuid; unable to open alert log at $log_file_absolute");
		}

		last;
	    }

	    my $currentTimestamp = "";
	    my $stackFound = 0;

	    seek(LOG, 0, 0);

	    # get first timestamp
	    while (<LOG>)
	    {
		if (/$STACK_BEGIN_PATTERN/)
		{
		    if ($stackFound)
		    {
			# second stack found, end search
			last;
		    }

		    $stackFound = 1;
		}

		if ($stackFound && /$STACK_TIME_PATTERN/)
		{
		    $currentTimestamp = $1;
		    last;
		}
	    }

	    if (EMAGENT_isPerlDebugEnabled())
	    {
		EMAGENT_PERL_DEBUG("First stack timestamp for $log_file_absolute: $currentTimestamp");
	    }

	    if (!$currentTimestamp)
	    {
		# no first timestamp for some reason, end processing
		last;
	    }

	    # reset unless saved offset is valid (must be the first file)
	    unless ($i == $[ && $firstStackTimestamp eq $currentTimestamp)
	    {
		$offset = $line = 0;
		$firstStackTimestamp = $currentTimestamp;
	    }

	    if (EMAGENT_isPerlDebugEnabled())
	    {
		EMAGENT_PERL_DEBUG("target $targetGuid; processing alert log tail starting at byte offset $offset, line $line");
	    }

	    #
	    # Scan, and process, the alert logs as long as the byte offset is valid
	    # for the existing alert log.
	    #
	    my $lineNumber = 0;        # line number of begining of stack
	    my $errStack = "";         # content of the current error stack
	    my $errCodes = "";         # key for the current error stack
	    my $traceFileName = "";    # name of trace file
	    my $impact = "";           # impact of error
	    my $incidentId = "";       # incident ID

	    #
	    # Go directly to EOF (Do not pass GO. Do not collect $200.)
	    # Do not read past the end of the file (at the time we first looked).
	    #
	    seek(LOG, 0, 2);

	    my $eof_offset = tell(LOG);

	    if (EMAGENT_isPerlDebugEnabled())
	    {
		EMAGENT_PERL_DEBUG("EOF position of $log_file_absolute is $eof_offset.");
	    }

	    if ($offset <= $eof_offset)
	    {
		#
		# A valid offset was provided, seek to it and process the lines.
		#
		seek(LOG, $offset, 0);

		my $prev_line_offset = $offset;
		my $readingText = 0;
		my $readingAttr = 0;
		my $readingImpact = 0;

		while (<LOG>)
		{
		    my $pos;

                    if ($pos = tell(LOG))
                    {
			$offset = $pos;
		    }

		    $line++;

		    #
		    # Look for a new stack
		    #
		    if ($_ =~ /$STACK_BEGIN_PATTERN/)
		    {
			#
			# Check for a time or size limiting condition.
			# (Note: $prev_line_offset is a function of the current size of the
			#        file, $eof_offset is the original size of the file;
			#        we don't want to "chase" the end of the file when
			#        it's growing at a rate faster than our scan rate!)
			#
			if (&outOfTime || $prev_line_offset >= $eof_offset)
			{
			    #
			    # "Unscan" the last line by backing up to the
			    # previous offset and a single line.
			    #
			    $line--;
			    $offset = $prev_line_offset;

			    if (EMAGENT_isPerlDebugEnabled())
			    {
				EMAGENT_PERL_DEBUG("Stopped at " . Time::HiRes::time);
			    }

			    #
			    # Break from the input loop BEFORE writing the error stack
			    # (the last error stack is written after all lines to be
			    # read are).
			    #
			    last;
			}

			#
			# Dump the existing errStack, if there is one.
			#
			if ($initialTimestamp && $errStack)
			{
			    &writeErrStack($initialTimestamp, $lineNumber,
					   $errStack, $errCodes, $traceFileName,
					   $impact, $incidentId, get_number_of_failures($errStack));
			}

			#
			# Begin a new error stack
			#
			$lineNumber = $line;

			$errCodes = "";   # reset information regarding the stack
			$errStack = "";
			$initialTimestamp = "";
			$traceFileName = "";
			$impact = "";
			$incidentId = "";

			$stacks_processed++;

			$readingText = 0;
			$readingAttr = 0;
			$readingImpact = 0;
			@errorTypeMatched = (0) x @errorTypeMatched;
		    }

		    #
		    # Process error stack content.
		    #

		    if (!$initialTimestamp && $_ =~ /$STACK_TIME_PATTERN/)
		    {
            $initialTimestamp = $1;
		    }

		    if (!$traceFileName)
		    {
            $traceFileName = get_trace_file($_, $adrHome);
		    }

		    if (!$impact)
		    {
            while (1)
            {
                if ($readingImpact)
                {
                    if (/$STACK_ATTR_VALUE_PATTERN/)
                    {
                        $impact = $1;
                        $readingImpact = 0;
                    }
                }
                elsif ($readingAttr)
                {
                    if (/$STACK_ATTR_NAME_PATTERN/)
                    {
                        $readingAttr = 0;
            
                        if ($1 =~ /^Impact$/i)
                        {
                            $readingImpact = 1;
                            redo;
                        }
                    }
                }
                elsif (/$STACK_ATTR_BEGIN_PATTERN/)
                {
                    $readingAttr = 1;
                    redo;
                }
      
                last;
            }
		    }

		    if (!$incidentId)
		    {
            $incidentId = get_errid($_);
		    }

		    #
		    # Check error types
		    #
		    check_error_types($_);

		    $prev_line_offset = tell(LOG);

		    #
		    # Discontinue adding error stack content if we've hit the limit
		    # on string size supported by EM.
		    #
		    if (length($errStack) >= 4000)
		    {
            $readingText = 0;
            next;
		    }

		    my $problemKey = get_prob_key($_);
		    my $errCode = &errorCodeOf($problemKey);

		    if ($errCode)
		    {
            if ($errCodes)
            {
                $errCodes .= ", ";
            }
            
            $errCodes .= $errCode;
		    }

		    my $msgTxt;

		    # get error stack text
		    if (/$STACK_TXT_BEGIN_PATTERN(.*)/)
		    {
            $msgTxt = $1;
            
            if ($msgTxt =~ /$STACK_TXT_END_PATTERN/)
            {
                $msgTxt =~ s/$STACK_TXT_END_PATTERN.*//;
                $readingText = 0;
            }
            else
            {
                $readingText = 1;
            }
		    }
		    elsif ($readingText)
		    {
            $msgTxt = $_;
      
            if ($msgTxt =~ /$STACK_TXT_END_PATTERN/)
            {
                $msgTxt =~ s/$STACK_TXT_END_PATTERN.*//;
                $readingText = 0;
            }
		    }

		    if ($msgTxt)
		    {
            $msgTxt = uncdata($msgTxt);
        
            $msgTxt =~ s/^ *//;
            $msgTxt =~ s/ *$//;
            chomp $msgTxt;
        
            if ($msgTxt)
            {
                if (!$errStack)
                {
              $errStack .= $msgTxt;
                }
                else
                {
              $errStack .= "~" . $msgTxt;
                }
            }
		    }
		}

		#
		# Dump the final errStack.
		#
		if ($initialTimestamp && $errStack)
		{
		    &writeErrStack($initialTimestamp, $lineNumber,
				   $errStack, $errCodes, $traceFileName,
				   $impact, $incidentId, get_number_of_failures($errStack));
		}
	    }
	    else
	    {
		#
		# We know the offset is bogus, so alert log has been recycled;
		# offset will be computed later, but line count should be computed now.
		#
		seek(LOG, 0, 0);

		$line = 0;
		while (<LOG>)
		{
		    my $pos;

		    if ($pos = tell(LOG))
		    {
			$offset = $pos;
		    }

		    $line++;
		}
	    }

	    close LOG;
	}
    }
}

#
# Reopen scanned file and write out new offset (log size), line number,
# and time
#
open (SCANNED, "+> $scannedFile") or die "em_error=Cannot open $scannedFile";
print SCANNED "$offset $line $firstStackTimestamp\n";
close(SCANNED);

#
# subroutine get_error_type_patterns
#  Return an array of patterns that define error types.
#
sub get_error_type_patterns {
    my $numberOfTypes = $ENV{EM_ALERTLOG_ERROR_TYPES};
    my @typePatterns;

    if (EMAGENT_isPerlDebugEnabled())
    {
        EMAGENT_PERL_DEBUG("$numberOfTypes error types defined.");
    }

    for ($[+1..$numberOfTypes)
    {
        $typePatterns[$_-1] = [ split / *, */, $ENV{"EM_ALERTLOG_ERROR_TYPE$_"} ];

        if (!@{$typePatterns[$_-1]})
        {
            EMAGENT_PERL_WARN("Error type $_ contains no definition.");
        }
        else
        {
            if (EMAGENT_isPerlDebugEnabled())
            {
                EMAGENT_PERL_DEBUG("Error type pattern: " . join ',', @{$typePatterns[$_-1]});
            }
        }
    }

    return @typePatterns;
}

#
# subroutine check_error_types
#  Check if current error stack match the error types
#
# $_[0] - log record
#
sub check_error_types {
    my ($logRecord) = @_;

    for my $i ($[..@errorTypePatterns + $[ -1)
    {
	# skip the first column
	for my $n ($[+1..@{$errorTypePatterns[$i]} + $[ - 1)
	{
	    if ($logRecord =~ /$errorTypePatterns[$i][$n]/)
	    {
	        # bump up the matched count
	        $errorTypeMatched[$i]++;

                if (EMAGENT_isPerlDebugEnabled())
                {
                    EMAGENT_PERL_DEBUG("Matched pattern: $errorTypePatterns[$i][$n]");
                }
	    }
	}
    }
}

#
# subroutine get_matched_entry
#  Get the index of current matched entry
#
sub get_matched_entry {
    my $index = -1;
    my $i;

    for $i ($[..@errorTypePatterns + $[ -1)
    {
        # assuming each pattern can be matched only once
        if ($errorTypeMatched[$i] >= @{$errorTypePatterns[$i]} - 1)
	{
	    $index = $i;
	    last;
	}
    }

    if (EMAGENT_isPerlDebugEnabled())
    {
        EMAGENT_PERL_DEBUG("Matched entry: " . join ',', @{$errorTypePatterns[$i]});
    }

    return $index;
}

#
# subroutine outOfTime
#  Determine if we've bumped up against a time limit.
#
sub outOfTime {
  #
  # Check every 512 log entries to see if we've been time-limited
  #
  !($stacks_processed % 128) &&
  $limitSwitch > 0 &&
  Time::HiRes::time - $start_time >= $limitSwitch;
}

#
# subroutine errorCodeOf
#  Extract the error codes of the line.
#
# $_[0]      - line to search for error codes in
#
sub errorCodeOf {
    my $error = $_[0];

    # starts with alphanumeric characters and a space followed by digits
    if ($error =~ m/^\w+ \d+/)
    {
        # change first space to '-' and get rid of other space characters
        $error =~ s/ /-/;
        $error =~ s/ //g;
    }

    $error;
}

#
# subroutine writeErrStack
#  Write out the current error stack.  Do nothing if we've seen this before.
#
# $_[0] - initialTimestamp
# $_[1] - lineNumber
# $_[2] - errStack
# $_[3] - errCodes
# $_[4] - traceFileName
# $_[5] - impact
# $_[6] - incidentId
# $_[7] - numberOfFailures
#

sub writeErrStack {
    my $matchedIndex = get_matched_entry();

    if ($matchedIndex < 0)
    {
        return;
    }

    #
    # combine all similar error stacks
    #
    # to revert to only rolling up identical 600 and 7445 errors,
    #  and (&&) the following condition to the first argument of grep:
    #
    #    m/(ORA-600\[[^\]]*\])|(ORA-7445\[[^\]]*\])/
    #
    my @matchedStackKeys = grep($_ eq $_[3], @errStackKeys);

    if ($#matchedStackKeys == ($[ - 1))
    {
        push(@errStackKeys, $_[3]);

	my $errorCategory = $errorTypePatterns[$matchedIndex][0];
	my $formattedTime = to_ctime($_[0]);

        #
        # Compose the output line
        #

	# timestamp/lineNumber|errorCategory|
        my $outputLine = "em_result=$formattedTime/$_[1]|$errorCategory|";

        my $errorColumns = "";
        my $column = 0;

	while ($column < @errorTypePatterns)
        {
            if ($column == $matchedIndex)
            {
		$errorColumns .= $_[2];
            }

            $errorColumns .= "|";
            $column++;
        }

	# other columns
        $outputLine .= $errorColumns .
	    "$_[4]|$_[3]|$log_file_absolute|$_[5]|$_[6]|$_[7]\n";

	print $outputLine;
	EMD_PERL_DEBUG($outputLine);
    }
}

exit 0;
