<!-- $Header: emdb/sysman/admin/default_collection/osm_instance.xml /st_emdbsa_11.2/9 2009/03/13 10:02:56 mpawelko Exp $
   MODIFIED    (MM/DD/YY)
      mpawelko  03/09/09 - 8316983: change ofs_performance_metrics to 1 hr
      mpawelko  02/20/09 - 6996096: split performance metrics into 2
                           collections
      fagonzal  02/18/09 - Adding checker failures alert
      ajdsouza  02/17/09 - added resource_name and ishasmanaged metric collections
      mnihalan  09/08/08 - Change 11gR1TB to 11gR1
      mnihalan  08/29/08 - Fix bug 7342407
      yozhang   08/11/08 - Name change OFS to ACFS
      asubba    07/29/08 - diskgroup size alert enhancemnt
      mnihalan  04/17/08 - Fix alert message for OFS
      dchakumk  03/10/08 - XbranchMerge dchakumk_6752_xbmsrc from
                           st_emdbsa_11.1
      mnihalan  02/11/08 - Add collection for Volume Performance metrics
      mnihalan  01/25/08 - Change OFS alerts
      mnihalan  01/23/08 - Fix OFS bug 6754205
      fagonzal  12/06/07 - Adding/updating alert log metrics
      asubba    09/25/07 - XbranchMerge asubba_ui_improvements from main
      mpawelko  08/30/07 - XbranchMerge mpawelko_bug-6266985 from main
      jsoule    08/07/07 - collect ADR alert metrics
      qsong     07/02/07 - bug 6165631 - combine all the diskgroup metrics (which has 15 minutes as default time interval) into one collection item
      mpawelko  04/18/07 - bug 6003984: fix failure group unavailable; add
                           review changes
      mpawelko  03/14/07 - add new imbalance and failure group metrics
      mpawelko  03/14/07 - add new imbalance and failure group metrics
      mpawelko  03/14/07 - add new imbalance and failure group metrics
      qsong     03/06/07 - remove dg rebalance metric
      qsong     10/23/06 - bug 5619037, combine all the disk/diskgroup related metrics to one collection item
      qsong     11/03/06 - 
      chanchan  10/13/06 - 
      qsong     03/23/06 - 
      qsong     10/23/06 - bug 5619037, combine all the disk/diskgroup related metrics to one collection item
      mnihalan  08/02/06 - Fix bug 5406081
      qsong     07/11/06 - 
      ajdsouza  07/27/05 - collect disk_path once a day
      rajeshar  05/11/05 - bug 4350509
      rajeshar  04/07/05 - bug 4094328
      rajeshar  03/14/05 - bug 4191804
      jochen    12/21/04 - Fix NLS resources 
      rajeshar  12/08/04 - Changing the collection time for 10gR1 and 10gR2 bug 3590905
      jochen    11/04/04 - Alert on percent used for 10.2 
      jochen    07/29/04 - Add failure reason to response alert 
      ychan     07/12/04 - Remove 10gBeta 
      jochen    06/25/04 - Use asm_disk_stat and asm_diskgroup_stat
      jochen    06/24/04 - Add mode_status=offline alert
      jochen    06/21/04 - Add media failures
      rreilly   06/07/04 - add asm ( osm ) policies
      sbadrina  05/10/04 -
      jochen    03/31/04 - Fix read_write_err alert
      lhan               - Multiple edits
      spanchum  10/14/02 - Initial file creation for 10gR1
-->

<TargetCollection TYPE="osm_instance" LEVEL="Oracle Recommended">

  <CollectionLevel NAME="Minimum"/>
  <CollectionLevel NAME="Oracle Recommended"/>

  <CollectionItem NAME="Response">
    <Schedule>
      <IntervalSchedule INTERVAL="5" TIME_UNIT="Min"/>
    </Schedule>

    <Condition COLUMN_NAME="Status"
               CRITICAL="0" OPERATOR="EQ"
               MESSAGE="Failed to connect to ASM instance. %oraerr%"
               MESSAGE_NLSID="Response_Status_alertmessage" />
  </CollectionItem>


  <!--
  ==================================================================
  == DiskGroup_Usage - 10.2+
  ==================================================================
    -->
   <!-- We are alerting on both percent_used and safe_percent_used
        because in the case that this 10.2 agent is talking to a
        10.1 OMS, the Disk Groups Space page (link from host home page)
        displays status based on the percent_used threshold values.
        If these threshold values don't exist, the status comes up
        empty.  If we decide not to support the 10.1 OMS, remove the
        percent_used, since it has the side effect of causing 2 alerts for
        basically the same thing, and safe_percent_used should
        supersede percent_used - Bug #3972557 
        
        Due to bug 4350509 merging borh collection into one and it will work for
        10gR1 and 10gR2 -->

  <CollectionItem NAME="diskgroup_space_usage">
    <Schedule>
      <IntervalSchedule INTERVAL="15" TIME_UNIT="Min"/>
    </Schedule>
    
    <MetricColl NAME="DiskGroup_Usage">
      <Condition COLUMN_NAME="percent_used"
	         CRITICAL="90" WARNING="75" OPERATOR="GT"
	         MESSAGE="Disk Group %dg_name% is %value%%% used."
	         MESSAGE_NLSID="DiskGroup_Usage_alertmessage"/>
      <Condition COLUMN_NAME="safe_percent_used"
	        CRITICAL="90" WARNING="75" OPERATOR="GT"
	        MESSAGE="Disk group %dg_name% has used %value%%% of safely usable free space (space that can be allocated while still having enough space to recover from failure group failures)."
	        MESSAGE_NLSID="DiskGroup_102_usage_alertmessage"/>
    <Condition COLUMN_NAME="free_mb"
              CRITICAL="NotDefined" WARNING="NotDefined" OPERATOR="LE"
              MESSAGE="Total free size for Disk Group %dg_name% has fallen to %value% (MB)."
              MESSAGE_NLSID="Diskgroup_Free_Size_alertmessage"/>
     <Condition COLUMN_NAME="usable_file_mb"
              CRITICAL="NotDefined" WARNING="NotDefined" OPERATOR="LE"
              MESSAGE="Usable free size for Disk Group %dg_name% has fallen to %value% (MB)."
              MESSAGE_NLSID="Diskgroup_Usable_Free_Size_alertmessage"/>
    </MetricColl>

    <MetricColl NAME="Database_DiskGroup_Usage"/>

    <MetricColl NAME="Volumes_Summary"/>

    <MetricColl NAME="Single_Instance_OFS_Summary">
      <Condition COLUMN_NAME="ofs_corrupted_state"
        CRITICAL="TRUE" OPERATOR="EQ"
           MESSAGE="The ASM Cluster File System using volume device %ofs_volume_device% has sections that are corrupt. Run check and repair operation on the file system to fix the issue."
           MESSAGE_NLSID="OFS_Response_Corrupted_alertmessage"
           CLEAR_MESSAGE="CLEARED - The ASM Cluster File System using volume device %ofs_volume_device% has sections that are corrupt."
           CLEAR_MESSAGE_NLSID="OFS_Response_Corrupted_alertmessage_clear"/>  

      <Condition COLUMN_NAME="ofs_used_pct"
         CRITICAL="97" WARNING="85" OPERATOR="GT"
           MESSAGE="The ASM Cluster File System using volume device %ofs_volume_device% is %ofs_used_pct%%% full. Resize the file system to add more space."
           MESSAGE_NLSID="OFS_Space_Usage_alertmessage"
           CLEAR_MESSAGE="CLEARED - The ASM Cluster File System using volume device %ofs_volume_device% is %ofs_used_pct%%% full."
           CLEAR_MESSAGE_NLSID="OFS_Space_Usage_alertmessage_clear"/>
    </MetricColl>

  </CollectionItem>

  <CollectionItem NAME="Disk_Path">
    <ValidIf>
       <CategoryProp NAME="VersionCategory" CHOICES="10gR2;11gR1;11gR2"/>
    </ValidIf>

    <Schedule>
      <IntervalSchedule INTERVAL="24" TIME_UNIT="Hr"/>
    </Schedule>
  </CollectionItem>

  <!--
  ======================================================================
  == Note: Collection time for single-instance Instance_Disk_Performance,
  == Instance_DiskGroup_Performance, and Single_Instance_DiskGroup_Performance
  == metrics.
  ======================================================================
  -->

  <CollectionItem NAME="performance_metrics" COLLECT_WHEN_ALTSKIP="TRUE">
    <Schedule>
      <IntervalSchedule INTERVAL="15" TIME_UNIT="Min"/>
    </Schedule>

    <MetricColl NAME="Instance_Disk_Performance">
        <Condition COLUMN_NAME="read_write_errs"
	       CRITICAL="0" OPERATOR="GT"
	       MESSAGE="Disk %dg_name%.%disk_name% has %value% Read/Write errors."
         MESSAGE_NLSID="Disk_ReadWrite_alertmessage"/>
    </MetricColl>
    <MetricColl NAME="Instance_DiskGroup_Performance"/>
    <MetricColl NAME="Single_Instance_DiskGroup_Performance"/>
  </CollectionItem>

<!--
  ======================================================================
  == Note: Collection time for cluster-wide Disk_Performance and
  == DiskGroup_Performance metrics. Collection time is 60 minutes to
  == reduce load on cluster since each agent for each instance collects
  == this data.  (bug 6996096)
  ======================================================================
  -->

  <CollectionItem NAME="cluster_performance_metrics" COLLECT_WHEN_ALTSKIP="TRUE">
    <Schedule>
      <IntervalSchedule INTERVAL="1" TIME_UNIT="Hr"/>
    </Schedule>

    <MetricColl NAME="Disk_Performance"/>
    <MetricColl NAME="DiskGroup_Performance"/>
  </CollectionItem>

  <CollectionItem NAME="ofs_performance_metrics" COLLECT_WHEN_ALTSKIP="TRUE">
    <Schedule>
      <IntervalSchedule INTERVAL="1" TIME_UNIT="Hr"/>
    </Schedule>

    <MetricColl NAME="Instance_Volume_Performance"/>
    <MetricColl NAME="Volume_Performance"/>
  </CollectionItem>
  
  <!--
  ======================================================================
  == Group: alert_log_rollup
  ======================================================================
    -->
  <CollectionItem NAME="alert_log_rollup" COLLECT_WHEN_ALTSKIP="TRUE">
    <ValidIf>
      <CategoryProp NAME="VersionCategory" CHOICES="10gR1;10gR2"/>
    </ValidIf>
    <Schedule>
      <IntervalSchedule INTERVAL="5" TIME_UNIT="Min"/>
    </Schedule>

    <!--
    ====================================================================
    == Category: Alert Log
    == Fetchlet: Perl
    == (NB: An alert log error stack never contributes to more than one
    ==      event.)
    == User Properties: ignorePattern - Perl regular expression used to
    ==                                  suppress specific ORA-errors
    ====================================================================
      -->
    <MetricColl NAME="alertLog">
      <!--
      ==================================================================
      == User Property: pattern to ignore
      ==================================================================
        -->
      <ItemProperty NAME="ignorePattern">.*ORA-0*(54|1142|1146)\D.*</ItemProperty>

      <!--
      ==================================================================
      == Alert: Alert Log Error Stack
      ==================================================================
        -->
      <Condition COLUMN_NAME="genericErrStack"
                 WARNING="ORA-0*(600?|7445|4[0-9][0-9][0-9])[^0-9]" OPERATOR="MATCH"
                 MESSAGE="ORA-error stack (%errCodes%) logged in %alertLogName%."
                 MESSAGE_NLSID="alertLog_genericErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE" />

      <!--
      ==================================================================
      == Alert: Archiver Hung Error Stack
      ==================================================================
        -->
      <Condition COLUMN_NAME="archiveHungErrStack"
                 CRITICAL="ORA-" OPERATOR="CONTAINS"
                 MESSAGE="The archiver hung at time/line number: %timeLine%."
                 MESSAGE_NLSID="alertLog_archiveHungErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE" />

      <!--
      ==================================================================
      == Alert: Data Block Corruption Error Stack
      ==================================================================
        -->
      <Condition COLUMN_NAME="blockCorruptErrStack"
                 CRITICAL="ORA-" OPERATOR="CONTAINS"
                 MESSAGE="A data block was corrupted at time/line number: %timeLine%."
                 MESSAGE_NLSID="alertLog_blockCorruptErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE" />

      <!--
      ==================================================================
      == Alert: Session Terminated Error Stack
      ==================================================================
        -->
      <Condition COLUMN_NAME="sessTerminateErrStack"
                 WARNING="ORA-" OPERATOR="CONTAINS"
                 MESSAGE="A session was terminated at time/line number: %timeLine%."
                 MESSAGE_NLSID="alertLog_sessTerminateErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE" />
      <!--
      ==================================================================
      == Alert: Media Failure Error Stack
      ==================================================================
        -->
      <Condition COLUMN_NAME="mediaFailureErrStack"
                 CRITICAL="ORA-" OPERATOR="CONTAINS"
                 MESSAGE="Media failure was detected at time/line number: %timeLine%."
                 MESSAGE_NLSID="alertLog_mediaFailureErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE" />
    </MetricColl>

    <!--
    ====================================================================
    == Category: Alert Log Error Status
    == Fetchlet: Aggregation
    ====================================================================
      -->
    <MetricColl NAME="alertLogStatus">
      <!--
      ==================================================================
      == User Property: pattern to ignore
      ==================================================================
        -->
      <ItemProperty NAME="ignorePattern">.*ORA-0*(54|1142|1146)\D.*</ItemProperty>

      <!--
      ==================================================================
      == Alert: Generic Alert Log Errors
      ==================================================================
        -->
      <Condition COLUMN_NAME="genericErrors"
                 WARNING="0" OPERATOR="GT"
                 MESSAGE="%value% distinct types of ORA- errors have been found in the alert log."
                 MESSAGE_NLSID="alertLogStatus_genericErrors_alertmessage" />

      <!--
      ==================================================================
      == Alert: Archiver Hung Errors
      ==================================================================
        -->
      <Condition COLUMN_NAME="archiveHungErrors"
                 WARNING="0" OPERATOR="GT"
                 MESSAGE="Archiver hung errors have been found in the alert log."
                 MESSAGE_NLSID="alertLogStatus_archiveHungErrors_alertmessage" />

      <!--
      ==================================================================
      == Alert: Data Block Corruption Errors
      ==================================================================
        -->
      <Condition COLUMN_NAME="blockCorruptErrors"
                 WARNING="0" OPERATOR="GT"
                 MESSAGE="Data block corruption errors have been found in the alert log."
                 MESSAGE_NLSID="alertLogStatus_blockCorruptErrors_alertmessage" />

      <!--
      ====================================================================
      == Alert: Session Terminated Error Stack
      ====================================================================
        -->
      <Condition COLUMN_NAME="sessTerminateErrors"
                 WARNING="0" OPERATOR="GT"
                 MESSAGE="Session terminations have been found in the alert log."
                 MESSAGE_NLSID="alertLogStatus_sessTerminateErrors_alertmessage" />

      <!--
      ==================================================================
      == Alert: Media Failure Errors
      ==================================================================
        -->
      <Condition COLUMN_NAME="mediaFailureErrors"
                 WARNING="0" OPERATOR="GT"
                 MESSAGE="Media failure errors have been found in the alert log."
                 MESSAGE_NLSID="alertLogStatus_mediaFailureErrors_alertmessage" />

    </MetricColl>
  </CollectionItem>


  <!--
   ====================================================================
   == Collection Item: ASM Policies
   == Collection Interval: Once every 7 Days
   ====================================================================
   -->

  <CollectionItem NAME="oracle_osm" UPLOAD_ON_FETCH="TRUE" CONFIG="TRUE">

     <Schedule>
      <IntervalSchedule INTERVAL = "7" TIME_UNIT = "Day"/>
     </Schedule>

     <MetricColl NAME="osm_diskGroupPolicies"/>

  </CollectionItem>

  <!--
  ======================================================================
  == Fetchlet: SQL
  == due to bug 4350509 we have to keep the conservation interval of 15 minutes
  == for Grid Control it will be 15 minutes as it can run agaisnt 10gR1 target
  == where performance of query is bad due to bug 4365041
  == for 10.2SA it will be 3 minutes as it run against 10.2ASM where no performance issue
  ======================================================================
      -->
  <CollectionItem NAME="disk_status">
    
    <Schedule>
      <IntervalSchedule INTERVAL="15" TIME_UNIT="Min" />
    </Schedule>

    <!--
    ====================================================================
    == Metric disk_status
    == Alert: disk mode_status OFFLINE for each offline disk
    ====================================================================
    -->
    <MetricColl NAME="disk_status"
                TRANSIENT="TRUE">
      <Condition COLUMN_NAME="mode_status"
                 CRITICAL="OFFLINE" OPERATOR="EQ"
                 MESSAGE="Disk %dg_name%.%disk_name% is offline."
                 MESSAGE_NLSID="modeStatusErr_alertmessage" />
    </MetricColl>

    <!--
    ====================================================================
    == Metric disk_status_rollup
    == Alert: Count of all disks offline
    ====================================================================
    -->
    <MetricColl NAME="disk_status_rollup">

      <!-- ==========
           To avoid flooding the repository with data, we only send the
           offline disk count if it is different than it was the last
           sample.
           ========== -->
      <Filter COLUMN_NAME="delta_offline"
              OPERATOR="NE"
              AFTER_SEVERITY_CHECKING="TRUE">
      </Filter>

      <Condition COLUMN_NAME="offline_count"
                 CRITICAL="0" OPERATOR="GT"
                 UPLOAD_IF_SEVERITY="CHANGE_ONLY"
                 MESSAGE="%offline_count% disks are offline."
                 MESSAGE_NLSID="modeStatusCount_alertmessage" />
    </MetricColl>

  </CollectionItem>
  <!--
  ==================================================================
  == OFS related Metrics
  ==================================================================
    -->
  <CollectionItem NAME="ofs_collections">
    <Schedule>
      <IntervalSchedule INTERVAL="5" TIME_UNIT="Min" />
    </Schedule>

    <MetricColl NAME="OFS_State">
      <Condition COLUMN_NAME="ofs_availability"
           CRITICAL="NOT AVAILABLE" OPERATOR="EQ"
           MESSAGE="The ASM Cluster File System %ofs_mount_point% is not available on host %ofs_host%."
           MESSAGE_NLSID="OFS_Response_Status_alertmessage"
           CLEAR_MESSAGE="CLEARED - The ASM Cluster File System %ofs_mount_point% is not available on host %ofs_host%."
           CLEAR_MESSAGE_NLSID="OFS_Response_Status_alertmessage_clear"/> 
      
      <Condition COLUMN_NAME="ofs_mount_state"
                 WARNING="DISMOUNTED" OPERATOR="EQ"
                 MESSAGE="The volume device %volume_device% is dismounted on host %ofs_host%"
                 MESSAGE_NLSID="OFS_Mount_State_alertmessage"
                 CLEAR_MESSAGE="CLEARED - The volume device %volume_device% is dismounted on host %ofs_host%"
                 CLEAR_MESSAGE_NLSID="OFS_Mount_State_alertmessage_clear"/>  
    </MetricColl>
  </CollectionItem>

   <!-- 
  ==================================================================
  == diskgroup_failgroup_checks (3 metrics)
  ==
  == 1. diskgroup_fg_imbalance (ASM failure group imbalance)
  == Purpose: For NORMAL or HIGH redundancy, alert on partner count
  == (different number of disks) and/or partner space (disks of
  == different sizes) imbalance
  ==
  == 2.diskgroup_imbalance (ASM disk group imbalance)
  == Purpose: Alert when disk group requires rebalance, disk group
  == requires configuration change (e.g. increase storage) and 
  == when a rebalance in progress requires a power boost.
  ==
  == 3. unavailable_failure_group (ASM unavailable failure group)
  == Purpose: Alert on failure groups with all member disks offline
  ==================================================================
  -->
  <CollectionItem NAME="diskgroup_failgroup_checks">
    <Schedule>
      <IntervalSchedule INTERVAL="15" TIME_UNIT="Min" />
    </Schedule>
    <MetricColl NAME="diskgroup_fg_imbalance">
      <Condition COLUMN_NAME="pImbalance"
  	         WARNING="2" OPERATOR="GE"
                 CLEAR_MESSAGE="CLEARED - Disk Group %diskGroup% has failure groups with different numbers of disks which may lead to suboptimal space usage. Changing the configuration may alleviate this problem."
                 CLEAR_MESSAGE_NLSID="diskGroupFGPImbalanceRebalance_alertmessage_clear"
                 MESSAGE="Disk Group %diskGroup% has failure groups with different numbers of disks which may lead to suboptimal space usage. Changing the configuration may alleviate this problem."
                 MESSAGE_NLSID="diskGroupFGPImbalance_alertmessage" />
      <Condition COLUMN_NAME="sImbalance"
  	         WARNING="10" OPERATOR="GT"
                 CLEAR_MESSAGE="CLEARED - Disk Group %diskGroup% has failure groups with disks of different sizes which may lead to suboptimal space usage. Changing the configuration may alleviate this problem."
                 CLEAR_MESSAGE_NLSID="diskGroupFGSImbalance_alertmessage_clear"
                 MESSAGE="Disk Group %diskGroup% has failure groups with disks of different sizes which may lead to suboptimal space usage. Changing the configuration may alleviate this problem."
                 MESSAGE_NLSID="diskGroupFGSImbalance_alertmessage" />
    </MetricColl> 
    <MetricColl NAME="diskgroup_imbalance">
      <Condition COLUMN_NAME="computedImbalance"
  	         WARNING="10" OPERATOR="GT"
                 CLEAR_MESSAGE="CLEARED - Disk Group %diskGroup% requires rebalance because the space usage imbalance between disks is high."
                 CLEAR_MESSAGE_NLSID="diskGroupRebalance_alertmessage_clear"
                 MESSAGE="Disk Group %diskGroup% requires rebalance because the space usage imbalance between disks is high."
                 MESSAGE_NLSID="diskGroupRebalance_alertmessage" />
      <Condition COLUMN_NAME="computedMinFree"
  	         WARNING="20" CRITICAL="10" OPERATOR="LT"
                 CLEAR_MESSAGE="CLEARED - Disk Group %diskGroup% requires rebalance because at least one disk is low on space."
                 CLEAR_MESSAGE_NLSID="diskGroupRebalance2_alertmessage_clear"
                 MESSAGE="Disk Group %diskGroup% requires rebalance because at least one disk is low on space."
                 MESSAGE_NLSID="diskGroupRebalance2_alertmessage" />
      <Condition COLUMN_NAME="computedMaxUsed"
  	         CRITICAL="95" OPERATOR="GT"
                 CLEAR_MESSAGE="CLEARED - Increase the rebalance power for Disk Group %diskGroup% because at least one disk is critically low on space."
                 CLEAR_MESSAGE_NLSID="diskGroupRebalanceInProg_alertmessage_clear"
                 MESSAGE="Increase the rebalance power for Disk Group %diskGroup% because at least one disk is critically low on space."
                 MESSAGE_NLSID="diskGroupRebalanceInProg_alertmessage" />
    </MetricColl> 
    <MetricColl NAME="unavailable_failure_group">
      <Condition COLUMN_NAME="fgAlertDiskCount"
                 CRITICAL="0" OPERATOR="EQ"
                 CLEAR_MESSAGE="CLEARED - Failure Group %diskGroup%.%failureGroup% is offline."
                 CLEAR_MESSAGE_NLSID="failureGroupUnavailable_alertmessage_clear"
                 MESSAGE="Failure Group %diskGroup%.%failureGroup% is offline."
                 MESSAGE_NLSID="failureGroupUnavailable_alertmessage" />
    </MetricColl>
  </CollectionItem>

  <!--
  ======================================================================
  == Group: Incident Meter
  ======================================================================
  -->
  <CollectionItem NAME="incident_meter">
    <Schedule>
      <IntervalSchedule INTERVAL="5" TIME_UNIT="Min"/>
    </Schedule>

    <!--
    ======================================================================
    == Category: Collect Incident Meter
    == Fetchlet: SQL
    ======================================================================
    -->
    <MetricColl NAME="incident_meter"/>
  </CollectionItem>

  <!--
  ======================================================================
  == Group: adr_alert_log_rollup
  ======================================================================
  -->
  <CollectionItem NAME="adr_alert_log_rollup" COLLECT_WHEN_ALTSKIP="TRUE"
      COLLECT_WHEN_DOWN="TRUE">
    <ValidIf>
      <CategoryProp NAME="VersionCategory" CHOICES="11gR1;11gR2"/>
    </ValidIf>
    <Schedule>
      <IntervalSchedule INTERVAL="5" TIME_UNIT="Min"/>
    </Schedule>

    <MetricColl NAME="adrAlertLogError">
    </MetricColl>

    <!--
    ====================================================================
    == Category: ADR Alert Log Incident Error
    == Fetchlet: Perl
    == (NB: An alert log error stack never contributes to more than one
    ==      event.)
    ====================================================================
    -->
    <MetricColl NAME="adrAlertLogIncidentError">
      <!--
      ==================================================================
      == Alert: Generic Incident Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="genericIncidentErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="Incident (%errCodes%) detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_genericIncidentErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>

      <!--
      ==================================================================
      == Alert: Generic Internal Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="genericInternalErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="Internal error (%errCodes%) detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_genericInternalErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>

      <!--
      ==================================================================
      == Alert: Session Terminated Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="sessTerminateErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="A session termination detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_sessTerminateErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>
                 
      <!--
      ==================================================================
      == Alert: Internal SQL Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="internalSqlErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="An internal SQL error detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_internalSqlErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>

      <!--
      ==================================================================
      == Alert: Access Violation Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="accessViolationErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="An access violation detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_accessViolationErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>

      <!--
      ==================================================================
      == Alert: File Access Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="fileAccessErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="A file access error detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_fileAccessErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>

      <!--
      ==================================================================
      == Alert: Out of Memory Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="outOfMemoryErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="Out of memory detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_outOfMemoryErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>
                 
      <!--
      ==================================================================
      == Alert: Cluster Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="clusterErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="A cluster error detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_clusterErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>
                 
      <!--
      ==================================================================
      == Alert: Deadlock Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="deadlockErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="A deadlock error detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_deadlockErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>
                 
      <!--
      ==================================================================
      == Alert: ASM Block Corruption Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="asmBlockCorruptionErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="An ASM data block was corrupted at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_asmBlockCorruptionErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>
                 
      <!--
      ==================================================================
      == Alert: Redo Log Corruption Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="redoLogCorruptionErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="A data block was corrupted at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_redoLogCorruptionErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>                                  

    </MetricColl>

    <!--
    ====================================================================
    == Category: ADR Alert Log Operational Error
    == Fetchlet: Perl
    == (NB: An alert log error stack never contributes to more than one
    ==      event.)
    ====================================================================
    -->
    <MetricColl NAME="adrAlertLogOperationalError">
     <!--
      ==================================================================
      == Alert: Generic Operational Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="genericOperationalErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="Operational error (%errCodes%) detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_genericOperationalErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>
                 
      <!--
      ==================================================================
      == Alert: Data Block Corruption Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="dataBlockCorruptionErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="A data block was corrupted at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_dataBlockCorruptionErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>                

      <!--
      ==================================================================
      == Alert: Media Failure Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="mediaFailureErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="Media failure detected in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_mediaFailureErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>

    </MetricColl>

   <!--
    ====================================================================
    == Category: ADR Alert Log Checker Failure
    == Fetchlet: Perl
    == (NB: An alert log error stack never contributes to more than one
    ==      event.)
    ====================================================================
    -->
    <MetricColl NAME="adrAlertLogCheckerFailure">
      <!--
      ==================================================================
      == Alert: Data Failure Error Stack
      ==================================================================
      -->
      <Condition COLUMN_NAME="genericCheckerFailureErrStack"
                 CRITICAL=".*" OPERATOR="MATCH"
                 MESSAGE="Health checker runs found %numberOfFailures% new failures in %alertLogName% at time/line number: %timeLine%."
                 MESSAGE_NLSID="adrAlertLog_genericCheckerFailureErrStack_alertmessage"
                 NO_CLEAR_ON_NULL="TRUE"/>
    </MetricColl>

    <!--
    ====================================================================
    == Category: ADR Alert Log Incident Error Status
    == Fetchlet: Aggregation
    ====================================================================
    -->
    <MetricColl NAME="adrAlertLogIncidentErrorStatus">
      <!--
      ==================================================================
      == Alert: Generic Incident Errors
      ==================================================================
      -->
      <Condition COLUMN_NAME="genericIncidentErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="%value% distinct types of incidents have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_genericIncidentErrors_alertmessage"/>

      <!--
      ==================================================================
      == Alert: Generic Internal Errors
      ==================================================================
      -->
      <Condition COLUMN_NAME="genericInternalErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="Generic internal errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_genericInternalErrors_alertmessage"/>

      <!--
      ====================================================================
      == Alert: Session Terminated Errors
      ====================================================================
      -->
      <Condition COLUMN_NAME="sessTerminateErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="Session terminations have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_sessTerminateErrors_alertmessage"/>

      <!--
      ====================================================================
      == Alert: Internal SQL Errors
      ====================================================================
      -->
      <Condition COLUMN_NAME="internalSqlErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="Internal SQL errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_internalErrors_alertmessage"/>

      <!--
      ====================================================================
      == Alert: Access Violation Errors
      ====================================================================
      -->
      <Condition COLUMN_NAME="accessViolationErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="Access violation errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_accessViolationErrors_alertmessage"/>

      <!--
      ====================================================================
      == Alert: File Access Errors
      ====================================================================
      -->
      <Condition COLUMN_NAME="fileAccessErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="File access errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_fileAccessErrors_alertmessage"/>

      <!--
      ====================================================================
      == Alert: Out of Memory Errors
      ====================================================================
      -->
      <Condition COLUMN_NAME="outOfMemoryErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="Out of memory errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_outOfMemoryErrors_alertmessage"/>
                 
      <!--
      ====================================================================
      == Alert: Cluster Errors
      ====================================================================
      -->
      <Condition COLUMN_NAME="clusterErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="Cluster errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_clusterErrors_alertmessage"/>
                 
      <!--
      ====================================================================
      == Alert: Deadlock Errors
      ====================================================================
      -->
      <Condition COLUMN_NAME="deadlockErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="Deadlock errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_deadlockErrors_alertmessage"/>
                 
      <!--
      ==================================================================
      == Alert: ASM Data Block Corruption Errors
      ==================================================================
      -->
      <Condition COLUMN_NAME="asmBlockCorruptionErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="ASM data block corruption errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_asmBlockCorruptionErrors_alertmessage"/>
                 
      <!--
      ==================================================================
      == Alert: Redo Log Corruption Errors
      ==================================================================
      -->
      <Condition COLUMN_NAME="redoLogCorruptionErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="Redo log corruption errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_redoLogCorruptionErrors_alertmessage"/>
                 
    </MetricColl>

    <!--
    ====================================================================
    == Category: ADR Alert Log Operational Error Status
    == Fetchlet: Aggregation
    ====================================================================
    -->
    <MetricColl NAME="adrAlertLogOperationalErrorStatus">
      <!--
      ==================================================================
      == Alert: Generic Operational Errors
      ==================================================================
      -->
      <Condition COLUMN_NAME="genericOperationalErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="%value% distinct types of operational errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_genericOperationalErrors_alertmessage"/>
                 
      <!--
      ==================================================================
      == Alert: Data Block Corruption Errors
      ==================================================================
      -->
      <Condition COLUMN_NAME="dataBlockCorruptionErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="Data block corruption errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_dataBlockCorruptionErrors_alertmessage" />                 

      <!--
      ==================================================================
      == Alert: Media Failure Errors
      ==================================================================
      -->
      <Condition COLUMN_NAME="mediaFailureErrors"
                 CRITICAL="0" OPERATOR="GT"
                 MESSAGE="Media failure errors have been found in the alert log."
                 MESSAGE_NLSID="adrAlertLogStatus_mediaFailureErrors_alertmessage" />

    </MetricColl>
  </CollectionItem>

  <!--
  ======================================================================
  == collection cluster_resource_name
  ======================================================================
    -->
  <CollectionItem NAME="cluster_resource_name" UPLOAD_ON_FETCH="TRUE" >

    <Schedule OFFSET_TYPE="INCREMENTAL">
      <IntervalSchedule INTERVAL="6" TIME_UNIT="Hr"/>
    </Schedule>

    <MetricColl NAME="cluster_resource_name"/>

  </CollectionItem>

</TargetCollection>
