{ @(#)39      1.8  src/bos/usr/sbin/perf/pmapi/libpmapi/RS64-III.evs, pmapi, bos720 12/1/05 12:22:25
{ IBM_PROLOG_BEGIN_TAG 
{ This is an automatically generated prolog. 
{  
{ bos720 src/bos/usr/sbin/perf/pmapi/libpmapi/RS64-III.evs 1.8 
{  
{ Licensed Materials - Property of IBM 
{  
{ COPYRIGHT International Business Machines Corp. 1999,2005 
{ All Rights Reserved 
{  
{ US Government Users Restricted Rights - Use, duplication or 
{ disclosure restricted by GSA ADP Schedule Contract with IBM Corp. 
{  
{ IBM_PROLOG_END_TAG 
114, 32, 32, 32, 22, 18, 18, 16

{ counter 1 }
#0,u,n,n,n,n,SUSPENDED,Suspended
Suspended
#1,v,n,n,n,n,PM_CYC,Processor cycles
Processor clock cycles.  
If MMCRA[30]=0, this is thread active cycles.
If MMCRA[30]=1, this is thread active run cycles.
#2,u,n,n,n,n,PM_STORAGE_CYC,Storage latency
Cycles lost due to storage delays - global count of all 
storage delays including I cache, D cache, TLB, and SLB miss time, and 
also E=DS load time, counting only time which actually slows down 
instruction execution. Includes sync, vsync, tlbsync, and eieio time after
E=DS store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx.
This count increments by a maximum of 1/cycle, i.e. overlapped time 
only counts once. 
((run cycles - storage latency) / inst) = infinite cache CPI = about 1 
IERAT miss/TLB hit miss cycles are not included in this count. To obtain
a more accurate storage latency, add (ERAT miss count - I TLB miss 
count) x4 to storage latency. IERAT miss/TLB miss cycles are included.
Isync after E=DS store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx are not 
included in this count. Isync duration could also be added to obtain a
more accurate storage latency.
#3,v,n,n,n,n,PM_INST_CMPL,Instructions completed
Instructions completed (more than 1/cycle)
#4,v,n,n,n,n,PM_INST_CMPL_GATED_BY_IMR,Instructions completed gated by IMR
Instructions completed gated by IMR (more than 1/cycle)
#5,u,n,n,n,n,PM_CYC_NGATED_BY_RUN,Cycles not gated by 'run'
cycles not gated by 'run'
this is thread active cycles
#6,v,n,n,n,n,PM_CYC_NGATED_BY_RUN_OR_THREAD_ACTIVE,Cycles not gated by 'run' or thread active
cycles not gated by 'run' or thread active
this is machine cycles counters are enabled
#7,u,n,n,n,n,NUSED,Not used
Not used
#8,u,n,n,n,n,PM_MPIPE_CMPL,M pipe instr completed
M pipe instruction completed - Logical instructions 
#9,u,n,n,n,n,NUSED,Not used
Not used
#10,u,n,n,n,n,PM_L1_ICACHE_MISS_CYC,L1 I cache miss latency
L1 I cache miss duration latency, time pipe is stalled
If MMCRA[0]=1, this event does not continue to count in the background 
thread as other durations normally would.
#11,u,n,n,n,n,PM_THRD0_RUN_CYC,Thread 0 run cycles
Thread 0 run cycles (use in global mode)
#12,v,n,n,n,n,PM_L1_DCACHE_MISS,L1 D cache miss count
L1 D cache miss count (Ld, St, dcbz, Ld/St for tablewalk, spawn not counted)
#13,u,n,n,n,n,PM_IERAT_MISS_CYC,IERAT miss latency
IERAT miss duration latency, time pipe is stalled. 
This count is for comparison only, use count x 4 for duration
#14,u,n,n,n,n,PM_TLB_MISS_CYC,TLB miss latency
TLB miss duration latency, time pipe is stalled (for both I and D).
#15,u,n,n,n,n,PM_COND_BR_TAKEN,Branches conditional taken
Branches conditional taken.
#16,u,n,n,n,n,NUSED,Not used
Not used
#17,u,n,n,n,n,PM_EXT_TRC_XFACE_STROBES,Strobes to external interface
Strobes to external trace interface count.
#18,u,n,n,n,n,NUSED,Not used
Not used
#19,u,n,n,n,n,NUSED,Not used
Not used
#20,u,n,n,n,n,NUSED,Not used
Not used
#21,u,n,n,n,n,PM_THRD_SWTCH,Thread switch count
Thread switch count.
#22,u,n,n,n,n,PM_THRD_SWTCH_BY_L1_DFETCH_MISS,Thread switch caused by L1 data fetch miss
Thread switch caused by L1 data fetch miss
#23,u,n,n,n,n,PM_THRD_SWTCH_BY_TLB_SLB_MISS,Thread switch caused by TLB/SLB miss
Thread switch caused by TLB/SLB miss.
#24,u,n,n,n,n,PM_THRD_SWTCH_BY_L2_INST_MISS,Thread switch caused by L2 instr miss
Thread switch caused by L2 instruction miss.
#25,u,n,n,n,n,PM_THRD_SWTCH_BY_MISS_ON_2ND_OF_STD,Thread switch caused by miss on second of store double
Thread switch caused by store double: miss on second of two.
#26,u,n,n,n,n,PM_THRD_SWTCH_BY_MISS_ON_2ND_UNALGN_LD,Thread switch caused by miss on second of two unaligned load
Thread switch caused by unaligned load: miss on second of two. 
#27,u,n,n,n,n,PM_THRD_SWTCH_BY_SW,Thread switch caused by SW threadswitch
Thread switch caused by SW threadswitch - OR 1,1,1. 
#28,u,n,n,n,n,PM_THRD_SWTCH_BY_TLBMULT_MISS,Thread switch caused by TLB Multiple miss
Thread switch caused by TLBMult miss - TLB miss on multi-cycle op.
#29,u,n,n,n,n,NUSED,Not used
Not used
#30,u,n,n,n,n,PM_THRASH_CNT_2_INST,Thrashing counts < 2 instr between switches
Thrashing counts: uses 3 bit counter reset after switch, 
less than 2 instructions between switches.
#31,u,n,n,n,n,PM_1THRD_MODE_CYC,Single-thread mode cycles
Single-thread mode cycles(CTRL[8] xor CTRL[9]).
#32,u,n,n,n,n,NUSED,Not used
Not used
#33,u,n,n,n,n,NUSED,Not used
Not used
#34,u,n,n,n,n,NUSED,Not used
Not used
#35,u,n,n,n,n,NUSED,Not used
Not used
#36,u,n,n,n,n,NUSED,Not used
Not used
#37,u,n,n,n,n,NUSED,Not used
Not used
#38,u,n,n,n,n,NUSED,Not used
Not used
#39,u,n,n,n,n,NUSED,Not used
Not used
#40,u,n,n,n,n,NUSED,Not used
Not used
#41,u,n,n,n,n,NUSED,Not used
Not used
#42,u,n,n,n,n,NUSED,Not used
Not used
#43,u,n,n,n,n,NUSED,Not used
Not used
#44,u,n,n,n,n,NUSED,Not used
Not used
#45,u,n,n,n,n,NUSED,Not used
Not used
#46,u,n,n,n,n,NUSED,Not used
Not used
#47,u,n,n,n,n,NUSED,Not used
Not used
#48,u,n,n,n,n,NUSED,Not used
Not used
#49,u,n,n,n,n,NUSED,Not used
Not used
#50,u,n,n,n,n,NUSED,Not used
Not used
#51,u,n,n,n,n,NUSED,Not used
Not used
#52,u,n,n,n,n,NUSED,Not used
Not used
#53,u,n,n,n,n,NUSED,Not used
Not used
#54,u,n,n,n,n,NUSED,Not used
Not used
#55,u,n,n,n,n,NUSED,Not used
Not used
#56,u,n,n,n,n,NUSED,Not used
Not used
#57,u,n,n,n,n,NUSED,Not used
Not used
#58,u,n,n,n,n,NUSED,Not used
Not used
#59,u,n,n,n,n,NUSED,Not used
Not used
#60,u,n,n,n,n,NUSED,Not used
Not used
#61,u,n,n,n,n,NUSED,Not used
Not used
#62,u,n,n,n,n,NUSED,Not used
Not used
#63,u,n,n,n,n,NUSED,Not used
Not used

#64,u,n,n,n,n,PM_GRP_x40,FXU group x40
PMC1: Unaligned access count if causes stall - doubleword cross
PMC2: Multi-cycle instr count - Ld/St and mult/div, no dcbx
PMC3: Multi-cycle instr cycles - note this is cycles-1 in relationship to the 
total number of cycles to execute the instruction.  Does not include storage
latencies of multi-cycle inst.
PMC4: Multi-cycle mult/div instr count
PMC5: Multi-cycle mult/div instr cycles - note this is cycles-1 in relationship 
to the total number of cycles to execute the instruction.
PMC6: not used
PMC7: Detour count (artificial holdoffs)
PMC8: Detour duration

#65,u,n,n,n,n,PM_GRP_x41,FXU group x41
PMC1: Data dependency stalls not waiting on load
PMC2: Waiting on load stalls not including storage latency from load (count 
includes normal S2 cycles, so S pipe instr completed should be subtracted from 
this count to get actual waiting on load stalls)
PMC3: Failed stcx (larx reservation lost)
PMC4: sync, vsync, eieio, tlbsync duration.
PMC5-6: not used
PMC7: stcx executed
PMC8: larx executed

#66,u,n,n,n,n,PM_SPIPE_PLS_CMPL,PLS_mode S pipe instr completed
PMC1: PLS_mode S pipe instr completed - Ld/st
PMC2: PLS_mode unaligned access count if causes stall - doubleword cross
PMC3: PLS_mode S pipe multi-cycle instr count - Ld/St
PMC4: PLS_MODE S pipe multi-cycle instr cycles - note this is cycles-1
in relationship to the total number of cycles
to execute the instruction. Does not include
storage latencies of multi-cycle inst.
PMC5: PLS_mode S pipe instr cycles - Ld/st - note this is cycles-1
in relationship to the total number of cycles 
to execute the instruction. Does not include
storage latencies of multi-cycle inst.
PMC6: PLS_mode cycles
PMC7-8: Not used

#67,u,n,n,n,n,NUSED,Not used
Not used
#68,u,n,n,n,n,NUSED,Not used
Not used
#69,u,n,n,n,n,NUSED,Not used
Not used
#70,u,n,n,n,n,NUSED,Not used
Not used
#71,u,n,n,n,n,NUSED,Not used
Not used
#72,u,n,n,n,n,NUSED,Not used
Not used
#73,u,n,n,n,n,NUSED,Not used
Not used
#74,u,n,n,n,n,NUSED,Not used
Not used
#75,u,n,n,n,n,NUSED,Not used
Not used
#76,u,n,n,n,n,NUSED,Not used
Not used
#77,u,n,n,n,n,NUSED,Not used
Not used
#78,u,n,n,n,n,NUSED,Not used
Not used
#79,u,n,n,n,n,NUSED,Not used
Not used

#80,u,n,n,n,n,PM_GRP_x50,L1 D cache group x50
PMC1: L1 D cache fetch miss count (does not include PTE fetch).
PMC2: L1 D cache fetch miss duration latency, time pipe is stalled
PMC3: L1 D cache store (RWITM/DCLAIM) miss count
PMC4: L1 D cache store (RWITM) miss duration latency, time pipe is stalled.
Does not support multithread mode.
PMC5: L1 D cache PLS_Mode reference cnt (Ld, St, dcbz, Ld/St for tablewalk) 
PMC6: L1 D cache store (DCLAIM) miss duration latency, time pipe is stalled
PMC7: L1 D cache castouts (aging, including from xlate ref)
PMC8: L1 D cache reference count (Ld, St, dcbz, Ld/St for tablewalk)

#81,u,n,n,n,n,PM_GRP_x51,L1 D group x51
PMC1: L1 D cache snoop hit.
PMC2: Not used 
PMC3: L1 D cache store (RWITM/DCLAIM) miss duration latency including TLB/SLB 
time.  Does not support multithread mode.
PMC4: Not used 
PMC5: L1 D cache snoop hit.  Does not support multithread mode.
PMC6: L1 D cache snoop hit causing state change.
Does not support multithread mode.
PMC7: L1 D cache snoop hit causing state change, no castout -dir chg only.
Does not support multithread mode.
PMC8: L1 D cache miss duration latency including TLB/SLB time.
Does not support multithread mode.

#82,u,n,n,n,n,PM_GRP_x52,L1 D cache group x52
PMC1: L1 D cache busy miss count (2 cycle loop).
PMC2: L1 D cache busy miss duration (includes background thread waiting to start).
PMC3: L1 D cache late select miss count (duration = 2x count, plus includes trailing
edge effects, double miss and two double)).
PMC4: Castout to L2 steals L1 stalls.
PMC5-8: Not used

#83,u,n,n,n,n,NUSED,Not used
Not used

#84,u,n,n,n,n,PM_GRP_x54,E=DS group x54
PMC1: sync after E=DS store duration.
PMC2: sync after E=DS store count.
PMC3: E=DS load duration.
PMC4: E=DS load count.
PMC5: TLBie received all.
PMC6: TLBie D Hit all.
PMC7: L1 D cache PTE miss count.
PMC8: L1 D cache PTE miss duration.

#85,u,n,n,n,n,PM_GRP_x55,TLB/SLB group x55
PMC1: TLB I miss count
PMC2: TLB I miss duration latency, time pipe is stalled.
PMC3: TLB D miss count.
PMC4: TLB D miss duration latency, time pipe is stalled.
PMC5: TLB I reference count (IERAT miss count).
PMC6: TLB D reference count - includes PTE's searched, so subtract next count 
for actual TLB D references. Also this count includes 1 reference for each cycle
of a multicycle storage op.
PMC7: TLB miss PTE's searched.
PMC8: TLB miss >8 PTE's searched.

#86,v,n,n,n,n,PM_GRP_x56,L2 cache group x56
PMC1: L2 cache miss count
PMC2: L2 cache data fetch miss count (does not include PTE fetch).
PMC3: L2 cache store (RWITM) miss count
PMC4: L2 cache instr miss count
PMC5: L2 cache xlate miss on PTEG count.
PMC6-8: Not used

#87,u,n,n,n,n,PM_GRP_x57,L2 cache group x57
PMC1: L2 cache miss duration latency, time pipe is stalled.
PMC2: L2 cache data fetch miss duration latency, time pipe is stalled.
PMC3: L2 cache store (RWITM) miss duration latency, time pipe is stalled.
PMC4: L2 cache instr miss duration latency, time pipe is stalled.
PMC5: L2 cache xlate miss on PTEG duration latency.
PMC6-8: Not used

#88,u,n,n,n,n,PM_GRP_x58,L2 cache group x58
PMC1: L1/L2 cache hits under miss
PMC2: Cycles with 2 and only 2 outstanding L2 miss.
PMC3: Cycles with 3 and only 3 outstanding L2 miss.
PMC4: 3rd miss return line fill time (pipe could start, but must wait; this 
count is included in busy miss)
PMC5: Data from L2/MS - fast access.
PMC6: Data from line buffer - slow access.
PMC7-8: Not used

#89,u,n,n,n,n,NUSED,Not used
Not used
#90,u,n,n,n,n,NUSED,Not used
Not used
#91,u,n,n,n,n,NUSED,Not used
Not used

#92,u,n,n,n,n,PM_GRP_x5c,SLB group x5c
PMC1: SLBie received all
PMC2: SLBie D Hit all
PMC3: L1 D cache STE miss count
PMC4: L1 D cache STE miss duration
PMC5-8: Not used

#93,u,n,n,n,n,PM_ISLB_MISS,Instruction SLB misses
PMC1: SLB I miss count
PMC2: SLB I miss duration latency, time pipe is stalled
PMC3: SLB D miss count
PMC4: SLB D miss duration latency, time pipe is stalled
PMC5: SLBI reference count (IERAT miss count)
PMC6: SLB D reference count
PMC7: SLB miss STE's searched
PMC8: SLB miss >8 STE's searched

#94,u,n,n,n,n,NUSED,Not used
Not used
#95,u,n,n,n,n,NUSED,Not used
Not used

#96,u,n,n,n,n,PM_GRP_x60,L2 cache group x60
PMC1: L2 cache reference count (Tag pipe)
PMC2: L2 cache data fetch reference count (Tag pipe, includes xlate)
PMC3: L2 cache store (RWITM) reference count (Tag pipe)
PMC4: L2 cache instruction reference count (Tag pipe)
PMC5: L2 cache reference (Data pipe, bandwidth or process time)
PMC6: L2 cache data fetch reference BW (Data pipe, includes xlate)
PMC7: L2 cache store RWITM reference BW (Data pipe)
PMC8: L2 cache instr reference BW (Data pipe)

#97,u,n,n,n,n,PM_GRP_x61,L2 cache group x61
PMC1: L2 cache miss count (Tag pipe)
PMC2: L2 cache data fetch miss count (Tag pipe, includes xlate)
PMC3: L2 cache store (RWITM) miss count (Tag pipe)
PMC4: L2 cache instruction miss count (Tag pipe)
PMC5: L2 cache snoop
PMC6: L2 cache aging castouts
PMC7: L2 cache snoop castouts (will include modified intervention if shared
intervention is disabled)
PMC8: L2 cache snoop intervention castouts (only counts if shared intervention
is disabled)

#98,u,n,n,n,n,PM_GRP_x62,L2 cache group x62
PMC1: L2 cache stcx miss
PMC2: L2 cache larx miss
PMC3: L2 cache stcx class miss
PMC4: L2 cache larx class miss
PMC5: L2 cache class miss count (Tag pipe)
PMC6: L2 cache data fetch class miss count (Tag pipe, includes xlate)
PMC7: L2 cache store (RWITM) class miss count (Tag pipe)
PMC8: L2 cache instr class miss count (Tag pipe)

#99,u,n,n,n,n,PM_GRP_x63,L2 cache group x63
PMC1: snoop requests presented to L2
PMC2: snoops accepted by L2 (balance were aborted)
PMC3: snoops aborted due to address collision in pipe (PMC2-PMC3=#
reaching bottom of pipe without forced retry)
PMC4: snoops reaching bottom of pipe that were retried by L2
PMC5: snoops reaching bottom of pipe that were retried by core
PMC6: snoops reaching bottom of pipe that were modified in L2
PMC7: snoops reaching bottom of pipe that were shared by L2 
PMC8: snoops reaching bottom of pipe that were owned by L2

#100,u,n,n,n,n,PM_GRP_x64,L2 cache group x64
PMC1: L2 Tag Scan Comm
PMC2: L2 Tag Store pipe
PMC3: L2 Tag busy - store queue overflow
PMC4: L2 Tag Snoop op
PMC5: L2 Tag Query
PMC6: L2 Tag L1 Op DLY Path
PMC7: L2 Tag L1 Op CMD Path
PMC8: L2 Tag L1 Op Spec Path

#101,u,n,n,n,n,PM_GRP_x65,L2 cache group x65
PMC1: L2 Data L2 miss (includes spec op)
PMC2: L2 Data L2 castout
PMC3: L2 Data L1 castout or L2 fill
PMC4: L1 castouts hit in L2
PMC5: L2 fills
PMC6: L2 cache aging castouts
PMC7: L2 cache snoop castouts (will include modified intervention if shared
intervention is disabled)
PMC8: L2 cache snoop intervention castouts (only counts if shared intervention 
is enabled)

#102,u,n,n,n,n,PM_GRP_x66,L2 cache group x66
PMC1: # of L2 transitions from F to I (data lost during fill)
PMC2: # of L2 transitions from I,F to E 
(fetch - block in no other caches)
PMC3: # of L2 transitions from I,F to S 
(fetch - block in other caches)
PMC4: # of L2 transitions from I,F to O 
(fetch - block in other caches - owned for intervention)
PMC5: # of L2 transitions from I,F to M 
(store miss - block invalidated in other caches)
PMC6: # of L2 transitions from E to M 
(store hit - block in no other caches)
PMC7: # of L2 transitions from M to E (L3 support)
PMC8: # of L2 transitions from S,O to M (transition on L1 castback)

#103,u,n,n,n,n,PM_GRP_x67,L2 cache group x67
PMC1: L2 cache snoop hit
PMC2: # of snoop-based L2 transitions from E,O to S
PMC3: # of snoop-based L2 transitions from E to I
PMC4: # of snoop-based L2 transitions from E to I (DCLAIM)
PMC5: # of snoop-based L2 transitions from O,S to I (RWITM)
PMC6: # of snoop-based L2 transitions from O,S to I (RWITM)
PMC7: # of snoop-based L2 transitions from M to I
PMC8: # of snoop-based L2 transitions from M to S

#104,u,n,n,n,n,NUSED,Not used
Not used

#105,u,n,n,n,n,PM_GRP_x69,Line buffer group x69
PMC1: dclaim shared store fail
PMC2: dclaim shared store pass
PMC3: dclaim shared store atomic fail
PMC4: dclaim shared store atomic pass
PMC5: conditional castback changed to castback
PMC6: conditional castback got killed
PMC7: L2 and L1 castback collision, L2 castback got killed
PMC8: line buffer snoop response is conditional retry

#106,u,n,n,n,n,PM_GRP_x6a,line buffer group x6a
PMC1: Line buffer full
PMC2: Conflict for line buffer (dependency between incoming and outstanding
commands).
PMC3: L1 command waits for the aging castback (possibility of combining).
PMC4: snoop hit occurred on a line fill in progress
PMC5: rwitm-atomic to read-burst conversion
PMC6: mesi transition: DClaim_Pending to Invalid
PMC7: mesi transition: Pending to DClaim_Pending
PMC8: snoop hit while in DClaim_Pending state

#107,u,n,n,n,n,PM_GRP_x6b,line buffer group x6b
PMC1: mesi transtion: Pending to Exclusive
PMC2: mesi transtion: Pending to Shared Owner
PMC3: mesi transition: Pending to Modified
PMC4: mesi transition: Pending to Invalid
PMC5: mesi transition: Exclusive to Invalid
PMC6: mesi transition: Shared to Invalid
PMC7: mesi transition: Exclusive to Shared
PMC8: Not used

#108,u,n,n,n,n,PM_GRP_x6c,line buffer group x6c
PMC1: line buffer 0 in use
PMC2: line buffer 1 in use
PMC3: line buffer 2 in use
PMC4: line buffer 3 in use
PMC5: line buffer 4 in use
PMC6: line buffer 5 in use
PMC7: line buffer 6 in use
PMC8: Not used
 
#109,u,n,n,n,n,PM_GRP_x6d,6XX bus group x6d
PMC1: 6xx address bus utilization - total
PMC2: 6xx address bus utilization - this processor
PMC3: 6xx data bus utilization - total
PMC4: 6xx data bus utilization - this processor
PMC5: 6xx DClaim - total AStat Retries
PMC6: 6xx DClaim - total AResp Retries
PMC7: 6xx DClaim - total not Retried/Rerun
PMC8: 6xx total bus AResp Rerun in of master operations (not sync/tlbsync)

#110,u,n,n,n,n,PM_GRP_x6e,6XX bus group x6e
PMC1: L2 cache fetch miss and another processor has modified copy
PMC2: L2 cache store (RWITM) miss and another processor has modified copy
PMC3: L2 cache fetch miss and another processor has shared owner copy
PMC4: L2 cache store (RWITM) miss and another processor has shared owner copy
PMC5: L2 cache fetch miss and another processor has shared copy
PMC6: L2 cache store (RWITM) miss and another processor has shared copy
PMC7: L2 cache fetch miss and no other processor has a copy
PMC8: L2 cache store (RWITM) miss and no other processor has a copy

#111,u,n,n,n,n,PM_GRP_x6f,6XX bus group x6f
PMC1: 6xx master trans retried on bus (load - read, RWITM)
PMC2: 6xx master trans not retried (load - read, RWITM)
PMC3: 6xx master trans retried on bus (store-write)
PMC4: 6xx master trans not retried on bus (store-write)
PMC5: 6xx total bus AResp Retry in of master operations
PMC6: 6xx total bus AStat Retry in of master operations
PMC7: 6xx total bus AResp Retry out of slave operations
PMC8: 6xx total bus AStat Retry out of slave operations

#112,u,n,n,n,n,PM_GRP_x70,FPU group x70
PMC1: FPU arithmetic instructions completed
PMC2: The Floating Point Unit was busy - FPU active
PMC3: FPU FPSCR (cat. 9 in Cray Megaflops)
PMC4: FPU pipe issue stalled
PMC5: FPU load issue stalled
PMC6: FPU store issue stalled
PMC7: FPU arithmetic -arithmetic dependency
PMC8: FPU arithmetic -store dependency

#113,u,n,n,n,n,PM_GRP_x71,FPU group x71
PMC1: FPU Add, Comp, Mult Exec. (Cat 4 Cray megaflops)
PMC2: FPU Multiply-Add exec. (Cat 2 Cray megaflops)
PMC3: FPU divides executed (Cat 1 Cray megaflops)
PMC4: FPU SQR executed (Cat 3 Cray megaflops)
PMC5: FPU FSRP + FCONV executed (Cat 8 Cray megaflops)
PMC6: FPU MOVE + EST executed (Cat 5 Cray megaflops)
PMC7: FPU Est. instr. FRSQRTE, FRES (cat. 7 in Cray megaflops)
PMC8: FPU Est. instr. FRSQRTE, FRES (cat. 7 in Cray megaflops)
$$$$

{ counter 2 }
#0,u,n,n,n,n,SUSPENDED,Suspended
Suspended
#1,v,n,n,n,n,PM_CYC,Processor cycles
Processor clock cycles.  
If MMCRA[30]=0, this is thread active cycles.
If MMCRA[30]=1, this is thread active run cycles.
#2,u,n,n,n,n,PM_STORAGE_CYC,Storage latency
Cycles lost due to storage delays - global count of all 
storage delays including I cache, D cache, TLB, and SLB miss time, and 
also E=DS load time, counting only time which actually slows down 
instruction execution. Includes sync, vsync, tlbsync, and eieio time after E=DS 
store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx.
This count increments by a maximum of 1/cycle, i.e. overlapped time only counts 
once. 
((run cycles - storage latency) / inst) = infinite cache CPI = about 1 
IERAT miss/TLB hit miss cycles are not included in this count. To obtain a more 
accurate storage latency, add (ERAT miss count - I TLB miss count) x4 to storage
latency. IERAT miss/TLB miss cycles are included.
Isync after E=DS store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx are not 
included in this count. Isync duration could also be added to obtain a more 
accurate storage latency.
#3,v,n,n,n,n,PM_INST_CMPL,Instructions completed
Instructions completed (more than 1/cycle)
#4,v,n,n,n,n,PM_INST_CMPL_GATED_BY_IMR,Instructions completed gated by IMR
Instructions completed gated by IMR (more than 1/cycle)
#5,u,n,n,n,n,PM_CYC_NGATED_BY_RUN,Cycles not gated by 'run'
This is thread active cycles.
#6,v,n,n,n,n,PM_CYC_NGATED_BY_RUN_OR_THREAD_ACTIVE,Cycles not gated by 'run' or thread active
This is machine cycles counters are enabled.
#7,u,n,n,n,n,NUSED,Not used
Not used
#8,u,n,n,n,n,PM_RPIPE_CMPL,R pipe instr completed
R pipe instruction completed - arithmetic instructions 
#9,u,n,n,n,n,NUSED,Not used
Not used
#10,v,n,n,n,n,PM_L1_ICACHE_MISS,L1 I cache miss count
L1 I cache miss count
#11,u,n,n,n,n,PM_THRD1_RUN_CYC,Thread 1 run cycles
Thread 1 run cycles (use in global mode)
#12,u,n,n,n,n,PM_L1_DCACHE_MISS_CYC,L1 D cache miss latency
L1 D cache miss duration latency, time pipe is stalled (does not 
include busy miss) 
#13,u,n,n,n,n,PM_IERAT_MISS,IERAT miss count
IERAT miss count
#14,u,n,n,n,n,PM_TLB_MISS_CYC,TLB miss latency
TLB miss duration latency, time pipe is stalled (for both I and D).
#15,u,n,n,n,n,PM_BR2LNK_TAKEN,Branch to link register taken
Branch to link register taken.
#16,u,n,n,n,n,PM_EXT_INT,External interrupts
External interrupt arrived (I/O or processor, only Parkway knows which).
#17,u,n,n,n,n,PM_EXT_TRC_XFACE_STROBES,Strobes to external interface
Strobes to external trace interface count.
#18,u,n,n,n,n,NUSED,Not used
Not used
#19,u,n,n,n,n,NUSED,Not used
Not used
#20,u,n,n,n,n,NUSED,Not used
Not used
#21,u,n,n,n,n,PM_THRD_SWTCH,Thread switch count
Thread switch count.
#22,u,n,n,n,n,PM_THRD_SWTCH_BY_L1_DSTORE_MISS,Thread switch caused by L1 data store miss
Thread switch caused by L1 data store miss.
#23,u,n,n,n,n,PM_THRD_SWTCH_BY_L2_MISS,Thread switch caused by L2 miss
Thread switch caused by L2 miss and dormant thread not L2 miss.
#24,u,n,n,n,n,PM_THRD_SWTCH_BY_THRD_TIMEOUT,Thread switch caused by thread timeout
Thread switch caused by thread timeout value reached. 
#25,u,n,n,n,n,PM_THRD_SWTCH_BY_ST_MULT_STR_MISS,Thread switch caused by miss on store multiple/string
Thread switch caused by store multiple/string: miss on any access 
#26,u,n,n,n,n,PM_THRD_SWTCH_BY_DORMANT_THRD_DSI,Thread switch caused by dormant thread DSI
Thread switch caused by dormant thread DSI. 
#27,u,n,n,n,n,PM_THRD_SWTCH_BY_PRIO,Thread switch caused by priority
Thread switch caused by priority. 
#28,u,n,n,n,n,PM_LOW_PRIO_CYC,cycles spent in low priority
cycles spent in low priority
#29,u,n,n,n,n,NUSED,Not used
Not used
#30,u,n,n,n,n,PM_THRASH_CNT_4_INST,Thrashing counts < 4 instr between switches
Thrashing counts: uses 3 bit counter reset after switch, 
less than  4 instructions between switches.
#31,u,n,n,n,n,PM_2RUN_LATCHES_SET,Thread 1 and 2 both run latches set
Thread 1 and 2 both run latches set (CTRL[16] & CTRL[17]).
$$$$

{ counter 3 }
#0,u,n,n,n,n,SUSPENDED,Suspended
Suspended
#1,v,n,n,n,n,PM_CYC,Processor cycles
Processor clock cycles.  
If MMCRA[30]=0, this is thread active cycles.
If MMCRA[30]=1, this is thread active run cycles.
#2,u,n,n,n,n,PM_STORAGE_CYC,Storage latency
Cycles lost due to storage delays - global count of all 
storage delays including I cache, D cache, TLB, and SLB miss time, and 
also E=DS load time, counting only time which actually slows down 
instruction execution. Includes sync, vsync, tlbsync, and eieio time after E=DS 
store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx.
This count increments by a maximum of 1/cycle, i.e. overlapped time only counts 
once. 
((run cycles - storage latency) / inst) = infinite cache CPI = about 1 
IERAT miss/TLB hit miss cycles are not included in this count. To obtain a more 
accurate storage latency, add (ERAT miss count - I TLB miss count) x4 to storage
latency. IERAT miss/TLB miss cycles are included.
Isync after E=DS store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx are not 
included in this count. Isync duration could also be added to obtain a more 
accurate storage latency.
#3,v,n,n,n,n,PM_INST_CMPL,Instructions completed
Instructions completed (more than 1/cycle)
#4,v,n,n,n,n,PM_INST_CMPL_GATED_BY_IMR,Instructions completed gated by IMR
Instructions completed gated by IMR (more than 1/cycle)
#5,u,n,n,n,n,PM_CYC_NGATED_BY_RUN,Cycles not gated by 'run'
This is thread active cycles.
#6,v,n,n,n,n,PM_CYC_NGATED_BY_RUN_OR_THREAD_ACTIVE,Cycles not gated by 'run' or thread active
This is machine cycles counters are enabled.
#7,u,n,n,n,n,PM_INST_DISP,Instructions dispatched
Instructions dispatched (more than 1/cycle)
#8,u,n,n,n,n,PM_BPIPE_CMPL,B pipe instr completed
B pipe instruction completed - branches 
#9,u,n,n,n,n,PM_ISYNC_CYC,Isync duration
Isync duration.
#10,u,n,n,n,n,PM_L1_ICACHE_MISS_CYC,L1 I cache miss latency
L1 I cache miss duration latency, time pipe is stalled
If MMCRA[0]=1, this event does not continue to count in the background 
thread as other durations normally would.
#11,u,n,n,n,n,PM_THRD0_RUN_CYC,Thread 0 run cycles
Thread 0 run cycles (use in global mode).
#12,v,n,n,n,n,PM_L1_DCACHE_MISS,L1 D cache miss count
L1 D cache miss count (Ld, St, dcbz, Ld/St for tablewalk, spawn 
not counted)
#13,u,n,n,n,n,PM_IERAT_MISS_CYC,IERAT miss latency
IERAT miss duration latency, time pipe is stalled. 
This count is for comparison only, use count x 4 for duration
#14,u,n,n,n,n,PM_TLB_MISS_CYC,TLB miss latency
TLB miss duration latency, time pipe is stalled (for both I and D).
#15,u,n,n,n,n,PM_BR2CNTER_TAKEN,Branch to counter taken
Branch to counter taken.
#16,u,n,n,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending
MSR(EE)=0 duration.
#17,u,n,n,n,n,PM_EXT_TRC_XFACE_STROBES,Strobes to external interface
Strobes to external trace interface count.
#18,u,n,n,n,n,NUSED,Not used
Not used
#19,u,n,n,n,n,NUSED,Not used
Not used
#20,u,n,n,n,n,NUSED,Not used
Not used
#21,u,n,n,n,n,PM_THRD_SWTCH,Thread switch count
Thread switch count.
#22,u,n,n,n,n,PM_THRD_SWTCH_BY_L1_INST_MISS,Thread switch caused by L1 instr miss
Thread switch caused by L1 instruction miss.
#23,u,n,n,n,n,PM_THRD_SWTCH_BY_L2_FETCH_MISS,Thread switch caused by L2 fetch miss
Thread switch caused by L2 fetch miss.
#24,u,n,n,n,n,PM_THRD_SWTCH_BY_SCOM,Thread switch caused by SCOM cmd
Thread switch caused by SCOM command. 
#25,u,n,n,n,n,PM_THRD_SWTCH_BY_LD_MULT_STR_MISS,Thread switch caused by miss on load multiple/string
Thread switch caused by load multiple/string: miss on any access 
#26,u,n,n,n,n,PM_THRD_SWTCH_BY_L2DATA_RET_IN_NON_ACTIVE_THRD,Thread switch caused by L2 data returned in non-active thread
Thread switch caused by L2 data returned in non-active thread when 
active thread stalled. 
#27,u,n,n,n,n,PM_THRD_SWTCH_BY_L2DFMULT_MISS,Thread switch caused by L2 DF Multiple miss
Thread switch caused by L2DFMult miss - L2 miss on data fetch multi-cycle op.
#28,u,n,n,n,n,PM_MED_PRIO_CYC,Cycles spent in medium priority
cycles spent in medium priority.
#29,u,n,n,n,n,PM_L2_MISS_THRD_CNT,L2 miss on both threads count
L2 miss on both threads count.
#30,u,n,n,n,n,PM_THRASH_CNT_8_INST,Thrashing counts < 8 instr between switches
Thrashing counts: uses 3 bit counter reset after switch, 
less than  8 instructions between switches.
#31,u,n,n,n,n,PM_OTHER_THRD_RDY,Cycles in which another thread is ready
Cycles in which another thread is ready (could be switched to).
$$$$

{ counter 4 }
#0,u,n,n,n,n,SUSPENDED,Suspended
Suspended
#1,v,n,n,n,n,PM_CYC,Processor cycles
Processor clock cycles.  
If MMCRA[30]=0, this is thread active cycles.
If MMCRA[30]=1, this is thread active run cycles.
#2,u,n,n,n,n,PM_STORAGE_CYC,Storage latency
Cycles lost due to storage delays - global count of all 
storage delays including I cache, D cache, TLB, and SLB miss time, and 
also E=DS load time, counting only time which actually slows down 
instruction execution. Includes sync, vsync, tlbsync, and eieio time after E=DS 
store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx.
This count increments by a maximum of 1/cycle, i.e. overlapped time only counts 
once. 
((run cycles - storage latency) / inst) = infinite cache CPI = about 1 
IERAT miss/TLB hit miss cycles are not included in this count. To obtain a more 
accurate storage latency, add (ERAT miss count - I TLB miss count) x4 to storage
latency. IERAT miss/TLB miss cycles are included.
Isync after E=DS store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx are not 
included in this count. Isync duration could also be added to obtain a more 
accurate storage latency.
#3,v,n,n,n,n,PM_INST_CMPL,Instructions completed
Instructions completed (more than 1/cycle)
#4,v,n,n,n,n,PM_INST_CMPL_GATED_BY_IMR,Instructions completed gated by IMR
Instructions completed gated by IMR (more than 1/cycle)
#5,u,n,n,n,n,PM_CYC_NGATED_BY_RUN,Cycles not gated by 'run'
This is thread active cycles.
#6,v,n,n,n,n,PM_CYC_NGATED_BY_RUN_OR_THREAD_ACTIVE,Cycles not gated by 'run' or thread active
This is machine cycles counters are enabled.
#7,u,n,n,n,n,PM_0INST_DISP_CYC,Cycles 0 instructions dispatched
Cycles 0 instructions dispatched
#8,u,n,n,n,n,PM_SPIPE_CMPL,S pipe instr completed
S pipe instruction completed - Ld/st
#9,u,n,n,n,n,NUSED,Not used
Not used
#10,v,n,n,n,n,PM_L1_ICACHE_MISS,L1 I cache miss count
L1 I cache miss count
#11,u,n,n,n,n,PM_THRD1_RUN_CYC,Thread 1 run cycles
Thread 1 run cycles (use in global mode).
#12,u,n,n,n,n,PM_L1_DCACHE_MISS_CYC,L1 D cache miss latency
L1 D cache miss duration latency, time pipe is stalled (does not 
include busy miss) 
#13,u,n,n,n,n,PM_IERAT_MISS,IERAT miss count
IERAT miss count
#14,u,n,n,n,n,PM_TLB_MISS_CYC,TLB miss latency
TLB miss duration latency, time pipe is stalled (for both I and D).
#15,u,n,n,n,n,PM_ABS_BR,Absolute branches
Absolute branches.
#16,u,n,n,n,n,PM_EE_OFF,Cycles MSR(EE) bit off
MSR(EE) = 0 and any interrupt is pending duration.
#17,u,n,n,n,n,PM_EXT_TRC_XFACE_STROBES,Strobes to external interface
Strobes to external trace interface count.
#18,u,n,n,n,n,NUSED,Not used
Not used
#19,u,n,n,n,n,NUSED,Not used
Not used
#20,u,n,n,n,n,NUSED,Not used
Not used
#21,u,n,n,n,n,PM_THRD_SWTCH,Thread switch count
Thread switch count.
#22,u,n,n,n,n,PM_THRD_SWTCH_BY_ERAT_MISS,Thread switch caused by ERAT miss
Thread switch caused by ERAT miss.
#23,u,n,n,n,n,PM_THRD_SWTCH_BY_L2_ST_MISS,Thread switch caused by L2 store miss
Thread switch caused by L2 store miss.
#24,u,n,n,n,n,PM_THRD_SWTCH_BY_MISS_ON_1ST_OF_STD,Thread switch caused by miss on first of store double
Thread switch caused by store double: miss on first of two
#25,u,n,n,n,n,PM_THRD_SWTCH_BY_MISS_ON_1ST_UNALGN_LD,Thread switch caused by miss on first of two unaligned load
Thread switch caused by unaligned load: miss on first of two 
#26,u,n,n,n,n,PM_THRD_SWTCH_BY_MISC,Thread switch caused by MISC
Thread switch caused by MISC: Breakpoint, active thread disabled. 
#27,u,n,n,n,n,PM_THRD_SWTCH_BY_L2DSMULT_MISS,Thread switch caused by L2 DS Multiple miss
Thread switch caused by L2SFMult miss - L2 miss on data store multi-cycle op.
#28,u,n,n,n,n,PM_HI_PRIO_CYC,cycles spent in high priority
cycles spent in high priority.
#29,u,n,n,n,n,PM_L2_MISS_THRD_CYC,L2 miss on both threads duration
L2 miss on both threads duration.
#30,u,n,n,n,n,PM_UNDEF_THRD_SWTCH_CNT,undefined thread switch count
Undefined thread switch count.
#31,u,n,n,n,n,PM_EMPTY_THRD_PREF_BUF,Thread prefetch buffer empty on switch
Thread prefetch buffer empty on switch.
$$$$

{ counter 5 }
#0,u,n,n,n,n,SUSPENDED,Suspended
Suspended
#1,v,n,n,n,n,PM_CYC,Processor cycles
Processor clock cycles.  
If MMCRA[30]=0, this is thread active cycles.
If MMCRA[30]=1, this is thread active run cycles.
#2,u,n,n,n,n,PM_STORAGE_CYC,Storage latency
Cycles lost due to storage delays - global count of all 
storage delays including I cache, D cache, TLB, and SLB miss time, and 
also E=DS load time, counting only time which actually slows down 
instruction execution. Includes sync, vsync, tlbsync, and eieio time after E=DS 
store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx.
This count increments by a maximum of 1/cycle, i.e. overlapped time only counts 
once. 
((run cycles - storage latency) / inst) = infinite cache CPI = about 1 
IERAT miss/TLB hit miss cycles are not included in this count. To obtain a more 
accurate storage latency, add (ERAT miss count - I TLB miss count) x4 to storage
latency. IERAT miss/TLB miss cycles are included.
Isync after E=DS store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx are not 
included in this count. Isync duration could also be added to obtain a more 
accurate storage latency.
#3,v,n,n,n,n,PM_INST_CMPL,Instructions completed
Instructions completed (more than 1/cycle)
#4,v,n,n,n,n,PM_INST_CMPL_GATED_BY_IMR,Instructions completed gated by IMR
Instructions completed gated by IMR (more than 1/cycle)
#5,u,n,n,n,n,PM_CYC_NGATED_BY_RUN,Cycles not gated by 'run'
This is thread active cycles.
#6,v,n,n,n,n,PM_CYC_NGATED_BY_RUN_OR_THREAD_ACTIVE,Cycles not gated by 'run' or thread active
This is machine cycles counters are enabled.
#7,u,n,n,n,n,PM_1INST_DISP_CYC,Cycles 1 instructions dispatched
Cycles 1 instructions dispatched
#8,u,n,n,n,n,PM_L1_ICACHE_REF,L1 I cache reference count
L1 I cache reference count
#9,u,n,n,n,n,NUSED,Not used
Not used
#10,u,n,n,n,n,PM_L1_ICACHE_MISS_CYC,L1 I cache miss latency
L1 I cache miss duration latency, time pipe is stalled
If MMCRA[0]=1, this event does not continue to count in the background 
thread as other durations normally would.
#11,u,n,n,n,n,NUSED,Not used
Not used
#12,v,n,n,n,n,PM_L1_DCACHE_MISS,L1 D cache miss count
L1 D cache miss count (Ld, St, dcbz, Ld/St for tablewalk, spawn 
not counted)
#13,u,n,n,n,n,PM_IERAT_MISS_CYC,IERAT miss latency
IERAT miss duration latency, time pipe is stalled. 
This count is for comparison only, use count x 4 for duration
#14,u,n,n,n,n,PM_TLB_MISS_CYC,TLB miss latency
TLB miss duration latency, time pipe is stalled (for both I and D).
#15,u,n,n,n,n,PM_UNCOND_BR,Branches unconditional
Branches unconditional.
#16,u,n,n,n,n,NUSED,Not used
Not used
#17,u,n,n,n,n,PM_EXT_TRC_XFACE_STROBES,Strobes to external interface
Strobes to external trace interface count.
#18,u,n,n,n,n,NUSED,Not used
Not used
#19,u,n,n,n,n,NUSED,Not used
Not used
#20,u,n,n,n,n,NUSED,Not used
Not used
#21,u,n,n,n,n,PM_THRD_SWTCH_CYC,Thread switch duration
Thread switch duration (time from switch to instr executed in new thread)
$$$$

{ counter 6 }
#0,u,n,n,n,n,SUSPENDED,Suspended
Suspended
#1,v,n,n,n,n,PM_CYC,Processor cycles
Processor clock cycles.  
If MMCRA[30]=0, this is thread active cycles.
If MMCRA[30]=1, this is thread active run cycles.
#2,u,n,n,n,n,PM_STORAGE_CYC,Storage latency
Cycles lost due to storage delays - global count of all 
storage delays including I cache, D cache, TLB, and SLB miss time, and 
also E=DS load time, counting only time which actually slows down 
instruction execution. Includes sync, vsync, tlbsync, and eieio time after E=DS 
store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx.
This count increments by a maximum of 1/cycle, i.e. overlapped time only counts 
once. 
((run cycles - storage latency) / inst) = infinite cache CPI = about 1 
IERAT miss/TLB hit miss cycles are not included in this count. To obtain a more 
accurate storage latency, add (ERAT miss count - I TLB miss count) x4 to storage
latency. IERAT miss/TLB miss cycles are included.
Isync after E=DS store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx are not 
included in this count. Isync duration could also be added to obtain a more 
accurate storage latency.
#3,v,n,n,n,n,PM_INST_CMPL,Instructions completed
Instructions completed (more than 1/cycle)
#4,v,n,n,n,n,PM_INST_CMPL_GATED_BY_IMR,Instructions completed gated by IMR
Instructions completed gated by IMR (more than 1/cycle)
#5,u,n,n,n,n,PM_CYC_NGATED_BY_RUN,Cycles not gated by 'run'
This is thread active cycles.
#6,v,n,n,n,n,PM_CYC_NGATED_BY_RUN_OR_THREAD_ACTIVE,Cycles not gated by 'run' or thread active
This is machine cycles counters are enabled.
#7,u,n,n,n,n,PM_2INST_DISP_CYC,Cycles 2 instructions dispatched
Cycles 2 instructions dispatched
#8,u,n,n,n,n,NUSED,Not used
Not used
#9,u,n,n,n,n,NUSED,Not used
Not used
#10,v,n,n,n,n,PM_L1_ICACHE_MISS,L1 I cache miss count
L1 I cache miss count
#11,u,n,n,n,n,PM_IABR_MATCH,IABR match
IABR match - iu_stg1imat count 
#12,u,n,n,n,n,PM_L1_DCACHE_MISS_CYC,L1 D cache miss latency
L1 D cache miss duration latency, time pipe is stalled (does not 
include busy miss) 
#13,u,n,n,n,n,PM_IERAT_MISS,IERAT miss count
IERAT miss count
#14,u,n,n,n,n,PM_TLB_MISS_CYC,TLB miss latency
TLB miss duration latency, time pipe is stalled (for both I and D).
#15,u,n,n,n,n,PM_COND_BR_NTAKEN,Branches conditional not taken
Branches conditional not taken.
#16,u,n,n,n,n,NUSED,Not used
Not used
#17,u,n,n,n,n,PM_EXT_TRC_XFACE_STROBES,Strobes to external interface
Strobes to external trace interface count.
$$$$

{ counter 7 }
#0,u,n,n,n,n,SUSPENDED,Suspended
Suspended
#1,v,n,n,n,n,PM_CYC,Processor cycles
Processor clock cycles.  
If MMCRA[30]=0, this is thread active cycles.
If MMCRA[30]=1, this is thread active run cycles.
#2,u,n,n,n,n,PM_STORAGE_CYC,Storage latency
Cycles lost due to storage delays - global count of all 
storage delays including I cache, D cache, TLB, and SLB miss time, and 
also E=DS load time, counting only time which actually slows down 
instruction execution. Includes sync, vsync, tlbsync, and eieio time after E=DS 
store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx.
This count increments by a maximum of 1/cycle, i.e. overlapped time only counts 
once. 
((run cycles - storage latency) / inst) = infinite cache CPI = about 1 
IERAT miss/TLB hit miss cycles are not included in this count. To obtain a more 
accurate storage latency, add (ERAT miss count - I TLB miss count) x4 to storage
latency. IERAT miss/TLB miss cycles are included.
Isync after E=DS store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx are not 
included in this count. Isync duration could also be added to obtain a more 
accurate storage latency.
#3,v,n,n,n,n,PM_INST_CMPL,Instructions completed
Instructions completed (more than 1/cycle)
#4,v,n,n,n,n,PM_INST_CMPL_GATED_BY_IMR,Instructions completed gated by IMR
Instructions completed gated by IMR (more than 1/cycle)
#5,u,n,n,n,n,PM_CYC_NGATED_BY_RUN,Cycles not gated by 'run'
This is thread active cycles.
#6,v,n,n,n,n,PM_CYC_NGATED_BY_RUN_OR_THREAD_ACTIVE,Cycles not gated by 'run' or thread active
This is machine cycles counters are enabled.
#7,u,n,n,n,n,PM_3INST_DISP_CYC,Cycles 3 instructions dispatched
Cycles 3 instructions dispatched
#8,u,n,n,n,n,PM_L1CACHE_MISS_ON_BR_TARG_PREFETCH,L1 I cache misses on branch target prefetch
L1 I cache misses on branch target prefetch count
(but may not be actually fetched)
#9,u,n,n,n,n,NUSED,Not used
Not used
#10,u,n,n,n,n,PM_L1_ICACHE_MISS_CYC,L1 I cache miss latency
L1 I cache miss duration latency, time pipe is stalled
If MMCRA[0]=1, this event does not continue to count in the background 
thread as other durations normally would.
#11,u,n,n,n,n,PM_TRACE_CNT0,Selected counts from trace_cnt(0)
trace_cnt(0) - selected counts from trace unit 
#12,v,n,n,n,n,PM_L1_DCACHE_MISS,L1 D cache miss count
L1 D cache miss count (Ld, St, dcbz, Ld/St for tablewalk, spawn 
not counted)
#13,u,n,n,n,n,PM_IERAT_MISS_CYC,IERAT miss latency
IERAT miss duration latency, time pipe is stalled. 
This count is for comparison only, use count x 4 for duration
#14,u,n,n,n,n,PM_TLB_MISS_CYC,TLB miss latency
TLB miss duration latency, time pipe is stalled (for both I and D).
#15,u,n,n,n,n,PM_0CYC_BR_NTAKEN,Zero cycle branch not taken
Zero cycle branch not taken.
#16,u,n,n,n,n,NUSED,Not used
Not used
#17,u,n,n,n,n,PM_EXT_TRC_XFACE_STROBES,Strobes to external interface
Strobes to external trace interface count.
$$$$

{ counter 8 }
#0,u,n,n,n,n,SUSPENDED,Suspended
Suspended
#1,v,n,n,n,n,PM_CYC,Processor cycles
Processor clock cycles.  
If MMCRA[30]=0, this is thread active cycles.
If MMCRA[30]=1, this is thread active run cycles.
#2,u,n,n,n,n,PM_STORAGE_CYC,Storage latency
Cycles lost due to storage delays - global count of all 
storage delays including I cache, D cache, TLB, and SLB miss time, and 
also E=DS load time, counting only time which actually slows down 
instruction execution. Includes sync, vsync, tlbsync, and eieio time after E=DS 
store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx.
This count increments by a maximum of 1/cycle, i.e. overlapped time only counts 
once. 
((run cycles - storage latency) / inst) = infinite cache CPI = about 1 
IERAT miss/TLB hit miss cycles are not included in this count. To obtain a more 
accurate storage latency, add (ERAT miss count - I TLB miss count) x4 to storage
latency. IERAT miss/TLB miss cycles are included.
Isync after E=DS store, Dcbf, Dcbst, Dcbi, Icbi, Tlbie, or Ecowx are not 
included in this count. Isync duration could also be added to obtain a more 
accurate storage latency.
#3,v,n,n,n,n,PM_INST_CMPL,Instructions completed
Instructions completed (more than 1/cycle)
#4,v,n,n,n,n,PM_INST_CMPL_GATED_BY_IMR,Instructions completed gated by IMR
Instructions completed gated by IMR (more than 1/cycle)
#5,u,n,n,n,n,PM_CYC_NGATED_BY_RUN,Cycles not gated by 'run'
This is thread active cycles.
#6,v,n,n,n,n,PM_CYC_NGATED_BY_RUN_OR_THREAD_ACTIVE,Cycles not gated by 'run' or thread active
This is machine cycles counters are enabled.
#7,u,n,n,n,n,PM_4INST_DISP_CYC,Cycles 4 instructions dispatched
Cycles 4 instructions dispatched
#8,u,n,n,n,n,PM_IERAT_REF,IERAT reference count
IERAT reference count
#9,u,n,n,n,n,NUSED,Not used
Not used
#10,v,n,n,n,n,PM_L1_ICACHE_MISS,L1 I cache miss count
L1 I cache miss count
#11,u,n,n,n,n,PM_TRACE_CNT1,Selected counts from trace_cnt(1)
trace_cnt(1) - selected counts from trace unit 
#12,u,n,n,n,n,PM_L1_DCACHE_MISS_CYC,L1 D cache miss latency
L1 D cache miss duration latency, time pipe is stalled (does not 
include busy miss) 
#13,u,n,n,n,n,PM_IERAT_MISS,IERAT miss count
IERAT miss count
#14,u,n,n,n,n,PM_TLB_MISS_CYC,TLB miss latency
TLB miss duration latency, time pipe is stalled (for both I and D).
#15,u,n,n,n,n,PM_0CYC_BR_TAKEN,Zero cycle branch taken
Zero cycle branch taken.