# IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # bos72X src/bos/usr/sbin/perf/pmapi/events/POWER9.dms 1.4 # # # # OBJECT CODE ONLY SOURCE MATERIALS # # COPYRIGHT International Business Machines Corp. 2017,2021 # All Rights Reserved # # The source code for this program is not published or otherwise # divested of its trade secrets, irrespective of what has been # deposited with the U.S. Copyright Office. # # IBM_PROLOG_END_TAG # Global variables CACHE_LINE_SIZE = 128; MEM_LINE_SIZE = 128; # Derived metric descriptions @PMD_CPI;Cycles per instruction;NA @@PM_CYC / PM_INST_CMPL @@@group=CPI_Breakdown @PMD_L1_ST_Miss_Ratio;Percentage of L1 store misses per L1 store ref;% @@PM_ST_MISS_L1 / PM_ST_FIN * 100 @@@group=General @PMD_dL1_Reload_FROM_L31_SHR_Rate;% of DL1 reloads from Private L3 S tate, other core per Inst;% @@PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_L2_ST_Rd_Util;% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt);NA @@((PM_L2_RCST_DISP/2)*4)/ ( PM_RUN_CYC) * 100 @@@group=L2_stats @PMD_L31_SHR_Latency;Marked L31 Load latency;NA @@PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR @@@group=Latency @PMD_dL1_Reload_FROM_L3_Miss_Rate;% of DL1 reloads from L3 per Inst;% @@PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_L2_Instr_commands;Percent of instruction reads out of all L2 commands;% @@PM_ISIDE_DISP * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP) @@@group=General @PMD_dL1_Reload_FROM_L2_Rate;% of DL1 reloads from L2 per Inst;% @@PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_L2_SN_Usage;Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle;NA @@(PM_SN_USAGE / PM_RUN_CYC) * 8 @@@group=General @PMD_dL1_Reload_FROM_L31_Rate;% of DL1 reloads from Private L3, other core per Inst;% @@(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_Speculation;Instruction dispatch-to-completion ratio;NA @@PM_INST_DISP / PM_INST_CMPL @@@group=General @PMD_DPLONG_STALL_CPI;Finish stall because the NTF instruction was a scalar multi-cycle instruction issued;NA @@PM_CMPLU_STALL_DPLONG/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L3_Latency;Marked L3 Load latency;NA @@PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3 @@@group=Latency @PMD_PTEG_FROM_DMEM;% of DERAT reloads from Distant Memory;% @@PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS @@@group=General @PMD_L31_MOD_Latency;Marked L31 Load latency;NA @@PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD @@@group=Latency @PMD_ICT_NOSLOT_DISP_HELD_ISSQ_CPI;Instruction Completion Table empty for this thread due to dispatch hold on this thread ;NA @@PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_DL2L3_SHR;% of DL1 dL1_Reloads from Distant L2 or L3 (Shared);% @@PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_PTEG_FROM_DL4_Rate;% of DERAT reloads from Distant L4 per inst;% @@PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_VFXLONG_STALL_CPI;Completion stall due to a long latency vector fixed point instruction (division, square root);NA @@PM_CMPLU_STALL_VFXLONG/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PM_STALL_CPI;Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish.;NA @@PM_CMPLU_STALL_PM/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_Loads_per_inst;PCT instruction loads;NA @@PM_LD_REF_L1 / PM_RUN_INST_CMPL @@@group=General @PMD_LD_RMEM_PER_LD_DMEM;Number of loads from remote memory per loads from distant memory;NA @@PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM @@@group=Memory @PMD_LSU_STALL_CPI;Completion stall by LSU instruction;NA @@PM_CMPLU_STALL_LSU/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_LOAD_FINISH_STALL_CPI;Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish;NA @@PM_CMPLU_STALL_LOAD_FINISH/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_INST_FROM_DMEM;% of ICache reloads from Distant Memory;% @@PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_dL1_Reload_FROM_RL2L3_SHR;% of DL1 dL1_Reloads from Remote L2 or L3 (Shared);% @@PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_LHS_STALL_CPI;Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data;NA @@PM_CMPLU_STALL_LHS/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_RL4;% of DERAT reloads from Remote L4;% @@PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS @@@group=General @PMD_STORE_PIPE_ARB_STALL_CPI;Finish stall because the NTF instruction was a store waiting for the next relaunch ;NA @@PM_CMPLU_STALL_STORE_PIPE_ARB/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L21_SHR_Latency;Marked L21 Load latency;NA @@PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR @@@group=Latency @PMD_dL1_Reload_FROM_L21_MOD_Rate;% of DL1 reloads from Private L2, other core per Inst;% @@PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_ICT_NOSLOT_DISP_HELD_SYNC_CPI;Dispatch held due to a synchronizing instruction at dispatch;NA @@PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_SPEC_FINISH_STALL_CPI;Finish stall while waiting for the non-speculative finish of either a stcx waiting;NA @@PM_CMPLU_STALL_SPEC_FINISH/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L2_Dem_LD_Disp;Demand load misses as a % of L2 LD dispatches (per thread);% @@PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2 ) * 100 @@@group=General @PMD_Stores_per_inst;PCT instruction stores;NA @@PM_ST_FIN / PM_RUN_INST_CMPL @@@group=General @PMD_L2_LDMISS_Wr_Util;L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks;NA @@((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ ( PM_RUN_CYC) * 100 @@@group=L2_stats @PMD_L2_ST_Wr_Util;L2 stores that require a cache write (4 pclks per disp attempt) % of pclks;NA @@((PM_L2_ST_DISP/2)*4)/ ( PM_RUN_CYC) * 100 @@@group=L2_stats @PMD_IPTEG_FROM_L3_Rate;Rate of IERAT reloads from L3;% @@PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_PTEG_FROM_L2;% of DERAT reloads from L2;% @@PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS @@@group=General @PMD_PTEG_FROM_L3;% of DERAT reloads from L3;% @@PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS @@@group=General @PMD_INST_FROM_DL4_Rate;% of ICache reloads from Distant L4 per Inst;% @@PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_FXLONG_STALL_CPI;Completion stall due to a long latency scalar fixed point instruction (division, square root);NA @@PM_CMPLU_STALL_FXLONG/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_DMISS_L3MISS_STALL_CPI;Completion stall due to cache miss resolving missed the L3;NA @@PM_CMPLU_STALL_DMISS_L3MISS/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_Write_BW;Total write bandwidth seen by the chiplet for this partition (Centaur + L4), including RWITMs and cast outs;NA @@PM_MEM_CO * 64 * (proc_freq * 1E-9) / PM_CYC @@@group=Memory @PMD_LSTACK_Mispredict_Rate;Link stack branch misprediction;% @@PM_BR_MPRED_LSTACK / PM_RUN_INST_CMPL * 100 @@@group=General @PMD_IPTEG_FROM_LMEM_Rate;Rate of IERAT reloads from local memory;% @@PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_PTEG_FROM_RL4_Rate;% of DERAT reloads from Remote L4 per inst;% @@PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_LSAQ_ARB_STALL_CPI;Finish stall because the NTF instruction was a load or store that was held in LSAQ;NA @@PM_CMPLU_STALL_LSAQ_ARB/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_INST_FROM_L3_Rate;% of ICache reloads from L3 per Inst;% @@PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_DMEM_Rate;% of DL1 Reloads from Distant Memory per Inst;% @@PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_Average_Completed_Instruction_Set_Size;Avg. more than 1 instructions completed;NA @@PM_INST_CMPL / PM_1PLUS_PPC_CMPL @@@group=General @PMD_Branches_per_inst;% Branches per instruction;NA @@PM_BRU_FIN / PM_RUN_INST_CMPL @@@group=General @PMD_CCACHE_Mispredict_Rate;Count cache branch misprediction per instruction;% @@PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100 @@@group=General @PMD_L1_ST_Miss_Rate;Percentage of L1 store misses per run instruction;% @@PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_L21_MOD;% of DL1 reloads from Private L2, other core;% @@PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_PTEG_FROM_RMEM_Rate;% of DERAT reloads from Remote Memory per inst;% @@PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_L31_MOD;% of DL1 reloads from Private L3, other core;% @@PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_LD_LL4_PER_LD_DMEM;Ratio of reloads from local L4 to distant L4;NA @@PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4 @@@group=Memory @PMD_L2_Inst_Miss_Rate;L2 Instruction Miss Rate (per instruction)(%);% @@PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_PTEG_FROM_LL4;% of DERAT reloads from Local L4;% @@PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS @@@group=General @PMD_L3_PTEG_Miss_Rate;L3 PTEG Miss Rate (per run instruction)(%);% @@PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_PTEG_FROM_DL4;% of DERAT reloads from Distant L4;% @@PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS @@@group=General @PMD_RMEM_Latency;Marked Rmem Load latency;NA @@PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM @@@group=Latency @PMD_INST_FROM_RL2L3_MOD;% of ICache reloads from Remote L2 or L3 (Modified);% @@PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_L2_Latency;Marked L2 Load latency;NA @@PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2 @@@group=Latency @PMD_LD_LMEM_PER_LD_MEM;Number of loads from local memory per loads from remote and distant memory;NA @@PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM) @@@group=Memory @PMD_DERAT_4K_Miss_Rate;% DERAT miss rate for 4K page per inst;% @@PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_RL4_Rate;% of DL1 Reloads from Remote Memory per Inst;% @@PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_Average_iL1_Miss_Latency;Average icache miss latency;NA @@(PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ) @@@group=Latency @PMD_L2_RC_ST_Disp_Fail_Rate;Rate of L2 store dispatches that failed per core;% @@100 * ((PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2) / PM_RUN_INST_CMPL @@@group=General @PMD_L2_ld_hit_frequency;Average number of cycles between L2 Load hits;NA @@(PM_L2_LD_HIT / PM_RUN_CYC) / 2 @@@group=L2_stats @PMD_DTLB_Miss_Rate;% DTLB miss rate per inst;% @@PM_DTLB_MISS / PM_RUN_INST_CMPL *100 @@@group=General @PMD_L2_RC_Usage;Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle;NA @@(PM_RC_USAGE / PM_RUN_CYC) * 16 @@@group=General @PMD_DL4_Latency;Distant L4 average load latency;NA @@PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4 @@@group=Latency @PMD_DMISS_REMOTE_STALL_CPI;Completion stall by Dcache miss which resolved from remote chip (cache or memory);NA @@PM_CMPLU_STALL_DMISS_REMOTE/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_LSTACK_Misprediction;Link stack branch misprediction;% @@PM_BR_MPRED_LSTACK/ PM_BR_PRED_LSTACK * 100 @@@group=General @PMD_L2_Local_Pred_Correct;L2 local pump prediction success;% @@PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100 @@@group=General @PMD_RUN_CPI;Run cycles per run instruction;NA @@PM_RUN_CYC / PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_INST_FROM_L21_MOD;% of ICache reloads from Private L2, other core;% @@PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_L2_Wr_Util;L2 Cache Write Utilization (per core);% @@PMD_L2_LDMISS_Wr_Util + PMD_L2_ST_Wr_Util @@@group=General @PMD_DERAT_4K_Miss_Ratio;DERAT miss ratio for 4K page;NA @@PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS @@@group=Translation @PMD_dL1_Reload_FROM_L2_Miss_Rate;% of DL1 reloads from L2 per Inst;% @@PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_ICache_MISS_RELOAD;Icache Fetchs per Icache Miss;NA @@(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS @@@group=Instruction_Stats(% per ref) @PMD_DPTEG_FROM_L2_Rate;Rate of DERAT reloads from L2;% @@PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_PTEG_FROM_DL2L3_MOD;% of DERAT reloads from Distant L2 or L3 (Modified);% @@PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS @@@group=General @PMD_dL1_Reload_FROM_RL2L3_SHR_Rate;% of DL1 reloads from Private L3, other core per Inst;% @@PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_L2_RC_LD_Disp_Addr_Fail;% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts;% @@(PM_L2_RCLD_DISP_FAIL_ADDR )/ ( PM_L2_RCLD_DISP) * 100 @@@group=General @PMD_NTC_ISSUE_HELD_OTHER_CPI;The NTC instruction is being held at dispatch during regular pipeline cycles;NA @@PM_NTC_ISSUE_HELD_OTHER/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_NTC_DISP_FIN_STALL_CPI;Finish stall because the NTF instruction was one that must finish at dispatch.;NA @@PM_CMPLU_STALL_NTC_DISP_FIN/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_DERAT_MISS_RELOAD;% of DERAT misses that result in an ERAT reload;% @@PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS @@@group=General @PMD_STORE_DATA_STALL_CPI;Finish stall because the next to finish instruction was a store waiting on data;NA @@PM_CMPLU_STALL_STORE_DATA/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_L31_MOD_Rate;% of DERAT reloads from Private L3, other core per inst;% @@PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_PTEG_FROM_DL2L3_MOD_Rate;% of DERAT reloads from Distant L2 or L3 (Modified) per inst;% @@PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_IPC;Instructions per cycles;NA @@PM_INST_CMPL / PM_CYC @@@group=General @PMD_L2_Shr_CO;L2 COs that were in T,Te,Si,S state as a % of all L2 COs;% @@PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100 @@@group=General @PMD_L3_ld_hit_frequency;Average number of cycles between L3 Load hits;NA @@(PM_L3_LD_HIT / PM_RUN_CYC) / 2 @@@group=L3_stats @PMD_L2_ld_miss_frequency;Average number of cycles between L2 Load misses;NA @@(PM_L2_LD_MISS / PM_RUN_CYC) / 2 @@@group=L2_stats @PMD_L3_Inst_Miss_Rate;L3 Instruction Miss Rate (per instruction)(%);% @@PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_Fixed_per_inst;Total Fixed point operations;NA @@PM_FXU_FIN/PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_DL2L3_MOD_Rate;% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst;% @@PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_PTEG_FROM_RMEM;% of DERAT reloads from Remote Memory;% @@PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS @@@group=General @PMD_VFXU_STALL_CPI;Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes;NA @@PM_CMPLU_STALL_VFXU/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_VDPLONG_STALL_CPI;Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.;NA @@PM_CMPLU_STALL_VDPLONG/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Miss_Reloads;% of DL1 misses that result in a cache reload;% @@PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1 @@@group=General @PMD_INST_FROM_RL2L3_SHR_Rate;% of ICache reloads from Remote L2 or L3 (Shared) per Inst;% @@PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_DERAT_64K_Miss_Rate;% DERAT miss ratio for 64K page per inst;% @@PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_STORE_FINISH_STALL_CPI;Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish;NA @@PM_CMPLU_STALL_STORE_FINISH/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_FLUSH_ANY_THREAD_STALL_CPI;Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion;NA @@PM_CMPLU_STALL_FLUSH_ANY_THREAD/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_DL2L3_SHR_Rate;% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst;% @@PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_COMPLETION_CPI;Cycles in which at least one instruction completes in this thread;NA @@PM_1PLUS_PPC_CMPL/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_L3_MEPF;% of DL1 Reloads that came from L3 and were brought into the L3 by a prefetch;% @@PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_dL1_Reload_FROM_L3;% of DL1 Reloads from L3;% @@PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_dL1_Reload_FROM_L2;% of DL1 reloads from L2;% @@PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_INST_FROM_DL2L3_SHR_Rate;% of ICache reloads from Distant L2 or L3 (Shared) per Inst;% @@PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_RMEM_Rate;% of DL1 Reloads from Remote Memory per Inst;% @@PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_Run_Cycles;Run cycles per cycle;% @@PM_RUN_CYC / PM_CYC*100 @@@group=General @PMD_dL1_Reload_FROM_LL4_Rate;% of DL1 Reloads from Local L4 per Inst;% @@PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_INST_FROM_DL2L3_SHR;% of ICache reloads from Distant L2 or L3 (Shared);% @@PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_PTEG_FROM_RL2L3_MOD_Rate;% of DERAT reloads from Remote L2 or L3 (Modified) per inst;% @@PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_L31_MOD_Latency;Marked L31 Load latency;NA @@PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD @@@group=Latency @PMD_L2_CO_Usage;Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle;NA @@(PM_CO_USAGE / PM_RUN_CYC) * 16 @@@group=General @PMD_PTEG_FROM_L31_SHR_Rate;% of DERAT reloads from Private L3, other core per inst;% @@PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_RMEM;% of DL1 dL1_Reloads from Remote Memory;% @@PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_L3_WI_Usage;Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle;NA @@(PM_L3_WI_USAGE / PM_RUN_CYC) * 8 @@@group=L3_stats @PMD_L2_INST_Miss_Ratio;L2 Inst misses as a % of total L2 Inst dispatches (per thread);% @@PM_L2_INST_MISS / PM_L2_INST * 100 @@@group=General @PMD_L2_Disp_Conflict_LDHITST_Latency;Marked L2 Load latency dispatch conflict LD hit ST;NA @@PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC/ PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST @@@group=Latency @PMD_DERAT_Miss_Rate;DERAT Miss Rate (per run instruction)(%);% @@PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_ICT_NOSLOT_IC_L3_CPI;Instruction Completion Table empty for this thread due to icache misses that were sourced from the local L3;NA @@PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_LD_LL4_PER_LD_MEM;Ratio of reloads from local L4 to remote+distant L4;NA @@PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4) @@@group=Memory @PMD_Cycles_Completed_Instructions_Set;Cycles per instruction group;NA @@PM_CYC / PM_1PLUS_PPC_CMPL @@@group=General @PMD_PTEG_FROM_L21_MOD_Rate;% of DERAT reloads from Private L2, other core per inst;% @@PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_NESTED_TBEGIN_STALL_CPI;Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin.;NA @@PM_CMPLU_STALL_NESTED_TBEGIN/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_RL2L3_SHR;% of DERAT reloads from Remote L2 or L3 (Shared);% @@PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS @@@group=General @PMD_SYNC_PMU_INT_STALL_CPI;ANY_SYNC_STALL_CPI;NA @@PM_CMPLU_STALL_SYNC_PMU_INT / PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_DL4;% of DL1 dL1_Reloads from Distant L4;% @@PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_PTEG_FROM_L31_SHR;% of DERAT reloads from Private L3, other core;% @@PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS @@@group=General @PMD_ICT_NOSLOT_DISP_HELD_CPI;Cycles in which the NTC instruciton is held at dispatch for any reason;NA @@PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L2_RC_ST_Disp_Addr_Fail;% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts;% @@(PM_L2_RCST_DISP_FAIL_ADDR )/ ( PM_L2_RCST_DISP) * 100 @@@group=General @PMD_L3_ld_miss_frequency;Average number of cycles between L3 Load misses;NA @@(PM_L3_LD_MISS / PM_RUN_CYC) / 2 @@@group=L3_stats @PMD_PTEG_FROM_RL2L3_SHR_Rate;% of DERAT reloads from Remote L2 or L3 (Shared) per inst;% @@PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_LL4;% of DL1 dL1_Reloads from Local L4;% @@PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_NESTED_TEND_STALL_CPI;Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level;NA @@PM_CMPLU_STALL_NESTED_TEND/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_FINISH_TO_CMPL_CPI;Cycles in which the oldest instruction is finished and ready to complete for waiting to get through the completion pipe;NA @@PM_NTC_ALL_FIN / PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_IPTEG_FROM_L2_Rate;Rate of IERAT reloads from L2;% @@PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_IPTEG_FROM_LL4_Rate;Rate of IERAT reloads from local memory;% @@PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_DERAT_64K_Miss_Ratio;DERAT miss ratio for 64K page;NA @@PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS @@@group=Translation @PMD_DSLB_Miss_Rate;% DSLB_Miss_Rate per inst;% @@PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_LSU_MFSPR_STALL_CPI;Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned;NA @@PM_CMPLU_STALL_LSU_MFSPR/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L2_LD_Miss_Rate;L2 dmand Load Miss Rate (per run instruction)(%);% @@PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_L2_IC_Inv_Rate;L2 Icache invalidates per run inst (per core);% @@(PM_L2_IC_INV / 2 ) / PM_RUN_INST_CMPL * 100 @@@group=General @PMD_THREAD_BLOCK_STALL_CPI;Completion Stalled because the thread was blocked;NA @@PM_CMPLU_STALL_THRD/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_DPTEG_FROM_L3_Rate;Rate of DERAT reloads from L3;% @@PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_RUN_CYC_CPI;Run_cycles;NA @@PM_RUN_CYC/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_Flush_Rate;Flush rate (%);% @@PM_FLUSH * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_EIEIO_STALL_CPI;Finish stall because the NTF instruction is an EIEIO waiting for response from L2;NA @@PM_CMPLU_STALL_EIEIO/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L3_No_Conflict_Latency;Marked L3 Load latency no conflict;NA @@PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC/ PM_MRK_DATA_FROM_L3_NO_CONFLICT @@@group=Latency @PMD_FXU_STALL_CPI;Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes;NA @@PM_CMPLU_STALL_FXU/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_ICT_NOSLOT_DISP_HELD_TBEGIN_CPI;the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch;NA @@PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_LD_LMEM_PER_LD_RMEM;Number of loads from local memory per loads from remote memory;NA @@PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM @@@group=Memory @PMD_EMQ_FULL_STALL_CPI;Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full;NA @@PM_CMPLU_STALL_EMQ_FULL/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_Estimated_dL1Miss_Latency;average L1 miss latency using marked events;NA @@PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1 @@@group=Latency @PMD_INST_FROM_RL4_Rate;% of ICache reloads from Remote L4 per Inst;% @@PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_TLBIE_STALL_CPI;Finish stall because the NTF instruction was a tlbie waiting for response from L2;NA @@PM_CMPLU_STALL_TLBIE/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_ICT_NOSLOT_IC_L3MISS_CPI;Instruction Completion Table empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache;NA @@PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_BR_Misprediction;Branch misprediction %;% @@PM_BR_MPRED_CMPL / PM_BR_PRED * 100 @@@group=General @PMD_LSU_FLUSH_NEXT_STALL_CPI;Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete;NA @@PM_CMPLU_STALL_LSU_FLUSH_NEXT/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L21_MOD_Latency;Marked L21 Load latency;NA @@PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD @@@group=Latency @PMD_Demand_Read_BW;Total demand read bandwidth seen by the chiplet for this partition (Centaur + L4), including data/inst/xlate;NA @@(PM_MEM_READ + PM_MEM_RWITM) * 64 * (proc_freq * 1E-9) / PM_CYC @@@group=Memory @PMD_INST_FROM_LMEM;% of ICache reloads from Local Memory;% @@PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_DMEM_Latency;Marked Dmem Load latency;NA @@PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM @@@group=Latency @PMD_L1_Inst_Miss_Rate;Instruction Cache Miss Rate (Per run Instruction)(%);% @@PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_STORE_FIN_ARB_STALL_CPI;Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe;NA @@PM_CMPLU_STALL_STORE_FIN_ARB/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_DL2L3_SHR_Latency;Marked L2L3 distant Load latency;NA @@PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR @@@group=Latency @PMD_RL2L3_SHR_Latency;Marked L2L3 remote Load latency;NA @@PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR @@@group=Latency @PMD_dL1_Reload_FROM_L31_SHR;% of DL1 reloads from Private L3, other core;% @@PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_PASTE_STALL_CPI;Finish stall because the NTF instruction was a paste waiting for response from L2;NA @@PM_CMPLU_STALL_PASTE/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_TEND_STALL_CPI;Finish stall because the NTF instruction was a tend instruction awaiting response from L2;NA @@PM_CMPLU_STALL_TEND/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_LMEM_Rate;% of DERAT reloads from Local Memory per inst;% @@PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_MEM_LOCALITY;Memory locality;% @@PM_DATA_FROM_LMEM * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM) @@@group=General @PMD_DMISS_L2L3_CONFLICT_STALL_CPI;Completion stall due to cache miss that resolves in the L2 or L3 with a conflict;NA @@PM_CMPLU_STALL_DMISS_L2L3_CONFLICT/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L2_LD_Rd_Util;% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt);NA @@((PM_L2_RCLD_DISP/2)*4)/ ( PM_RUN_CYC) * 100 @@@group=L2_stats @PMD_INST_FROM_DL2L3_MOD_Rate;% of ICache reloads from Distant L2 or L3 (Modified) per Inst;% @@PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_elpased_cycles;time base elapsed cycles;NA @@proc_freq * total_time @@@group=General @PMD_INST_FROM_L2_Rate;% of ICache reloads from L2 per Inst;% @@PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_NTC_FIN_CPI;Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack;NA @@PM_NTC_FIN/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_STALL_CPI;Nothing completed and Instruction Completion Table not empty;NA @@PM_CMPLU_STALL/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_DMEM;% of DL1 dL1_Reloads from Distant Memory;% @@PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_dL1_Reload_FROM_DL4_Rate;% of DL1 Reloads from Distant L4 per Inst;% @@PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_L1_Prefetch_Rate;L1 Prefetches issued by the prefetch machine per instruction (per thread);% @@PM_L1_PREF / PM_RUN_INST_CMPL * 100 @@@group=General @PMD_L2_Disp_Conflict_Other_Latency;Marked L2 Load latency dispatch conflict other;NA @@PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC/ PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER @@@group=Latency @PMD_ANY_SYNC_STALL_CPI;ANY_SYNC_STALL_CPI;NA @@PM_CMPLU_STALL_ANY_SYNC / PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_L21_SHR_Rate;% of DL1 reloads from Private L2, other core per Inst;% @@PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_L2_ST_Disp_Rate;Rate of L2 dispatches per core;% @@100 * (PM_L2_RCST_DISP/2) / PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_RL2L3_MOD_Rate;% of DL1 reloads from Private L3, other core per Inst;% @@PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_SRQ_FULL_STALL_CPI;Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full;NA @@PM_CMPLU_STALL_SRQ_FULL/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_LRQ_OTHER_STALL_CPI;Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others;NA @@PM_CMPLU_STALL_LRQ_OTHER/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_LL4_Latency;Local L4 average load latency;NA @@PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4 @@@group=Latency @PMD_NTC_FLUSH_STALL_CPI;Completion stall due to ntc flush;NA @@PM_CMPLU_STALL_NTC_FLUSH/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_RL2L3_MOD;% of DERAT reloads from Remote L2 or L3 (Modified);% @@PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS @@@group=General @PMD_L2_ST_Miss_Ratio;L2 Store misses as a % of total L2 Store dispatches (per thread);% @@PM_L2_ST_MISS / PM_L2_ST * 100 @@@group=General @PMD_L31_Latency;Marked L31 Load latency;NA @@(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD) @@@group=General @PMD_dL1_Reload_FROM_L3_Rate;% of DL1 Reloads from L3 per Inst;% @@PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_INST_FROM_L31_MOD_Rate;% of ICache reloads from Private L3, other core per Inst;% @@PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_INST_FROM_LL4_Rate;% of ICache reloads from Local L4 per Inst;% @@PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_STCX_STALL_CPI;Finish stall because the NTF instruction was a stcx waiting for response from L2;NA @@PM_CMPLU_STALL_STCX/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_L21_SHR_Rate;% of DERAT reloads from Private L2, other core per inst;% @@PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_ST_FWD_STALL_CPI;Completion stall due to store forward;NA @@PM_CMPLU_STALL_ST_FWD/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_DP_STALL_CPI;Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.;NA @@PM_CMPLU_STALL_DP/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_INST_FROM_RL2L3_MOD_Rate;% of ICache reloads from Remote L2 or L3 (Modified) per Inst;% @@PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_L3_Miss;% of DL1 Reloads from sources beyond the local L3;% @@PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_L2_ST_Disp_Fail_Rate;Rate of L2 store dispatches that failed per core;% @@100 * ((PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2) / PM_RUN_INST_CMPL @@@group=General @PMD_Total_BW;Total read+write bandwidth seen by the chiplet for this partition (Centaur + L4), including RWITMs and cast outs;NA @@(PM_MEM_READ + PM_MEM_PREF + PM_MEM_RWITM + PM_MEM_CO) * 64 * (proc_freq * 1E-9) / PM_CYC @@@group=Memory @PMD_INST_FROM_LMEM_Rate;% of ICache reloads from Local Memory per Inst;% @@PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_PTEG_FROM_LL4_Rate;% of DERAT reloads from Local L4 per inst;% @@PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_INST_FROM_RMEM_Rate;% of ICache reloads from Remote Memory per Inst;% @@PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_DMISS_L21_L31_STALL_CPI;Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3);NA @@PM_CMPLU_STALL_DMISS_L21_L31/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_RL4;% of DL1 dL1_Reloads from Remote L4;% @@PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_PTEG_FROM_L31_MOD;% of DERAT reloads from Private L3, other core;% @@PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS @@@group=General @PMD_VDP_STALL_CPI;Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish.;NA @@PM_CMPLU_STALL_VDP/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_MTFPSCR_STALL_CPI;Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT);NA @@PM_CMPLU_STALL_MTFPSCR/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_RL2L3_MOD;% of DL1 dL1_Reloads from Remote L2 or L3 (Modified);% @@PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_ITLB_Miss_Rate;% ITLB miss rate per inst;% @@PM_ITLB_MISS / PM_RUN_INST_CMPL *100 @@@group=General @PMD_ICT_NOSLOT_BR_MPRED_CPI;Instruction Completion Table empty for this thread due to branch mispred;NA @@PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_LMQ_FULL_STALL_CPI;Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full;NA @@PM_CMPLU_STALL_LMQ_FULL/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_L3_MEPF_Rate;% of DL1 Reloads that came from the L3 and were brought into the L3 by a prefetch, per instruction completed;% @@PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_ERAT_MISS_STALL_CPI;Finish stall because the NTF instruction was a load or store that suffered a translation miss;NA @@PM_CMPLU_STALL_ERAT_MISS/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_ICACHE_PREF;% of ICache reloads due to prefetch;% @@PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_Disp_Flush_Rate;GCT empty cycles;% @@(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100 @@@group=General @PMD_Prefetch_BW;Total prefetch bandwidth seen by the chiplet for this partition (Centaur + L4), including data/inst/xlate;NA @@PM_MEM_PREF * 64 * (proc_freq * 1E-9) / PM_CYC @@@group=Memory @PMD_L2_PTEG_Miss_Rate;L2 PTEG Miss Rate (per run instruction)(%);% @@PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_ICT_NOSLOT_DISP_HELD_HB_FULL_CPI;Instruction Completion Table empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF;NA @@PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_SLB_STALL_CPI;Finish stall because the NTF instruction was awaiting L2 response for an SLB;NA @@PM_CMPLU_STALL_SLB/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L2_Mod_CO;L2 COs that were in M,Me,Mu state as a % of all L2 COs;% @@PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100 @@@group=General @PMD_L2_RC_LD_Disp_Fail;% of L2 Load RC dispatch attempts that failed;% @@(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ ( PM_L2_RCLD_DISP) * 100 @@@group=General @PMD_Cycles_Atleast_One_Inst_Dispatched;Percentage Cycles atleast one instruction dispatched;% @@PM_1PLUS_PPC_DISP / PM_CYC * 100 @@@group=General @PMD_LMEM_Latency;Marked Lmem Load latency;NA @@PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM @@@group=Latency @PMD_INST_FROM_L3;% of ICache reloads from L3;% @@PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_INST_FROM_L31_SHR;% of ICache reloads from Private L3, other core;% @@PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_INST_FROM_L21_SHR_Rate;% of ICache reloads from Private L2, other core per Inst;% @@PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_DL2L3_MOD_Latency;Marked L2L3 remote Load latency;NA @@PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD @@@group=Latency @PMD_FXU_All_BUSY;All FXU Busy;NA @@PM_FXU_BUSY / PM_CYC @@@group=General @PMD_dL1_Reload_FROM_LMEM;% of DL1 dL1_Reloads from Local Memory;% @@PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_L2_DC_Inv_Rate;L2 dcache invalidates per run inst (per core);% @@(PM_L2_DC_INV / 2 ) / PM_RUN_INST_CMPL * 100 @@@group=General @PMD_CRYPTO_STALL_CPI;Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish;NA @@PM_CMPLU_STALL_CRYPTO/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_BRU_STALL_CPI;Completion stall due to a Branch Unit;NA @@PM_CMPLU_STALL_BRU/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_DMEM_Rate;% of DERAT reloads from Distant Memory per inst;% @@PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_RL2L3_MOD_Latency;Marked L2L3 remote Load latency;NA @@PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD @@@group=Latency @PMD_INST_FROM_L21_SHR;% of ICache reloads from Private L2, other core;% @@PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_INST_FROM_L2;% of ICache reloads from L2;% @@PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_PTEG_FROM_L21_MOD;% of DERAT reloads from Private L2, other core;% @@PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS @@@group=General @PMD_RL4_Latency;Remote L4 average load latency;NA @@PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4 @@@group=Latency @PMD_NTC_ISSUE_HELD_DARQ_FULL_CPI;The NTC instruction is being held at dispatch because there are no slots in the DARQ for it;NA @@PM_NTC_ISSUE_HELD_DARQ_FULL/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_L2_Rate;% of DERAT reloads from L2 per inst;% @@PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_dL1_Reload_FROM_L2_Miss;% of DL1 Reloads from sources beyond the local L2;% @@PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_INST_FROM_LL4;% of ICache reloads from Local L4;% @@PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_L3_Disp_Conflict_Latency;Marked L3 Load latency dispatch conflict;NA @@PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC/ PM_MRK_DATA_FROM_L3_DISP_CONFLICT @@@group=Latency @PMD_L2_LD_commands;Percent of loads out of all L2 commands;% @@PM_L2_LD * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP) @@@group=General @PMD_DCACHE_MISS_STALL_CPI;Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest;NA @@PM_CMPLU_STALL_DCACHE_MISS/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L2_CO_M_Rd_Util;%L2 Modified CO Cache read Utilization (4 pclks per disp attempt);NA @@((PM_L2_CASTOUT_MOD/2)*4)/ ( PM_RUN_CYC) * 100 @@@group=L2_stats @PMD_INST_FROM_RL4;% of ICache reloads from Remote L4;% @@PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_INST_FROM_DL2L3_MOD;% of ICache reloads from Distant L2 or L3 (Modified);% @@PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_INST_FROM_RL2L3_SHR;% of ICache reloads from Remote L2 or L3 (Shared);% @@PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_L2_LD_Miss_Ratio;L2 Load misses as a % of total L2 Load dispatches (per thread);% @@PM_L2_LD_MISS / PM_L2_LD * 100 @@@group=General @PMD_dL1_Reload_FROM_LMEM_Rate;% of DL1 Reloads from Local Memory per Inst;% @@PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_PTEG_FROM_L3_Rate;% of DERAT reloads from L3 per inst;% @@PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_NOTHING_DISPATCHED_CPI;Number of cycles the Instruction Completion Table has no itags assigned to this thread;NA @@PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_L21_SHR;% of DL1 reloads from Private L2, other core;% @@PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_FXU_All_IDLE;All FXU Idle;NA @@PM_FXU_IDLE / PM_CYC @@@group=General @PMD_LD_LL4_PER_LD_RL4;Ratio of reloads from local L4 to remote L4;NA @@PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4 @@@group=Memory @PMD_CCACHE_Misprediction;Count cache branch misprediction;% @@PM_BR_MPRED_CCACHE / PM_BR_PRED_CCACHE * 100 @@@group=General @PMD_Br_Mpred_Flush_Rate;Branch Mispredict flushes per instruction;% @@PM_FLUSH_MPRED / PM_RUN_INST_CMPL * 100 @@@group=General @PMD_L4_LOCALITY;L4 locality(%);NA @@PM_DATA_FROM_LL4 * 100 / (PM_DATA_FROM_LL4 + PM_DATA_FROM_RL4 + PM_DATA_FROM_DL4) @@@group=Memory @PMD_DMISS_LMEM_STALL_CPI;Completion stall due to cache miss that resolves in local memory;NA @@PM_CMPLU_STALL_DMISS_LMEM/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_DL2L3_MOD;% of DL1 dL1_Reloads from Distant L2 or L3 (Modified);% @@PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID @@@group=General @PMD_NTC_ISSUE_HELD_ARB_CPI;The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread);NA @@PM_NTC_ISSUE_HELD_ARB/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_LMEM;% of DERAT reloads from Local Memory;% @@PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS @@@group=General @PMD_Taken_Branches;% Branches Taken;% @@PM_BR_TAKEN_CMPL * 100 / PM_BRU_FIN @@@group=General @PMD_INST_FROM_DMEM_Rate;% of ICache reloads from Distant Memory per Inst;% @@PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_ICT_NOSLOT_IC_MISS_CPI;Instruction Completion Table empty for this thread due to Icache Miss;NA @@PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_DL2L3_SHR_Rate;% of DERAT reloads from Distant L2 or L3 (Shared) per inst;% @@PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_DMISS_L2L3_STALL_CPI;Completion stall by Dcache miss which resolved in L2/L3;NA @@PM_CMPLU_STALL_DMISS_L2L3/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_DERAT_Miss_Ratio;DERAT miss ratio;NA @@PM_LSU_DERAT_MISS / PM_LSU_DERAT_MISS @@@group=Translation @PMD_ICT_NOSLOT_BR_MPRED_ICMISS_CPI;Instruction Completion Table empty for this thread due to Icache Miss and branch mispred;NA @@PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_dL1_Reload_FROM_L31_MOD_Rate;% of DL1 reloads from Private L3 M state, other core per Inst;% @@PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_LD_LMEM_PER_LD_DMEM;Number of loads from local memory per loads from distant memory;NA @@PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM @@@group=Memory @PMD_L2_ST_commands;Percent of stores out of all L2 commands;% @@PM_L2_ST * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP) @@@group=General @PMD_INST_FROM_RMEM;% of ICache reloads from Remote Memory;% @@PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_L1_LD_Miss_Ratio;Percentage of L1 load misses per L1 load ref;% @@PM_LD_MISS_L1 / PM_LD_REF_L1 * 100 @@@group=General @PMD_RMEM_cpi;estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_RMEM / PM_RUN_INST_CMPL) * PMD_RMEM_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_INST_FROM_L21_MOD_Rate;% of ICache reloads from Private L2, other core per Inst;% @@PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_L2_No_Conflict_Latency;Marked L2 Load latency no conflict;NA @@PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC/ PM_MRK_DATA_FROM_L2_NO_CONFLICT @@@group=Latency @PMD_INST_FROM_L31_SHR_Rate;% of ICache reloads from Private L3 other core per Inst;% @@PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_INST_FROM_L31_MOD;% of ICache reloads from Private L3, other core;% @@PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_L1_LD_Miss_Rate;Percentage of L1 demand load misses per run instruction;% @@PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_DFLONG_STALL_CPI;Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish.;NA @@PM_CMPLU_STALL_DFLONG/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L3_LD_Miss_Rate;L3 demand Load Miss Rate (per run instruction)(%);% @@PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_ISLB_Miss_Rate;% ISLB miss rate per inst;% @@PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL @@@group=General @PMD_LARX_STALL_CPI;Finish stall because the NTF instruction was a larx waiting to be satisfied;NA @@PM_CMPLU_STALL_LARX/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_LRQ_FULL_STALL_CPI;Finish stall because the NTF instruction was a load that was held in LSAQ because the LRQ was full;NA @@PM_CMPLU_STALL_LRQ_FULL/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L2_RC_ST_Disp_Fail;% of L2 Store RC dispatch attempts that failed;% @@(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ ( PM_L2_RCST_DISP) * 100 @@@group=General @PMD_INST_FROM_DL4;% of ICache reloads from Distant L4;% @@PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS @@@group=General @PMD_PTEG_FROM_L21_SHR;% of DERAT reloads from Private L2, other core;% @@PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS @@@group=General @PMD_DFU_STALL_CPI;Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish.;NA @@PM_CMPLU_STALL_DFU/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_PTEG_FROM_DL2L3_SHR;% of DERAT reloads from Distant L2 or L3 (Shared);% @@PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS @@@group=General @PMD_Custom_secs;cycles;NA @@PM_RUN_CYC @@@group=General @PMD_L31_MOD_cpi;estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_L31_MOD / PM_RUN_INST_CMPL) * PMD_L31_MOD_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_L21_SHR_cpi;estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_L21_SHR / PM_RUN_INST_CMPL) * PMD_L21_SHR_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_DL4_cpi;estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_DL4 / PM_RUN_INST_CMPL) * PMD_DL4_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_dcache_miss_cpi;dL1 miss portion of CPI;% @@( PMD_DCACHE_MISS_STALL_CPI / PMD_RUN_CPI) * 100 @@@group=CPI_Breakdown @PMD_DMISS_L2L3_NOCONFLICT_STALL_CPI;Completion stall due to cache miss that resolves in the L2 or L3 without conflict;NA @@PMD_DMISS_L2L3_STALL_CPI - PMD_DMISS_L2L3_CONFLICT_STALL_CPI @@@group=CPI_Breakdown @PMD_DL2L3_MOD_cpi;estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_DL2L3_MOD / PM_RUN_INST_CMPL) * PMD_DL2L3_MOD_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_Run_Latch_Cyc;Percentage of all cycles in which the run latch was set, assuming fixed frequency;% @@(PM_RUN_CYC / PMD_elpased_cycles) * 100 @@@group=General @PMD_DMISS_NON_LOCAL_STALL_CPI;Completion stall by Dcache miss which resolved outside of local memory;NA @@PMD_DMISS_L3MISS_STALL_CPI - PMD_DMISS_L21_L31_STALL_CPI - PMD_DMISS_LMEM_STALL_CPI @@@group=CPI_Breakdown @PMD_RL4_cpi;estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_RL4 / PM_RUN_INST_CMPL) * PMD_RL4_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_L2_Wr_Util;L2 Cache Write Utilization (per core);% @@PMD_L2_LDMISS_Wr_Util + PMD_L2_ST_Wr_Util @@@group=General @PMD_EMQ_STALL_CPI;emq_stall_cpi;NA @@PMD_ERAT_MISS_STALL_CPI + PMD_EMQ_FULL_STALL_CPI @@@group=CPI_Breakdown @PMD_VDP_OTHER_STALL_CPI;Vector stalls due to small latency double precision ops;NA @@PMD_VDP_STALL_CPI - PMD_VDPLONG_STALL_CPI @@@group=CPI_Breakdown @PMD_LSU_FIN_STALL_CPI;Finish stall because the NTF instruction was an LSU op; NA @@PM_CMPLU_STALL_LSU_FIN/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_EXCEPTION_STALL_CPI;Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception;NA @@PM_CMPLU_STALL_EXCEPTION/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_EXEC_UNIT_STALL_CPI;Completion stall due to execution units (FXU/VSU/CRU);NA @@PM_CMPLU_STALL_EXEC_UNIT/PM_RUN_INST_CMPL @@@group=CPI_Breakdown @PMD_L2_cpi;estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_L2 / PM_RUN_INST_CMPL) * PMD_L2_Latency) / PMD_DCACHE_MISS_STALL_CPI ) *100 @@@group=CPI_Breakdown @PMD_LMEM_cpi;estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_LMEM / PM_RUN_INST_CMPL) * PMD_LMEM_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_DMEM_cpi;estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_DMEM / PM_RUN_INST_CMPL) * PMD_DMEM_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_DL2L3_MOD_cpi;estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_DL2L3_MOD / PM_RUN_INST_CMPL) * PMD_DL2L3_MOD_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_L21_MOD_cpi;estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_L21_MOD / PM_RUN_INST_CMPL) * PMD_L21_MOD_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_DMISS_NON_LOCAL_STALL_CPI;Completion stall by Dcache miss which resolved outside of local memory;NA @@PMD_DMISS_L3MISS_STALL_CPI - PMD_DMISS_L21_L31_STALL_CPI - PMD_DMISS_LMEM_STALL_CPI @@@group=CPI_Breakdown @PMD_DMISS_DISTANT_STALL_CPI;Completion stall by Dcache miss which resolved off node memory/cache;NA @@PMD_DMISS_L3MISS_STALL_CPI - PMD_DMISS_L21_L31_STALL_CPI - PMD_DMISS_LMEM_STALL_CPI - PMD_DMISS_REMOTE_STALL_CPI @@@group=CPI_Breakdown @PMD_SCALAR_STALL_CPI;scalar_stall_cpi;NA @@PMD_FXU_STALL_CPI + PMD_DP_STALL_CPI + PMD_DFU_STALL_CPI + PMD_STALL_CPI + PMD_CRYPTO_STALL_CPI @@@group=CPI_Breakdown @PMD_DL2L3_SHR_cpi;estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_DL2L3_SHR / PM_RUN_INST_CMPL) * PMD_DL2L3_SHR_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_VECTOR_STALL_CPI;vector_stall_cpi;NA @@PMD_VFXU_STALL_CPI + PMD_VDP_STALL_CPI @@@group=CPI_Breakdown @PMD_L2_Rd_Util;L2 Cache Read Utilization (per core);% @@PMD_L2_LD_Rd_Util + PMD_L2_ST_Rd_Util + PMD_L2_CO_M_Rd_Util @@@group=General @PMD_RL2L3_SHR_cpi;estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_RL2L3_SHR / PM_RUN_INST_CMPL) * PMD_RL2L3_SHR_Latency) / PMD_DCACHE_MISS_STALL_CPI) * 100 @@@group=CPI_Breakdown @PMD_L3_cpi;estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi;% @@(((PM_DATA_FROM_L3 / PM_RUN_INST_CMPL) * PMD_L3_Latency) / PMD_DCACHE_MISS_STALL_CPI) * 100 @@@group=CPI_Breakdown @PMD_FXU_OTHER_STALL_CPI;Stalls due to short latency integer ops;NA @@PMD_FXU_STALL_CPI - PMD_FXLONG_STALL_CPI @@@group=CPI_Breakdown @PMD_SRQ_STALL_CPI;srq_stall_cpi;NA @@PMD_STORE_DATA_STALL_CPI + PMD_EIEIO_STALL_CPI + PMD_STCX_STALL_CPI + PMD_SLB_STALL_CPI + PMD_TEND_STALL_CPI + PMD_PASTE_STALL_CPI + PMD_TLBIE_STALL_CPI + PMD_STORE_PIPE_ARB_STALL_CPI + PMD_STORE_FIN_ARB_STALL_CPI @@@group=CPI_Breakdown @PMD_LRQ_STALL_CPI;lrq_stall_cpi;NA @@PMD_LMQ_FULL_STALL_CPI + PMD_ST_FWD_STALL_CPI + PMD_LHS_STALL_CPI + PMD_LSU_MFSPR_STALL_CPI + PMD_LARX_STALL_CPI + PMD_LRQ_OTHER_STALL_CPI @@@group=CPI_Breakdown @PMD_RL2L3_MOD_cpi;estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_RL2L3_MOD / PM_RUN_INST_CMPL) * PMD_RL2L3_MOD_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_SCALAR_STALL_CPI;scalar_stall_cpi;NA @@PMD_FXU_STALL_CPI + PMD_DP_STALL_CPI + PMD_DFU_STALL_CPI + PMD_STALL_CPI + PMD_CRYPTO_STALL_CPI @@@group=CPI_Breakdown @PMD_LSAQ_STALL_CPI;lsaq_stall_cpi;NA @@PMD_LRQ_FULL_STALL_CPI + PMD_SRQ_FULL_STALL_CPI + PMD_LSAQ_ARB_STALL_CPI @@@group=CPI_Breakdown @PMD_DMISS_L2L3_NOCONFLICT_STALL_CPI;Completion stall due to cache miss that resolves in the L2 or L3 without conflict;NA @@PMD_DMISS_L2L3_STALL_CPI - PMD_DMISS_L2L3_CONFLICT_STALL_CPI @@@group=CPI_Breakdown @PMD_DP_OTHER_STALL_CPI;Stalls due to short latency double precision ops.;NA @@PMD_DP_STALL_CPI - PMD_DPLONG_STALL_CPI @@@group=CPI_Breakdown @PMD_VFXU_OTHER_STALL_CPI;Vector stalls due to small latency integer ops;NA @@PMD_VFXU_STALL_CPI - PMD_VFXLONG_STALL_CPI @@@group=CPI_Breakdown @PMD_ICT_NOSLOT_IC_L2_CPI;ICT_NOSLOT_IC_L2_CPI;NA @@PMD_ICT_NOSLOT_IC_MISS_CPI - PMD_ICT_NOSLOT_IC_L3_CPI - PMD_ICT_NOSLOT_IC_L3MISS_CPI @@@group=CPI_Breakdown @PMD_OTHER_CPI;Cycles unaccounted for.;NA @@PMD_RUN_CPI - PMD_COMPLETION_CPI - PMD_THREAD_BLOCK_STALL_CPI - PMD_STALL_CPI - PMD_NOTHING_DISPATCHED_CPI @@@group=CPI_Breakdown @PMD_ISSUE_HOLD_CPI;issue_hold_cpi;NA @@PMD_NTC_ISSUE_HELD_DARQ_FULL_CPI + PMD_NTC_ISSUE_HELD_ARB_CPI + PMD_NTC_ISSUE_HELD_OTHER_CPI @@@group=CPI_Breakdown @PMD_DFU_OTHER_STALL_CPI;Stalls due to short latency decimal floating ops.;NA @@PMD_DFU_STALL_CPI - PMD_DFLONG_STALL_CPI @@@group=CPI_Breakdown @PMD_L31_SHR_cpi;estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi;% @@(((PM_DATA_FROM_L31_SHR / PM_RUN_INST_CMPL) * PMD_L31_SHR_Latency) / PMD_DCACHE_MISS_STALL_CPI) *100 @@@group=CPI_Breakdown @PMD_ICT_NOSLOT_CYC_OTHER_CPI;Instruction Completion Table other stalls;NA @@PMD_NOTHING_DISPATCHED_CPI - PMD_ICT_NOSLOT_IC_MISS_CPI - PMD_ICT_NOSLOT_BR_MPRED_ICMISS_CPI - PMD_ICT_NOSLOT_BR_MPRED_CPI - PMD_ICT_NOSLOT_DISP_HELD_CPI @@@group=CPI_Breakdown @PMD_LSU_OTHER_STALL_CPI;Completion LSU stall for other reasons;NA @@PMD_LSU_STALL_CPI - PMD_LSU_FIN_STALL_CPI - PMD_STORE_FINISH_STALL_CPI - PMD_SRQ_STALL_CPI - PMD_LOAD_FINISH_STALL_CPI - PMD_DCACHE_MISS_STALL_CPI - PMD_LRQ_STALL_CPI - PMD_EMQ_STALL_CPI - PMD_LSAQ_STALL_CPI @@@group=CPI_Breakdown @PMD_ICT_NOSLOT_DISP_HELD_OTHER_CPI;ICT_NOSLOT_DISP_HELD_OTHER_CPI;NA @@PMD_ICT_NOSLOT_DISP_HELD_CPI - PMD_ICT_NOSLOT_DISP_HELD_HB_FULL_CPI - PMD_ICT_NOSLOT_DISP_HELD_SYNC_CPI - PMD_ICT_NOSLOT_DISP_HELD_TBEGIN_CPI - PMD_ICT_NOSLOT_DISP_HELD_ISSQ_CPI @@@group=CPI_Breakdown #@PMD_OTHER_THREAD_CMPL_STALL;Completion stall because a different thread was using the completion pipe;NA #@@THREAD_BLOCK_STALL_CPI - EXCEPTION_STALL_CPI - ANY_SYNC_STALL_CPI - SYNC_PMU_INT_STALL_CPI - SPEC_FINISH_STALL_CPI - FLUSH_ANY_THREAD_STALL_CPI - LSU_FLUSH_NEXT_STALL_CPI - NESTED_TBEGIN_STALL_CPI - NESTED_TEND_STALL_CPI - MTFPSCR_STALL_CPI #@@@group=General #@PMD_EXEC_UNIT_OTHER_STALL_CPI;Completion stall due to execution units for other reasons.;NA #@@PMD_EXEC_UNIT_STALL_CPI-PMD_SCALAR_STALL_CPI-PMD_VECTOR_STALL_CPI #@@@group=CPI_Breakdown #@PMD_OTHER_STALL_CPI;Completion stall for other reasons;NA #@@PMD_STALL_CPI - PMD_NTC_DISP_FIN_STALL_CPI - PMD_NTC_FLUSH_STALL_CPI - PMD_LSU_STALL_CPI - PMD_EXEC_UNIT_STALL_CPI - PMD_BRU_STALL_CPI #@@@group=CPI_Breakdown