# @(#)M 1.1.1.7 src/bos/usr/sbin/perf/pmapi/events/POWER6.dms, pmapi, bos720 1/23/08 08:51:31 # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # bos720 src/bos/usr/sbin/perf/pmapi/events/POWER6.dms 1.1.1.7 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 2005,2008 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # Global variables CACHE_LINE_SIZE = 128; MEM_LINE_SIZE = 128; # Derived metric descriptions ############################# # Basic Performance Metrics # ############################# @PMD_CPI_BPM;Cycles per instruction @@PM_CYC / PM_INST_CMPL @@@group=basic_performance @PMD_IPC_BPM;Instructions per cycles @@PM_INST_CMPL / PM_CYC @@@group=basic_performance @PMD_RUN_CPI_BPM;Run cycles per run instruction @@PM_RUN_CYC / PM_RUN_INST_CMPL @@@group=basic_performance @PMD_GPG_BPM;Cycles per group @@PM_CYC / PM_1PLUS_PPC_CMPL @@@group=basic_performance @PMD_IPG_BPM;Instructions per group @@PM_INST_CMPL / PM_1PLUS_PPC_CMPL @@@group=basic_performance @PMD_PCT_GRPS_COMPLETE_BPM;Groups completed per group dispatched @@PM_1PLUS_PPC_CMPL / PM_1PLUS_PPC_DISP @@@group=basic_performance @PMD_DSP_BDW_TOT_BPM;Dispatch bandwidth total @@(PM_1PLUS_PPC_DISP + PM_INST_DISP_LLA) / PM_CYC @@@group=basic_performance @PMD_DSP_BDW_DEM_BPM;Dispatch bandwidth demand @@PM_1PLUS_PPC_DISP / PM_CYC @@@group=basic_performance @PMD_PCT_RUN_BPM;Run cycles per cycle @@PM_RUN_CYC / PM_CYC @@@group=basic_performance @PMD_PCT_HV_BPM;Hypervisor cycles per cycles @@PM_HV_CYC / PM_CYC @@@group=basic_performance @PMD_LD_MISS_RATE_BPM;Load miss rate @@PM_LD_MISS_L1 / PM_INST_CMPL @@@group=basic_performance @PMD_LD_MISS_RATIO_BPM;Load miss ratio @@PM_LD_MISS_L1 / PM_LD_REF_L1 @@@group=basic_performance @PMD_LD_MISS_LAT_BPM;Load miss latency @@PM_LD_MISS_L1_CYC / PM_LD_MISS_L1 @@@group=basic_performance @PMD_ST_MISS_RATE_BPM;Store miss rate @@PM_ST_MISS_L1 / PM_INST_CMPL @@@group=basic_performance @PMD_ST_MISS_RATIO_BPM;Store miss ratio @@PM_ST_MISS_L1 / PM_ST_REF_L1 @@@group=basic_performance @PMD_PCT_INST_LD_BPM;PCT instruction loads @@PM_LD_REF_L1 / PM_INST_CMPL @@@group=basic_performance @PMD_PCT_INST_ST_BPM;PCT instruction stores @@PM_ST_REF_L1 / PM_INST_CMPL @@@group=basic_performance @PMD_DERAT_MISS_RATE_BPM;DERAT miss rate @@PM_LSU_DERAT_MISS / PM_INST_CMPL @@@group=basic_performance @PMD_LSU_DERAT_MISS_LAT_BPM;DERAT miss latency @@PM_LSU_DERAT_MISS_CYC / PM_LSU_DERAT_MISS @@@group=basic_performance @PMD_DSLB_MISS_RATE_BPM;DSLB_miss_rate @@PM_DSLB_MISS / PM_INST_CMPL @@@group=basic_performance @PMD_DERAT_REF_BPM;DERAT references @@PM_DERAT_REF_4K + PM_DERAT_REF_64K + PM_DERAT_REF_16M + PM_DERAT_REF_16G @@@group=basic_performance @PMD_DERAT_MISS_RATIO_BPM;DERAT miss ratio @@PM_LSU_DERAT_MISS / PMD_DERAT_REF_BPM @@@group=basic_performance @PMD_DERAT_4K_MISS_RATIO_BPM;DERAT miss ratio for 4K page @@PM_DERAT_MISS_4K / PM_DERAT_REF_4K @@@group=basic_performance @PMD_DERAT_64K_MISS_RATIO_BPM;DERAT miss ratio for 64K page @@PM_DERAT_MISS_64K / PM_DERAT_REF_64K @@@group=basic_performance @PMD_DERAT_16M_MISS_RATIO_BPM;DERAT miss ratio for 16M page @@PM_DERAT_MISS_16M / PM_DERAT_REF_16M @@@group=basic_performance @PMD_DERAT_16G_MISS_RATIO_BPM;DERAT miss ratio for 16G page @@PM_DERAT_MISS_16G / PM_DERAT_REF_16G @@@group=basic_performance @PMD_PCT_DERAT_REF_4K_BPM;PCT DERAT References for 4K page @@PM_DERAT_REF_4K / PMD_DERAT_REF_BPM @@@group=basic_performance @PMD_PCT_DERAT_REF_64K_BPM;PCT DERAT References for 64K page @@PM_DERAT_REF_64K / PMD_DERAT_REF_BPM @@@group=basic_performance @PMD_PCT_DERAT_REF_16M_BPM;PCT DERAT References for 16M page @@PM_DERAT_REF_16M / PMD_DERAT_REF_BPM @@@group=basic_performance @PMD_PCT_DERAT_REF_16G_BPM;PCT DERAT References for 16G page @@PM_DERAT_REF_16G / PMD_DERAT_REF_BPM @@@group=basic_performance @PMD_INST_MISS_RATE_BPM;Instructions miss rate @@PM_L1_ICACHE_MISS / PM_INST_CMPL @@@group=basic_performance @PMD_IERAT_MISS_RATE_BPM;IERAT miss rate @@PM_IERAT_MISS / PM_INST_CMPL @@@group=basic_performance @PMD_IERAT_4K_MISS_RATE_BPM;IERAT miss rate for 4K page @@ PM_IERAT_MISS_4K / PM_INST_CMPL @@@group=basic_performance @PMD_IERAT_64K_MISS_RATE_BPM;IERAT miss rate for 64K page @@ PM_IERAT_MISS_64K / PM_INST_CMPL @@@group=basic_performance @PMD_IERAT_16M_MISS_RATE_BPM;IERAT miss rate for 16M page @@ PM_IERAT_MISS_16M / PM_INST_CMPL @@@group=basic_performance @PMD_IERAT_16G_MISS_RATE_BPM;IERAT miss rate for 16G page @@ PM_IERAT_MISS_16G / PM_INST_CMPL @@@group=basic_performance @PMD_ISLB_MISS_RATE_BPM;ISLB miss rate @@PM_ISLB_MISS / PM_INST_CMPL @@@group=basic_performance ###################### # load/store metrics # ###################### @PMD_PTEG_FETCH_BPM;Page Table Entry fetches @@PM_PTEG_FROM_L2 + PM_PTEG_FROM_L2MISS @@@group=load_store_performance @PMD_PTEG_REL_PER_DERAT_BPM;Page Table Entry fetches per DERAT @@PMD_PTEG_FETCH_BPM / (PM_LSU_DERAT_MISS) @@@group=load_store_performance @PMD_SYNC_TIME_BPM;average service time for SYNC @@PM_SYNC_CYC / PM_SYNC_COUNT @@@group=load_store_performance @PMD_LSU_REJ_RATE_BPM;LSU reject rate @@PM_LSU_REJECT / PM_INST_CMPL @@@group=load_store_performance @PMD_LHS_REJ_RATE_BPM;LHS reject rate @@PM_LSU_REJECT_LHS / PM_INST_CMPL @@@group=load_store_performance @PMD_EA_GUESS_WRONG_REJ_RATE_BPM;Effective address guess wrong reject rate @@PM_LSU_REJECT_DERAT_MPRED / PM_INST_CMPL @@@group=load_store_performance @PMD_SET_MPRED_REJ_RATE_BPM;Set mispredict reject rate @@PM_LSU_REJECT_SET_MPRED / PM_INST_CMPL @@@group=load_store_performance @PMD_NO_SCRATCH_REJ_RET_BPM;No scratch reject rate @@PM_LSU_REJECT_NO_SCRATCH / PM_INST_CMPL @@@group=load_store_performance @PMD_STQ_FULL_REJ_RATE_BPM;STQ full reject rate @@PM_LSU_REJECT_STQ_FULL / PM_INST_CMPL @@@group=load_store_performance @PMD_PARTIAL_SECT_REJ_RATE_BPM;Partial sector reject rate @@PM_LSU_REJECT_PARTIAL_SECTOR / PM_INST_CMPL @@@group=load_store_performance @PMD_ULD_REJ_RATE_BPM;Unaligned load reject rate @@PM_LSU_REJECT_ULD / PM_INST_CMPL @@@group=load_store_performance @PMD_UST_REJ_RATE_BPM;Unaligned store reject rate @@PM_LSU_REJECT_UST / PM_INST_CMPL @@@group=load_store_performance @PMD_LSU_REJ_RATIO_BPM;LSU reject ratio @@PM_LSU_REJECT / (PM_LD_REF_L1 + PM_ST_REF_L1) @@@group=load_store_performance @PMD_LHS_REJ_RATIO_BPM;LHS reject ratio @@PM_LSU_REJECT_LHS / PM_LD_REF_L1 @@@group=load_store_performance @PMD_PARTIAL_SECT_REJ_RATIO_BPM;Partial sector reject ratio @@PM_LSU_REJECT_ULD / PM_LD_REF_L1 @@@group=load_store_performance @PMD_ULD_REJ_RATIO_BPM;Unaligned load reject ratio @@PM_LSU_REJECT_ULD / PM_LD_REF_L1 @@@group=load_store_performance @PMD_UST_REJ_RATIO_BPM;Unaligned store reject ratio @@PM_LSU_REJECT_UST / PM_ST_REF_L1 @@@group=load_store_performance @PMD_STQ_FULL_REJ_RATIO_BPM;STQ full reject ratio @@PM_LSU_REJECT_STQ_FULL / PM_ST_REF_L1 @@@group=load_store_performance @PMD_LD_BYTE_BPM;Load Bytes @@((PM_LD_REF_L1 - PM_LSU_LDF)*4) + (PM_LSU_LDF * 8) @@@group=load_store_performance @PMD_ST_BYTE_BPM;Store Bytes @@((PM_ST_REF_L1 - PM_FPU_STF)*4) + (PM_FPU_STF * 8) @@@group=load_store_performance @PMD_MB_READ_BPM;MBs read @@(PMD_LD_BYTE_BPM / (PM_RUN_CYC / (proc_freq * 1000000))) / 1048576 @@@group=load_store_performance @PMD_MB_STORE_BPM;MBs store @@(PMD_ST_BYTE_BPM / (PM_RUN_CYC / (proc_freq * 1000000))) / 1048576 @@@group=load_store_performance @PMD_MEM_LOCALITY_BPM;Memory locality @@PM_DATA_FROM_LMEM / (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM) @@@group=load_store_performance @PMD_FLUSH_ALIGN_RATE_BPM;Alignment flush rate @@PM_LSU_FLUSH_ALIGN / PM_INST_CMPL @@@group=load_store_performance @PMD_FLUSH_RATE_BPM;Flush rate @@PM_FLUSH / PM_INST_CMPL @@@group=load_store_performance ############################# # branch prediction metrics # ############################# @PMD_BR_PRED_BPM;Branch prediction @@1 - (PM_BR_MPRED / PM_BR_PRED) @@@group=branch_prediction_performance @PMD_CR_MISS_PRED_BPM;CR bit branch misprediction @@PM_BR_MPRED_CR / PM_BR_PRED_CR @@@group=branch_prediction_performance @PMD_CCACHE_MISS_PRED_BPM;Count cache branch misprediction @@PM_BR_MPRED_CCACHE / PM_BR_PRED_CCACHE @@@group=branch_prediction_performance @PMD_TA_MISS_PRED_BPM;Target address branch misprediction @@PM_BR_MPRED_TA / (PM_BR_PRED_CCACHE + PM_BR_PRED_LSTACK) @@@group=branch_prediction_performance @PMD_LSTACK_MISS_PRED_BPM;Link stack branch misprediction @@(PM_BR_MPRED_TA - PM_BR_MPRED_CCACHE) / PM_BR_PRED_LSTACK @@@group=branch_prediction_performance @PMD_COUNT_MISS_PRED_BPM;Count prediction branch misprediction @@PM_BR_MPRED_COUNT / (PM_BR_PRED) @@@group=branch_prediction_performance @PMD_BR_PCTT_BPM;Branch PCTT @@PM_BRU_FIN / PM_INST_CMPL @@@group=branch_prediction_performance @PMD_PCT_BR_CR_PRED_BPM;PCT BR CR prediction @@PM_BR_PRED_CR / PM_BR_PRED @@@group=branch_prediction_performance @PMD_PCT_BR_CCACHE_PRED_BPM;PCT BR CCACHE prediction @@PM_BR_PRED_CCACHE / PM_BR_PRED @@@group=branch_prediction_performance @PMD_PCT_BR_LSTACK_PRED_BPM;PCT BR LSTACK prediction @@PM_BR_PRED_LSTACK / PM_BR_PRED @@@group=branch_prediction_performance ################################# # ressource utilization metrics # ################################# @PMD_FPU0_BALANCE_BPM;FPU0 balance @@PM_FPU0_FIN / PM_FPU_FIN @@@group=ressource_utilization_performance @PMD_FXU_BOTH_BUSY_BPM;Both FXU Busy @@PM_FXU_BUSY / PM_CYC @@@group=ressource_utilization_performance @PMD_FXU0_ONLY_BUSY_BPM;FXU0 only Busy @@PM_FXU0_BUSY_FXU1_IDLE / PM_CYC @@@group=ressource_utilization_performance @PMD_FXU1_ONLY_BUSY_BPM;FXU1 only Busy @@PM_FXU1_BUSY_FXU0_IDLE / PM_CYC @@@group=ressource_utilization_performance @PMD_FXU_BOTH_IDLE_BPM;Both FXU Idle @@PM_FXU_IDLE / PM_CYC @@@group=ressource_utilization_performance @PMD_FXU0_BALANCE_BPM;FXU0 balance @@PM_FXU0_FIN / (PM_FXU0_FIN + PM_FXU1_FIN) @@@group=ressource_utilization_performance ######################## # mutli-thread metrics # ######################## @PMD_THRD_PRIO_0_BPM;Cycles thread running at priority level 0 @@PM_THRD_PRIO_0_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_1_BPM;Cycles thread running at priority level 1 @@PM_THRD_PRIO_1_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_2_BPM;Cycles thread running at priority level 2 @@PM_THRD_PRIO_2_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_3_BPM;Cycles thread running at priority level 3 @@PM_THRD_PRIO_3_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_4_BPM;Cycles thread running at priority level 4 @@PM_THRD_PRIO_4_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_5_BPM;Cycles thread running at priority level 5 @@PM_THRD_PRIO_5_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_6_BPM;Cycles thread running at priority level 6 @@PM_THRD_PRIO_6_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_7_BPM;Cycles thread running at priority level 7 @@PM_THRD_PRIO_7_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_DIFF_0_BPM;Cycles no thread priority difference @@PM_THRD_PRIO_DIFF_0_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_DIFF_1OR2_BPM;Cycles thread priority difference is 1 or 2 @@PM_THRD_PRIO_DIFF_1or2_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_DIFF_3OR4_BPM;Cycles thread priority difference is 3 or 4 @@PM_THRD_PRIO_DIFF_3or4_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_DIFF_5OR6_BPM;Cycles thread priority difference is 5 or 6 @@PM_THRD_PRIO_DIFF_5or6_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_DIFF_MINUS1OR2_BPM;Cycles thread priority difference is -1 or -2 @@PM_THRD_PRIO_DIFF_minus1or2_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_DIFF_MINUS3OR4_BPM;Cycles thread priority difference is -3 or -4 @@PM_THRD_PRIO_DIFF_minus3or4_CYC / PM_CYC @@@group=multi_thread_performance @PMD_THRD_PRIO_DIFF_MINUS5OR6_BPM;Cycles thread priority difference is -5 or -6 @@PM_THRD_PRIO_DIFF_minus5or6_CYC / PM_CYC @@@group=multi_thread_performance ########################## # floating point metrics # ########################## @PMD_FLOP_BPM;FLOPs @@PM_FPU_1FLOP + (2 * PM_FPU_FMA) + (4 * PM_FPU_FSQRT_FDIV) @@@group=floating_point_performance @PMD_FLOP_PCT_BPM;FLOP_PCT @@PM_FPU_FLOP / PM_FPU_FIN @@@group=floating_point_performance @PMD_F_1FLOP_PCT_BPM;Single flop instructions @@PM_FPU_1FLOP / PM_FPU_FIN @@@group=floating_point_performance @PMD_FSQRT_FDIV_PCT_BPM;Floating Point divide or SQRT @@PM_FPU_FSQRT_FDIV / PM_FPU_FIN @@@group=floating_point_performance @PMD_FEST_PCT_BPM;Floating Point estimate @@PM_FPU_FEST / PM_FPU_FIN @@@group=floating_point_performance @PMD_FMA_PCT_BPM;Floating Point multiply add @@PM_FPU_FMA / PM_FPU_FIN @@@group=floating_point_performance @PMD_FRSP_PCT_BPM;Floating Point reciprocal @@PM_FPU_FRSP / PM_FPU_FIN @@@group=floating_point_performance @PMD_FCONV_PCT_BPM;Floating Point convert type instruction @@PM_FPU_FCONV / PM_FPU_FIN @@@group=floating_point_performance @PMD_STF_PCT_BPM;Floating Point store @@PM_FPU_STF / PM_FPU_FIN @@@group=floating_point_performance @PMD_FXDIV_PCT_BPM;Fixed Point divides execute on FPU @@PM_FPU_FXDIV / PM_FPU_FIN @@@group=floating_point_performance @PMD_FXMULT_PCT_BPM;Fixed Point multiplies execute on FPU @@PM_FPU_FXMULT / PM_FPU_FIN @@@group=floating_point_performance @PMD_FPU_PCTT_BPM;FPU_PCTT @@PM_FPU_FIN / PM_INST_CMPL @@@group=floating_point_performance @PMD_FLOP_PCTT_BPM;FLOP_PCTT @@PM_FPU_FLOP / PM_INST_CMPL @@@group=floating_point_performance @PMD_F_1FLOP_PCTT_BPM;_F_1FLOP_PCTT @@PM_FPU_1FLOP / PM_INST_CMPL @@@group=floating_point_performance @PMD_FSQRT_FDIV_PCTT_BPM;FSQRT_FDIV_PCTT @@PM_FPU_FSQRT_FDIV / PM_INST_CMPL @@@group=floating_point_performance @PMD_FEST_PCTT_BPM;FEST_PCTT @@PM_FPU_FEST / PM_INST_CMPL @@@group=floating_point_performance @PMD_FMA_PCTT_BPM;FMA_PCTT @@PM_FPU_FMA / PM_INST_CMPL @@@group=floating_point_performance @PMD_FRSP_PCTT_BPM;FRSP_PCTT @@PM_FPU_FRSP / PM_INST_CMPL @@@group=floating_point_performance @PMD_FCONV_PCTT_BPM;FCONV_PCTT @@PM_FPU_FCONV / PM_INST_CMPL @@@group=floating_point_performance @PMD_STF_PCTT_BPM;STF_PCTT @@PM_FPU_STF / PM_INST_CMPL @@@group=floating_point_performance @PMD_FXDIV_PCTT_BPM;Fixed Point divides execute on FPU @@PM_FPU_FXDIV / PM_INST_CMPL @@@group=floating_point_performance @PMD_FXMULT_PCTT_BPM;Fixed Point multiplies execute on FPU @@PM_FPU_FXMULT / PM_INST_CMPL @@@group=floating_point_performance ########################### # L2 and L3 cache metrics # ########################### @PMD_L2_ST_RATE_BPM;L2 store rate @@PM_ST_REQ_L2 / PM_INST_CMPL @@@group=l2_l3_cache_performance @PMD_L2_ST_MISS_RATIO_BPM;L2 store miss ratio @@PM_L2_ST_MISS_DATA / PM_ST_REQ_L2 @@@group=l2_l3_cache_performance @PMD_L2_GATHER_RATIO_BPM;L2 gather ratio @@PM_ST_REQ_L2 / PM_ST_REF_L1 @@@group=l2_l3_cache_performance @PMD_L2_LD_RATE_BPM;L2 load rate @@PM_L2_LD_REQ_DATA / PM_INST_CMPL @@@group=l2_l3_cache_performance @PMD_L2_LD_MISS_RATIO_BPM;L2 load miss ratio @@PM_L2_LD_MISS_DATA / PM_LD_REQ_L2 @@@group=l2_l3_cache_performance @PMD_L2_IFETCH_RATE_BPM;L2 instruction fetch rate @@PM_L2_LD_REQ_INST / PM_INST_CMPL @@@group=l2_l3_cache_performance @PMD_L2_IFETCH_MISS_RATIO_BPM;L2 instruction fetch miss ratio @@PM_L2_LD_MISS_INST / PM_L2_LD_REQ_INST @@@group=l2_l3_cache_performance @PMD_L3_MISS_RATIO_BPM;L3 miss ratio @@(PM_L3SA_MISS + PM_L3SB_MISS) / (PM_L3SA_REF + PM_L3SB_REF) @@@group=l2_l3_cache_performance ###################### # performance groups # ###################### @PMD_UTI_RATE;Utilization rate;% @@(PM_CYC * 100 / proc_freq) / total_time @@@group=performance @PMD_MIPS;MIPS;MIPS @@(0.000001 * PM_INST_CMPL) / total_time @@@group=performance @PMD_INST_PER_CYC;Instructions per cycle @@PM_INST_CMPL / PM_CYC @@@group=performance @PMD_FLOPS;Flop;Mflop @@(PM_FPU_1FLOP + (2 * PM_FPU_FMA) + (4 * PM_FPU_FSQRT_FDIV)) * 0.000001 @@@description=1FLOP instruction + 2 FMAs + 4 FDIVs + 4 FSQRTs (flops) @@@group=performance,metrics_AEM @PMD_FLOPS_WCT;Flop rate (flops / WCT);Mflop/s @@((PM_FPU_1FLOP + (2 * PM_FPU_FMA) + (4 * PM_FPU_FSQRT_FDIV)) * 0.000001) / total_time @@@group=performance @PMD_FLOPS_UTIME;Flops / user time;Mflop/s @@((PM_FPU_1FLOP + (2 * PM_FPU_FMA) + (4 * PM_FPU_FSQRT_FDIV)) * 0.000001) / (PM_CYC / proc_freq) @@@group=performance @PMD_FP_LD_ST;Floating point load and store operations ;M @@(PM_LSU_LDF + PM_FPU_STF) * 0.000001 @@@group=performance @PMD_INST_PER_FP_LD_ST;Instructions per floating point load/store @@PM_INST_CMPL / (PM_LSU_LDF + PM_FPU_STF) @@@group=performance @PMD_PRC_INST_DISP_CMPL;% Instructions dispatched that completed;% @@PM_INST_CMPL * 100 / PM_INST_DISP @@@group=performance @PMD_DATA_L2;Total L2 data cache accesses;M @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * 0.000001 @@@group=performance @PMD_PRC_L2_ACCESS;% accesses from L2 per cycle;% @@100 * (PM_LD_MISS_L1 + PM_ST_MISS_L1) / PM_CYC @@@group=performance @PMD_L2_TRAF;L2 traffic;MBytes @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_L2_BDW;L2 bandwidth per processor;MBytes/s @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_L1_MISS_LD_EST_LAT_AVG;Estimated latency from loads from L2 (Average);s @@HPM_AVG_L2_LATENCY * (PM_ST_MISS_L1 + PM_LD_MISS_L1) / proc_freq @@@group=performance @PMD_LD_ST;Total load and store operations;M @@(PM_LD_REF_L1 + PM_ST_REF_L1) * 0.000001 @@@group=performance @PMD_INST_PER_LD_ST;Instructions per load/store @@PM_INST_CMPL / (PM_ST_REF_L1 + PM_LD_REF_L1) @@@group=performance @PMD_ST_PER_ST_MISS;Number of stores per store miss @@PM_ST_REF_L1 / PM_ST_MISS_L1 @@@group=performance @PMD_LD_PER_LD_MISS;Number of loads per load miss @@PM_LD_REF_L1 / PM_LD_MISS_L1 @@@group=performance @PMD_MEM_LD_TRAF;Memory load traffic;MBytes @@((PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM) * MEM_LINE_SIZE) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_MEM_LD_BDW;Memory load bandwidth per processor;MBytes/s @@((PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM) * MEM_LINE_SIZE) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_MEM_LD_EST_LAT;Estimated latency from loads from memory;s @@HPM_MEM_LATENCY * (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM)/ proc_freq @@@group=performance @PMD_LD_LMEM_PER_LD_RMEM;Number of loads from local memory per loads from remote memory @@PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM @@@group=performance @PMD_LD_LMEM_PER_LD_DMEM;Number of loads from local memory per loads from distant memory @@PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM @@@group=performance @PMD_LD_LMEM_PER_LD_MEM;Number of loads from local memory per loads from remote and distant memory @@PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM) @@@group=performance @PMD_PRC_MEM_LD;% loads from memory per cycle;% @@100 * (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM) / PM_RUN_CYC @@@group=performance @PMD_LD_RMEM_PER_LD_DMEM;Number of loads from remote memory per loads from distant memory @@PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM @@@group=performance @PMD_HW_FP;Total HW floating point instructions @@(PM_FPU_FIN) * 0.000001 @@@group=performance @PMD_HW_FP_PER_CYC;HW floating point instructions per Cycle @@PM_FPU_FIN / PM_CYC @@@group=performance @PMD_HW_FP_PER_UTIME;HW floating point instructions / user time;M HWflops/s @@(0.000001 * PM_FPU_FIN) / (PM_CYC / proc_freq) @@@group=performance @PMD_HW_FP_RATE;HW floating point rate (HW Flops / WCT);M HWflops/s @@0.000001 * PM_FPU_FIN / total_time @@@group=performance @PMD_LD_ST_DISP_PER_L1_MISS;Number of load/stores dispatched per L1 miss @@(PM_LD_REF_L1 + PM_ST_REF_L1) / (PM_LD_MISS_L1 + PM_ST_MISS_L1) @@@group=performance @PMD_L1_HIT_RATE;L1 cache hit rate;% @@100 * (1 - (PM_LD_MISS_L1 + PM_ST_MISS_L1) / (PM_LD_REF_L1 + PM_ST_REF_L1)) @@@group=performance @PMD_L2_EST_LAT_AVG;Estimated latency from L2 (Average);s @@HPM_AVG_L2_LATENCY * (PM_ST_MISS_L1 + PM_LD_MISS_L1) / proc_freq @@@group=performance @PMD_FX;Total Fixed point operations @@(PM_FXU0_FIN + PM_FXU1_FIN) * 0.000001 @@@group=performance @PMD_FX_PER_CYC;Fixed point operations per Cycle @@(PM_FXU0_FIN + PM_FXU1_FIN) / PM_CYC @@@group=performance @PMD_L2_LD;Total Loads from L2;M @@(PM_DATA_FROM_L2 + PM_DATA_FROM_L21 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD) * 0.000001 @@@group=performance @PMD_PRC_L2_LD;% loads from L2 per cycle;% @@100 * (PM_DATA_FROM_L2 + PM_DATA_FROM_L21 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD) / PM_CYC @@@group=performance @PMD_L2_LD_TRAF;L2 load traffic;MBytes/s @@(PM_DATA_FROM_L2 + PM_DATA_FROM_L21 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_L2_LD_BDW;L2 load bandwidth per processor;MBytes/s @@(PM_DATA_FROM_L2 + PM_DATA_FROM_L21 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_L2_LD_EST_LAT;Estimated latency from loads from L2;s @@((HPM_L2_LATENCY * PM_DATA_FROM_L2) + (HPM_L25_LATENCY * (PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD)) + (HPM_L21_LATENCY * PM_DATA_FROM_L21)) / proc_freq @@@group=performance @PMD_L2_LD_EST_LAT_AVG;Estimated latency from loads from L2 (Average);s @@HPM_AVG_L2_LATENCY * (PM_DATA_FROM_L2 + PM_DATA_FROM_L21 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD) / proc_freq @@@group=performance @PMD_L3_LD;Total loads from L3;M @@(PM_DATA_FROM_L3 + PM_DATA_FROM_L35_MOD + PM_DATA_FROM_L35_SHR) * 0.000001 @@@group=performance @PMD_PRC_L3_LD;% loads from L3 per cycle;% @@100 * (PM_DATA_FROM_L3 + PM_DATA_FROM_L35_MOD + PM_DATA_FROM_L35_SHR) / PM_CYC @@@group=performance @PMD_L3_LD_TRAF;L3 load traffic;MBytes/s @@(PM_DATA_FROM_L3 + PM_DATA_FROM_L35_MOD + PM_DATA_FROM_L35_SHR) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_L3_LD_BDW;L3 load bandwidth per processor;MBytes/s @@(PM_DATA_FROM_L3 + PM_DATA_FROM_L35_MOD + PM_DATA_FROM_L35_SHR) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_L3_LD_EST_LAT;Estimated latency from loads from L3;s @@((HPM_L3_LATENCY * PM_DATA_FROM_L3) + (HPM_L35_LATENCY * (PM_DATA_FROM_L35_MOD + PM_DATA_FROM_L35_SHR))) / proc_freq @@@group=performance @PMD_L3_LD_EST_LAT_AVG;Estimated latency from loads from L3 (Average);s @@HPM_AVG_L3_LATENCY * (PM_DATA_FROM_L3 + PM_DATA_FROM_L35_MOD + PM_DATA_FROM_L35_SHR) / proc_freq @@@group=performance @PMD_FMA_PRC;FMA percentage;% @@(2 * 100 * PM_FPU_FMA) / (PM_FPU_1FLOP + PM_FPU_FMA) @@@group=performance @PMD_FP_COMP;Floating point Computation intensity @@(PM_FPU_1FLOP + PM_FPU_FMA) / (PM_LSU_LDF + PM_FPU_STF) @@@group=performance @PMD_L2_LD_MISS_PERC;L2 load miss percentage;% @@100 * (PM_L2_MISS) / (PM_ST_MISS_L1 + PM_LD_MISS_L1) @@@group=performance @PMD_L3_LD_MISS_PERC;L3 load miss percentage;% @@100 * ((PM_DATA_FROM_L3MISS + PM_INST_FROM_L3MISS) / PM_L2_MISS) @@@group=performance ##################### # CPI Breakdown Model ##################### @PMD_TOTAL_CPI;Total CPI @@PM_CYC / PM_INST_CMPL @@@group=cpi_breakdown:0 @PMD_DPU_WT_CPI;Nothing to Dispatch @@PM_DPU_WT / PM_INST_CMPL @@@group=cpi_breakdown:1 @PMD_DPU_WT_BR_MPRED_CPI;Waiting on Branch Mispredict @@PM_DPU_WT_BR_MPRED / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_DPU_WT_IC_MISS_CPI;Waiting on Icache Miss @@PM_DPU_WT_IC_MISS / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_DPU_WT_OTHER;Other @@(PM_DPU_WT - PM_DPU_WT_BR_MPRED - PM_DPU_WT_IC_MISS) / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_DPU_HELD_CPI;Dispatch Held @@PM_DPU_HELD / PM_INST_CMPL @@@group=cpi_breakdown:1 @PMD_LD_MISS_L1_CPI;Dcache miss pending @@PM_LD_MISS_L1_CYC / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_LSU_DERAT_MISS_CPI;DERAT miss pending @@PM_LSU_DERAT_MISS_CYC / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_DPU_HELD_OTHER_CPI;Dispatch Held (not cache or ERAT) @@(PM_DPU_HELD - PM_LD_MISS_L1_CYC - PM_LSU_DERAT_MISS_CYC) / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_DPU_WT_OTHER_CPI;Dispatch Wait (not held) @@(PM_CYC - PM_DPU_WT - PM_DPU_HELD - PM_1PLUS_PPC_DISP - PM_INST_DISP_LLA) / PM_INST_CMPL @@@group=cpi_breakdown:1 @PMD_1PLUS_PPC_DISP_CPI;Dispatch @@PM_1PLUS_PPC_DISP / PM_INST_CMPL @@@group=cpi_breakdown:1 @PMD_FLUSH_REJECT_CPI;Flush/Reject @@(PM_1PLUS_PPC_DISP - PM_1PLUS_PPC_CMPL) / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_1PLUS_PPC_CMPL_CPI;Finish CPI @@PM_1PLUS_PPC_CMPL / PM_INST_CMPL @@@group=cpi_breakdown:2 ############################## # CPI Breakdown Extended Model ############################## pm_breakdown_cpi = PM_CYC/PM_INST_CMPL; # # cpu utilization # @PMD_B_CPU_UTI;CPU Utilization @@PM_RUN_CYC/PM_CYC*100 @@@group=cpu_utilization @@@description=CPU Utilization: (ignore if not K+U) @PMD_B_CPI;CPI @@pm_breakdown_cpi @@@group=cbem_cpu_utilization @PMD_B_CPI_LD_MISS_L1;CPI component due to L1 load misses @@PM_LD_MISS_L1_CYC/PM_INST_CMPL @@@group=cbem_cpu_utilization,cbem_data_src @PMD_B_CPI_LD_MISS_L1_PERC;Percentage CPI component due to L1 load misses;% @@PM_LD_MISS_L1_CYC/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_cpu_utilization,cbem_data_src @PMD_B_CPI_LSU_REJ;CPI component due to LSU rejects @@PM_LSU_REJECT*11/PM_INST_CMPL @@@group=cbem_cpu_utilization,cbem_lsu_rej @PMD_B_CPI_LSU_REJ_PERC;Percentage CPI component due to LSU rejects;% @@PM_LSU_REJECT*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_cpu_utilization,cbem_lsu_rej @PMD_B_CPI_DERAT_MISS;CPI component due to DERAT misses @@PM_LSU_DERAT_MISS_CYC/PM_INST_CMPL @@@group=cbem_cpu_utilization,cbem_translation_overheads @PMD_B_CPI_DERAT_MISS_PERC;Percentage CPI component due to DERAT misses;% @@PM_LSU_DERAT_MISS_CYC/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_cpu_utilization,cbem_translation_overheads @PMD_B_CPI_SYNC;CPI component due to SYNC @@PM_SYNC_CYC/PM_INST_CMPL @@@group=cbem_cpu_utilization,cbem_barrier_sync @PMD_B_CPI_SYNC_PERC;Percentage CPI component due to SYNC;% @@PM_SYNC_CYC/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_cpu_utilization,cbem_barrier_sync @PMD_B_CPI_BR_MPRED;CPI component due to branch mispredicts @@PM_BR_MPRED*15/PM_CYC*pm_breakdown_cpi @@@group=cbem_cpu_utilization,cbem_branch_pred @PMD_B_CPI_BR_MPRED_PERC;Percentage CPI component due to branch mispredicts;% @@PM_BR_MPRED*15/PM_CYC*100 @@@group=cbem_cpu_utilization,cbem_branch_pred @PMD_B_GRP_SIZE;Avg. group size @@PM_INST_CMPL/PM_1PLUS_PPC_CMPL @@@group=cbem_cpu_utilization @PMD_B_CYC_HV_PERC;Percentage Cycles spent in hypervisor mode;% @@PM_HV_CYC/PM_CYC*100 @@@group=cbem_cpu_utilization # # dispatch # @PMD_B_CYC_DISP_PERC;Percentage Cycles something dispatched;% @@(PM_CYC-(PM_DPU_HELD+PM_DPU_WT))/PM_CYC*100 @@@description=Calculated by total cycles - dispatch held - dispatch waiting @@@group=cbem_dispatch @PMD_B_CYC_GRP_DISP_PERC;Percentage Cycles a group dispatched;% @@PM_1PLUS_PPC_DISP/PM_CYC*100 @@@description=Calculated with PM_1PLUS_PPC_DISP counter @@@group=cbem_dispatch @PMD_B_CYC_GRP_CMPL_PERC;Percentage Cycles a group completed;% @@PM_1PLUS_PPC_CMPL/PM_CYC*100 @@@group=cbem_dispatch @PMD_B_GRP_DISP_PER_CMPL;Group dispatch-to-completion ratio @@PM_1PLUS_PPC_DISP/PM_1PLUS_PPC_CMPL @@@group=cbem_dispatch @PMD_B_INST_DISP_PER_CMPL;Instruction dispatch-to-completion ratio @@PM_INST_DISP/PM_INST_CMPL @@@group=cbem_dispatch @PMD_B_DISP_WT_PERC;Percentage Cycles dispatch was waiting;% @@PM_DPU_WT/PM_CYC*100 @@@group=cbem_dispatch @PMD_B_DISP_WT_IC_MISS_PERC;Percentage Cycles dispatch was waiting due to I-cache miss;% @@PM_DPU_WT_IC_MISS/PM_CYC*100 @@@group=cbem_dispatch @PMD_B_DISP_WT_BR_MPRED_PERC;Percentage Cycles dispatch was waiting due to mispredicted branches;% @@PM_DPU_WT_BR_MPRED/PM_CYC*100 @@@group=cbem_dispatch @PMD_B_DISP_WT_OTHER_PERC;Percentage Cycles dispatch was waiting due to other reasons;% @@(PM_DPU_WT - (PM_DPU_WT_IC_MISS+PM_DPU_WT_BR_MPRED))/PM_CYC*100 @@@group=cbem_dispatch # # held # @PMD_B_CYC_DISP_HELD_PERC;Percentage Cycles dispatch was held;% @@PM_DPU_HELD/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_RESTART_PERC;Percentage Cycles dispatch was held due to restart coming;% @@PM_DPU_HELD_RESTART/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_SMT_PERC;Percentage Cycles dispatch was held due to SMT conflicts;% @@PM_DPU_HELD_SMT/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_GPR_PERC;Percentage Cycles dispatch was held due to GPR dependencies;% @@PM_DPU_HELD_GPR/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_STCX_PERC;Percentage Cycles dispatch was held due to STCX updating CR;% @@PM_DPU_HELD_STCX_CR/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_CW_PERC;Percentage Cycles dispatch was held due to cache writes;% @@PM_DPU_HELD_CW/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_CR_LOGIC_PERC;Percentage Cycles dispatch was held due to CR, LR or CTR updated by CR logical;% @@PM_DPU_HELD_CR_LOGICAL/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_XTHRD_PERC;Percentage Cycles dispatch was held due to cross-thread resource dependencies;% @@PM_DPU_HELD_XTHRD/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_FPQ_PERC;Percentage Cycles dispatch was held due to FPU issue queue full;% @@PM_DPU_HELD_FPQ/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_XER_PERC;Percentage Cycles dispatch was held due to XER dependency;% @@PM_DPU_HELD_XER/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_ISYNC_PERC;Percentage Cycles dispatch was held due to ISYNC;% @@PM_DPU_HELD_ISYNC/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_RU_WQ_PERC;Percentage Cycles dispatch was held due to RU FXU write queue full;% @@PM_DPU_HELD_RU_WQ/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_FPU_CR_PERC;Percentage Cycles dispatch was held due to FPU updating CR;% @@PM_DPU_HELD_FPU_CR/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_LSU_PERC;Percentage Cycles dispatch was held due to LSU move or invalidate SLR and SR;% @@PM_DPU_HELD_LSU/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_ITLB_ISLB_PERC;Percentage Cycles dispatch was held due to TLB or SLB invalidates;% @@PM_DPU_HELD_ITLB_ISLB/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_FXU_MULTI_PERC;Percentage Cycles dispatch was held due to FXU multi-cycle;% @@PM_DPU_HELD_FXU_MULTI/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_FP_FX_MULT_PERC;Percentage Cycles dispatch was held due to non fixed mul/div after fixed mul/div;% @@PM_DPU_HELD_FP_FX_MULT/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_MULT_GPR_PERC;Percentage Cycles dispatch was held due to mul/div mul/div GPR dependencies;% @@PM_DPU_HELD_MULT_GPR/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_COMPLETION_PERC;Percentage Cycles dispatch was held due to completion holding dispatch;% @@PM_DPU_HELD_COMPLETION/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_INT_PERC;Percentage Cycles dispatch was held due to PM_DPU_HELD_INT;% @@PM_DPU_HELD_INT/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_LLA_END_PERC;Percentage Cycles dispatch was held due to LLA ended;% @@PM_DPU_HELD_LLA_END/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_FXU_SOPS_PERC;Percentage Cycles dispatch was held due to FXU slow ops (mtmsr, scv, rfscv);% @@PM_DPU_HELD_FXU_SOPS/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_THRD_PRIO_PERC;Percentage Cycles dispatch was held due to lower priority thread;% @@PM_DPU_HELD_THRD_PRIO/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_SPR_PERC;Percentage Cycles dispatch was held due to MTSPR/MFSPR;% @@PM_DPU_HELD_SPR/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_LSU_SOPS_PERC;Percentage Cycles dispatch was held due to LSU slow ops (sync, tlbie, stcx);% @@PM_DPU_HELD_LSU_SOPS/PM_CYC*100 @@@group=cbem_held @PMD_B_CYC_DISP_HELD_THERMAL_PERC;Percentage Cycles dispatch was held due to thermal conditions;% @@PM_DPU_HELD_THERMAL/PM_CYC*100 @@@group=cbem_held sum_held = PM_DPU_HELD_THERMAL+PM_DPU_HELD_GPR+PM_DPU_HELD_SMT+PM_DPU_HELD_CW+PM_DPU_HELD_FPQ+PM_DPU_HELD_XER+PM_DPU_HELD_ISYNC+PM_DPU_HELD_STCX_CR+PM_DPU_HELD_RU_WQ+PM_DPU_HELD_FPU_CR+PM_DPU_HELD_LSU+PM_DPU_HELD_ITLB_ISLB+PM_DPU_HELD_FXU_MULTI+PM_DPU_HELD_FP_FX_MULT+PM_DPU_HELD_MULT_GPR+PM_DPU_HELD_COMPLETION+PM_DPU_HELD_INT+PM_DPU_HELD_XTHRD+PM_DPU_HELD_LLA_END+PM_DPU_HELD_RESTART+PM_DPU_HELD_FXU_SOPS+PM_DPU_HELD_THRD_PRIO+PM_DPU_HELD_SPR+PM_DPU_HELD_CR_LOGICAL+PM_DPU_HELD_LSU_SOPS; @PMD_B_HELD_PER_DISP_HELD_PERC;Percentage held reasons per dispatch holds;% @@(sum_held/PM_DPU_HELD)*100 @@@group=cbem_held @PMD_B_HELD_PER_CYC_PERC;Percentage held reasons per total cycles;% @@sum_held/PM_CYC*100 @@@group=cbem_held # # lla # @PMD_B_LLA_PER_CYC_PERC;Percentage Cycles Load Lookahead have been active;% @@(PM_LSU_DERAT_MISS_CYC+PM_LD_MISS_L1_CYC)/PM_CYC*100 @@@group=cbem_lla @PMD_B_LLA_PER_CYC_LD_MISS_PERC;Percentage Cycles Load Lookahead have been active due to L1 load miss cycles;% @@PM_LD_MISS_L1_CYC/(PM_CYC*100) @@@group=cbem_lla @PMD_B_LLA_PER_CYC_DERAT_MISS_PERC;Percentage Cycles Load Lookahead have been active due to DERAT miss cycles;% @@PM_LSU_DERAT_MISS_CYC/PM_CYC*100 @@@group=cbem_lla @PMD_B_LLA_PER_INST_PERC;Percentage of instructions dispatched in LLA mode;% @@PM_INST_DISP_LLA/PM_INST_CMPL*100 @@@group=cbem_lla # # lsu reject # @PMD_B_LSU_REJ_PER_INST_CMPL_PERC;Percentage of instructions completed that were rejected for any reason;% @@PM_LSU_REJECT/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_DERAT_PERC;Percentage of instructions completed that were rejected due to DERAT Guess Wrong;% @@PM_LSU_REJECT_DERAT_MPRED/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_DERAT;CPI component due to DERAT Guess Wrong @@PM_LSU_REJECT_DERAT_MPRED*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_DERAT_PERC;Percentage of CPI component due to DERAT Guess Wrong;% @@PM_LSU_REJECT_DERAT_MPRED*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_LHS_PERC;Percentage of instructions completed that were rejected due to Load-Hit-Store;% @@PM_LSU_REJECT_LHS/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_LHS;CPI component due to Load-Hit-Store @@PM_LSU_REJECT_LHS*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_LHS_PERC;Percentage of CPI component due to Load-Hit-Store;% @@PM_LSU_REJECT_LHS*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_LHS_BOTH_PERC;Percentage of instructions completed that were rejected due to Load-Hit-Store, both units;% @@PM_LSU_REJECT_LHS_BOTH/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_LHS_BOTH;CPI component due to Load-Hit-Store, both units @@PM_LSU_REJECT_LHS_BOTH*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_LHS_BOTH_PERC;Percentage of CPI component due to Load-Hit-Store, both units;% @@PM_LSU_REJECT_LHS_BOTH*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_NO_SCRATCH_PERC;Percentage of instructions completed that were rejected due to Scratch Register Unavailable;% @@PM_LSU_REJECT_NO_SCRATCH/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_NO_SCRATCH;CPI component due to Scratch Register Unavailable @@PM_LSU_REJECT_NO_SCRATCH*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_NO_SCRATCH_PERC;Percentage of CPI component due to Scratch Register Unavailable;% @@PM_LSU_REJECT_NO_SCRATCH*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_STQ_FULL_PERC;Percentage of instructions completed that were rejected due to Store Queue Full;% @@PM_LSU_REJECT_STQ_FULL/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_STQ_FULL;CPI component due to Store Queue Full @@PM_LSU_REJECT_STQ_FULL*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_STQ_FULL_PERC;Percentage of CPI component due to Store Queue Full;% @@PM_LSU_REJECT_STQ_FULL*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_PART_SECT_PERC;Percentage of instructions completed that were rejected due to Partial Sector Valid;% @@PM_LSU_REJECT_PARTIAL_SECTOR/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_PART_SECT;CPI component due to Partial Sector Valid @@PM_LSU_REJECT_PARTIAL_SECTOR*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_PART_SECT_PERC;Percentage of CPI component due to Partial Sector Valid;% @@PM_LSU_REJECT_PARTIAL_SECTOR*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_L2_CORR_PERC;Percentage of instructions completed that were rejected due to L1 Data Cache stale data;% @@PM_LSU_REJECT_L2_CORR/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_L2_CORR;CPI component due to L1 Data Cache stale data @@PM_LSU_REJECT_L2_CORR*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_L2_CORR_PERC;Percentage of CPI component due to L1 Data Cache stale data;% @@PM_LSU_REJECT_L2_CORR*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_ULD_PERC;Percentage of instructions completed that were rejected due to Unaligned Load;% @@PM_LSU_REJECT_ULD/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_ULD;CPI component due to Unaligned Load @@PM_LSU_REJECT_ULD*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_ULD_PERC;Percentage of CPI component due to Unaligned Load;% @@PM_LSU_REJECT_ULD*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_ULD_BOTH_PERC;Percentage of instructions completed that were rejected due to Unaligned Load, both units;% @@PM_LSU_REJECT_ULD_BOTH/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_ULD_BOTH;CPI component due to Unaligned Load, both units @@PM_LSU_REJECT_ULD_BOTH*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_ULD_BOTH_PERC;Percentage of CPI component due to Unaligned Load, both units;% @@PM_LSU_REJECT_ULD_BOTH*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_UST_PERC;Percentage of instructions completed that were rejected due to Unaligned Store;% @@PM_LSU_REJECT_UST/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_UST;CPI component due to Unaligned Store @@PM_LSU_REJECT_UST*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_UST_PERC;Percentage of CPI component due to Unaligned Store;% @@PM_LSU_REJECT_UST*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_UST_BOTH_PERC;Percentage of instructions completed that were rejected due to Unaligned Store, both units;% @@PM_LSU_REJECT_UST_BOTH/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_UST_BOTH;CPI component due to Unaligned Store, both units @@PM_LSU_REJECT_UST_BOTH*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_UST_BOTH_PERC;Percentage of CPI component due to Unaligned Store, both units;% @@PM_LSU_REJECT_UST_BOTH*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_EXT_PERC;Percentage of instructions completed that were rejected due to external (FXU or IDU) reject request;% @@PM_LSU_REJECT_EXTERN/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_EXT;CPI component due to external (FXU or IDU) reject request @@PM_LSU_REJECT_EXTERN*11/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_EXT_PERC;Percentage of CPI component due to external (FXU or IDU) reject request;% @@PM_LSU_REJECT_EXTERN*11/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_SET_MPRED_PERC;Percentage of instructions completed that were rejected due to Set Predict Wrong;% @@PM_LSU_REJECT_SET_MPRED/PM_INST_CMPL*100 @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_SET_MPRED;CPI component due to Set Predict Wrong @@PM_LSU_REJECT_SET_MPRED*16/PM_INST_CMPL @@@group=cbem_lsu_rej @PMD_B_CPI_LSU_REJ_SET_MPRED_PERC;Percentage of CPI component due to Set Predict Wrong;% @@PM_LSU_REJECT_SET_MPRED*16/(PM_INST_CMPL*pm_breakdown_cpi)*100 @@@group=cbem_lsu_rej @PMD_B_LSU_REJ_STEAL_PERC;Percentage of instructions completed that were rejected due to L1 Dcache Miss;% @@PM_LSU_REJECT_STEAL/PM_INST_CMPL*100 @@@group=cbem_lsu_rej sum_rejects = PM_LSU_REJECT_DERAT_MPRED+PM_LSU_REJECT_LHS+PM_LSU_REJECT_NO_SCRATCH+PM_LSU_REJECT_STQ_FULL+PM_LSU_REJECT_PARTIAL_SECTOR+PM_LSU_REJECT_L2_CORR+PM_LSU_REJECT_ULD+PM_LSU_REJECT_UST+PM_LSU_REJECT_EXTERN+PM_LSU_REJECT_SET_MPRED+PM_LSU_REJECT_STEAL; @PMD_B_LIST_REJ_PER_ALL_PERC;Percentage of listed conditions per all rejects (PM_LSU_REJECT);% @@sum_rejects/PM_LSU_REJECT*100 @@@group=cbem_lsu_rej # # data source # @PMD_B_ICACHE_MISS_PER_INST_PERC;Percentage of Icache misses per instruction completed;% @@PM_L1_ICACHE_MISS/PM_INST_CMPL*100 @@@group=cbem_data_src @PMD_B_L1_ST_MISS_PER_ST_REF_PERC;Percentage of L1 store misses per L1 store ref;% @@PM_ST_MISS_L1/PM_ST_REF_L1*100 @@@group=cbem_data_src @PMD_B_L1_LD_MISS_PER_LD_REF_PERC;Percentage of L1 load misses per L1 load ref;% @@PM_LD_MISS_L1/PM_LD_REF_L1*100 @@@group=cbem_data_src @PMD_B_LD_MISS_L1_PER_INST_PERC;Percentage of L1 load misses per instruction;% @@PM_LD_MISS_L1/PM_INST_CMPL*100 @@@group=cbem_data_src @PMD_B_L1_LD_MISS_RES_CYC;Average L1 Load miss resolution cycles;cycles @@PM_LD_MISS_L1_CYC/PM_LD_MISS_L1 @@@group=cbem_data_src @PMD_B_L1_LD_MISS_FROM_L2_PERC;Percentage of L1 load misses that are sourced from L2;% @@PM_DATA_FROM_L2/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_L2_LAT_AVER;L1 load misses from L2, average L2 latency;cycles @@PM_DATA_FROM_L2_CYC/PM_DATA_FROM_L2 @@@group=cbem_data_src @PMD_B_L2_CYC_PER_INST;L1 load misses from L2, CPI component @@PM_DATA_FROM_L2_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency1 = (PM_DATA_FROM_L2_CYC/PM_DATA_FROM_L2)*(PM_DATA_FROM_L2/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_L21_PERC;Percentage of L1 load misses that are sourced from L21;% @@PM_DATA_FROM_L21/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_L21_LAT_AVER;L1 load misses from L21, average L21 latency;cycles @@PM_DATA_FROM_L21_CYC/PM_DATA_FROM_L21 @@@group=cbem_data_src @PMD_B_L21_CYC_PER_INST;L1 load misses from L21, CPI component @@PM_DATA_FROM_L21_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency2 = (PM_DATA_FROM_L21_CYC/PM_DATA_FROM_L21) * (PM_DATA_FROM_L21/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_L25_SHR_PERC;Percentage of L1 load misses that are sourced from L2.5 shared intervention;% @@PM_DATA_FROM_L25_SHR/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_L25_SHR_LAT_AVER;L1 load misses from L2.5 shared intervention, average L2.5 Shr latency;cycles @@PM_DATA_FROM_L25_SHR_CYC/PM_DATA_FROM_L25_SHR @@@group=cbem_data_src @PMD_B_L25_SHR_CYC_PER_INST;L1 load misses from L2.5 shared intervention, CPI component @@PM_DATA_FROM_L25_SHR_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency3 = (PM_DATA_FROM_L25_SHR_CYC/PM_DATA_FROM_L25_SHR) * (PM_DATA_FROM_L25_SHR/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_L25_MOD_PERC;Percentage of L1 load misses that are sourced from L2.5 modified intervention;% @@PM_DATA_FROM_L25_MOD/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_L25_MOD_LAT_AVER;L1 load misses from L2.5 modified intervention, average L2.5 Mod latency;cycles @@PM_DATA_FROM_L25_MOD_CYC/PM_DATA_FROM_L25_MOD @@@group=cbem_data_src @PMD_B_L25_MOD_CYC_PER_INST;L1 load misses from L2.5 modified intervention, CPI component @@PM_DATA_FROM_L25_MOD_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency4 = (PM_DATA_FROM_L25_MOD_CYC/PM_DATA_FROM_L25_MOD) * (PM_DATA_FROM_L25_MOD/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_L3_PERC;Percentage of L1 load misses that are sourced from L3;% @@PM_DATA_FROM_L3/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_L3_LAT_AVER;L1 load misses from L3, average L3 latency;cycles @@PM_DATA_FROM_L3_CYC/PM_DATA_FROM_L3 @@@group=cbem_data_src @PMD_B_L3_CYC_PER_INST;L1 load misses from L3, CPI component @@PM_DATA_FROM_L3_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency5 = (PM_DATA_FROM_L3_CYC/PM_DATA_FROM_L3) * (PM_DATA_FROM_L3/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_L35_SHR_PERC;Percentage of L1 load misses that are sourced from L3.5 shared intervention;% @@PM_DATA_FROM_L35_SHR/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_L35_SHR_LAT_AVER;L1 load misses from L3.5 shared intervention, average L3.5 Shr latency;cycles @@PM_DATA_FROM_L35_SHR_CYC/PM_DATA_FROM_L35_SHR @@@group=cbem_data_src @PMD_B_L35_SHR_CYC_PER_INST;L1 load misses from L3.5 shared intervention, CPI component @@PM_DATA_FROM_L35_SHR_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency6 = (PM_DATA_FROM_L35_SHR_CYC/PM_DATA_FROM_L35_SHR) * (PM_DATA_FROM_L35_SHR/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_L35_MOD_PERC;Percentage of L1 load misses that are sourced from L3.5 modified intervention;% @@PM_DATA_FROM_L35_MOD/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_L35_MOD_LAT_AVER;L1 load misses from L3.5 modified intervention, average L3.5 Mod latency;cycles @@PM_DATA_FROM_L35_MOD_CYC/PM_DATA_FROM_L35_MOD @@@group=cbem_data_src @PMD_B_L35_MOD_CYC_PER_INST;L1 load misses from L3.5 modified intervention, CPI component @@PM_DATA_FROM_L35_MOD_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency7 = (PM_DATA_FROM_L35_MOD_CYC/PM_DATA_FROM_L35_MOD) * (PM_DATA_FROM_L35_MOD/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_RL2L3_SHR_PERC;Percentage of L1 load misses that are sourced from remote L2/L3 shared intervention;% @@PM_DATA_FROM_RL2L3_SHR/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_RL2L3_SHR_LAT_AVER;L1 load misses from remote L2/L3 shared intervention, average Remote L2/L3 Shr latency;cycles @@PM_DATA_FROM_RL2L3_SHR_CYC/PM_DATA_FROM_RL2L3_SHR @@@group=cbem_data_src @PMD_B_RL2L3_SHR_CYC_PER_INST;L1 load misses from remote L2/L3 shared intervention, CPI component @@PM_DATA_FROM_RL2L3_SHR_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency8 = (PM_DATA_FROM_RL2L3_SHR_CYC/PM_DATA_FROM_RL2L3_SHR) * (PM_DATA_FROM_RL2L3_SHR/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_RL2L3_MOD_PERC;Percentage of L1 load misses that are sourced from remote L2/L3 modified intervention;% @@PM_DATA_FROM_RL2L3_MOD/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_RL2L3_MOD_LAT_AVER;L1 load misses from remote L2/L3 modified intervention, average Remote L2/L3 Mod latency;cycles @@PM_DATA_FROM_RL2L3_MOD_CYC/PM_DATA_FROM_RL2L3_MOD @@@group=cbem_data_src @PMD_B_RL2L3_MOD_CYC_PER_INST;L1 load misses from remote L2/L3 modified intervention, CPI component @@PM_DATA_FROM_RL2L3_MOD_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency9 = (PM_DATA_FROM_RL2L3_MOD_CYC/PM_DATA_FROM_RL2L3_MOD) * (PM_DATA_FROM_RL2L3_MOD/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_DL2L3_SHR_PERC;Percentage of L1 load misses that are sourced from distant L2/L3 shared intervention;% @@PM_DATA_FROM_DL2L3_SHR/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_DL2L3_SHR_LAT_AVER;L1 load misses from distant L2/L3 shared intervention, average Distant L2/L3 Shr latency;cycles @@PM_DATA_FROM_DL2L3_SHR_CYC/PM_DATA_FROM_DL2L3_SHR @@@group=cbem_data_src @PMD_B_DL2L3_SHR_CYC_PER_INST;L1 load misses from distant L2/L3 shared intervention, CPI component @@PM_DATA_FROM_DL2L3_SHR_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency10 = (PM_DATA_FROM_DL2L3_SHR_CYC/PM_DATA_FROM_DL2L3_SHR) * (PM_DATA_FROM_DL2L3_SHR/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_DL2L3_MOD_PERC;Percentage of L1 load misses that are sourced from distant L2/L3 modified intervention;% @@PM_DATA_FROM_DL2L3_MOD/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_DL2L3_MOD_LAT_AVER;L1 load misses from distant L2/L3 modified intervention, average Distant L2/L3 Mod latency;cycles @@PM_DATA_FROM_DL2L3_MOD_CYC/PM_DATA_FROM_DL2L3_MOD @@@group=cbem_data_src @PMD_B_DL2L3_MOD_CYC_PER_INST;L1 load misses from distant L2/L3 modified intervention, CPI component @@PM_DATA_FROM_DL2L3_MOD_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency11 = (PM_DATA_FROM_DL2L3_MOD_CYC/PM_DATA_FROM_DL2L3_MOD) * (PM_DATA_FROM_DL2L3_MOD/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_LMEM_PERC;Percentage of L1 load misses that are sourced from Local memory;% @@PM_DATA_FROM_LMEM/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_LMEM_LAT_AVER;L1 load misses from Local memory, average local memory latency;cycles @@PM_DATA_FROM_LMEM_CYC/PM_DATA_FROM_LMEM @@@group=cbem_data_src @PMD_B_LMEM_CYC_PER_INST;L1 load misses from Local memory, CPI component @@PM_DATA_FROM_LMEM_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency12 = (PM_DATA_FROM_LMEM_CYC/PM_DATA_FROM_LMEM) * (PM_DATA_FROM_LMEM/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_RMEM_PERC;Percentage of L1 load misses that are sourced from Remote memory;% @@PM_DATA_FROM_RMEM/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_RMEM_LAT_AVER;L1 load misses from Remote memory, average remote memory latency;cycles @@PM_DATA_FROM_RMEM_CYC/PM_DATA_FROM_RMEM @@@group=cbem_data_src @PMD_B_RMEM_CYC_PER_INST;L1 load misses from Remote memory, CPI component @@PM_DATA_FROM_RMEM_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency13 = (PM_DATA_FROM_RMEM_CYC/PM_DATA_FROM_RMEM) * (PM_DATA_FROM_RMEM/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_DMEM_PERC;Percentage of L1 load misses that are sourced from Distant memory;% @@PM_DATA_FROM_DMEM/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_DMEM_LAT_AVER;L1 load misses from Distant memory, average distant memory latency;cycles @@PM_DATA_FROM_DMEM_CYC/PM_DATA_FROM_DMEM @@@group=cbem_data_src @PMD_B_DMEM_CYC_PER_INST;L1 load misses from Distant memory, CPI component @@PM_DATA_FROM_DMEM_CYC/PM_INST_CMPL @@@group=cbem_data_src calc_latency14 = (PM_DATA_FROM_DMEM_CYC/PM_DATA_FROM_DMEM) * (PM_DATA_FROM_DMEM/PM_LD_MISS_L1); @PMD_B_L1_LD_MISS_FROM_MEM_DBL_PERC;Percentage of L1 load misses that are sourced from double pump memory;% @@PM_DATA_FROM_MEM_DP/(PM_LD_MISS_L1)*100 @@@group=cbem_data_src @PMD_B_MEM_DBL_LAT_AVER;L1 load misses from Local memory, average double pump memory latency;cycles @@PM_DATA_FROM_MEM_DP_CYC/PM_DATA_FROM_MEM_DP @@@group=cbem_data_src @PMD_B_MEM_DBL_CYC_PER_INST;L1 load misses from double pump memory, CPI component @@PM_DATA_FROM_MEM_DP_CYC/PM_INST_CMPL @@@group=cbem_data_src #calc_latency15 = (PM_DATA_FROM_MEM_DBL_CYC/PM_DATA_FROM_MEM_DBL) * (PM_DATA_FROM_MEM_DBL/PM_LD_MISS_L1) #calc_latency = calc_latency1 + calc_latency2 + calc_latency3 + calc_latency4 + calc_latency5 + calc_latency6 + calc_latency7 + calc_latency8 + calc_latency9 + calc_latency10 + calc_latency11 + calc_latency12 + calc_latency13 + calc_latency14 + calc_latency15 calc_latency = calc_latency1 + calc_latency2 + calc_latency3 + calc_latency4 + calc_latency5 + calc_latency6 + calc_latency7 + calc_latency8 + calc_latency9 + calc_latency10 + calc_latency11 + calc_latency12 + calc_latency13 + calc_latency14; @PMD_B_l1_MISS_LAT;Average L1 miss latency from breakdown;cycles @@calc_latency @@@group=cbem_data_src # # translation overheads # @PMD_B_DERAT_MISS_PER_LDST_REF_PERC;Percentage DERAT misses per load/store ref;% @@PM_LSU_DERAT_MISS/(PM_LD_REF_L1+PM_ST_REF_L1)*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PER_INST_CMPL_PERC;Percentage DERAT misses per instruction completed;% @@PM_LSU_DERAT_MISS/PM_INST_CMPL*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_LAT;Average DERAT miss latency;cycles @@PM_LSU_DERAT_MISS_CYC/PM_LSU_DERAT_MISS @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_L2_PERC;On DERAT miss, PTEG source distribution from L2;% @@PM_PTEG_FROM_L2/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_L21_PERC;On DERAT miss, PTEG source distribution from L21;% @@PM_PTEG_FROM_L21/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_L25_SHR_PERC;On DERAT miss, PTEG source distribution from L2.5 shared intervention;% @@PM_PTEG_FROM_L25_SHR/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_L25_MOD_PERC;On DERAT miss, PTEG source distribution from L2.5 modified intervention;% @@PM_PTEG_FROM_L25_MOD/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_L3_PERC;On DERAT miss, PTEG source distribution from L3;% @@PM_PTEG_FROM_L3/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_L35_SHR_PERC;On DERAT miss, PTEG source distribution from L3.5 shared intervention;% @@PM_PTEG_FROM_L35_SHR/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_L35_MOD_PERC;On DERAT miss, PTEG source distribution from L3.5 modified intervention;% @@PM_PTEG_FROM_L35_MOD/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_DL2L3_MOD_PERC;On DERAT miss, PTEG source distribution from Distant L2/L3 modified intervention;% @@PM_PTEG_FROM_DL2L3_MOD/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_LMEM_PERC;On DERAT miss, PTEG source distribution from Local memory;% @@PM_PTEG_FROM_LMEM/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_RMEM_PERC;On DERAT miss, PTEG source distribution from Remote memory;% @@PM_PTEG_FROM_RMEM/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_DMEM_PERC;On DERAT miss, PTEG source distribution from Distant memory;% @@PM_PTEG_FROM_DMEM/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_DERAT_MISS_PTEG_FROM_MEM_DBL_PERC;On DERAT miss, PTEG source distribution from Double-pump memory;% @@PM_PTEG_FROM_MEM_DP/PM_LSU_DERAT_MISS*100 @@@group=cbem_translation_overheads @PMD_B_IERAT_MISS_PER_INST_CMP_PERC;Percentage IERAT misses per instruction completed;% @@PM_IERAT_MISS/PM_INST_CMPL*100 @@@group=cbem_translation_overheads @PMD_B_SLB_MISS_PER_INST_CMP_PERC;Percentage SLB misses per instruction completed;% @@PM_SLB_MISS/PM_INST_CMPL*100 @@@group=cbem_translation_overheads @PMD_B_ISLB_MISS_PER_INST_CMP_PERC;Percentage ISLB misses per instruction completed;% @@PM_ISLB_MISS/PM_INST_CMPL*100 @@@group=cbem_translation_overheads @PMD_B_DSLB_MISS_PER_INST_CMP_PERC;Percentage DSLB misses per instruction completed;% @@PM_DSLB_MISS/PM_INST_CMPL*100 @@@group=cbem_translation_overheads # # branch predictions # @PMD_B_BR_PRED_PER_INST_CMP_PERC;Percentage of branches per instruction completed;% @@PM_BR_PRED/PM_INST_CMPL*100 @@@group=cbem_branch_pred @PMD_B_BR_MPRED_PER_INST_CMP_PERC;Percentage of mispredicted branches per instruction completed;% @@PM_BR_MPRED/PM_INST_CMPL*100 @@@group=cbem_branch_pred @PMD_B_BR_MPRED_CR_PER_BR_MPRED_PERC;Percentage of mispredicts due to incorrect direction;% @@PM_BR_MPRED_CR/PM_BR_MPRED*100 @@@group=cbem_branch_pred @PMD_B_BR_MPRED_TA_PER_BR_MPRED_PERC;Percentage of mispredicts due to incorrect target address;% @@PM_BR_MPRED_TA/PM_BR_MPRED*100 @@@group=cbem_branch_pred @PMD_B_BR_MPRED_CCACHE_PER_BR_MPRED_PERC;Percentage of mispredicts due to incorrect target address due to count cache;% @@PM_BR_MPRED_CCACHE/PM_BR_MPRED*100 @@@group=cbem_branch_pred @PMD_B_BR_MPRED_COUNT_PER_BR_MPRED_PERC;Percentage of mispredicts due to incorrect target address due to count register;% @@PM_BR_MPRED_COUNT/PM_BR_MPRED*100 @@@group=cbem_branch_pred @PMD_B_BR_MPRED_LINK_PER_BR_MPRED_PERC;Percentage of mispredicts due to incorrect target address due to link stack ;% @@(PM_BR_MPRED_TA - (PM_BR_MPRED_COUNT+PM_BR_MPRED_CCACHE))/PM_BR_MPRED*100 @@@group=cbem_branch_pred @PMD_B_BR_PRED_CR_PER_BR_PRED_PERC;Percentage of branches that are CR predictions;% @@PM_BR_PRED_CR/PM_BR_PRED*100 @@@group=cbem_branch_pred @PMD_B_BR_PRED_CCACHE_PER_BR_PRED_PERC;Percentage of branches that are count cache predictions;% @@PM_BR_PRED_CCACHE/PM_BR_PRED*100 @@@group=cbem_branch_pred @PMD_B_BR_PRED_LSTACK_PER_BR_PRED_PERC;Percentage of branches that are link stack predictions;% @@PM_BR_PRED_LSTACK/PM_BR_PRED*100 @@@group=cbem_branch_pred # # smt thread priority # @PMD_B_CYC_AT_THRD_PRIO_1_PERC;Percentage of run cycles spent at very low thread priority (1);% @@PM_THRD_PRIO_1_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio @PMD_B_CYC_AT_THRD_PRIO_2_PERC;Percentage of run cycles spent at low thread priority (2);% @@PM_THRD_PRIO_2_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio @PMD_B_CYC_AT_THRD_PRIO_3_PERC;Percentage of run cycles spent at medium low thread priority (3);% @@PM_THRD_PRIO_3_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio @PMD_B_CYC_AT_THRD_PRIO_4_PERC;Percentage of run cycles spent at normal thread priority (4);% @@PM_THRD_PRIO_4_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio @PMD_B_CYC_AT_THRD_PRIO_5_PERC;Percentage of run cycles spent at medium high thread priority (5);% @@PM_THRD_PRIO_5_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio @PMD_B_CYC_AT_THRD_PRIO_6_PERC;Percentage of run cycles spent at high thread priority (6);% @@PM_THRD_PRIO_6_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio @PMD_B_CYC_AT_THRD_PRIO_7_PERC;Percentage of run cycles spent at extra high thread priority (7);% @@PM_THRD_PRIO_7_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio @PMD_B_CYC_AT_THRD_PRIO_DIFF_0_PERC;Percentage of run cycles with no thread priority differences between siblings;% @@PM_THRD_PRIO_DIFF_0_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio @PMD_B_CYC_AT_THRD_PRIO_DIFF_12_PERC;Percentage of run cycles with thread priority differences between siblings equal to 1 or 2;% @@PM_THRD_PRIO_DIFF_1or2_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio @PMD_B_CYC_AT_THRD_PRIO_DIFF_34_PERC;Percentage of run cycles with thread priority differences between siblings equal to 3 or 4;% @@PM_THRD_PRIO_DIFF_3or4_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio @PMD_B_CYC_AT_THRD_PRIO_DIFF_56_PERC;Percentage of run cycles with thread priority differences between siblings equal to 5 or 6;% @@PM_THRD_PRIO_DIFF_5or6_CYC/PM_CYC*100 @@@group=cbem_smt_thread_prio # # fabric statistics # @PMD_B_FAB_CMD_ISSUED_PER_INST_CMPL;Fabric commands issued per 100 instructions @@PM_FAB_CMD_ISSUED/(4*PM_INST_CMPL)*100 @@@group=cbem_fab_stat @PMD_B_FAB_DCLAIM_PER_FAB_CMD_PERC;Percentage of fabric commands that are DClaims;% @@PM_FAB_DCLAIM/PM_FAB_CMD_ISSUED*100 @@@group=cbem_fab_stat @PMD_B_FAB_DMA_PER_FAB_CMD_PERC;Percentage of fabric commands that are DMA operations;% @@PM_FAB_DMA/PM_FAB_CMD_ISSUED*100 @@@group=cbem_fab_stat @PMD_B_FAB_MMIO_PER_FAB_CMD_PERC;Percentage of fabric commands that are MMIO operations;% @@PM_FAB_MMIO/PM_FAB_CMD_ISSUED*100 @@@group=cbem_fab_stat @PMD_B_FAB_RETRIED_PER_FAB_CMD_PERC;Percentage of fabric commands that are retried;% @@PM_FAB_CMD_RETRIED/PM_FAB_CMD_ISSUED*100 @@@group=cbem_fab_stat @PMD_B_FAB_NODE_PUMP_PER_FAB_CMD_PERC;Percentage of fabric commands that are node pumps;% @@PM_FAB_NODE_PUMP/PM_FAB_CMD_ISSUED*100 @@@group=cbem_fab_stat @PMD_B_FAB_RETRY_NODE_PUMP_PERC;Percentage of node pumps retried;% @@PM_FAB_RETRY_NODE_PUMP/PM_FAB_NODE_PUMP*100 @@@group=cbem_fab_stat @PMD_B_FAB_RETRY_NODE_PUMP_PER_FAB_CMD_PERC;Percentage of fabric retries that are node pumps;% @@PM_FAB_RETRY_NODE_PUMP/PM_FAB_CMD_RETRIED*100 @@@group=cbem_fab_stat @PMD_B_FAB_SYS_PUMP_PER_FAB_CMD_PERC;Percentage of fabric commands that are system pumps;% @@PM_FAB_SYS_PUMP/PM_FAB_CMD_ISSUED*100 @@@group=cbem_fab_stat @PMD_B_FAB_RETRY_SYS_PUMP_PERC;Percentage of system pumps retried;% @@PM_FAB_RETRY_SYS_PUMP/PM_FAB_SYS_PUMP*100 @@@group=cbem_fab_stat @PMD_B_FAB_RETRY_SYS_PUMP_PER_FAB_CMD_PERC;Percentage of fabric retries that are system pumps;% @@PM_FAB_RETRY_SYS_PUMP/PM_FAB_CMD_RETRIED*100 @@@group=cbem_fab_stat @PMD_B_FAB_ADDR_COLL_PER_FAB_CMD_PERC;Percentage of fabric commands issued that have address collisions;% @@PM_FAB_ADDR_COLLISION/PM_FAB_CMD_ISSUED*100 @@@group=cbem_fab_stat ############# # AEM Metrics ############# @PMD_CPI;Cycles Per Instruction (Run) @@PM_RUN_CYC / PM_RUN_INST_CMPL @@@group=metrics_AEM @PMD_CPU_BUSY;CPU Busy;% @@(PM_RUN_CYC * 100) / PM_CYC @@@group=metrics_AEM @PMD_HYPERVISOR_BUSY;Time in Hypervisor Mode;% @@(PM_HV_CYC * 100) / PM_CYC @@@group=metrics_AEM @PMD_RUN_INST_PER_CYC;Instructions per cycle @@PM_RUN_INST_CMPL / PM_CYC @@@group=metrics_AEM @PMD_PRC_RUN_INST_DISP_CMPL;% Instructions dispatched that completed;% @@PM_RUN_INST_CMPL * 100 / PM_INST_DISP @@@group=metrics_AEM @PMD_BRANCH_PREDICTION;% Branches correctly predicted;% @@(1-(PM_BR_MPRED / PM_BR_PRED)) * 100 @@@group=metrics_AEM @PMD_PCT_MPRED_CR;% Branch mispredicts due to CR prediction;% @@(PM_BR_MPRED_CR * 100) / PM_BR_MPRED @@@group=metrics_AEM @PMD_PCT_MPRED_TA;% Branch mispredicts due to target prediction;% @@(PM_BR_MPRED_TA * 100) / PM_BR_MPRED @@@group=metrics_AEM @PMD_PCT_BR_TAKEN;% Branches Taken;% @@(PM_BR_TAKEN * 100) / PM_BRU_FIN @@@group=metrics_AEM @PMD_DL1_LD_MISS_RATE;Load Miss Rate (per instruction) @@PM_LD_MISS_L1 / PM_RUN_INST_CMPL @@@group=metrics_AEM @PMD_DL1_LD_MISS_RATIO;Load Miss Ratio (per reference);% @@PM_LD_MISS_L1 * 100 / PM_LD_REF_L1 @@@group=metrics_AEM @PMD_DL1_MISS_LATENCY;Average DL1 miss latency @@PM_LD_MISS_L1_CYC / PM_LD_MISS_L1 @@@group=metrics_AEM @PMD_L2_LD_MISS_RATE;L2 Load Miss Rate (per instruction) @@PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL @@@group=metrics_AEM @PMD_L3_LD_MISS_RATE;L3 Load Miss Rate (per instruction) @@PM_DATA_FROM_L3MISS / PM_RUN_INST_CMPL @@@group=metrics_AEM @PMD_L1_LD_MISS_RELOAD;% of DL1 misses that result in a cache reload;% @@(PM_L1_DCACHE_RELOAD_VALID * 100) / PM_LD_MISS_L1 @@@group=metrics_AEM @PMD_LD_FROM_L2;% of DL1 reloads from L2;% @@(PM_DATA_FROM_L2 * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_L21;% of DL1 reloads from Private L2, other core;% @@(PM_DATA_FROM_L21 * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_L25_SHR;% of DL1 reloads from local L2 (Shared);% @@(PM_DATA_FROM_L25_SHR * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_L25_MOD;% of DL1 reloads from local L2 (Modified);% @@(PM_DATA_FROM_L25_MOD * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_L3;% of DL1 reloads from L3;% @@(PM_DATA_FROM_L3 * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_L35_SHR;% of DL1 reloads from Local L3 (Shared) ;% @@(PM_DATA_FROM_L35_SHR * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_L35_MOD;% of DL1 reloads from Local L3 (Modified) ;% @@(PM_DATA_FROM_L35_MOD * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_RL2L3_SHR;% of DL1 reloads from Remote L2 or L3 (Shared) ;% @@(PM_DATA_FROM_RL2L3_SHR * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_RL2L3_MOD;% of DL1 reloads from Remote L2 or L3 (Modified) ;% @@(PM_DATA_FROM_RL2L3_MOD * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_DL2L3_MOD;% of DL1 reloads from Distant L2 or L3 (Modified) ;% @@(PM_DATA_FROM_DL2L3_MOD * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_DL2L3_SHR;% of DL1 reloads from Distant L2 or L3 (Shared) ;% @@(PM_DATA_FROM_DL2L3_SHR * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_LMEM;% of DL1 reloads from Local Memory;% @@(PM_DATA_FROM_LMEM * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_RMEM;% of DL1 reloads from Remote Memory;% @@(PM_DATA_FROM_RMEM * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_LD_FROM_DMEM;% of DL1 reloads from Distant Memory;% @@(PM_DATA_FROM_DMEM * 100) / PM_L1_DCACHE_RELOAD_VALID @@@group=metrics_AEM @PMD_DERAT_MISS_RATE;DERAT Miss Rate (per instruction) @@PM_LSU_DERAT_MISS / PM_RUN_INST_CMPL @@@group=metrics_AEM @PMD_DERAT_MISS_LATENCY;Average Data ERAT miss latency @@PM_LSU_DERAT_MISS_CYC / PM_LSU_DERAT_MISS @@@group=metrics_AEM @PMD_DERAT_MISS_RELOAD;% of DERAT misses that result in an ERAT reload;% @@(PM_PTEG_RELOAD_VALID * 100) / PM_LSU_DERAT_MISS @@@group=metrics_AEM @PMD_PTEG_FROM_L2;% of DERAT reloads from L2;% @@(PM_PTEG_FROM_L2 * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_L21;% of DERAT reloads from Private L2, other core;% @@(PM_PTEG_FROM_L21 * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_L25_SHR;% of DERAT reloads from local L2 (Shared);% @@(PM_PTEG_FROM_L25_SHR * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_L25_MOD;% of DERAT reloads from local L2 (Modified);% @@(PM_PTEG_FROM_L25_MOD * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_L3;% of DERAT reloads from L3;% @@(PM_PTEG_FROM_L3 * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_L35_SHR;% of DERAT reloads from Local L3 (Shared) ;% @@(PM_PTEG_FROM_L35_SHR * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_L35_MOD;% of DERAT reloads from Local L3 (Modified) ;% @@(PM_PTEG_FROM_L35_MOD * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_RL2L3_SHR;% of DERAT reloads from Remote L2 or L3 (Shared) ;% @@(PM_PTEG_FROM_RL2L3_SHR * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_RL2L3_MOD;% of DERAT reloads from Remote L2 or L3 (Modified) ;% @@(PM_PTEG_FROM_RL2L3_MOD * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_DL2L3_MOD;% of DERAT reloads from Distant L2 or L3 (Modified) ;% @@(PM_PTEG_FROM_DL2L3_MOD * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_DL2L3_SHR;% of DERAT reloads from Distant L2 or L3 (Shared) ;% @@(PM_PTEG_FROM_DL2L3_SHR * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_LMEM;% of DERAT reloads from Local Memory;% @@(PM_PTEG_FROM_LMEM * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_RMEM;% of DERAT reloads from Remote Memory;% @@(PM_PTEG_FROM_RMEM * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_PTEG_FROM_DMEM;% of DERAT reloads from Distant Memory;% @@(PM_PTEG_FROM_DMEM * 100) / PM_PTEG_RELOAD_VALID @@@group=metrics_AEM @PMD_ICache_MISS_RATE;Instruction Cache Miss Rate (Per Instruction) @@PM_L1_ICACHE_MISS / PM_RUN_INST_CMPL @@@group=metrics_AEM @PMD_ICACHE_PREF_PCT;% of ICache reloads due to prefetch;% @@(PM_IC_PREF_WRITE * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_ICache_MISS_RELOAD;Icache Fetchs per Icache Miss @@(PM_INST_FETCH_CYC - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS @@@group=metrics_AEM @PMD_INST_FROM_L2;% of ICache reloads from L2;% @@(PM_INST_FROM_L2 * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_L21;% of ICache reloads from Private L2, other core;% @@(PM_INST_FROM_L21 * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_L25_SHR;% of ICache reloads from local L2 (Shared);% @@(PM_INST_FROM_L25_SHR * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_L25_MOD;% of ICache reloads from local L2 (Modified);% @@(PM_INST_FROM_L25_MOD * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_L3;% of ICache reloads from L3;% @@(PM_INST_FROM_L3 * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_L35_SHR;% of ICache reloads from Local L3 (Shared) ;% @@(PM_INST_FROM_L35_SHR * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_L35_MOD;% of ICache reloads from Local L3 (Modified) ;% @@(PM_INST_FROM_L35_MOD * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_RL2L3_SHR;% of ICache reloads from Remote L2 or L3 (Shared) ;% @@(PM_INST_FROM_RL2L3_SHR * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_RL2L3_MOD;% of ICache reloads from Remote L2 or L3 (Modified) ;% @@(PM_INST_FROM_RL2L3_MOD * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_DL2L3_MOD;% of ICache reloads from Distant L2 or L3 (Modified) ;% @@(PM_INST_FROM_DL2L3_MOD * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_DL2L3_SHR;% of ICache reloads from Distant L2 or L3 (Shared) ;% @@(PM_INST_FROM_DL2L3_SHR * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_LMEM;% of ICache reloads from Local Memory;% @@(PM_INST_FROM_LMEM * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_RMEM;% of ICache reloads from Remote Memory;% @@(PM_INST_FROM_RMEM * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM @PMD_INST_FROM_DMEM;% of ICache reloads from Distant Memory;% @@(PM_INST_FROM_DMEM * 100) / PM_INST_FETCH_CYC @@@group=metrics_AEM ######################### # Dispatch Hold Metrics # ######################### @PMD_Dispatch_Held_CPI;Total cycles @@PM_DPU_HELD @@@group=disp_hold @PMD_Miss_related;Dispatch holds waiting for a cache or DERAT miss after LLA ends @@PM_DPU_HELD_RESTART + PM_DPU_HELD_LLA_END + PM_DPU_HELD_CW @@@group=disp_hold @PMD_Restart_coming;Waiting for restart from LSU @@PM_DPU_HELD_RESTART @@@group=disp_hold @PMD_LLA_Ending;Load Look Ahead (LLA) ending @@PM_DPU_HELD_LLA_END @@@group=disp_hold @PMD_Cache_write;Data Cache write blocks dispatch @@PM_DPU_HELD_CW @@@group=disp_hold @PMD_SMT;Secondary thread stall due to not enough execution units available @@PM_DPU_HELD_SMT @@@group=disp_hold @PMD_STCX_CR;Stall instructions that use the CR after a stcx @@PM_DPU_HELD_STCX_CR @@@group=disp_hold @PMD_GPR;Holds due to GPR conflicts @@PM_DPU_HELD_GPR @@@group=disp_hold @PMD_LSU_Slow_Ops;Holds due to LSU Slow Ops @@PM_DPU_HELD_LSU_SOPS - PM_DPU_HELD_STCX_CR @@@group=disp_hold @PMD_Completion;Dispatch due to Completion @@PM_DPU_HELD_COMPLETION @@@group=disp_hold @PMD_SPR;Holds due to SPR conflicts @@PM_DPU_HELD_SPR @@@group=disp_hold @PMD_FXU_Slow_Ops;Holds due to Slow FXU instructions @@PM_DPU_HELD_FXU_SOPS @@@group=disp_hold @PMD_Cross_Thread;Holds due to Cross Thread conflicts @@PM_DPU_HELD_XTHRD @@@group=disp_hold @PMD_XER;Holds due to XER conflicts @@PM_DPU_HELD_XER @@@group=disp_hold @PMD_CR_Logicals;Holds due to CR Logical instructions @@PM_DPU_HELD_CR_LOGICAL @@@group=disp_hold @PMD_LSU;Holds due to LSU instructions @@PM_DPU_HELD_LSU @@@group=disp_hold @PMD_Interrupt;Holds due to Interrupts pending @@PM_DPU_HELD_INT @@@group=disp_hold @PMD_ISync;Holds due to ISYNC instructions @@PM_DPU_HELD_ISYNC @@@group=disp_hold @PMD_Thread_Priority;Holds due to Thread Priority @@PM_DPU_HELD_THRD_PRIO @@@group=disp_hold @PMD_Multi_Cycle_FXU;Holds due to a non-Multiple or Divide instruction following a Multi-Cycle FXU instruction @@PM_DPU_HELD_FXU_MULTI @@@group=disp_hold @PMD_Multiple_GPR;Holds due to a Multi-Cycle FXU instruction @@PM_DPU_HELD_MULT_GPR @@@group=disp_hold @PMD_FPU_CR;Instructions waiting for CR after a Floating Point Instruction record format @@PM_DPU_HELD_FPU_CR @@@group=disp_hold @PMD_FPU_FX_Multiply;Dispatch Held due to a Fixed Point Multiple @@PM_DPU_HELD_FP_FX_MULT @@@group=disp_hold @PMD_FP_Issue_Queue;Instructions waiting for FP Issue Queue @@PM_DPU_HELD_FPQ @@@group=disp_hold @PMD_Instruction_TLB_SLB;Dispatch Held due to Instruction ITLB or ISLB @@PM_DPU_HELD_ITLB_ISLB @@@group=disp_hold @PMD_Recovery_Unit;Dispatch Held due to Recovery Unit @@PM_DPU_HELD_RU_WQ @@@group=disp_hold @PMD_Thermal;Dispatch Held due to Thermal @@PM_DPU_HELD_THERMAL @@@group=disp_hold @PMD_Power;Dispatch Held due to Power @@PM_DPU_HELD_POWER @@@group=disp_hold ############### # HPC Metrics # ############### # # Group count metrics # # number of flops in a fp divide CM_FDIV = 4; # number of flops in a fp sqrt CM_FSQRT = 4; # number of flops in a fp multiply-add CM_FMA = 2; @PMD_FLOP_HPC;fp ops counted @@((CM_FDIV * PM_FPU_FSQRT_FDIV) + (CM_FMA * PM_FPU_FMA) + PM_FPU_1FLOP) @@@group=hpc_metrics_grp_counts @@@description=floating point operations counted @PMD_GFLOPS_HPC;Floating point performance in gigaflops per second @@PMD_FLOP_HPC / total_time / 1000000000 @@@group=hpc_metrics_grp_counts @@@description=floating point performance in gigaflops per second (do not use PM_RUN_CYC here) @PMD_RUN_CYC_HPC;Total run cycles for thread. @@PM_RUN_CYC @@@group=hpc_metrics_grp_counts @@@description=Total run cycles for thread (under run latch control) @PMD_CYC_INST_CMPL_HPC;Cycles needed to handle all instruction groups @@PM_INST_CMPL / 5 @@@group=hpc_metrics_grp_counts @@@description=Assuming perfect scheduling, how many cycles would be needed to handle all instruction groups @PMD_CPI_HPC;cycles per completed instruction @@PM_RUN_CYC / PM_INST_CMPL @@@group=hpc_metrics_grp_counts,hpc_metrics_commercial,hpc_metrics_utilization @@@description=cycles per completed instruction @PMD_IPC_HPC;Completed instructions per cycle @@PM_INST_CMPL / PM_RUN_CYC @@@group=hpc_metrics_grp_counts @@@description=Completed instructions per cycle - this increases as performance increases, so it is a more intuitive metric than CPI # # GCT metrics # # # Stall metrics # # # Commercial metrics # @PMD_DATA_L1_MISS_HPC;Load and store L1 Data misses per instruction @@(PM_ST_MISS_L1 + PM_LD_MISS_L1) / PM_INST_CMPL @@@group=hpc_metrics_commercial @@@description=Load and store L1 Data misses per instruction @PMD_LSU_DERAT_MISS_HPC;Data ERAT misses per instruction @@PM_LSU_DERAT_MISS / PM_INST_CMPL @@@group=hpc_metrics_commercial @@@description=Data ERAT misses per instruction @PMD_PM_BR_MPRED_CR_HPC;Branch mispredictions due to CR per instruction @@PM_BR_MPRED_CR / PM_INST_CMPL @@@group=hpc_metrics_commercial @@@description=Branch mispredictions due to CR per instruction @PMD_PM_BR_MPRED_TA_HPC;Branch mispredictions due to TA per instruction @@PM_BR_MPRED_TA / PM_INST_CMPL @@@group=hpc_metrics_commercial @@@description=Branch mispredictions due to TA per instruction @PMD_INST_FROM_L2MISS_HPC;L2 Instruction requests missed per instruction @@PM_INST_FROM_L2MISS / PM_INST_CMPL @@@group=hpc_metrics_commercial @@@description=L2 Instruction requests missed per instruction @PMD_DATA_FROM_L2MISS_HPC;L2 Data requests missed per instruction @@PM_DATA_FROM_L2MISS / PM_INST_CMPL @@@group=hpc_metrics_commercial @@@description=L2 Data requests missed per instruction @PMD_MEMORY_HITS_HPC;Memory hits per instruction @@(PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM) / PM_INST_CMPL @@@group=hpc_metrics_commercial @@@description=Memory hits per instruction ( Larger => slower ) @PMD_RUN_NORM_HPC;Fraction of cycles spent on this (SMT) thread = 1.0 for ST @@PM_RUN_CYC / PM_CYC @@@group=hpc_metrics_utilization @@@description=Fraction of cycles spent on this (SMT) thread = 1.0 for ST # # LSU metrics # # # FXU metrics # @PMD_FXU_FIN_HPC;Fraction of cycles an FXU delivers a result @@(PM_FXU0_FIN + PM_FXU1_FIN) / PM_RUN_CYC @@@group=hpc_metrics_fxu @@@description=Fraction of cycles an FXU delivers a result @PMD_FXU0_FIN_HPC;Fraction of cycles that FXU0 is in use @@PM_FXU0_FIN / PM_RUN_CYC @@@group=hpc_metrics_fxu @@@description=Fraction of cycles that FXU0 is in use @PMD_FXU1_FIN_HPC;Fraction of cycles that FXU1 is in use @@PM_FXU1_FIN / PM_RUN_CYC @@@group=hpc_metrics_fxu @@@description=Fraction of cycles that FXU1 is in use # # FDU metrics # @PMD_FPU_FIN_HPC;Fraction of cycles an FPU delivers a result assuming 1 cycle per op @@PM_FPU_FIN / PM_RUN_CYC @@@group=hpc_metrics_fdu @@@description=Fraction of cycles an FPU delivers a result assuming 1 cycle per op # # Memory Bandwidth metrics #