# @(#)M 1.8 src/bos/usr/sbin/perf/pmapi/libpmapi/POWER4.dms, pmapi, bos720 1/23/08 08:51:25 # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # bos720 src/bos/usr/sbin/perf/pmapi/libpmapi/POWER4.dms 1.8 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 2005,2008 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # Global variables CACHE_LINE_SIZE = 128; MEM_LINE_SIZE = 128; L1_LINE_SIZE = 128; # Derived metric descriptions @PMD_UTI_RATE;Utilization rate;% @@(PM_CYC * 100 / proc_freq) / total_time @@@group=performance @PMD_FP_LD_ST;Floating point load and store operations;M @@(PM_LSU_LDF + PM_FPU_STF) * 0.000001 @@@group=performance @PMD_INST_PER_FP_LD_ST;Instructions per load/store @@PM_INST_CMPL / (PM_LSU_LDF + PM_FPU_STF) @@@group=performance @PMD_MIPS;MIPS;MIPS @@(0.000001 * PM_INST_CMPL) / total_time @@@group=performance @PMD_INST_PER_CYC;Instructions per cycle @@PM_INST_CMPL / PM_CYC @@@group=performance @PMD_HW_FP_PER_CYC_SUM;HW floating point instructions per Cycle (units sum) @@(PM_FPU0_FIN + PM_FPU1_FIN) / PM_CYC @@@group=performance @PMD_HW_FP_PER_UTIME_SUM;HW floating point instructions / user time (units sum);M HWflops/s @@(PM_FPU0_FIN + PM_FPU1_FIN) / (PM_CYC / proc_freq) @@@group=performance @PMD_HW_FP_RATE_SUM;HW floating point rate (units sum);M HWflops/s @@0.000001 * (PM_FPU0_FIN + PM_FPU1_FIN) / total_time @@@group=performance @PMD_FLIPS;Total floating point instructions + FMAs (flips);Mflip @@ (PM_FPU0_FIN + PM_FPU1_FIN + PM_FPU_FMA - PM_FPU_STF) * 0.000001 @@@group=performance @PMD_FLIPS_WCT;Flip rate (flips / WCT);Mflip/s @@((PM_FPU0_FIN + PM_FPU1_FIN + PM_FPU_FMA - PM_FPU_STF) * 0.000001) / total_time @@@group=performance @PMD_FLIPS_UTIME;Flips / user time;Mflip/s @@((PM_FPU0_FIN + PM_FPU1_FIN + PM_FPU_FMA - PM_FPU_STF) * 0.000001) / (PM_CYC / proc_freq) @@@group=performance @PMD_FMA_PRC;FMA percentage;% @@(2 * 100 * PM_FPU_FMA) / (PM_FPU0_FIN + PM_FPU1_FIN + PM_FPU_FMA - PM_FPU_STF) @@@group=performance @PMD_FP_COMP;Floating point Computation intensity @@(PM_FPU0_FIN + PM_FPU1_FIN + PM_FPU_FMA - PM_FPU_STF) / (PM_LSU_LDF + PM_FPU_STF) @@@group=performance @PMD_FP_WEIGHT;Weighted floating point instructions;M @@((PM_FPU0_FIN + PM_FPU1_FIN + PM_FPU_FMA - PM_FPU_STF) + (HPM_DIV_WEIGHT - 1) * PM_FPU_FDIV) * 0.000001 @@@group=performance @PMD_FP_WEIGHT_RATE;Weighted float point instructions rate;M Wflip/s @@(((PM_FPU0_FIN + PM_FPU1_FIN + PM_FPU_FMA - PM_FPU_STF) + (HPM_DIV_WEIGHT - 1) * PM_FPU_FDIV) * 0.000001) / total_time @@@group=performance @PMD_DATA_L2;Total L2 data cache accesses;M @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * 0.000001 @@@group=performance @PMD_PRC_L2_ACCESS;% accesses from L2 per cycle;% @@100 * (PM_LD_MISS_L1 + PM_ST_MISS_L1) / PM_CYC @@@group=performance @PMD_L2_TRAF;L2 traffic;MBytes @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * L1_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_L2_BDW;L2 bandwidth per processor;MBytes/s @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * L1_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_LD_ST;Total load and store operations;M @@(PM_LD_REF_L1 + PM_ST_REF_L1) * 0.000001 @@@group=performance @PMD_INST_PER_LD_ST;Instructions per load/store @@PM_INST_CMPL / (PM_ST_REF_L1 + PM_LD_REF_L1) @@@group=performance @PMD_LD_PER_LD_MISS;Number of loads per load miss @@PM_LD_REF_L1 / PM_LD_MISS_L1 @@@group=performance @PMD_ST_PER_ST_MISS;Number of stores per store miss @@PM_ST_REF_L1 / PM_ST_MISS_L1 @@@group=performance @PMD_LD_ST_DISP_PER_L1_MISS;Number of load/stores dispatched per L1 miss @@(PM_LD_REF_L1 + PM_ST_REF_L1) / (PM_LD_MISS_L1 + PM_ST_MISS_L1) @@@group=performance @PMD_L1_HIT_RATE;L1 cache hit rate;% @@100 * (1 - (PM_LD_MISS_L1 + PM_ST_MISS_L1) / (PM_LD_REF_L1 + PM_ST_REF_L1)) @@@group=performance @PMD_L2_EST_LAT_AVG;Estimated latency from L2 (Average);s @@HPM_AVG_L2_LATENCY * (PM_ST_MISS_L1 + PM_LD_MISS_L1) / proc_freq @@@group=performance @PMD_LD_PER_TLB;Number of loads per TLB miss @@PM_LD_REF_L1 / PM_DTLB_MISS @@@group=performance @PMD_LD_ST_PER_TLB;Number of load/store per TLB miss @@(PM_LD_REF_L1 + PM_ST_REF_L1) / PM_DTLB_MISS @@@group=performance @PMD_TLB_EST_LAT;Estimated latency from TLB miss;s @@HPM_TLB_LATENCY * PM_DTLB_MISS / proc_freq @@@group=performance @PMD_PRC_INST_DISP_CMPL;% Instructions dispatched that completed;% @@PM_INST_CMPL * 100 / PM_INST_DISP @@@group=performance @PMD_FX;Total Fixed point operations @@(PM_FXU0_FIN + PM_FXU1_FIN) * 0.000001 @@@group=performance @PMD_FX_PER_CYC;Fixed point operations per Cycle @@(PM_FXU0_FIN + PM_FXU1_FIN) / PM_CYC @@@group=performance @PMD_MBR_PRC;Branches mispredicated percentage;% @@((PM_BR_MPRED_CR + PM_BR_MPRED_TA) * 100) / PM_BR_ISSUED @@@group=performance @PMD_HW_FP_PER_CYC;HW floating point instructions per Cycle @@PM_FPU_FIN / PM_CYC @@@group=performance @PMD_HW_FP_PER_UTIME;HW floating point instructions / user time;M HWflops/s @@(0.000001 * PM_FPU_FIN) / (PM_CYC / proc_freq) @@@group=performance @PMD_HW_FP_RATE;HW floating point rate;M HWflops/s @@0.000001 * PM_FPU_FIN / total_time @@@group=performance @PMD_L2_LD;Total Loads from L2;M @@(PM_DATA_FROM_L2 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD + PM_DATA_FROM_L275_SHR + PM_DATA_FROM_L275_MOD) * 0.000001 @@@group=performance @PMD_L2_LD_TRAF;L2 load traffic;MBytes/s @@(PM_DATA_FROM_L2 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD + PM_DATA_FROM_L275_SHR + PM_DATA_FROM_L275_MOD) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_L2_LD_BDW;L2 load bandwidth per processor;MBytes/s @@(PM_DATA_FROM_L2 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD + PM_DATA_FROM_L275_SHR + PM_DATA_FROM_L275_MOD) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_L2_LD_MISS_RATE;L2 load miss rate;% @@100 * (PM_DATA_FROM_MEM + PM_DATA_FROM_L3 + PM_DATA_FROM_L35) / (PM_DATA_FROM_L2 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD + PM_DATA_FROM_L275_SHR + PM_DATA_FROM_L275_MOD + PM_DATA_FROM_MEM + PM_DATA_FROM_L3 + PM_DATA_FROM_L35) @@@group=performance @PMD_L2_LD_EST_LAT;Estimated latency from loads from L2;s @@((HPM_L2_LATENCY * PM_DATA_FROM_L2) + (HPM_L25_LATENCY * (PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD)) + (HPM_L275_LATENCY * (PM_DATA_FROM_L275_SHR + PM_DATA_FROM_L275_MOD))) / proc_freq @@@group=performance @PMD_L2_LD_EST_LAT_AVG;Estimated latency from loads from L2 (Average);s @@HPM_AVG_L2_LATENCY * (PM_DATA_FROM_L2 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD + PM_DATA_FROM_L275_SHR + PM_DATA_FROM_L275_MOD) / proc_freq @@@group=performance @PMD_L3_LD;Total loads from L3;M @@(PM_DATA_FROM_L3 + PM_DATA_FROM_L35) * 0.000001 @@@group=performance @PMD_L3_LD_TRAF;L3 load traffic;MBytes/s @@ (PM_DATA_FROM_L3 + PM_DATA_FROM_L35) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_L3_LD_BDW;L3 load bandwidth per processor;MBytes/s @@(PM_DATA_FROM_L3 + PM_DATA_FROM_L35) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_L3_LD_MISS_RATE;L3 load miss rate;% @@100 * (PM_DATA_FROM_MEM / (PM_DATA_FROM_L3 + PM_DATA_FROM_L35 + PM_DATA_FROM_MEM)) @@@group=performance @PMD_L3_LD_EST_LAT;Estimated latency from loads from L3;s @@((HPM_L3_LATENCY * PM_DATA_FROM_L3) + (HPM_L35_LATENCY * PM_DATA_FROM_L35)) / proc_freq @@@group=performance @PMD_L3_LD_EST_LAT_AVG;Estimated latency from loads from L3 (Average);s @@HPM_AVG_L3_LATENCY * (PM_DATA_FROM_L3 + PM_DATA_FROM_L35) / proc_freq @@@group=performance @PMD_MEM_LD_TRAF;Memory load traffic;MBytes @@(PM_DATA_FROM_MEM * MEM_LINE_SIZE) / (1024 * 1024) @@@group=performance @PMD_MEM_LD_BDW;Memory load bandwidth per processor;MBytes/s @@(PM_DATA_FROM_MEM * MEM_LINE_SIZE) / (1024 * 1024) / total_time @@@group=performance @PMD_MEM_LD_EST_LAT;Estimated latency from loads from memory;s @@HPM_MEM_LATENCY * PM_DATA_FROM_MEM / proc_freq @@@group=performance @PMD_PRC_L2_LD;% loads from L2 per cycle;% @@100 * (PM_DATA_FROM_L2 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD + PM_DATA_FROM_L275_SHR + PM_DATA_FROM_L275_MOD) / PM_CYC @@@group=performance @PMD_PRC_L3_LD;% loads from L3 per cycle;% @@100 * (PM_DATA_FROM_L3 + PM_DATA_FROM_L35) / PM_CYC @@@group=performance @PMD_PRC_MEM_LD;% loads from memory per cycle;% @@100 * PM_DATA_FROM_MEM / PM_CYC @@@group=performance # Alternative performance metric group @PMD_TOTAL_CPI;Total CPI @@PM_CYC / PM_INST_CMPL @@@group=performance2 @PMD_CMPL_CYC;Completion cycles @@PM_GRP_CMPL / PM_INST_CMPL @@@group=performance2 @PMD_INST_GRP;Instructions per group @@PM_INST_CMPL / PM_GRP_CMPL @@@group=performance2 @PMD_GCT_EMPTY;Completion Table empty (GCT empty) @@PM_GCT_EMPTY_CYC / PM_INST_CMPL @@@group=performance2 @PMD_LD_REF_CMPL;Loads per Instruction @@PM_LD_REF_L1 / PM_INST_CMPL @@@group=performance2 @PMD_ST_REF_CMPL;Stores per Instruction @@PM_ST_REF_L1 / PM_INST_CMPL @@@group=performance2 @PMD_LD_MISS_CMPL;DL1 Load Miss per Instruction @@PM_LD_MISS_L1 / PM_INST_CMPL @@@group=performance2 @PMD_LD_MISS_REF;DL1 Load Miss per Reference @@PM_LD_MISS_L1 / PM_LD_REF_L1 @@@group=performance2 @PMD_ST_MISS_CMPL;DL1 Store Miss per Instruction @@PM_ST_MISS_L1 / PM_INST_CMPL @@@group=performance2 @PMD_ST_MISS_REF;DL1 Store Miss per Reference @@PM_ST_MISS_L1 / PM_LD_REF_L1 @@@group=performance2 @PMD_ITLB_RATE;ITLB Miss rate @@PM_ITLB_MISS / PM_INST_CMPL @@@group=performance2 @PMD_ISLB_RATE;ISLB Miss rate @@PM_ISLB_MISS / PM_INST_CMPL @@@group=performance2 @PMD_DTLB_RATE;DTLB Miss rate @@PM_DTLB_MISS / PM_INST_CMPL @@@group=performance2 @PMD_DSLB_RATE;DSLB Miss rate @@PM_DSLB_MISS / PM_INST_CMPL @@@group=performance2 @PMD_DERAT_RATE;DERAT Miss rate @@PM_LSU_DERAT_MISS / PM_INST_CMPL @@@group=performance2 @PMD_IERAT_RATE;IERAT Miss rate @@PM_IERAT_XLATE_WR / PM_INST_CMPL @@@group=performance2 @PMD_DTBL_AVER;Average Tablewalk @@PM_DATA_TABLEWALK_CYC / (PM_ITLB_MISS + PM_DTLB_MISS) @@@group=performance2 @PMD_LRQ_RES;LRQ residence time @@PM_LSU_LRQ_S0_VALID / PM_LSU_LRQ_S0_ALLOC @@@group=performance2 @PMD_LRQ_RATE;LRQ arrival rate @@1 / ((PM_LSU_LRQ_S0_ALLOC * 32) / PM_CYC) @@@group=performance2 @PMD_LRQ_QUEUE;LRQ average entries in queue @@PMD_LRQ_RATE * PMD_LRQ_RES @@@group=performance2 @PMD_LMQ_RES;LMQ residence time @@PM_LSU_LMQ_S0_VALID / PM_LSU_LMQ_S0_ALLOC @@@group=performance2 @PMD_SRQ_RES;SRQ residence time @@PM_LSU_SRQ_S0_VALID / PM_LSU_SRQ_S0_ALLOC @@@group=performance2 @PMD_SRQ_RATE;SRQ arrival rate @@1 / ((PM_LSU_SRQ_S0_ALLOC * 32) / PM_CYC) @@@group=performance2 @PMD_SRQ_QUEUE;SRQ average entries in queue @@PMD_SRQ_RATE * PMD_SRQ_RES @@@group=performance2 @PMD_ST_CIU;Store Sent to CIU @@PM_ST_REF_L1 @@@group=performance2 @PMD_ST_L2_REQ;Stores Sent to L2 slices @@PM_L2SA_ST_REQ + PM_L2SB_ST_REQ + PM_L2SC_ST_REQ @@@group=performance2 @PMD_ST_L2_MISS;Store miss in L2 @@(PM_L2SA_ST_REQ - PM_L2SA_ST_HIT) + (PM_L2SB_ST_REQ - PM_L2SB_ST_HIT) + (PM_L2SC_ST_REQ - PM_L2SC_ST_HIT) @@@group=performance2 @PMD_GAT_RATIO;Gather ratio @@PMD_ST_CIU / PMD_ST_L2_REQ @@@group=performance2 @PMD_LD_REQ;Loads Sent to GPS @@PM_DATA_FROM_L2 + PM_DATA_FROM_L25_SHR + PM_DATA_FROM_L25_MOD + PM_DATA_FROM_L275_SHR + PM_DATA_FROM_L275_MOD + PM_DATA_FROM_L3 + PM_DATA_FROM_L35 @@@group=performance2 @PMD_MPRED_DIR;Branch Mispredict Rate (Direction) @@PM_BR_MPRED_CR / PM_INST_CMPL @@@group=performance2 @PMD_MPRED_CNT;Branch Mispredict Rate (Count/Link) @@PM_BR_MPRED_TA / PM_INST_CMPL @@@group=performance2 # CPI Breakdown Model @PMD_CPI_TOTAL_CYC;Total CPI @@PM_CYC / PM_INST_CMPL @@@group=cpi_breakdown:0 @PMD_CPI_CMPL_CYC;Completion Cycles @@PM_GRP_CMPL / PM_INST_CMPL @@@group=cpi_breakdown:1 @PMD_CPI_BASE_CMPL_CYC;Base Completion Cycles @@PM_INST_CMPL / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_OVERHEAD;Overhead of Grouping Restrictions @@PMD_CPI_CMPL_CYC - PMD_CPI_BASE_CMPL_CYC @@@group=cpi_breakdown:2 @PMD_CPI_GCT_EMPTY_CYC;GCT Empty Cycles @@PM_GCT_EMPTY_CYC / PM_INST_CMPL @@@group=cpi_breakdown:1 @PMD_CPI_OTHER;Other @@PMD_CPI_TOTAL_CYC - PMD_CPI_CMPL_CYC - PMD_CPI_GCT_EMPTY_CYC @@@group=cpi_breakdown:1