# @(#)M 1.4 src/bos/usr/sbin/perf/pmapi/libpmapi/POWER3.dms, pmapi, bos720 3/30/07 03:05:42 # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # bos720 src/bos/usr/sbin/perf/pmapi/libpmapi/POWER3.dms 1.4 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 2005,2007 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # Global variables CACHE_LINE_SIZE = 128; MEM_LINE_SIZE = 128; # Derived metric descriptions @PMD_UTI_RATE;Utilization rate;% @@(PM_CYC * 100 / proc_freq) / total_time @@@group=performance @PMD_PRC_TLB_MISS;% TLB misses per cycle;% @@100 * (PM_TLB_MISS / PM_CYC) @@@group=performance @PMD_LD_PER_TLB;Number of loads per TLB miss @@PM_LD_CMPL / PM_TLB_MISS @@@group=performance @PMD_LD_ST_PER_TLB;Number of load/store per TLB miss @@(PM_LD_CMPL + PM_ST_CMPL) / PM_TLB_MISS @@@group=performance @PMD_LD_ST;Total load and store operations;M @@(PM_ST_CMPL + PM_LD_CMPL) * 0.000001 @@@group=performance @PMD_INST_PER_LD_ST;Instructions per load/store @@PM_INST_CMPL / (PM_ST_CMPL + PM_LD_CMPL) @@@group=performance @PMD_MIPS;MIPS;MIPS @@(0.000001 * PM_INST_CMPL) / total_time @@@group=performance @PMD_INST_PER_CYC;Instructions per cycle @@PM_INST_CMPL / PM_CYC @@@group=performance @PMD_HW_FP_PER_CYC;HW floating point instructions per Cycle @@(PM_FPU0_CMPL + PM_FPU1_CMPL) / PM_CYC @@@group=performance @PMD_HW_FP_PER_UTIME;HW floating point instructions / user time;M HWflops/s @@(PM_FPU0_CMPL + PM_FPU1_CMPL) / (PM_CYC / proc_freq) @@@group=performance @PMD_HW_FP_RATE;HW floating point rate (HW Flops / WCT);M HWflops/s @@0.000001 * (PM_FPU0_CMPL + PM_FPU1_CMPL) / total_time @@@group=performance @PMD_FLIPS;Total floating point instructions + FMAs (flips);Mflip @@(PM_FPU0_CMPL + PM_FPU1_CMPL + PM_FPU_FMA) * 0.000001 @@@group=performance @PMD_FLIPS_WCT;Flip rate (flips / WCT);Mflip/s @@((PM_FPU0_CMPL + PM_FPU1_CMPL + PM_FPU_FMA) * 0.000001) / total_time @@@group=performance @PMD_FLIPS_UTIME;Flips / user time;Mflip/s @@((PM_FPU0_CMPL + PM_FPU1_CMPL + PM_FPU_FMA) * 0.000001) / (PM_CYC / proc_freq) @@@group=performance @PMD_COMP;Computation intensity @@(PM_FPU0_CMPL + PM_FPU1_CMPL + PM_FPU_FMA) / (PM_LD_CMPL + PM_ST_CMPL) @@@group=performance @PMD_FMA_PRC;FMA percentage;% @@(2 * 100 * PM_FPU_FMA) / (PM_FPU0_CMPL + PM_FPU1_CMPL + PM_FPU_FMA) @@@group=performance @PMD_TLB_EST_LAT;Estimated latency from TLB miss;s @@HPM_TLB_LATENCY * PM_TLB_MISS / proc_freq @@@group=performance @PMD_LD_DISP_PER_TLB;Number of loads dispatched per TLB miss @@PM_LD_DISP / PM_TLB_MISS @@@group=performance @PMD_LD_ST_DISP_PER_TLB;Number of load/store dispatched per TLB miss @@(PM_LD_DISP + PM_ST_DISP) / PM_TLB_MISS @@@group=performance @PMD_DATA_L2;Total L2 data cache accesses;M @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * 0.000001 @@@group=performance @PMD_PRC_L2_ACCESS;% accesses from L2 per cycle;% @@100 * (PM_LD_MISS_L1 + PM_ST_MISS_L1) / PM_CYC @@@group=performance @PMD_L2_TRAF;L2 traffic;MBytes @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_L2_BDW;L2 bandwidth per processor;MBytes/s @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_LD_ST_DISP;Total load and store dispatched operations;M @@(PM_ST_DISP + PM_LD_DISP) * 0.000001 @@@group=performance @PMD_LD_DISP_PER_LD_MISS;Number of loads dispatched per load miss @@PM_LD_DISP / PM_LD_MISS_L1 @@@group=performance @PMD_ST_DISP_PER_ST_MISS;Number of stores dispatched per store miss @@PM_ST_DISP / PM_ST_MISS_L1 @@@group=performance @PMD_LD_ST_DISP_PER_L1_MISS;Number of load/stores dispatched per L1 miss @@(PM_ST_DISP + PM_LD_DISP) / (PM_ST_MISS_L1 + PM_LD_MISS_L1) @@@group=performance @PMD_L1_HIT_RATE_LDST_DISP;L1 cache hit rate (versus load/store dispatched);% @@100 * (1 - ((PM_LD_MISS_L1 + PM_ST_MISS_L1) / (PM_LD_DISP + PM_ST_DISP))) @@@group=performance @PMD_PRC_LSU_IDLE;% Cycles LSU is idle ;% @@(PM_LSU_IDLE * 100) / PM_CYC @@@group=performance @PMD_LD_DISP_PER_LD_MISS_L2HIT;Number of loads dispatched per load miss (L2 hit) @@PM_LD_DISP / PM_LD_MISS_L2HIT @@@group=performance @PMD_ST_DISP_PER_L2_ST_MISS;Number of stores dispatched per L2 store miss @@PM_ST_DISP / PM_ST_MISS_L2 @@@group=performance @PMD_LD_DISP_PER_L2_LD_MISS;Number of loads dispatched per L2 load miss @@PM_LD_DISP / PM_BIU_LD_NORTRY @@@group=performance @PMD_LD_ST_DISP_PER_L2_MISS;Number of load/stores dispatched per L2 miss @@(PM_LD_DISP + PM_ST_DISP) / PM_BIU_LD_NORTRY @@@group=performance @PMD_MEM_TRAF;Memory traffic;MBytes @@(PM_BIU_LD_NORTRY + PM_BIU_ST_NORTRY) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_MEM_BDW;Memory bandwidth per processor ;MBytes/s @@(PM_BIU_LD_NORTRY + PM_BIU_ST_NORTRY) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_PRC_0INST_CMPL;% of cycles with 0 instructions completed;% @@(PM_0INST_CMPL * 100) / PM_CYC @@@group=performance @PMD_FX;Total Fixed point operations;M @@(PM_FXU0_PROD_RESULT + PM_FXU1_PROD_RESULT + PM_FXU2_PROD_RESULT) * 0.000001 @@@group=performance @PMD_FX_PER_CYC;Fixed point operations per Cycle @@(PM_FXU0_PROD_RESULT + PM_FXU1_PROD_RESULT + PM_FXU2_PROD_RESULT) / PM_CYC @@@group=performance @PMD_FX_PER_LDST;Fixed point operations per load/stores @@(PM_FXU0_PROD_RESULT + PM_FXU1_PROD_RESULT + PM_FXU2_PROD_RESULT) / (PM_LD_CMPL + PM_ST_CMPL) @@@group=performance @PMD_MBR_PRC;Branches mispredicated percentage;% @@(PM_BR_MPRED_GC * 100) / PM_BR_PRED @@@group=performance @PMD_INST_PER_IC_MISS;Instructions per I Cache Miss @@PM_INST_CMPL / PM_IC_MISS @@@group=performance @PMD_PRC_INST_DISP_CMPL;% Instructions dispatched that completed;% @@PM_INST_CMPL * 100 / PM_INST_DISP @@@group=performance @PMD_ST_L1_MISS_PER_L2_ST_MISS;L1 store miss per L2 store miss;% @@PM_ST_MISS_L1 / PM_ST_MISS_L2 @@@group=performance @PMD_SNOOP_HIT_PER_SNOOP;Snoop hit ratio @@(PM_SNOOP_L2HIT * 100) / PM_SNOOP @@@group=performance