# @(#)M 1.5 src/bos/usr/sbin/perf/pmapi/libpmapi/POWER5.dms, pmapi, bos720 1/23/08 08:51:26 # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # bos720 src/bos/usr/sbin/perf/pmapi/libpmapi/POWER5.dms 1.5 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 2005,2008 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # Global variables CACHE_LINE_SIZE = 128; MEM_LINE_SIZE = 128; # Derived metric descriptions @PMD_UTI_RATE;Utilization rate;% @@(PM_CYC * 100 / proc_freq) / total_time @@@group=performance @PMD_MIPS;MIPS;MIPS @@(0.000001 * PM_INST_CMPL) / total_time @@@group=performance @PMD_INST_PER_CYC;Instructions per cycle @@PM_INST_CMPL / PM_CYC @@@group=performance @PMD_HW_FP_PER_CYC;HW floating point instructions per Cycle @@PM_FPU_FIN / PM_CYC @@@group=performance @PMD_HW_FP_PER_UTIME;HW floating point instructions / user time;M HWflops/s @@(0.000001 * PM_FPU_FIN) / (PM_CYC / proc_freq) @@@group=performance @PMD_HW_FP_RATE;HW floating point rate (HW Flops / WCT);M HWflops/s @@0.000001 * PM_FPU_FIN / total_time @@@group=performance @PMD_FX;Total Fixed point operations;M @@PM_FXU_FIN * 0.000001 @@@group=performance @PMD_FX_PER_CYC;Fixed point operations per Cycle;M/s @@PM_FXU_FIN / PM_CYC @@@group=performance @PMD_FP_LD_ST;Floating point load and store operations;M @@ (PM_LSU_LDF + PM_FPU_STF) * 0.000001 @@@group=performance @PMD_INST_PER_FP_LD_ST;Instructions per floating point load/store @@PM_INST_CMPL / (PM_LSU_LDF + PM_FPU_STF) @@@group=performance @PMD_PRC_INST_DISP_CMPL;% Instructions dispatched that completed;% @@PM_INST_CMPL * 100 / PM_INST_DISP @@@group=performance @PMD_DATA_L2;Total L2 data cache accesses;M @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * 0.000001 @@@group=performance @PMD_PRC_L2_ACCESS;% accesses from L2 per cycle;% @@100 * (PM_LD_MISS_L1 + PM_ST_MISS_L1) / PM_CYC @@@group=performance @PMD_L2_TRAF;L2 traffic;MBytes @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_L2_BDW;L2 bandwidth per processor;MBytes/s @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_L2_LD_EST_LAT_AVG;Estimated latency from loads from L2 (Average);s @@HPM_AVG_L2_LATENCY * (PM_ST_MISS_L1 + PM_LD_MISS_L1) / proc_freq @@@group=performance @PMD_LD_ST;Total load and store operations;M @@(PM_LD_REF_L1 + PM_ST_REF_L1) * 0.000001 @@@group=performance @PMD_INST_PER_LD_ST;Instructions per load/store @@PM_INST_CMPL / (PM_ST_REF_L1 + PM_LD_REF_L1) @@@group=performance @PMD_LD_PER_LD_MISS;Number of loads per load miss @@PM_LD_REF_L1 / PM_LD_MISS_L1 @@@group=performance @PMD_LD_PER_DTLB;Number of loads per DTLB miss @@PM_LD_REF_L1 / PM_DTLB_MISS @@@group=performance @PMD_ST_PER_ST_MISS;Number of stores per store miss @@PM_ST_REF_L1 / PM_ST_MISS_L1 @@@group=performance @PMD_LD_PER_TLB;Number of loads per TLB miss @@PM_LD_REF_L1 / PM_TLB_MISS @@@group=performance @PMD_LD_ST_PER_TLB;Number of load/store per TLB miss @@(PM_LD_REF_L1 + PM_ST_REF_L1) / PM_TLB_MISS @@@group=performance @PMD_TLB_EST_LAT;Estimated latency from TLB miss;s @@HPM_TLB_LATENCY * PM_TLB_MISS / proc_freq @@@group=performance @PMD_MEM_LD_TRAF;Memory load traffic;MBytes @@((PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM) * MEM_LINE_SIZE) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_MEM_BDW;Memory bandwidth per processor;MBytes/s @@((PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM) * MEM_LINE_SIZE) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_MEM_LD_EST_LAT;Estimated latency from loads from memory;s @@HPM_MEM_LATENCY * (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM)/ proc_freq @@@group=performance @PMD_LD_LMEM_PER_LD_RMEM;Number of loads from local memory per loads from remote memory @@PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM @@@group=performance @PMD_PRC_MEM_LD;% loads from memory per cycle;% @@100 * (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM) / PM_CYC @@@group=performance # CPI Breakdown Model @PMD_TOTAL_CPI;Total CPI @@PM_CYC / PM_INST_CMPL @@@group=cpi_breakdown:0 @PMD_CPI_CMPL_CYC;Completion cycles @@PM_GRP_CMPL / PM_INST_CMPL @@@group=cpi_breakdown:1 @PMD_CPI_GCT_EMPTY;Completion Table empty (GCT empty) @@PM_GCT_NOSLOT_CYC / PM_INST_CMPL @@@group=cpi_breakdown:1 @PMD_CPI_GCT_EMPTY_IC_MISS;I-Cache Miss Penalty @@PM_GCT_NOSLOT_IC_MISS / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_GCT_EMPTY_BR_MPRED;Branch Mispredication Penalty @@PM_GCT_NOSLOT_BR_MPRED / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_GCT_EMPTY_OTHER;Others GCT stalls @@PMD_CPI_GCT_EMPTY - PMD_CPI_GCT_EMPTY_IC_MISS - PMD_CPI_GCT_EMPTY_BR_MPRED @@@description=GCT Empty - I-Cache Miss Penalty - Branch Mispredication Penalty @@@group=cpi_breakdown:2 @PMD_CPI_STALL_CYC;Completion Stall cycles @@PMD_TOTAL_CPI - PMD_CPI_CMPL_CYC - PMD_CPI_GCT_EMPTY @@@description=Total cycles - Completion cycles - GCT empty @@@group=cpi_breakdown:1 @PMD_CPI_STALL_LSU;Stall by LSU instruction @@PM_CMPLU_STALL_LSU / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_STALL_LSU_REJECT;Stall by LSU Reject @@PM_CMPLU_STALL_REJECT / PM_INST_CMPL @@@group=cpi_breakdown:3 @PMD_CPI_STALL_LSU_ERAT_MISS;Stall by LSU Translation Reject @@PM_CMPLU_STALL_ERAT_MISS / PM_INST_CMPL @@@group=cpi_breakdown:4 @PMD_CPI_STALL_LSU_REJECT_OTHERS;Stall by LSU Other Reject @@PMD_CPI_STALL_LSU_REJECT - PMD_CPI_STALL_LSU_ERAT_MISS @@@description=Stall by LSU Reject - Stall by LSU Translation Reject @@@group=cpi_breakdown:4 @PMD_CPI_STALL_LSU_DCACHE_MISS;Stall by LSU D-cache miss @@PM_CMPLU_STALL_DCACHE_MISS / PM_INST_CMPL @@@group=cpi_breakdown:3 @PMD_CPI_STALL_LSU_OTHERS;Stall by LSU basic latency, LSU Flush penalty @@PMD_CPI_STALL_LSU - PMD_CPI_STALL_LSU_REJECT - PMD_CPI_STALL_LSU_DCACHE_MISS @@@description=Stall by LSU Instruction - Stall by LSU Reject- Stall by LSU D-cache miss @@@group=cpi_breakdown:3 @PMD_CPI_STALL_FXU;Stall by FXU instruction @@PM_CMPLU_STALL_FXU / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_STALL_FXU_DIV;Stall by any form of DIV/MTSPR/MFSPR instruction @@PM_CMPLU_STALL_DIV / PM_INST_CMPL @@@group=cpi_breakdown:3 @PMD_CPI_STALL_FXU_OTHERS;Stall by FXU basic latency @@PMD_CPI_STALL_FXU - PMD_CPI_STALL_FXU_DIV @@@description=Stall by FXU Instruction - Stall by any form of DIV/MTSPR/MFSPR instruction @@@group=cpi_breakdown:3 @PMD_CPI_STALL_FPU;Stall by FPU instruction @@PM_CMPLU_STALL_FPU / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_STALL_FPU_DIV;Stall by any form of FDIV/FSQRT instruction @@PM_CMPLU_STALL_FDIV / PM_INST_CMPL @@@group=cpi_breakdown:3 @PMD_CPI_STALL_FPU_OTHERS;Stall by FPU basic latency @@PMD_CPI_STALL_FPU - PMD_CPI_STALL_FPU_DIV @@@description=Stall by FPU Instruction - Stall by any form of FDIV/FSQRT instruction @@@group=cpi_breakdown:3 @PMD_CPI_STALL_OTHERS;Stall by others @@PMD_CPI_STALL_CYC - PMD_CPI_STALL_LSU - PMD_CPI_STALL_FXU - PMD_CPI_STALL_FPU @@@description=Completion Stall cycles - Stall by LSU Instruction - Stall by FXU Instruction - Stall by FPU Instruction @@@group=cpi_breakdown:2