# @(#)M 1.9 src/bos/usr/sbin/perf/pmapi/events/POWER5-II.dms, pmapi, bos720 1/23/08 08:51:29 # IBM_PROLOG_BEGIN_TAG # This is an automatically generated prolog. # # bos720 src/bos/usr/sbin/perf/pmapi/events/POWER5-II.dms 1.9 # # Licensed Materials - Property of IBM # # COPYRIGHT International Business Machines Corp. 2005,2008 # All Rights Reserved # # US Government Users Restricted Rights - Use, duplication or # disclosure restricted by GSA ADP Schedule Contract with IBM Corp. # # IBM_PROLOG_END_TAG # Global variables CACHE_LINE_SIZE = 128; MEM_LINE_SIZE = 128; # Derived metric descriptions @PMD_UTI_RATE;Utilization rate;% @@(PM_CYC * 100 / proc_freq) / total_time @@@group=performance @PMD_MIPS;MIPS;MIPS @@(0.000001 * PM_INST_CMPL) / total_time @@@group=performance @PMD_INST_PER_CYC;Instructions per cycle @@PM_INST_CMPL / PM_CYC @@@group=performance @PMD_HW_FP_PER_CYC;HW floating point instructions per Cycle @@PM_FPU_FIN / PM_CYC @@@group=performance @PMD_HW_FP_PER_UTIME;HW floating point instructions / user time;M HWflops/s @@(0.000001 * PM_FPU_FIN) / (PM_CYC / proc_freq) @@@group=performance @PMD_HW_FP_RATE;HW floating point rate (HW Flops / WCT);M HWflops/s @@0.000001 * PM_FPU_FIN / total_time @@@group=performance @PMD_FP;Total Fixed point operations;M @@PM_FXU_FIN * 0.000001 @@@group=performance @PMD_FP_PER_CYC;Fixed point operations per Cycle @@PM_FXU_FIN / PM_CYC @@@group=performance @PMD_FP_LD_ST;Floating point load and store operations ;M @@ (PM_LSU_LDF + PM_FPU_STF) * 0.000001 @@@group=performance @PMD_INST_PER_FP_LD_ST;Instructions per floating point load/store @@PM_INST_CMPL / (PM_LSU_LDF + PM_FPU_STF) @@@group=performance @PMD_PRC_INST_DISP_CMPL;% Instructions dispatched that completed;% @@PM_INST_CMPL * 100 / PM_INST_DISP @@@group=performance @PMD_DATA_L2;Total L2 data cache accesses;M @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * 0.000001 @@@group=performance @PMD_PRC_L2_ACCESS;% accesses from L2 per cycle;% @@100 * (PM_LD_MISS_L1 + PM_ST_MISS_L1) / PM_CYC @@@group=performance @PMD_L2_TRAF;L2 traffic;MBytes @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_L2_BDW;L2 bandwidth per processor;MBytes/s @@(PM_LD_MISS_L1 + PM_ST_MISS_L1) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_L2_LD_EST_LAT_AVG;Estimated latency from loads from L2 (Average);s @@HPM_AVG_L2_LATENCY * (PM_ST_MISS_L1 + PM_LD_MISS_L1) / proc_freq @@@group=performance @PMD_LD_ST;Total load and store operations;M @@(PM_LD_REF_L1 + PM_ST_REF_L1) * 0.000001 @@@group=performance @PMD_INST_PER_LD_ST;Instructions per load/store @@PM_INST_CMPL / (PM_ST_REF_L1 + PM_LD_REF_L1) @@@group=performance @PMD_LD_PER_LD_MISS;Number of loads per load miss @@PM_LD_REF_L1 / PM_LD_MISS_L1 @@@group=performance @PMD_LD_PER_DTLB;Number of loads per DTLB miss @@PM_LD_REF_L1 / PM_DTLB_MISS @@@group=performance @PMD_ST_PER_ST_MISS;Number of stores per store miss @@PM_ST_REF_L1 / PM_ST_MISS_L1 @@@group=performance @PMD_LD_PER_TLB;Number of loads per TLB miss @@PM_LD_REF_L1 / PM_TLB_MISS @@@group=performance @PMD_LD_ST_PER_TLB;Number of load/store per TLB miss @@(PM_LD_REF_L1 + PM_ST_REF_L1) / PM_TLB_MISS @@@group=performance @PMD_TLB_EST_LAT;Estimated latency from TLB miss;s @@HPM_TLB_LATENCY * PM_TLB_MISS / proc_freq @@@group=performance @PMD_MEM_LD_TRAF;Memory load traffic;MBytes @@((PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM) * MEM_LINE_SIZE) * CACHE_LINE_SIZE / (1024 * 1024) @@@group=performance @PMD_MEM_BDW;Memory bandwidth per processor ;MBytes/s @@((PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM) * MEM_LINE_SIZE) * CACHE_LINE_SIZE / (1024 * 1024) / total_time @@@group=performance @PMD_MEM_LD_EST_LAT;Estimated latency from loads from memory;s @@HPM_MEM_LATENCY * (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM)/ proc_freq @@@group=performance @PMD_LD_LMEM_PER_LD_RMEM;Number of loads from local memory per loads from remote memory @@PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM @@@group=performance @PMD_PRC_MEM_LD;% loads from memory per cycle;% @@100 * (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM) / PM_RUN_CYC @@@group=performance @PMD_FLIPS;Total floating point instructions + FMAs (flips);Mflip @@(PM_FPU0_FIN + PM_FPU1_FIN + PM_FPU_FMA - PM_FPU_STF) * 0.000001 @@@group=performance @PMD_FLIPS_WCT;Flip rate (flips / WCT);Mflip/s @@((PM_FPU0_FIN + PM_FPU1_FIN + PM_FPU_FMA - PM_FPU_STF) * 0.000001) / total_time @@@group=performance @PMD_FLIPS_UTIME;Flips / user time;Mflip/s @@((PM_FPU0_FIN + PM_FPU1_FIN + PM_FPU_FMA - PM_FPU_STF) * 0.000001) / (PM_CYC / proc_freq) @@@group=performance @PMD_FLOPS;Flop;Mflop @@(PM_FPU_1FLOP + (2 * PM_FPU_FMA) + (4 * PM_FPU_FDIV) + (4 * PM_FPU_FSQRT)) * 0.000001 @@@description=1FLOP instruction + 2 FMAs + 4 FDIVs + 4 FSQRTs (flops) @@@group=performance @PMD_FLOPS_WCT;Flop rate (flops / WCT);Mflop/s @@((PM_FPU_1FLOP + (2 * PM_FPU_FMA) + (4 * PM_FPU_FDIV) + (4 * PM_FPU_FSQRT)) * 0.000001) / total_time @@@group=performance @PMD_FLOPS_UTIME;Flops / user time;Mflop/s @@((PM_FPU_1FLOP + (2 * PM_FPU_FMA) + (4 * PM_FPU_FDIV) + (4 * PM_FPU_FSQRT)) * 0.000001) / (PM_CYC / proc_freq) @@@group=performance # CPI Breakdown Model @PMD_TOTAL_CPI;Total CPI @@PM_CYC / PM_INST_CMPL @@@group=cpi_breakdown:0 @PMD_CPI_CMPL_CYC;Completion cycles @@PM_GRP_CMPL / PM_INST_CMPL @@@group=cpi_breakdown:1 @PMD_CPI_GCT_EMPTY;Completion Table empty (GCT empty) @@PM_GCT_NOSLOT_CYC / PM_INST_CMPL @@@group=cpi_breakdown:1 @PMD_CPI_GCT_EMPTY_IC_MISS;I-Cache Miss Penalty @@PM_GCT_NOSLOT_IC_MISS / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_GCT_EMPTY_BR_MPRED;Branch Misprediction Penalty @@PM_GCT_NOSLOT_BR_MPRED / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_GCT_EMPTY_OTHER;Others GCT stalls @@PMD_CPI_GCT_EMPTY - PMD_CPI_GCT_EMPTY_IC_MISS - PMD_CPI_GCT_EMPTY_BR_MPRED @@@description=GCT Empty - I-Cache Miss Penalty - Branch Misprediction Penalty @@@group=cpi_breakdown:2 @PMD_CPI_STALL_CYC;Completion Stall cycles @@PMD_TOTAL_CPI - PMD_CPI_CMPL_CYC - PMD_CPI_GCT_EMPTY @@@description=Total cycles - Completion cycles - GCT empty @@@group=cpi_breakdown:1 @PMD_CPI_STALL_LSU;Stall by LSU instruction @@PM_CMPLU_STALL_LSU / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_STALL_LSU_REJECT;Stall by LSU Reject @@PM_CMPLU_STALL_REJECT / PM_INST_CMPL @@@group=cpi_breakdown:3 @PMD_CPI_STALL_LSU_ERAT_MISS;Stall by LSU Translation Reject @@PM_CMPLU_STALL_ERAT_MISS / PM_INST_CMPL @@@group=cpi_breakdown:4 @PMD_CPI_STALL_LSU_REJECT_OTHERS;Stall by LSU Other Reject @@PMD_CPI_STALL_LSU_REJECT - PMD_CPI_STALL_LSU_ERAT_MISS @@@description=Stall by LSU Reject - Stall by LSU Translation Reject @@@group=cpi_breakdown:4 @PMD_CPI_STALL_LSU_DCACHE_MISS;Stall by LSU D-cache miss @@PM_CMPLU_STALL_DCACHE_MISS / PM_INST_CMPL @@@group=cpi_breakdown:3 @PMD_CPI_STALL_LSU_OTHERS;Stall by LSU basic latency, LSU Flush penalty @@PMD_CPI_STALL_LSU - PMD_CPI_STALL_LSU_REJECT - PMD_CPI_STALL_LSU_DCACHE_MISS @@@description=Stall by LSU Instruction - Stall by LSU Reject- Stall by LSU D-cache miss @@@group=cpi_breakdown:3 @PMD_CPI_STALL_FXU;Stall by FXU instruction @@PM_CMPLU_STALL_FXU / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_STALL_FXU_DIV;Stall by any form of DIV/MTSPR/MFSPR instruction @@PM_CMPLU_STALL_DIV / PM_INST_CMPL @@@group=cpi_breakdown:3 @PMD_CPI_STALL_FXU_OTHERS;Stall by FXU basic latency @@PMD_CPI_STALL_FXU - PMD_CPI_STALL_FXU_DIV @@@description=Stall by FXU Instruction - Stall by any form of DIV/MTSPR/MFSPR instruction @@@group=cpi_breakdown:3 @PMD_CPI_STALL_FPU;Stall by FPU instruction @@PM_CMPLU_STALL_FPU / PM_INST_CMPL @@@group=cpi_breakdown:2 @PMD_CPI_STALL_FPU_DIV;Stall by any form of FDIV/FSQRT instruction @@PM_CMPLU_STALL_FDIV / PM_INST_CMPL @@@group=cpi_breakdown:3 @PMD_CPI_STALL_FPU_OTHERS;Stall by FPU basic latency @@PMD_CPI_STALL_FPU - PMD_CPI_STALL_FPU_DIV @@@description=Stall by FPU Instruction - Stall by any form of FDIV/FSQRT instruction @@@group=cpi_breakdown:3 @PMD_CPI_STALL_OTHERS;Stall by others @@PMD_CPI_STALL_CYC - PMD_CPI_STALL_LSU - PMD_CPI_STALL_FXU - PMD_CPI_STALL_FPU @@@description=Completion Stall cycles - Stall by LSU Instruction - Stall by FXU Instruction - Stall by FPU Instruction @@@group=cpi_breakdown:2