hsx-metrics.json 23.6 KB
Newer Older
1 2
[
    {
3
        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
4
        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)",
5
        "MetricGroup": "TopdownL1",
6
        "MetricName": "Frontend_Bound",
7
        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
8 9 10
    },
    {
        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
11
        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
12
        "MetricGroup": "TopdownL1_SMT",
13
        "MetricName": "Frontend_Bound_SMT",
14
        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
15 16 17
    },
    {
        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
18
        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)",
19
        "MetricGroup": "TopdownL1",
20 21
        "MetricName": "Bad_Speculation",
        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
22 23 24
    },
    {
        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
25
        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
26
        "MetricGroup": "TopdownL1_SMT",
27 28
        "MetricName": "Bad_Speculation_SMT",
        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
29 30 31
    },
    {
        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
32 33
        "MetricConstraint": "NO_NMI_WATCHDOG",
        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) )",
34
        "MetricGroup": "TopdownL1",
35 36
        "MetricName": "Backend_Bound",
        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
37 38 39
    },
    {
        "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
40
        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
41
        "MetricGroup": "TopdownL1_SMT",
42 43
        "MetricName": "Backend_Bound_SMT",
        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
44 45 46
    },
    {
        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
47
        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)",
48
        "MetricGroup": "TopdownL1",
49
        "MetricName": "Retiring",
50
        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved.  Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. "
51 52 53
    },
    {
        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
54
        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
55
        "MetricGroup": "TopdownL1_SMT",
56
        "MetricName": "Retiring_SMT",
57
        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved.  Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
58 59
    },
    {
60
        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
61
        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
62
        "MetricGroup": "Ret;Summary",
63 64 65
        "MetricName": "IPC"
    },
    {
66
        "BriefDescription": "Uops Per Instruction",
67
        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
68
        "MetricGroup": "Pipeline;Ret;Retire",
69 70 71
        "MetricName": "UPI"
    },
    {
72
        "BriefDescription": "Instruction per taken branch",
73 74 75
        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
        "MetricGroup": "Branches;Fed;FetchBW",
        "MetricName": "UpTB"
76 77
    },
    {
78
        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
79 80
        "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
        "MetricGroup": "Pipeline;Mem",
81 82 83
        "MetricName": "CPI"
    },
    {
84
        "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
85
        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
86
        "MetricGroup": "Pipeline",
87 88 89
        "MetricName": "CLKS"
    },
    {
90 91 92
        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
        "MetricExpr": "4 * CPU_CLK_UNHALTED.THREAD",
        "MetricGroup": "TmaL1",
93 94 95
        "MetricName": "SLOTS"
    },
    {
96 97 98
        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
        "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
        "MetricGroup": "TmaL1_SMT",
99 100 101
        "MetricName": "SLOTS_SMT"
    },
    {
102 103 104
        "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
        "MetricGroup": "Ret;SMT;TmaL1",
105 106 107
        "MetricName": "CoreIPC"
    },
    {
108 109 110
        "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
        "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
        "MetricGroup": "Ret;SMT;TmaL1_SMT",
111 112 113
        "MetricName": "CoreIPC_SMT"
    },
    {
114
        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
115
        "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
116
        "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
117 118 119
        "MetricName": "ILP"
    },
    {
120
        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
121
        "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
122 123 124 125
        "MetricGroup": "SMT",
        "MetricName": "CORE_CLKS"
    },
    {
126 127 128 129
        "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
        "MetricGroup": "InsType",
        "MetricName": "IpLoad"
130 131
    },
    {
132 133 134 135
        "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
        "MetricGroup": "InsType",
        "MetricName": "IpStore"
136 137
    },
    {
138 139 140 141
        "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
        "MetricGroup": "Branches;Fed;InsType",
        "MetricName": "IpBranch"
142 143
    },
    {
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
        "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
        "MetricGroup": "Branches;Fed;PGO",
        "MetricName": "IpCall"
    },
    {
        "BriefDescription": "Instruction per taken branch",
        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
        "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO",
        "MetricName": "IpTB"
    },
    {
        "BriefDescription": "Branch instructions per taken branch. ",
        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
        "MetricGroup": "Branches;Fed;PGO",
        "MetricName": "BpTkBranch"
    },
    {
        "BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
        "MetricExpr": "INST_RETIRED.ANY",
        "MetricGroup": "Summary;TmaL1",
        "MetricName": "Instructions"
    },
167 168 169 170 171 172
    {
        "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
        "MetricGroup": "Pipeline;Ret",
        "MetricName": "Retire"
    },
173 174 175 176 177 178 179
    {
        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
        "MetricExpr": "IDQ.DSB_UOPS / (( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS ) )",
        "MetricGroup": "DSB;Fed;FetchBW",
        "MetricName": "DSB_Coverage"
    },
    {
180 181 182 183 184 185 186
        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
        "MetricGroup": "Bad;BadSpec;BrMispredicts",
        "MetricName": "IpMispredict"
    },
    {
        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
187 188
        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
        "MetricGroup": "Mem;MemoryBound;MemoryLat",
189
        "MetricName": "Load_Miss_Real_Latency"
190 191 192 193 194 195
    },
    {
        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
        "MetricGroup": "Mem;MemoryBound;MemoryBW",
        "MetricName": "MLP"
196 197 198
    },
    {
        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
199
        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
200
        "MetricGroup": "Mem;CacheMisses",
201 202 203 204
        "MetricName": "L1MPKI"
    },
    {
        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
205
        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
206
        "MetricGroup": "Mem;Backend;CacheMisses",
207 208 209
        "MetricName": "L2MPKI"
    },
    {
210 211 212 213
        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
        "MetricExpr": "1000 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
        "MetricGroup": "Mem;CacheMisses",
        "MetricName": "L3MPKI"
214 215
    },
    {
216 217 218 219 220
        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
        "MetricConstraint": "NO_NMI_WATCHDOG",
        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / CPU_CLK_UNHALTED.THREAD",
        "MetricGroup": "Mem;MemoryTLB",
        "MetricName": "Page_Walks_Utilization"
221 222
    },
    {
223 224 225 226
        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
        "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
        "MetricGroup": "Mem;MemoryTLB_SMT",
        "MetricName": "Page_Walks_Utilization_SMT"
227
    },
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
    {
        "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
        "MetricGroup": "Mem;MemoryBW",
        "MetricName": "L1D_Cache_Fill_BW"
    },
    {
        "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
        "MetricGroup": "Mem;MemoryBW",
        "MetricName": "L2_Cache_Fill_BW"
    },
    {
        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
        "MetricGroup": "Mem;MemoryBW",
        "MetricName": "L3_Cache_Fill_BW"
    },
    {
        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
        "MetricExpr": "(64 * L1D.REPLACEMENT / 1000000000 / duration_time)",
        "MetricGroup": "Mem;MemoryBW",
        "MetricName": "L1D_Cache_Fill_BW_1T"
    },
    {
        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
        "MetricExpr": "(64 * L2_LINES_IN.ALL / 1000000000 / duration_time)",
        "MetricGroup": "Mem;MemoryBW",
        "MetricName": "L2_Cache_Fill_BW_1T"
    },
    {
        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
        "MetricExpr": "(64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time)",
        "MetricGroup": "Mem;MemoryBW",
        "MetricName": "L3_Cache_Fill_BW_1T"
    },
    {
        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
        "MetricExpr": "0",
        "MetricGroup": "Mem;MemoryBW;Offcore",
        "MetricName": "L3_Cache_Access_BW_1T"
    },
270 271
    {
        "BriefDescription": "Average CPU Utilization",
272
        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
273
        "MetricGroup": "HPC;Summary",
274 275
        "MetricName": "CPU_Utilization"
    },
276 277 278 279 280 281
    {
        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
        "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time",
        "MetricGroup": "Summary;Power",
        "MetricName": "Average_Frequency"
    },
282
    {
283
        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
284
        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
285 286 287 288
        "MetricGroup": "Power",
        "MetricName": "Turbo_Utilization"
    },
    {
289
        "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
290 291
        "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
        "MetricGroup": "SMT",
292 293 294
        "MetricName": "SMT_2T_Utilization"
    },
    {
295 296 297
        "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
        "MetricGroup": "OS",
298 299
        "MetricName": "Kernel_Utilization"
    },
300 301 302 303 304 305
    {
        "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
        "MetricGroup": "OS",
        "MetricName": "Kernel_CPI"
    },
306
    {
307
        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
308
        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
309
        "MetricGroup": "HPC;Mem;MemoryBW;SoC",
310 311 312 313
        "MetricName": "DRAM_BW_Use"
    },
    {
        "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
314
        "MetricExpr": "1000000000 * ( cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182@ / cbox@event\\=0x35\\,umask\\=0x3\\,filter_opc\\=0x182@ ) / ( cbox_0@event\\=0x0@ / duration_time )",
315 316
        "MetricGroup": "Mem;MemoryLat;SoC",
        "MetricName": "MEM_Read_Latency"
317 318 319
    },
    {
        "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
320
        "MetricExpr": "cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182@ / cbox@event\\=0x36\\,umask\\=0x3\\,filter_opc\\=0x182\\,thresh\\=1@",
321 322
        "MetricGroup": "Mem;MemoryBW;SoC",
        "MetricName": "MEM_Parallel_Reads"
323 324 325
    },
    {
        "BriefDescription": "Socket actual clocks when any core is active on that socket",
326
        "MetricExpr": "cbox_0@event\\=0x0@",
327
        "MetricGroup": "SoC",
328 329
        "MetricName": "Socket_CLKS"
    },
330 331 332 333 334 335
    {
        "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
        "MetricGroup": "Branches;OS",
        "MetricName": "IpFarBranch"
    },
336
    {
337
        "BriefDescription": "C3 residency percent per core",
338 339 340 341 342
        "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C3_Core_Residency"
    },
    {
343
        "BriefDescription": "C6 residency percent per core",
344 345 346 347 348
        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C6_Core_Residency"
    },
    {
349
        "BriefDescription": "C7 residency percent per core",
350 351 352 353 354
        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C7_Core_Residency"
    },
    {
355
        "BriefDescription": "C2 residency percent per package",
356 357 358 359 360
        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C2_Pkg_Residency"
    },
    {
361
        "BriefDescription": "C3 residency percent per package",
362 363 364 365 366
        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C3_Pkg_Residency"
    },
    {
367
        "BriefDescription": "C6 residency percent per package",
368 369 370 371 372
        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C6_Pkg_Residency"
    },
    {
373
        "BriefDescription": "C7 residency percent per package",
374 375 376 377 378
        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
        "MetricGroup": "Power",
        "MetricName": "C7_Pkg_Residency"
    }
]