maprobe.h 3.1 KB
Newer Older
1 2 3 4 5 6 7
// basic microarchtectural probe

#ifndef PROBE_H
#define PROBE_H

#include <klib.h>
#include <csr.h>
W
William Wang 已提交
8
#include "bitutils.h"
W
William Wang 已提交
9
#include "resultmat.h"
W
William Wang 已提交
10 11 12

// config
// #define PERF_SIM // probe run in simulatior, diaable perf counters
13 14 15 16 17 18 19 20

// perf const
#define BYTE (1)
#define KB (1024*BYTE)
#define MB (1024*KB)
#define GB (1024*MB)

// platform dependent const
W
William Wang 已提交
21 22 23 24
#ifndef _PERF_TEST_ADDR_BASE
#define _PERF_TEST_ADDR_BASE 0x80400000
// #define _PERF_TEST_ADDR_BASE 0x2000400000
#endif
25
#define _PERF_CACHELINE_SIZE_BYTE (64 * BYTE)
26 27
#define _PERF_PAGE_SIZE_BYTE (4 * KB)
#define _PERF_L1_NOALIAS_SIZE_BYTE (16 * KB)
W
William Wang 已提交
28
#define _PERF_L1_SIZE_BYTE (64 * KB)
29 30 31
#define _PERF_L2_SIZE_BYTE (1 * MB)
#define _PERF_L3_SIZE_BYTE (6 * MB)
#define _PERF_MEM_SIZE_BYTE (1024 * MB)
W
William Wang 已提交
32
#define _PERF_L1_NUM_WAYS 4
33 34 35 36 37 38 39 40 41
#define _PERF_L1_NUM_SETS 256
#define _PERF_L2_NUM_SLICES 4
// #define _PERF_L2_NUM_SETS 512

#define _PERF_ADDR_STRIDE_L1_SAME_BANK _PERF_CACHELINE_SIZE_BYTE
#define _PERF_ADDR_STRIDE_L1_SAME_SET (_PERF_L1_NUM_SETS * _PERF_CACHELINE_SIZE_BYTE)
#define _PERF_ADDR_STRIDE_L2_SAME_SLICE (_PERF_L2_NUM_SLICES * _PERF_CACHELINE_SIZE_BYTE)
// #define _PERF_ADDR_STRIDE_L2_SAME_SET (_PERF_L2_NUM_SETS * _PERF_CACHELINE_SIZE_BYTE)
#define _PERF_ADDR_STRIDE_NEXT_PAGE (_PERF_PAGE_SIZE_BYTE)
42 43 44 45 46 47 48

// probe const
#define _PERF_BLACKHOLE _PERF_TEST_ADDR_BASE

struct perf
{
    // const to be calibrated at run time
W
William Wang 已提交
49
    uint64_t csr_read_cycle; // # of cycles to read mcycle
50 51 52 53 54
    uint64_t csr_read_ninst; // # of inst needed to read minstret

    // timer
    uint64_t cycle;
    uint64_t instrcnt;
W
William Wang 已提交
55 56 57
};
extern struct perf perf;

58 59
extern uint64_t _perf_g_total_samples;

W
William Wang 已提交
60 61 62 63 64 65 66 67
// common perf tools
extern void _perf_start_timer();
extern void _perf_end_timer();
extern void _perf_print_timer();
extern void _perf_calibrate();
extern void _perf_blackhole(uint64_t value);

// latency test
68 69
extern uint64_t setup_pointer_tracing_linklist(uint64_t base_addr, uint64_t end_addr, uint64_t step);
extern uint64_t read_pointer_tracing_linklist(uint64_t base_addr, uint64_t num_valid_node);
W
William Wang 已提交
70
extern void latency_test_warmup(uint64_t base_addr, uint64_t end_addr);
71 72 73 74 75
extern float test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv);
extern float test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv);
extern float test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64_t test_align, int pregen_addr, int iter, int to_csv);
extern float test_same_address_load_latency(int iter, int to_csv);
extern float test_read_after_write_latency(int iter, int to_csv);
W
William Wang 已提交
76 77
extern float test_linear_write_latency(uint64_t size, uint64_t step, int iter, int to_csv);

W
William Wang 已提交
78 79

// bandwidth test
80 81 82
extern float test_l1_load_bandwidth(uint64_t size, int iter, int to_csv);
extern float test_l1_store_bandwidth(uint64_t size, int iter, int to_csv);
extern float test_l1_store_wcb_bandwidth(uint64_t size, int iter, int to_csv);
83

W
William Wang 已提交
84 85 86 87 88 89
// key parameter matrix generate
void generate_linear_access_latency_matrix();
void generate_pointer_tracing_latency_matrix();
void generate_random_access_latency_matrix();

// legacy test
90 91
extern void legacy_test_mem_throughput(uint64_t iter);
extern void legacy_test_mem_throughput_same_set(uint64_t iter);
92

93
#endif