main.c 10.0 KB
Newer Older
1 2 3
#include <klib.h>
#include "maprobe.h"

4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
void typical_linear_load_test_set()
{
    _perf_calibrate();
    printf("------------- linear load test set -------------\n");
    printf("page size linear double word load:\n");
    test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, sizeof(uint64_t), 1, 0);
    test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, sizeof(uint64_t), 2, 0);
    printf("page size linear cache line load:\n");
    test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
    test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    printf("dcache/2 linear double word load:\n");
    test_linear_access_latency(_PERF_L1_SIZE_BYTE / 2, sizeof(uint64_t), 1, 0);
    test_linear_access_latency(_PERF_L1_SIZE_BYTE / 2, sizeof(uint64_t), 2, 0);
    printf("dcache/2 linear cache line load:\n");
    test_linear_access_latency(_PERF_L1_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
    test_linear_access_latency(_PERF_L1_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    printf("dcache linear double word load:\n");
    test_linear_access_latency(_PERF_L1_SIZE_BYTE, sizeof(uint64_t), 1, 0);
    test_linear_access_latency(_PERF_L1_SIZE_BYTE, sizeof(uint64_t), 2, 0);
    printf("dcache linear cache line load:\n");
    test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
    test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    printf("L2 linear cache line load:\n");
    test_linear_access_latency(_PERF_L2_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
    test_linear_access_latency(_PERF_L2_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    printf("L1 (L1 same set) linear cache line load:\n");
    test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 10, 0);
31
    test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 100, 0);
32 33
    printf("L2 (L1 same set) linear cache line load:\n");
    test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 2, 0);
34
    test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 4, 0);
35 36 37 38 39 40 41 42
    printf("L1 (L2 same slice) linear cache line load:\n");
    test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 1, 0);
    test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 2, 0);
    printf("L2 (L2 same slice) linear cache line load:\n");
    test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 1, 0);
    test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 2, 0);
    printf("L1 (page traverse) linear cache line load:\n");
    test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 10, 0);
43
    test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 100, 0);
44 45
    printf("L2 (page traverse) linear cache line load:\n");
    test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 2, 0);
46
    test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 4, 0);
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
    printf("total samples: %ld\n", _perf_g_total_samples);
}

void typical_random_load_test_set()
{
    printf("------------- random load test set -------------\n");
    printf("from page size random load (word):\n");
    test_random_access_latency(1024, _PERF_PAGE_SIZE_BYTE, 8*BYTE, 1, 1, 0);
    test_random_access_latency(1024, _PERF_PAGE_SIZE_BYTE, 8*BYTE, 0, 1, 0);
    printf("from page size random load (cache line):\n");
    test_random_access_latency(1024, _PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
    test_random_access_latency(1024, _PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
    printf("from dcache/2 size random load (word):\n");
    test_random_access_latency(1024, _PERF_L1_SIZE_BYTE/2, 8*BYTE, 1, 1, 0);
    test_random_access_latency(1024, _PERF_L1_SIZE_BYTE/2, 8*BYTE, 0, 1, 0);
    printf("from dcache/2 size random load (cache line):\n");
    test_random_access_latency(1024, _PERF_L1_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
    test_random_access_latency(1024, _PERF_L1_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
    printf("from dcache size random load (word):\n");
    test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L1_SIZE_BYTE, 8*BYTE, 1, 1, 0);
    test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L1_SIZE_BYTE, 8*BYTE, 0, 1, 0);
    printf("from dcache size random load (cache line):\n");
    test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
    test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
    printf("from l2 size random load (word):\n");
    test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L2_SIZE_BYTE, 8*BYTE, 1, 1, 0);
    test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L2_SIZE_BYTE, 8*BYTE, 0, 1, 0);
    printf("from l2 size random load (cache line):\n");
    test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L2_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
    test_random_access_latency(_PERF_L1_SIZE_BYTE/_PERF_CACHELINE_SIZE_BYTE*2, _PERF_L2_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
    printf("total samples: %ld\n", _perf_g_total_samples);
}

void typical_pointer_tracing_load_test_set()
{
    printf("------------- pointer tracing load test set -------------\n");
    printf("cacheline by cacheline tracing:\n");
    test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 10, 0);
    test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
    test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
    printf("page by page, tracing:\n");
    test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE*2, _PERF_PAGE_SIZE_BYTE, 10, 0);
    test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE/2, _PERF_PAGE_SIZE_BYTE, 10, 0);
    test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE, _PERF_PAGE_SIZE_BYTE, 10, 0);
    test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE/2, _PERF_PAGE_SIZE_BYTE, 10, 0);
    test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE, _PERF_PAGE_SIZE_BYTE, 10, 0);
    printf("total samples: %ld\n", _perf_g_total_samples);
}

void typical_memory_disambiuation_test_set()
{
    printf("------------- memory disambiuation test set -------------\n");
    printf("load from the same address:\n");
    test_same_address_load_latency(1024, 0);
    test_same_address_load_latency(1024, 0);
    test_same_address_load_latency(1024, 0);
106 107 108 109
    printf("load then store to the same address:\n");
    test_read_after_write_latency(1024, 0);
    test_read_after_write_latency(1024, 0);
    test_read_after_write_latency(1024, 0);
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
    // more to be added
}

// typical latency test for fast regression
void typical_latency_test()
{
    _perf_g_total_samples = 0;
    typical_linear_load_test_set();
    typical_random_load_test_set();
    typical_pointer_tracing_load_test_set();
    typical_memory_disambiuation_test_set();
}

void pointer_tracing_graph()
{
    _perf_g_total_samples = 0;
    _perf_calibrate();
    printf("data for pointer tracing latency graph:\n");
    printf("range (B), read latency, iters, samples\n");
    for (int i = 1*KB; i < 64*KB; i = i + 1*KB) {
        test_pointer_tracing_latency(i, _PERF_CACHELINE_SIZE_BYTE, 2, 1);
    }
    for (int i = 64*KB; i < 1024*KB; i = i + 64*KB) {
        test_pointer_tracing_latency(i, _PERF_CACHELINE_SIZE_BYTE, 1, 1);
    }
    test_pointer_tracing_latency(1024*KB, _PERF_CACHELINE_SIZE_BYTE, 1, 1);
    for (int i = 1*MB; i <8*MB; i = i + 1*MB) {
        test_pointer_tracing_latency(i, _PERF_CACHELINE_SIZE_BYTE, 1, 1);
    }
    printf("total samples: %ld\n", _perf_g_total_samples);
}

// a simple test set used to check if test is working correctly
void latency_test_example()
{
    _perf_calibrate();
    printf("latency test example:\n");
    test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0);
148
    test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, sizeof(uint64_t), 5, 0);
149 150 151 152
    test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0);
    test_random_access_latency(4096, 1024*MB, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
    test_random_access_latency(4096, 1024*MB, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
    test_same_address_load_latency(1024, 0);
153
    test_read_after_write_latency(1024, 0);
154 155 156 157
    printf("total samples: %ld\n", _perf_g_total_samples);
}

void legacy_latency_throughput_test()
158 159 160
{
    _perf_calibrate();
    printf("Memory throughput:\n");
161
    legacy_test_mem_throughput(1024);
162
    printf("L1 latency:\n");
163 164 165 166
    test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0);
    test_pointer_tracing_latency(_PERF_L1_NOALIAS_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    test_pointer_tracing_latency(_PERF_L1_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
167
    printf("L2 latency:\n");
168 169
    test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    // test_pointer_tracing_latency(_PERF_L2_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
170
    printf("L3 latency:\n");
171 172
    test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE/2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
    // test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE,2, 0);
173
    // printf("MEM:\n");
174 175 176
    // test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE*2, _PERF_CACHELINE_SIZE_BYTE,2, 0);
    printf("total samples: %ld\n", _perf_g_total_samples);
}
177

178 179 180 181 182 183 184
int main()
{
    latency_test_example();
    typical_latency_test();
    // pointer_tracing_graph();
    // latency_test();
    // legacy_latency_throughput_test();
185 186
    return 0;
}