maprobe: let all test return result

d9b43ff8 · William Wang · b27f2968 · d9b43ff8 · d9b43ff8 · d9b43ff8
Showing with 51 addition and 33 deletion

apps/maprobe/bandwidth-test.c apps/maprobe/bandwidth-test.c +11 -5

apps/maprobe/include/maprobe.h apps/maprobe/include/maprobe.h +8 -8

apps/maprobe/latency-test.c apps/maprobe/latency-test.c +32 -20

未找到文件。
--- a/apps/maprobe/bandwidth-test.c
+++ b/apps/maprobe/bandwidth-test.c
 #include "maprobe.h"

-void test_l1_load_bandwidth(uint64_t size, int iter, int to_csv)
+float test_l1_load_bandwidth(uint64_t size, int iter, int to_csv)
 {
    // printf("stride %d linear access latency test\n", step);
    // printf("range (B), read latency, iters, samples, cycles\n");
@@ -23,17 +23,19 @@ void test_l1_load_bandwidth(uint64_t size, int iter, int to_csv)
    _perf_end_timer();
    // _perf_print_timer();
    uint64_t total_access = size / _PERF_CACHELINE_SIZE_BYTE * 8 * iter;
+    float bandwidth = total_access * 8 * BYTE / (float)perf.cycle;
    if (to_csv) {
        printf("%ld, %f, %d, %ld, %ld\n", size, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
    } else {
        printf("range %ldKB (%d iters) dcache linear (8Byte) read, latency %f, throughput %f B/cycle (%ld samples, %ld cycles), stride %dB\n", 
-            size/KB, iter, (float)perf.cycle / total_access, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle, 8
+            size/KB, iter, (float)perf.cycle / total_access, bandwidth, total_access, perf.cycle, 8
        );
    }
    _perf_g_total_samples += total_access;
+    return bandwidth;
 }

-void test_l1_store_bandwidth(uint64_t size, int iter, int to_csv)
+float test_l1_store_bandwidth(uint64_t size, int iter, int to_csv)
 {
    // printf("stride %d linear access latency test\n", step);
    // printf("range (B), read latency, iters, samples, cycles\n");
@@ -56,17 +58,19 @@ void test_l1_store_bandwidth(uint64_t size, int iter, int to_csv)
    _perf_end_timer();
    // _perf_print_timer();
    uint64_t total_access = size / _PERF_CACHELINE_SIZE_BYTE * 8 * iter;
+    float bandwidth = total_access * 8 * BYTE / (float)perf.cycle;
    if (to_csv) {
        printf("%ld, %f, %d, %ld, %ld\n", size, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
    } else {
        printf("range %ldKB (%d iters) dcache linear (8Byte) store latency %f, throughput %f B/cycle (%ld samples, %ld cycles), stride %dB\n", 
-            size/KB, iter, (float)perf.cycle / total_access, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle, 8
+            size/KB, iter, (float)perf.cycle / total_access, bandwidth, total_access, perf.cycle, 8
        );
    }
    _perf_g_total_samples += total_access;
+    return bandwidth;
 }

-void test_l1_store_wcb_bandwidth(uint64_t size, int iter, int to_csv)
+float test_l1_store_wcb_bandwidth(uint64_t size, int iter, int to_csv)
 {
    // printf("stride %d linear access latency test\n", step);
    // printf("range (B), read latency, iters, samples, cycles\n");
@@ -82,6 +86,7 @@ void test_l1_store_wcb_bandwidth(uint64_t size, int iter, int to_csv)
    _perf_end_timer();
    // _perf_print_timer();
    uint64_t total_access = size / _PERF_CACHELINE_SIZE_BYTE * iter;
+    float bandwidth = total_access * _PERF_CACHELINE_SIZE_BYTE / (float)perf.cycle;
    if (to_csv) {
        printf("%ld, %f, %d, %ld, %ld\n", size, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
    } else {
@@ -90,4 +95,5 @@ void test_l1_store_wcb_bandwidth(uint64_t size, int iter, int to_csv)
        );
    }
    _perf_g_total_samples += total_access;
+    return bandwidth;
 }
\ No newline at end of file
--- a/apps/maprobe/include/maprobe.h
+++ b/apps/maprobe/include/maprobe.h
@@ -68,16 +68,16 @@ extern void _perf_blackhole(uint64_t value);
 extern uint64_t setup_pointer_tracing_linklist(uint64_t base_addr, uint64_t end_addr, uint64_t step);
 extern uint64_t read_pointer_tracing_linklist(uint64_t base_addr, uint64_t num_valid_node);
 extern void latency_test_warmup(uint64_t base_addr, uint64_t end_addr);
-extern void test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv);
-extern void test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv);
-extern void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64_t test_align, int pregen_addr, int iter, int to_csv);
-extern void test_same_address_load_latency(int iter, int to_csv);
-extern void test_read_after_write_latency(int iter, int to_csv);
+extern float test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv);
+extern float test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv);
+extern float test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64_t test_align, int pregen_addr, int iter, int to_csv);
+extern float test_same_address_load_latency(int iter, int to_csv);
+extern float test_read_after_write_latency(int iter, int to_csv);

 // bandwidth test
-extern void test_l1_load_bandwidth(uint64_t size, int iter, int to_csv);
-extern void test_l1_store_bandwidth(uint64_t size, int iter, int to_csv);
-extern void test_l1_store_wcb_bandwidth(uint64_t size, int iter, int to_csv);
+extern float test_l1_load_bandwidth(uint64_t size, int iter, int to_csv);
+extern float test_l1_store_bandwidth(uint64_t size, int iter, int to_csv);
+extern float test_l1_store_wcb_bandwidth(uint64_t size, int iter, int to_csv);

 extern void legacy_test_mem_throughput(uint64_t iter);
 extern void legacy_test_mem_throughput_same_set(uint64_t iter);

--- a/apps/maprobe/latency-test.c
+++ b/apps/maprobe/latency-test.c
@@ -42,7 +42,7 @@ void latency_test_warmup(uint64_t base_addr, uint64_t end_addr)
    setup_pointer_tracing_linklist(base_addr, end_addr, _PERF_CACHELINE_SIZE_BYTE);
 }

-void test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv)
+float test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv)
 {
    // printf("pointer tracing latency test\n");
    // printf("range (B), read latency, iters, samples, cycles\n");
@@ -59,19 +59,21 @@ void test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv)
    }
    _perf_end_timer();
    // _perf_print_timer();
+    float acpa = (float)perf.cycle / total_node; // average cycle per access
    if (to_csv) {
-        printf("%ld, %f, %d, %ld, %ld\n", size, (float)perf.cycle / total_node, iter, total_node, perf.cycle);
+        printf("%ld, %f, %d, %ld, %ld\n", size, acpa, iter, total_node, perf.cycle);
    } else {
        printf("range %ldKB (%d iters) pointer tracing read latency %f, throughput %f B/cycle (%ld samples, %ld cycles)\n",
-            size/KB, iter, (float)perf.cycle / total_node, total_node * 8 * BYTE / (float)perf.cycle, total_node, perf.cycle
+            size/KB, iter, acpa, total_node * 8 * BYTE / (float)perf.cycle, total_node, perf.cycle
        );
    }

    _perf_blackhole(result);
    _perf_g_total_samples += total_node;
+    return acpa;
 }

-void test_same_address_load_latency(int iter, int to_csv)
+float test_same_address_load_latency(int iter, int to_csv)
 {
    // printf("same address load latency test\n", step);
    // printf("range (B), read latency, iters, samples, cycles\n");
@@ -86,19 +88,21 @@ void test_same_address_load_latency(int iter, int to_csv)
    _perf_end_timer();
    // _perf_print_timer();
    uint64_t total_access = iter;
+    float acpa = (float)perf.cycle / total_access; // average cycle per access
    if (to_csv) {
-        printf("%ld, %f, %d, %ld, %ld\n", 0, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
+        printf("%ld, %f, %d, %ld, %ld\n", 0, acpa, iter, total_access, perf.cycle);
    } else {
        printf("same address read latency %f, throughput %f B/cycle (%ld samples, %ld cycles)\n", 
-            (float)perf.cycle / total_access, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle
+            acpa, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle
        );
    }

    _perf_blackhole(result);
    _perf_g_total_samples += total_access;
+    return acpa;
 }

-void test_read_after_write_latency(int iter, int to_csv)
+float test_read_after_write_latency(int iter, int to_csv)
 {
    // printf("same address store-load latency test\n", step);
    // printf("range (B), read latency, iters, samples, cycles\n");
@@ -114,19 +118,21 @@ void test_read_after_write_latency(int iter, int to_csv)
    _perf_end_timer();
    // _perf_print_timer();
    uint64_t total_access = iter;
+    float acpa = (float)perf.cycle / total_access; // average cycle per access
    if (to_csv) {
-        printf("%ld, %f, %d, %ld, %ld\n", 0, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
+        printf("%ld, %f, %d, %ld, %ld\n", 0, acpa, iter, total_access, perf.cycle);
    } else {
        printf("read after write latency %f, throughput %f B/cycle (%ld samples, %ld cycles)\n", 
-            (float)perf.cycle / total_access, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle
+            acpa, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle
        );
    }

    _perf_blackhole(result);
    _perf_g_total_samples += total_access;
+    return acpa;
 }

-void test_linear_access_latency_simple(uint64_t size, uint64_t step, int iter, int to_csv)
+float test_linear_access_latency_simple(uint64_t size, uint64_t step, int iter, int to_csv)
 {
    // printf("stride %d linear access latency test\n", step);
    // printf("range (B), read latency, iters, samples, cycles\n");
@@ -145,19 +151,21 @@ void test_linear_access_latency_simple(uint64_t size, uint64_t step, int iter, i
    _perf_end_timer();
    // _perf_print_timer();
    uint64_t total_access = num_access * iter;
+    float acpa = (float)perf.cycle / total_access; // average cycle per access
    if (to_csv) {
-        printf("%ld, %f, %d, %ld, %ld\n", size, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
+        printf("%ld, %f, %d, %ld, %ld\n", size, acpa, iter, total_access, perf.cycle);
    } else {
        printf("range %ldKB (%d iters) simple linear read latency %f, throughput %f B/cycle (%ld samples, %ld cycles), stride %dB\n", 
-            size/KB, iter, (float)perf.cycle / total_access, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle, step
+            size/KB, iter, acpa, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle, step
        );
    }

    _perf_blackhole(result);
    _perf_g_total_samples += total_access;
+    return acpa;
 }

-void test_linear_access_latency_batch8(uint64_t size, uint64_t step, int iter, int to_csv)
+float test_linear_access_latency_batch8(uint64_t size, uint64_t step, int iter, int to_csv)
 {
    // printf("stride %d linear access latency test\n", step);
    // printf("range (B), read latency, iters, samples, cycles\n");
@@ -203,24 +211,26 @@ void test_linear_access_latency_batch8(uint64_t size, uint64_t step, int iter, i
    _perf_end_timer();
    // _perf_print_timer();
    uint64_t total_access = num_access * iter;
+    float acpa = (float)perf.cycle / total_access; // average cycle per access
    if (to_csv) {
-        printf("%ld, %f, %d, %ld, %ld\n", size, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
+        printf("%ld, %f, %d, %ld, %ld\n", size, acpa, iter, total_access, perf.cycle);
    } else {
        printf("range %ldKB (%d iters) batch(8) linear read latency %f, throughput %f B/cycle (%ld samples, %ld cycles), stride %dB\n", 
-            size/KB, iter, (float)perf.cycle / total_access, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle, step
+            size/KB, iter, acpa, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle, step
        );
    }

    _perf_blackhole(result);
    _perf_g_total_samples += total_access;
+    return acpa;
 }

-void test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv)
+float test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv)
 {
-    test_linear_access_latency_batch8(size, step, iter, to_csv);
+    return test_linear_access_latency_batch8(size, step, iter, to_csv);
 }

-void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64_t test_align, int pregen_addr, int iter, int to_csv)
+float test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64_t test_align, int pregen_addr, int iter, int to_csv)
 {
    // printf("align %d random access (cache line) latency test, %s\n",
    //     test_align, pregen_addr ? "use pregen addr array" : "gen rand addr at run time"
@@ -255,17 +265,19 @@ void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64
    }
    // _perf_print_timer();
    uint64_t total_access = num_access * iter;
+    float acpa = (float)perf.cycle / total_access; // average cycle per access
    if (to_csv) {
-        printf("%ld, %f, %d, %ld, %ld\n", test_range, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
+        printf("%ld, %f, %d, %ld, %ld\n", test_range, acpa, iter, total_access, perf.cycle);
    } else {
        printf("range %ldKB, access cover %ldKB (%d iters) random read latency %f, throughput %f B/cycle (%ld samples, %ld cycles), align %ldB, %s\n", 
-            test_range/KB, total_access*8*_PERF_CACHELINE_SIZE_BYTE/KB, iter, (float)perf.cycle / (total_access), total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle, test_align,
+            test_range/KB, total_access*8*_PERF_CACHELINE_SIZE_BYTE/KB, iter, acpa, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle, test_align,
            pregen_addr ? "pregen addr" : "runtime addr"
        );
    }

    _perf_blackhole(result);
    _perf_g_total_samples += total_access;
+    return acpa;
 }

 void legacy_test_mem_throughput(uint64_t iter)