提交 47fe2bc9 编写于 作者: W William Wang

maprobe: fix usage of volatile to avoid extra store

上级 421b3b8f
...@@ -71,6 +71,7 @@ extern void test_pointer_tracing_latency(uint64_t size, int step, int iter, int ...@@ -71,6 +71,7 @@ extern void test_pointer_tracing_latency(uint64_t size, int step, int iter, int
extern void test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv); extern void test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv);
extern void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64_t test_align, int pregen_addr, int iter, int to_csv); extern void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64_t test_align, int pregen_addr, int iter, int to_csv);
extern void test_same_address_load_latency(int iter, int to_csv); extern void test_same_address_load_latency(int iter, int to_csv);
extern void test_read_after_write_latency(int iter, int to_csv);
extern void legacy_test_mem_throughput(uint64_t iter); extern void legacy_test_mem_throughput(uint64_t iter);
extern void legacy_test_mem_throughput_same_set(uint64_t iter); extern void legacy_test_mem_throughput_same_set(uint64_t iter);
......
...@@ -50,7 +50,7 @@ void test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv) ...@@ -50,7 +50,7 @@ void test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv)
{ {
// printf("pointer tracing latency test\n"); // printf("pointer tracing latency test\n");
// printf("range (B), read latency, iters, samples, cycles\n"); // printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist register uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
_perf_start_timer(); _perf_start_timer();
uint64_t nnode = setup_pointer_tracing_linklist(_PERF_TEST_ADDR_BASE, _PERF_TEST_ADDR_BASE + size, step); uint64_t nnode = setup_pointer_tracing_linklist(_PERF_TEST_ADDR_BASE, _PERF_TEST_ADDR_BASE + size, step);
_perf_end_timer(); _perf_end_timer();
...@@ -79,13 +79,13 @@ void test_same_address_load_latency(int iter, int to_csv) ...@@ -79,13 +79,13 @@ void test_same_address_load_latency(int iter, int to_csv)
{ {
// printf("same address load latency test\n", step); // printf("same address load latency test\n", step);
// printf("range (B), read latency, iters, samples, cycles\n"); // printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist register uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
// _perf_print_timer(); // _perf_print_timer();
_perf_start_timer(); _perf_start_timer();
uint64_t address = _PERF_TEST_ADDR_BASE; uint64_t address = _PERF_TEST_ADDR_BASE;
for (int i = 0; i < iter; i++) { for (int i = 0; i < iter; i++) {
result += *((uint64_t*) (address)); result += *((volatile uint64_t*) (address));
} }
_perf_end_timer(); _perf_end_timer();
// _perf_print_timer(); // _perf_print_timer();
...@@ -102,11 +102,39 @@ void test_same_address_load_latency(int iter, int to_csv) ...@@ -102,11 +102,39 @@ void test_same_address_load_latency(int iter, int to_csv)
_perf_g_total_samples += total_access; _perf_g_total_samples += total_access;
} }
void test_read_after_write_latency(int iter, int to_csv)
{
// printf("same address store-load latency test\n", step);
// printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will store data to memory
// _perf_print_timer();
_perf_start_timer();
uint64_t address = _PERF_TEST_ADDR_BASE;
for (int i = 0; i < iter; i++) {
result += *((uint64_t*) (address));
address += sizeof(uint64_t);
}
_perf_end_timer();
// _perf_print_timer();
uint64_t total_access = iter;
if (to_csv) {
printf("%ld, %f, %d, %ld, %ld\n", 0, (float)perf.cycle / total_access, iter, total_access, perf.cycle);
} else {
printf("read after write latency %f, throughput %f B/cycle (%ld samples, %ld cycles)\n",
(float)perf.cycle / total_access, total_access * 8 * BYTE / (float)perf.cycle, total_access, perf.cycle
);
}
_perf_blackhole(result);
_perf_g_total_samples += total_access;
}
void test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv) void test_linear_access_latency(uint64_t size, uint64_t step, int iter, int to_csv)
{ {
// printf("stride %d linear access latency test\n", step); // printf("stride %d linear access latency test\n", step);
// printf("range (B), read latency, iters, samples, cycles\n"); // printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist register uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
uint64_t num_access = size / step; uint64_t num_access = size / step;
// _perf_print_timer(); // _perf_print_timer();
...@@ -139,7 +167,7 @@ void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64 ...@@ -139,7 +167,7 @@ void test_random_access_latency(uint64_t num_access, uint64_t test_range, uint64
// test_align, pregen_addr ? "use pregen addr array" : "gen rand addr at run time" // test_align, pregen_addr ? "use pregen addr array" : "gen rand addr at run time"
// ); // );
// printf("range (B), read latency, iters, samples, cycles\n"); // printf("range (B), read latency, iters, samples, cycles\n");
volatile uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist register uint64_t result = 0; // make sure compiler will not opt read_pointer_tracing_linklist
// _perf_print_timer(); // _perf_print_timer();
// alloc memory for random access addr array and data // alloc memory for random access addr array and data
......
...@@ -27,11 +27,11 @@ void typical_linear_load_test_set() ...@@ -27,11 +27,11 @@ void typical_linear_load_test_set()
test_linear_access_latency(_PERF_L2_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 1, 0); test_linear_access_latency(_PERF_L2_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 2, 0); test_linear_access_latency(_PERF_L2_SIZE_BYTE / 2, _PERF_CACHELINE_SIZE_BYTE, 2, 0);
printf("L1 (L1 same set) linear cache line load:\n"); printf("L1 (L1 same set) linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 10, 0); test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 10, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 100, 0);
printf("L2 (L1 same set) linear cache line load:\n"); printf("L2 (L1 same set) linear cache line load:\n");
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 2, 0); test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 2, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L1_SAME_SET, 4, 0);
printf("L1 (L2 same slice) linear cache line load:\n"); printf("L1 (L2 same slice) linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 1, 0); test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 2, 0); test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 2, 0);
...@@ -39,11 +39,11 @@ void typical_linear_load_test_set() ...@@ -39,11 +39,11 @@ void typical_linear_load_test_set()
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 1, 0); test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 2, 0); test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_L2_SAME_SLICE, 2, 0);
printf("L1 (page traverse) linear cache line load:\n"); printf("L1 (page traverse) linear cache line load:\n");
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 1, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 10, 0); test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 10, 0);
test_linear_access_latency(_PERF_L1_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 100, 0);
printf("L2 (page traverse) linear cache line load:\n"); printf("L2 (page traverse) linear cache line load:\n");
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 1, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 2, 0); test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 2, 0);
test_linear_access_latency(_PERF_L2_SIZE_BYTE, _PERF_ADDR_STRIDE_NEXT_PAGE, 4, 0);
printf("total samples: %ld\n", _perf_g_total_samples); printf("total samples: %ld\n", _perf_g_total_samples);
} }
...@@ -103,6 +103,10 @@ void typical_memory_disambiuation_test_set() ...@@ -103,6 +103,10 @@ void typical_memory_disambiuation_test_set()
test_same_address_load_latency(1024, 0); test_same_address_load_latency(1024, 0);
test_same_address_load_latency(1024, 0); test_same_address_load_latency(1024, 0);
test_same_address_load_latency(1024, 0); test_same_address_load_latency(1024, 0);
printf("load then store to the same address:\n");
test_read_after_write_latency(1024, 0);
test_read_after_write_latency(1024, 0);
test_read_after_write_latency(1024, 0);
// more to be added // more to be added
} }
...@@ -141,10 +145,12 @@ void latency_test_example() ...@@ -141,10 +145,12 @@ void latency_test_example()
_perf_calibrate(); _perf_calibrate();
printf("latency test example:\n"); printf("latency test example:\n");
test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0); test_pointer_tracing_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0);
test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, sizeof(uint64_t), 5, 0);
test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0); test_linear_access_latency(_PERF_PAGE_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 5, 0);
test_random_access_latency(4096, 1024*MB, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0); test_random_access_latency(4096, 1024*MB, _PERF_CACHELINE_SIZE_BYTE, 0, 1, 0);
test_random_access_latency(4096, 1024*MB, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0); test_random_access_latency(4096, 1024*MB, _PERF_CACHELINE_SIZE_BYTE, 1, 1, 0);
test_same_address_load_latency(1024, 0); test_same_address_load_latency(1024, 0);
test_read_after_write_latency(1024, 0);
printf("total samples: %ld\n", _perf_g_total_samples); printf("total samples: %ld\n", _perf_g_total_samples);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册