提交 f77baf5f 编写于 作者: W William Wang

Merge branch 'southlake'

......@@ -19,6 +19,7 @@ extern "C" {
enum {
_EVENT_NULL = 0,
_EVENT_ERROR,
_EVENT_IRQ_SOFT,
_EVENT_IRQ_TIMER,
_EVENT_IRQ_IODEV,
_EVENT_PAGEFAULT,
......
......@@ -23,7 +23,10 @@ AM_SRCS := noop/isa/riscv/trm_flash.c \
xs/isa/riscv/cache.c \
nemu/isa/riscv/boot/start_flash.S
CFLAGS += -I$(AM_HOME)/am/src/nemu/include -I$(AM_HOME)/am/src/xs/include -DISA_H=\"riscv.h\" -DNOPRINT
CFLAGS += -I$(AM_HOME)/am/src/nemu/include -I$(AM_HOME)/am/src/xs/include -DISA_H=\"riscv.h\"
ifdef NOPRINT
CFLAGS += -DNOPRINT
endif
ASFLAGS += -DMAINARGS=\"$(mainargs)\"
.PHONY: $(AM_HOME)/am/src/nemu/common/mainargs.S
......
......@@ -23,7 +23,10 @@ AM_SRCS := noop/isa/riscv/trm.c \
xs/isa/riscv/cache.c \
nemu/isa/riscv/boot/start.S
CFLAGS += -I$(AM_HOME)/am/src/nemu/include -I$(AM_HOME)/am/src/xs/include -DISA_H=\"riscv.h\" -DNOPRINT
CFLAGS += -I$(AM_HOME)/am/src/nemu/include -I$(AM_HOME)/am/src/xs/include -DISA_H=\"riscv.h\"
ifdef NOPRINT
CFLAGS += -DNOPRINT
endif
ASFLAGS += -DMAINARGS=\"$(mainargs)\"
.PHONY: $(AM_HOME)/am/src/nemu/common/mainargs.S
......
......@@ -4,10 +4,40 @@
#define MSTATUS_FS 0x00006000
.macro set_reg_zero reg_idx
mv x\reg_idx, zero
.endm
.macro set_freg_zero freg_idx
fmv.w.x f\freg_idx, zero
.endm
.macro init_regs
.altmacro
.set i, 1
.rept 31
set_reg_zero %i
.set i, i+1
.endr
.endm
.macro init_fregs
.set i, 0
.rept 32
set_freg_zero %i
.set i, i+1
.endr
.endm
_start:
init_regs
mv s0, zero
la sp, _stack_pointer
li a0, MSTATUS_FS & (MSTATUS_FS >> 1)
csrs mstatus, a0
csrwi fcsr, 0
init_fregs # init fregs after fp enable
jal _trm_init
......@@ -3,6 +3,7 @@
#include <klib.h>
// static _Context* (*user_handler)(_Event, _Context*) = NULL;
static _Context* (*custom_soft_handler)(_Event, _Context*) = NULL;
static _Context* (*custom_timer_handler)(_Event, _Context*) = NULL;
static _Context* (*custom_external_handler)(_Event, _Context*) = NULL;
static _Context* (*custom_secall_handler)(_Event, _Context*) = NULL;
......@@ -26,6 +27,27 @@ _Context* __am_irq_default_handler(_Event *ev, _Context *c) {
return c;
}
/*
* default handler for Supervisor Software Interrupt
* set event to IRQ_SOFT
* may call custom soft handler if registered
*/
_Context* __am_irq_SSIP_handler(_Event *ev, _Context *c) {
#if __riscv_xlen == 64
asm volatile ("csrwi sip, 0");
#endif
// printf("inside irq SSIP handler\n");
ev->event = _EVENT_IRQ_SOFT;
if (custom_soft_handler != NULL) {
// printf("dive into custom soft handler");
custom_soft_handler(*ev, c);
}
// machine mode will clear stip
asm volatile("csrs mie, 0");
// printf("SSIP handler finished\n");
return c;
}
/*
* default handler for Supervisor Timer Interrupt
* set event to IRQ_TIMER
......@@ -57,7 +79,7 @@ _Context* __am_irq_SEIP_handler(_Event *ev, _Context *c) {
// It's not deleted because we want to test sip write mask.
asm volatile ("csrwi sip, 0");
ev->event = _EVENT_IRQ_IODEV;
// printf("inside irq SSIP handler\n");
printf("inside irq SEIP handler\n");
if (custom_external_handler != NULL)
custom_external_handler(*ev, c);
return c;
......@@ -108,6 +130,14 @@ _Context* __am_irq_handle(_Context *c) {
extern void __am_asm_trap(void);
/*
* Supervisor soft interrupt custom handler register function
* handler: the function to be registered
*/
void ssip_handler_reg(_Context*(*handler)(_Event, _Context*)) {
custom_soft_handler = handler;
}
/*
* Supervisor timer interrupt custom handler register function
* handler: the function to be registered
......@@ -141,6 +171,8 @@ void custom_handler_reg(uintptr_t code, _Context*(*handler)(_Event, _Context*))
switch (code) {
#if __riscv_xlen == 64
case INTR_BIT | SCAUSE_SSIP:
ssip_handler_reg(handler);
break;
#endif
case INTR_BIT | SCAUSE_STIP:
stip_handler_reg(handler);
......@@ -195,10 +227,9 @@ int _cte_init(_Context *(*handler)(_Event ev, _Context *ctx)) {
}
#if __riscv_xlen == 64
interrupt_handler[SCAUSE_SSIP] = __am_irq_STIP_handler;
#else
interrupt_handler[SCAUSE_STIP] = __am_irq_STIP_handler;
interrupt_handler[SCAUSE_SSIP] = __am_irq_SSIP_handler;
#endif
interrupt_handler[SCAUSE_STIP] = __am_irq_STIP_handler;
interrupt_handler[SCAUSE_SEIP] = __am_irq_SEIP_handler;
exception_handler[SCAUSE_SECALL] = __am_irq_SECALL_handler;
......
......@@ -14,7 +14,7 @@ extern void enable_timer();
extern void init_pmp();
extern void enable_pmp(uintptr_t pmp_reg, uintptr_t pmp_addr, uintptr_t pmp_size, uint8_t lock, uint8_t permission);
extern void enable_pmp_TOR(uintptr_t pmp_reg, uintptr_t pmp_addr, uintptr_t pmp_size, bool lock, uint8_t permission);
#include <pmp.h>
#include <csr.h>
static void init_eip() {
// enable machine external interrupt (mie.meip and mstatus.mie)
......
......@@ -20,7 +20,7 @@ static const _Area segments[] = { // Kernel memory mappings
RANGE_LEN(0x3c000000, 0x4000000), // PLIC
RANGE_LEN(0xc0000000, 0x100000), // page table test allocates from this position
#elif defined(__ARCH_RISCV64_XS_SOUTHLAKE) || defined(__ARCH_RISCV64_XS_SOUTHLAKE_FLASH)
RANGE_LEN(0x2000000000, 0x8000000), // PMEM
RANGE_LEN(0x2000000000, 0x800000), // PMEM
RANGE_LEN(0x1f00050000, 0x1000), // uart
// RANGE_LEN(CLINT_MMIO, 0x10000), // clint/timer
// RANGE_LEN(0x1f0c000000, 0x4000000), // PLIC
......
#ifndef __PMP_H__
#define __PMP_H__
#ifndef __CSR_H__
#define __CSR_H__
// csr addr const
// pmp csr
#define PMPCFG_BASE 0x3a0
#define PMPADDR_BASE 0x3b0
#define PMP_R 1
......@@ -15,7 +18,12 @@
// currently XiangShan only support 16 PMP entries
#define PMP_COUNT 16
#define CSR_PMPCFG0 0x3a0
#define CSR_PMPADDR0 0x3b0
// hpm csr
#define CSR_MCYCLE 0xb00
#define CSR_MINSTRET 0xb02
// csr R/W borrowed from OpenSBI project under BSD 2-clause license
#define __ASM_STR(x) #x
......@@ -37,9 +45,6 @@
: "rK"(__v) \
: "memory"); \
})
#define CSR_PMPCFG0 0x3a0
#define CSR_PMPADDR0 0x3b0
#define csr_set(csr, val) \
({ \
......@@ -61,4 +66,4 @@
#endif // __PMP_H__
\ No newline at end of file
#endif // __CSR_H__
\ No newline at end of file
......@@ -7,15 +7,17 @@
#include ISA_H // "x86.h", "mips32.h", ...
#define MAX_EXTERNAL_INTR 256UL
#define MAX_INTERNAL_INTR 10UL
#if defined(__ARCH_RISCV64_NOOP) || defined(__ARCH_RISCV32_NOOP) || defined(__ARCH_RISCV64_XS) || defined(__ARCH_RISCV64_XS_FLASH)
#define INTR_GEN_ADDR (0x40070000UL)
#define INTR_RANDOM (0x40070008UL)
#define INTR_RANDOM_MASK (0x40070010UL)
#define INTR_RANDOM (INTR_GEN_ADDR + MAX_EXTERNAL_INTR)
#define INTR_RANDOM_MASK (INTR_GEN_ADDR + MAX_EXTERNAL_INTR*2)
#define PLIC_BASE_ADDR (0x3c000000UL)
#elif defined(__ARCH_RISCV64_XS_SOUTHLAKE) || defined(__ARCH_RISCV64_XS_SOUTHLAKE_FLASH)
#define INTR_GEN_ADDR (0x1f00060000UL)
#define INTR_RANDOM (0x1f00060008UL)
#define INTR_RANDOM_MASK (0x1f00060010UL)
#define INTR_RANDOM (INTR_GEN_ADDR + MAX_EXTERNAL_INTR)
#define INTR_RANDOM_MASK (INTR_GEN_ADDR + MAX_EXTERNAL_INTR*2)
#define PLIC_BASE_ADDR (0x1f1c000000UL)
#endif
......
#include <xs.h>
#include <cache.h>
#include <pmp.h>
#include <csr.h>
#include <klib.h>
// these functions are interface for cache
......
......@@ -29,9 +29,6 @@
// External interrupts start with index PLIC_EXT_INTR_OFFSET
#define PLIC_EXT_INTR_OFFSET 1
#define MAX_EXTERNAL_INTR 64
#define MAX_INTERNAL_INTR 10
// CSR.MIE
#define MEIE 11
#define SEIE 9
......
#include <xs.h>
#include <pmp.h>
#include <csr.h>
#include <klib.h>
// these functions are interface for pma
......
#include <xs.h>
#include <pmp.h>
#include <csr.h>
#include <klib.h>
/*
* Note that PMP should only be set on machine mode
......
......@@ -62,6 +62,7 @@ void enable_timer();
void set_timer_inc(uintptr_t inc);
// =========== Interrupt handler registration =======
void ssip_handler_reg(_Context*(*handler)(_Event, _Context*));
void stip_handler_reg(_Context*(*handler)(_Event, _Context*));
void seip_handler_reg(_Context*(*handler)(_Event, _Context*));
void secall_handler_reg(_Context*(*handler)(_Event, _Context*));
......
......@@ -353,11 +353,17 @@
#include <am.h>
#include <klib.h>
#include <klib-macros.h>
#include <csr.h> // read csr for ipc
#define Start_Timer() Begin_Time = uptime()
#define Stop_Timer() End_Time = uptime()
#if defined(__ARCH_RISCV64_XS_SOUTHLAKE) || defined(__ARCH_RISCV64_XS_SOUTHLAKE_FLASH)
#define NUMBER_OF_RUNS 1000 /* Default number of runs, reduced for fast test */
#else
#define NUMBER_OF_RUNS 500000 /* Default number of runs */
#endif
#define PASS2
#ifdef NOSTRUCTASSIGN
......@@ -743,6 +749,10 @@ Boolean check(int cond) {
if (!cond) pass = false;
return cond;
}
uint64_t instrcnt = 0;
uint64_t cycle = 0;
int main ()
/*****/
......@@ -761,7 +771,7 @@ int main ()
_ioe_init();
Number_Of_Runs = NUMBER_OF_RUNS;
Number_Of_Runs = NUMBER_OF_RUNS;
/* Initializations */
......@@ -794,6 +804,8 @@ int main ()
/***************/
Start_Timer();
instrcnt = csr_read(CSR_MINSTRET);
cycle = csr_read(CSR_MCYCLE);
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
{
......@@ -852,6 +864,11 @@ int main ()
Done = true;
}
cycle = csr_read(CSR_MCYCLE) - cycle;
instrcnt = csr_read(CSR_MINSTRET) - instrcnt;
printf("cycle %d inst %d ipc %lf\n", cycle, instrcnt, (float)instrcnt/cycle);
if (!check(Int_Glob == 5)) {
printf("Int_Glob: %d\n", Int_Glob);
printf(" should be: %d\n", 5);
......@@ -937,11 +954,19 @@ int main ()
printf(" should be: DHRYSTONE PROGRAM, 2'ND STRING\n");
}
printf ("Finished in %d ms\n", (int)User_Time);
float HZ = 2000000000;
float Dhrystones_Per_Second = ((float) HZ * (float) Number_Of_Runs)
/ (float) cycle;
float Dhrystones_Per_MHZ = (1000000 * (float) Number_Of_Runs)
/ (float) cycle;
printf("Freq is set to %f HZ\n", HZ);
printf("==================================================\n");
printf("Dhrystone %s %d Marks\n", pass ? "PASS" : "FAIL",
880900 / (int)User_Time * NUMBER_OF_RUNS/ 500000);
printf(" vs. 100000 Marks (i7-7700K @ 4.20GHz)\n");
printf("Dhrystone %s\n", pass ? "PASS" : "FAIL");
printf("%f Dhrystones_Per_Second\n", Dhrystones_Per_Second);
printf("%f DMIPS\n", Dhrystones_Per_Second / 1757);
printf("%f DMIPS/MHZ\n", Dhrystones_Per_MHZ / 1757);
return 0;
}
......
NAME = hpmdriver
SRCS = hpmdriver.c
include $(AM_HOME)/Makefile.app
# HPM driver用法
## 环境
&emsp; &emsp; [nexus-am仓库地址](https://github.com/OpenXiangShan/nexus-am/tree/southlake),采用`southlake`分支。用法见[开发环境文档](https://xiangshan-doc.readthedocs.io/zh_CN/latest/tools/xsenv/)
```shell
nexus-am/apps/hpmdriver
├── Makefile
├── events.h :性能事件编码表
├── hpmdriver.h :头文件,包含性能时间寄存器结构和各类功能函数
├── hpmdriver.c :主函数
```
&emsp; &emsp; 采用`make ARCH=riscv64-xs-southlake`编译,可以采用如下指令编译+运行
```shell
make ARCH=riscv64-xs-southlake && $NOOP_HOME/build/emu --no-diff -i ./build/*.bin 2>&1 | tee log | less
```
## 使用方法
&emsp; &emsp; 向性能事件寄存器`mhpmevent`中写入`希望统计的性能事件`,以及`多个性能事件之间的组合方式`(可以采用头文件中的宏定义)。然后读取对应的`mhpmcounter`寄存器,即可获得计数值。
&emsp; &emsp; mode域指定,统计该特权级下的性能事件。
&emsp; &emsp; 性能计数事件的组合方式:
- ​ Event0 \<Optype0\> Event1 = T1
- ​ Event2 \<Optype1\> Event3 = T2
- ​ T1 \<Optype2\> T2 = Result
&emsp; &emsp; **特别注意,性能事件必须用其对应的性能事件寄存器统计**,具体见下表。因为各个Block的`Hardware Performance Monitor`只与对应范围的`csr mhpmevent`寄存器相连,所以只能利用对应范围的`mhpmevent`寄存器去统计。而且各个Block性能事件的编码均从0开始,如果没有利用正确的`mhpmevent`寄存器,会导致统计结果出错。
| 事件 | 对应的寄存器 |
| ------------- | ---- |
| Frontend | `mhpmevent3~10` |
| CtrlBlock | `mhpmevent11~18` |
| MemBlock | `mhpmevent19~26` |
| 缓存 | `mhpmevent27~31` |
&emsp; &emsp; 提供了 `set_event_quad/set_evet_double/set_event_single`来设置4个性能事件的组合/2个性能事件的组合/单个性能事件。`se_cc`设置性能事件并清除计数器。`print_coutner`打印计数器的值。
&emsp; &emsp; 具体示例,可以参考`hpmdriver.c`
#ifndef __EVENTS_H__
#define __EVENTS_H__
// due to the probable performance event change
// it is recommeded to parse_encoding again
// to update this event_encoding
#define noEvent 0
#define Frontend_noEvent 0
#define Frontend_frontendFlush 1
#define Frontend_ifu_req 2
#define Frontend_ifu_miss 3
#define Frontend_ifu_req_cacheline_0 4
#define Frontend_ifu_req_cacheline_1 5
#define Frontend_ifu_req_cacheline_0_hit 6
#define Frontend_ifu_req_cacheline_1_hit 7
#define Frontend_only_0_hit 8
#define Frontend_only_0_miss 9
#define Frontend_hit_0_hit_1 10
#define Frontend_hit_0_miss_1 11
#define Frontend_miss_0_hit_1 12
#define Frontend_miss_0_miss_1 13
#define Frontend_IBuffer_Flushed 14
#define Frontend_IBuffer_hungry 15
#define Frontend_IBuffer_1_4_valid 16
#define Frontend_IBuffer_2_4_valid 17
#define Frontend_IBuffer_3_4_valid 18
#define Frontend_IBuffer_4_4_valid 19
#define Frontend_IBuffer_full 20
#define Frontend_Front_Bubble 21
#define Frontend_icache_miss_cnt 22
#define Frontend_icache_miss_penalty 23
#define Frontend_bpu_s2_redirect 24
#define Frontend_bpu_s3_redirect 25
#define Frontend_bpu_to_ftq_stall 26
#define Frontend_mispredictRedirect 27
#define Frontend_replayRedirect 28
#define Frontend_predecodeRedirect 29
#define Frontend_to_ifu_bubble 30
#define Frontend_from_bpu_real_bubble 31
#define Frontend_BpInstr 32
#define Frontend_BpBInstr 33
#define Frontend_BpRight 34
#define Frontend_BpWrong 35
#define Frontend_BpBRight 36
#define Frontend_BpBWrong 37
#define Frontend_BpJRight 38
#define Frontend_BpJWrong 39
#define Frontend_BpIRight 40
#define Frontend_BpIWrong 41
#define Frontend_BpCRight 42
#define Frontend_BpCWrong 43
#define Frontend_BpRRight 44
#define Frontend_BpRWrong 45
#define Frontend_ftb_false_hit 46
#define Frontend_ftb_hit 47
#define Frontend_tage_tht_hit 48
#define Frontend_sc_update_on_mispred 49
#define Frontend_sc_update_on_unconf 50
#define Frontend_ftb_commit_hits 51
#define Frontend_ftb_commit_misses 52
#define CSR_Hc 0
#define CSR_Hc_1 1
#define CSR_Hc_2 2
#define CSR_Hc_3 3
#define CSR_Hc_4 4
#define CSR_Hc_5 5
#define CSR_Hc_6 6
#define CSR_Hc_7 7
#define CSR_Hc_8 8
#define CSR_Hc_9 9
#define CSR_Hc_10 10
#define CSR_Hc_11 11
#define CSR_Hc_12 12
#define CSR_Hc_13 13
#define CSR_Hc_14 14
#define CSR_Hc_15 15
#define CSR_Hc_16 16
#define CSR_Hc_17 17
#define CSR_Hc_18 18
#define CSR_Hc_19 19
#define CSR_Hc_20 20
#define CSR_Hc_21 21
#define CSR_Hc_22 22
#define CSR_Hc_23 23
#define CSR_Hc_24 24
#define MemBlock_noEvent 0
#define MemBlock_lu0_load_s0_in_fire 1
#define MemBlock_lu0_load_to_load_forward 2
#define MemBlock_lu0_stall_dcache 3
#define MemBlock_lu0_addr_spec_success 4
#define MemBlock_lu0_addr_spec_failed 5
#define MemBlock_lu0_load_s1_in_fire 6
#define MemBlock_lu0_load_s1_tlb_miss 7
#define MemBlock_lu0_load_s2_in_fire 8
#define MemBlock_lu0_load_s2_dcache_miss 9
#define MemBlock_lu0_load_s2_replay 10
#define MemBlock_lu0_load_s2_replay_tlb_miss 11
#define MemBlock_lu0_load_s2_replay_cache 12
#define MemBlock_lu1_load_s0_in_fire 13
#define MemBlock_lu1_load_to_load_forward 14
#define MemBlock_lu1_stall_dcache 15
#define MemBlock_lu1_addr_spec_success 16
#define MemBlock_lu1_addr_spec_failed 17
#define MemBlock_lu1_load_s1_in_fire 18
#define MemBlock_lu1_load_s1_tlb_miss 19
#define MemBlock_lu1_load_s2_in_fire 20
#define MemBlock_lu1_load_s2_dcache_miss 21
#define MemBlock_lu1_load_s2_replay 22
#define MemBlock_lu1_load_s2_replay_tlb_miss 23
#define MemBlock_lu1_load_s2_replay_cache 24
#define MemBlock_lu1_sbuffer_req_valid 25
#define MemBlock_sbuffer_req_fire 26
#define MemBlock_sbuffer_merge 27
#define MemBlock_sbuffer_newline 28
#define MemBlock_dcache_req_valid 29
#define MemBlock_dcache_req_fire 30
#define MemBlock_sbuffer_idle 31
#define MemBlock_sbuffer_flush 32
#define MemBlock_sbuffer_replace 33
#define MemBlock_mpipe_resp_valid 34
#define MemBlock_refill_resp_valid 35
#define MemBlock_replay_resp_valid 36
#define MemBlock_coh_timeout 37
#define MemBlock_sbuffer_1_4_valid 38
#define MemBlock_sbuffer_2_4_valid 39
#define MemBlock_sbuffer_3_4_valid 40
#define MemBlock_sbuffer_full_valid 41
#define MemBlock_lq_rollback 42
#define MemBlock_lq_mmioCycle 43
#define MemBlock_lq_mmio_Cnt 44
#define MemBlock_lq_refill 45
#define MemBlock_lq_writeback_success 46
#define MemBlock_lq_writeback_blocked 47
#define MemBlock_ltq_1_4_valid 48
#define MemBlock_ltq_2_4_valid 49
#define MemBlock_ltq_3_4_valid 50
#define MemBlock_ltq_4_4_valid 51
#define MemBlock_sq_mmioCycle 52
#define MemBlock_sq_mmioCnt 53
#define MemBlock_sq_mmio_wb_success 54
#define MemBlock_sq_mmio_wb_blocked 55
#define MemBlock_stq_1_4_valid 56
#define MemBlock_stq_2_4_valid 57
#define MemBlock_stq_3_4_valid 58
#define MemBlock_stq_4_4_valid 59
#define MemBlock_dcache_wbq_req 60
#define MemBlock_dcache_wbq_1_4_valid 61
#define MemBlock_dcache_wbq_2_4_valid 62
#define MemBlock_dcache_wbq_3_4_valid 63
#define MemBlock_dcache_wbq_4_4_valid 64
#define MemBlock_dcache_mp_req 65
#define MemBlock_dcache_mp_total_penalty 66
#define MemBlock_dcache_missq_req 67
#define MemBlock_dcache_missq_1_4_valid 68
#define MemBlock_dcache_missq_2_4_valid 69
#define MemBlock_dcache_missq_3_4_valid 70
#define MemBlock_dcache_missq_4_4_valid 71
#define MemBlock_dcache_probq_req 72
#define MemBlock_dcache_probq_1_4_valid 73
#define MemBlock_dcache_probq_2_4_valid 74
#define MemBlock_dcache_probq_3_4_valid 75
#define MemBlock_dcache_probq_4_4_valid 76
#define MemBlock_loadpipe0_load_req 77
#define MemBlock_loadpipe0_load_replay 78
#define MemBlock_loadpipe0_load_replay_for_data_nack 79
#define MemBlock_loadpipe0_load_replay_for_no_mshr 80
#define MemBlock_loadpipe0_load_replay_for_conflict 81
#define MemBlock_loadpipe1_load_req 82
#define MemBlock_loadpipe1_load_replay 83
#define MemBlock_loadpipe1_load_replay_for_data_nack 84
#define MemBlock_loadpipe1_load_replay_for_no_mshr 85
#define MemBlock_loadpipe1_load_replay_for_conflict 86
#define MemBlock_perfEventsPTW_0 87
#define MemBlock_perfEventsPTW_1 88
#define MemBlock_perfEventsPTW_2 89
#define MemBlock_perfEventsPTW_3 90
#define MemBlock_perfEventsPTW_4 91
#define MemBlock_perfEventsPTW_5 92
#define MemBlock_perfEventsPTW_6 93
#define MemBlock_perfEventsPTW_7 94
#define MemBlock_perfEventsPTW_8 95
#define MemBlock_perfEventsPTW_9 96
#define MemBlock_perfEventsPTW_10 97
#define MemBlock_perfEventsPTW_11 98
#define MemBlock_perfEventsPTW_12 99
#define MemBlock_perfEventsPTW_13 100
#define MemBlock_perfEventsPTW_14 101
#define MemBlock_perfEventsPTW_15 102
#define MemBlock_perfEventsPTW_16 103
#define MemBlock_perfEventsPTW_17 104
#define MemBlock_perfEventsPTW_18 105
#define MemBlock_ldDeqCount 106
#define MemBlock_stDeqCount 107
#define CtrlBlock_noEvent 0
#define CtrlBlock_decoder_fused_instr 1
#define CtrlBlock_decoder_waitInstr 2
#define CtrlBlock_decoder_stall_cycle 3
#define CtrlBlock_decoder_utilization 4
#define CtrlBlock_rename_in 5
#define CtrlBlock_rename_waitinstr 6
#define CtrlBlock_rename_stall_cycle_dispatch 7
#define CtrlBlock_rename_stall_cycle_fp 8
#define CtrlBlock_rename_stall_cycle_int 9
#define CtrlBlock_rename_stall_cycle_walk 10
#define CtrlBlock_me_freelist_1_4_valid 11
#define CtrlBlock_me_freelist_2_4_valid 12
#define CtrlBlock_me_freelist_3_4_valid 13
#define CtrlBlock_me_freelist_4_4_valid 14
#define CtrlBlock_std_freelist_1_4_valid 15
#define CtrlBlock_std_freelist_2_4_valid 16
#define CtrlBlock_std_freelist_3_4_valid 17
#define CtrlBlock_std_freelist_4_4_valid 18
#define CtrlBlock_dispatch_in 19
#define CtrlBlock_dispatch_empty 20
#define CtrlBlock_dispatch_utili 21
#define CtrlBlock_dispatch_waitinstr 22
#define CtrlBlock_dispatch_stall_cycle_lsq 23
#define CtrlBlock_dispatch_stall_cycle_rob 24
#define CtrlBlock_dispatch_stall_cycle_int_dq 25
#define CtrlBlock_dispatch_stall_cycle_fp_dq 26
#define CtrlBlock_dispatch_stall_cycle_ls_dq 27
#define CtrlBlock_intdq_dispatchq_in 28
#define CtrlBlock_intdq_dispatchq_out 29
#define CtrlBlock_intdq_dispatchq_out_try 30
#define CtrlBlock_intdq_dispatchq_fake_block 31
#define CtrlBlock_intdq_dispatchq_1_4_valid 32
#define CtrlBlock_intdq_dispatchq_2_4_valid 33
#define CtrlBlock_intdq_dispatchq_3_4_valid 34
#define CtrlBlock_intdq_dispatchq_4_4_valid 35
#define CtrlBlock_fpdq_dispatchq_in 36
#define CtrlBlock_fpdq_dispatchq_out 37
#define CtrlBlock_fpdq_dispatchq_out_try 38
#define CtrlBlock_fpdq_dispatchq_fake_block 39
#define CtrlBlock_fpdq_dispatchq_1_4_valid 40
#define CtrlBlock_fpdq_dispatchq_2_4_valid 41
#define CtrlBlock_fpdq_dispatchq_3_4_valid 42
#define CtrlBlock_fpdq_dispatchq_4_4_valid 43
#define CtrlBlock_lsdq_dispatchq_in 44
#define CtrlBlock_lsdq_dispatchq_out 45
#define CtrlBlock_lsdq_dispatchq_out_try 46
#define CtrlBlock_lsdq_dispatchq_fake_block 47
#define CtrlBlock_lsdq_dispatchq_1_4_valid 48
#define CtrlBlock_lsdq_dispatchq_2_4_valid 49
#define CtrlBlock_lsdq_dispatchq_3_4_valid 50
#define CtrlBlock_lsdq_dispatchq_4_4_valid 51
#define CtrlBlock_rob_interrupt_num 52
#define CtrlBlock_rob_exception_num 53
#define CtrlBlock_rob_flush_pipe_num 54
#define CtrlBlock_rob_replay_inst_num 55
#define CtrlBlock_rob_commitUop 56
#define CtrlBlock_rob_commitInstr 57
#define CtrlBlock_rob_commitInstrMove 58
#define CtrlBlock_rob_commitInstrFused 59
#define CtrlBlock_rob_commitInstrLoad 60
#define CtrlBlock_rob_commitInstrLoad_1 61
#define CtrlBlock_rob_commitInstrLoadWait 62
#define CtrlBlock_rob_commitInstrStore 63
#define CtrlBlock_rob_walkInstr 64
#define CtrlBlock_rob_walkCycle 65
#define CtrlBlock_rob_1_4_valid 66
#define CtrlBlock_rob_2_4_valid 67
#define CtrlBlock_rob_3_4_valid 68
#define CtrlBlock_rob_4_4_valid 69
#define CtrlBlock_perfEventsEu0_0 70
#define CtrlBlock_perfEventsEu0_1 71
#define CtrlBlock_perfEventsEu0_2 72
#define CtrlBlock_perfEventsEu0_3 73
#define CtrlBlock_perfEventsEu0_4 74
#define CtrlBlock_perfEventsEu0_5 75
#define CtrlBlock_perfEventsEu1_0 76
#define CtrlBlock_perfEventsEu1_1 77
#define CtrlBlock_perfEventsEu1_2 78
#define CtrlBlock_perfEventsEu1_3 79
#define CtrlBlock_perfEventsEu1_4 80
#define CtrlBlock_perfEventsEu1_5 81
#define CtrlBlock_perfEventsRs_0 82
#define CtrlBlock_perfEventsRs_1 83
#define CtrlBlock_perfEventsRs_2 84
#define CtrlBlock_perfEventsRs_3 85
#define CtrlBlock_perfEventsRs_4 86
#define CtrlBlock_perfEventsRs_5 87
#define CtrlBlock_perfEventsRs_6 88
#define CtrlBlock_perfEventsRs_7 89
#endif // __EVENTS_H__
#include "hpmdriver.h"
int main() {
printf("Hello, XiangShan!\n");
printu_csr(marchid);
printx_csr(mcountinhibit);
printx_csr(mcounteren);
printx_csr(scounteren);
printu_csr(mcycle);
printu_csr(minstret);
se_cc_single(3, MODE_M, Frontend_frontendFlush);
se_cc_single(11, MODE_M, CtrlBlock_decoder_waitInstr);
se_cc_double(19, MODE_M, OPTYPE_ADD, MemBlock_loadpipe0_load_req, MemBlock_loadpipe1_load_req);
// === tmp workload ===
volatile uint64_t a = 0;
for(uint64_t i = 0; i < 100; i++) {
a += a + i;
}
printf("%lu\n",a);
print_event(3);
print_counter(3);
print_event(11);
print_counter(11);
print_event(19);
print_counter(19);
return 0;
}
\ No newline at end of file
#ifndef __HPMDRIVER_H_
#define __HPMDRIVER_H_
#include <klib.h>
#include <csr.h> // nexus-am/am/src/xs/include/csr.h
#include "events.h"
#define MODE_OFFSET 59
#define MODE_MASK 0x1F
#define MODE_M 0x10
#define MODE_H 0x08
#define MODE_S 0x04
#define MODE_U 0x02
#define MODE_D 0x01
#define OPTYPE2_OFFSET 50
#define OPTYPE2_MASK 0x1F
#define OPTYPE1_OFFSET 45
#define OPTYPE1_MASK 0x1F
#define OPTYPE0_OFFSET 40
#define OPTYPE0_MASK 0x1F
#define OPTYPE_OR 0x0
#define OPTYPE_AND 0x1
#define OPTYPE_XOR 0x2
#define OPTYPE_ADD 0x4
// Operations
// Event0 <Optype0> Event1 = T1
// Event2 <Optype1> Event3 = T2
// T1 <Optype2> T2 = Result
#define EVENT3_OFFSET 30
#define EVENT3_MASK 0x3FF
#define EVENT2_OFFSET 20
#define EVENT2_MASK 0x3FF
#define EVENT1_OFFSET 10
#define EVENT1_MASK 0x3FF
#define EVENT0_OFFSET 0
#define EVENT0_MASK 0x3FF
#define SET(reg, field, value) (reg) = ((reg) & ~((uint64_t)(field##_MASK) << (field##_OFFSET))) | ((uint64_t)(value) << (field##_OFFSET));
#define clear_event(id) csr_write(mhpmevent##id, 0x0UL)
#define print_event(id) printf("mhpmevent%d: %lx\n", id, csr_read(mhpmevent##id))
#define clear_counter(id) csr_write(mhpmcounter##id, 0x0UL)
#define print_counter(id) printf("mhpmcounter%d: %lu\n", id, csr_read(mhpmcounter##id))
#define printd_csr(csr) printf(#csr": %ld\n", csr_read(csr))
#define printu_csr(csr) printf(#csr": %lu\n", csr_read(csr))
#define printx_csr(csr) printf(#csr": %lx\n", csr_read(csr))
#define set_event_quad(csr_id, mode, optype2, optype1, optype0, event3, event2, event1, event0) \
{ \
uint64_t value = csr_read(mhpmevent##csr_id); \
SET(value, MODE, mode); \
SET(value, OPTYPE2, optype2); \
SET(value, OPTYPE1, optype1); \
SET(value, OPTYPE0, optype0); \
SET(value, EVENT3, event3); \
SET(value, EVENT2, event2); \
SET(value, EVENT1, event1); \
SET(value, EVENT0, event0); \
csr_write(mhpmevent##csr_id, value); \
}
#define set_event_double(csr_id, mode, optype0, event1, event0) \
set_event_quad(csr_id, mode, OPTYPE_OR, OPTYPE_OR, optype0, noEvent, noEvent, event1, event0)
#define set_event_single(csr_id, mode, event)\
set_event_quad(csr_id, mode, OPTYPE_OR, OPTYPE_OR, OPTYPE_OR, noEvent, noEvent, noEvent, event)
// set event and clear counter
#define se_cc_quad(csr_id, mode, optype2, optype1, optype0, event3, event2, event1, event0) \
{set_event_quad(csr_id, mode, optype2, optype1, optype0, event3, event2, event1, event0);clear_counter(csr_id);}
#define se_cc_double(csr_id, mode, optype0, event1, event0) \
{set_event_double(csr_id, mode, optype0, event1, event0);clear_counter(csr_id);}
#define se_cc_single(csr_id, mode, event) \
{set_event_single(csr_id, mode, event);clear_counter(csr_id);}
#endif /* __HPMDRIVER_H_ */
\ No newline at end of file
import os
import re
xs = os.environ['NOOP_HOME']
am = os.environ['AM_HOME']
assert(xs)
assert(am)
# grep -r -n generatePerfEvent src > generatePerfEventlog
generatePerfEventlog = os.path.join(xs, 'generatePerfEvent_log')
# make emu EMU_THREADS=8 -j24 > generate_log 2>&1 &
generate_log = os.path.join(xs,'generate_log')
events_header = os.path.join(am, 'apps/hpmdriver/events.h')
# open src file that has perfEvents and parse
def parse_events_src():
with open(generatePerfEventlog, 'r') as f:
c = f.read()
regexp = '(.*scala):'
paths = re.findall(regexp, c)
all_events = []
for path in paths:
module = path.split('/')[-1].split('.')[0]
with open(os.path.join(xs, path), 'r') as f:
c = f.read()
# the following is to match content in Seq()
pf = 'val perfEvents = Seq'
st = c.find(pf)
if st == -1:
continue
st = st + len(pf)
cnt = 0
for i in range(10000):
ch = c[st+i]
if ch == '(':
cnt += 1
elif ch == ')':
cnt -= 1
if cnt == 0:
content = c[st:st+i+1]
break
# turn Seq(...) to (module, name, signal, coding)
for eve in re.findall(r'\("(.*)", (.*)\),',content):
name = eve[0].strip()
signal = eve[1].strip()
# search for coding in generate_log
with open(generate_log, 'r') as f:
g = f.read()
regexp = f'\((\w+) perfEvents Set,({name})\s*,(.+),(\d+)'
matches = re.findall(regexp,g)
coding = 'NA'
block = 'NA'
if len(matches) == 1:
coding = matches[0][3]
block = matches[0][0]
elif len(matches) > 1:
coding = ','.join([x[3] for x in matches])
block = matches[0][0]
all_events.append((block, module, name, signal, coding))
for event in all_events:
print('@'.join([x for x in event]))
# use excel to export this, split = '@'
def parse_encoding():
with open(generate_log, 'r') as f:
c = f.read()
regexp = '\((\w+) perfEvents Set,(\w+)\s*,(.+),(\d+)'
matches = re.findall(regexp,c)
for match in matches:
block = match[0]
name = match[1]
coding = match[3]
print(f'#define {block}_{name} {coding}')
if __name__ == '__main__':
parse_events_src()
NAME = maprobe
SRCS = maprobe.c
include $(AM_HOME)/Makefile.app
// basic microarchtectural probe
#ifndef PROBE_H
#define PROBE_H
#include <klib.h>
#include <csr.h>
// perf const
#define BYTE (1)
#define KB (1024*BYTE)
#define MB (1024*KB)
#define GB (1024*MB)
// platform dependent const
// #define _PERF_TEST_ADDR_BASE 0x80400000
#define _PERF_TEST_ADDR_BASE 0x2000400000
#define _PERF_CACHELINE_SIZE_BYTE (64 * BYTE)
#define _PERF_L1_NOALIAS_SIZE_BYTE (32 * KB)
#define _PERF_L1_SIZE_BYTE (128 * KB)
#define _PERF_L2_SIZE_BYTE (512 * KB)
#define _PERF_L3_SIZE_BYTE (2 * MB)
#define _PERF_L1_NUM_WAYS 8
#define _PERF_SET_SIZE_BYTE (_PERF_L1_SIZE_BYTE / _PERF_L1_NUM_WAYS)
// probe const
#define _PERF_BLACKHOLE _PERF_TEST_ADDR_BASE
struct perf
{
// const to be calibrated at run time
uint64_t csr_read_cycle; //# of cycles to read mcycle
uint64_t csr_read_ninst; // # of inst needed to read minstret
// timer
uint64_t cycle;
uint64_t instrcnt;
} perf;
void _perf_start_timer()
{
perf.cycle = csr_read(CSR_MCYCLE);
perf.instrcnt = csr_read(CSR_MINSTRET);
}
void _perf_end_timer()
{
perf.cycle = csr_read(CSR_MCYCLE) - perf.cycle;
perf.instrcnt = csr_read(CSR_MINSTRET) - perf.instrcnt;
}
void _perf_print_timer()
{
printf("cycle %d inst %d ipc %lf\n", perf.cycle, perf.instrcnt, (float)perf.instrcnt/perf.cycle);
}
void _perf_calibrate()
{
// csr read delay
uint64_t cycle_1 = csr_read(CSR_MCYCLE);
uint64_t cycle_2 = csr_read(CSR_MCYCLE);
perf.csr_read_cycle = cycle_2-cycle_1;
printf("perf_calibrate: csr_read_cycle %d\n", perf.csr_read_cycle);
// csr read inst cost
uint64_t inst_1 = csr_read(CSR_MINSTRET);
uint64_t inst_2 = csr_read(CSR_MINSTRET);
perf.csr_read_ninst = inst_2-inst_1;
printf("perf_calibrate: csr_read_ninst %d\n", perf.csr_read_ninst);
}
void _perf_blackhole(uint64_t value)
{
*(uint64_t*) _PERF_BLACKHOLE = value;
}
uint64_t setup_latency_test_linklist(uint64_t base_addr, uint64_t end_addr, uint64_t step)
{
uint64_t num_valid_node = 0;
assert(step % 8 == 0);
assert(step >= 8);
for (uint64_t cur_addr = base_addr; cur_addr < end_addr;) {
uint64_t next_addr = cur_addr + step;
*((uint64_t*)cur_addr) = next_addr;
cur_addr = next_addr;
num_valid_node++;
}
return num_valid_node;
}
uint64_t read_latency_test_linklist(uint64_t base_addr, uint64_t num_valid_node)
{
uint64_t cur_addr = base_addr;
for (int i = 0; i < num_valid_node; i++) {
cur_addr = (*(uint64_t*)cur_addr);
}
return cur_addr;
}
void warmup(uint64_t base_addr, uint64_t end_addr)
{
setup_latency_test_linklist(base_addr, end_addr, _PERF_CACHELINE_SIZE_BYTE);
}
void test_latency(uint64_t size, int iter)
{
volatile uint64_t result = 0; // make sure compiler will not opt read_latency_test_linklist
printf("range 0x%xB (%d iters) latency test\n", size, iter);
_perf_start_timer();
uint64_t nnode = setup_latency_test_linklist(_PERF_TEST_ADDR_BASE, _PERF_TEST_ADDR_BASE + size, _PERF_CACHELINE_SIZE_BYTE);
_perf_end_timer();
uint64_t total_node = nnode * iter;
// _perf_print_timer();
_perf_start_timer();
for (int i = 0; i < iter; i++) {
result += read_latency_test_linklist(_PERF_TEST_ADDR_BASE, nnode);
}
_perf_end_timer();
// _perf_print_timer();
printf("range 0x%xB (%d intrs) read latency %f (%d samples)\n", size, iter, (float)perf.cycle / total_node, total_node);
_perf_blackhole(result);
}
void test_mem_throughput(uint64_t iter)
{
uint64_t remain = iter;
uint64_t result = 0;
uint64_t access_addr = _PERF_TEST_ADDR_BASE;
_perf_start_timer();
while (remain--) {
result += *(uint64_t*) access_addr;
access_addr += _PERF_CACHELINE_SIZE_BYTE;
}
_perf_end_timer();
*(uint64_t*) _PERF_BLACKHOLE = result;
printf("mem band width %f B/cycle (%d samples)\n", (float)iter * _PERF_CACHELINE_SIZE_BYTE / perf.cycle, iter);
}
void test_mem_throughput_same_set(uint64_t iter)
{
uint64_t remain = iter;
uint64_t result = 0;
uint64_t access_addr = _PERF_TEST_ADDR_BASE;
_perf_start_timer();
while (remain--) {
result += *(uint64_t*) access_addr;
access_addr += _PERF_SET_SIZE_BYTE;
}
_perf_end_timer();
*(uint64_t*) _PERF_BLACKHOLE = result;
printf("mem band width %f B/cycle (%d samples)\n", (float)iter * _PERF_CACHELINE_SIZE_BYTE / perf.cycle, iter);
}
#endif
\ No newline at end of file
#include <klib.h>
#include "maprobe.h"
int main()
{
_perf_calibrate();
printf("Memory throughput:\n");
test_mem_throughput(512);
printf("L1 latency:\n");
test_latency(4 * KB, 5);
test_latency(_PERF_L1_NOALIAS_SIZE_BYTE, 2);
test_latency(_PERF_L1_SIZE_BYTE/2, 2);
test_latency(_PERF_L1_SIZE_BYTE, 2);
printf("L2 latency:\n");
test_latency(_PERF_L2_SIZE_BYTE/2, 2);
// test_latency(_PERF_L2_SIZE_BYTE, 2);
printf("L3 latency:\n");
test_latency(_PERF_L3_SIZE_BYTE/2, 2);
// test_latency(_PERF_L3_SIZE_BYTE,2);
// printf("MEM:\n");
// test_latency(_PERF_L3_SIZE_BYTE*2,2);
return 0;
}
\ No newline at end of file
......@@ -9,6 +9,7 @@
#define IOE ({ _ioe_init(); })
#define CTE(h) ({ _Context *h(_Event, _Context *); _cte_init(h); })
#define RSEH(h) ({ _Context *h(_Event, _Context *); ssip_handler_reg(h);})
#define REEH(h) ({ _Context *h(_Event, _Context *); seip_handler_reg(h);})
#define RTEH(h) ({ _Context *h(_Event, _Context *); stip_handler_reg(h);})
#define RCEH(h) ({ _Context *h(_Event, _Context *); secall_handler_reg(h);})
......
......@@ -16,6 +16,8 @@ static const char *tests[256] = {
['p'] = "x86 virtual memory test",
['c'] = "risc-v physical memory protection test",
['s'] = "risc-v virtual memory test",
['r'] = "risc-v RTC tick test",
['z'] = "risc-v soft intr test",
};
int main(const char *args) {
......@@ -23,7 +25,9 @@ int main(const char *args) {
CASE('x', dma_test);
CASE('h', hello);
CASE('i', hello_intr, IOE, CTE(simple_trap), REEH(simple_trap), RCEH(simple_trap), RTEH(simple_trap));
CASE('z', soft_intr, IOE, CTE(soft_trap), RSEH(soft_trap), REEH(soft_trap), RCEH(soft_trap), RTEH(soft_trap));
CASE('e', external_intr, IOE, NOTIMEINT(), CTE(external_trap), REEH(external_trap), RTEH(external_trap));
CASE('u', test_BEU, IOE, NOTIMEINT(), CTE(handle_external_trap), REEH(handle_external_trap), RTEH(handle_external_trap));
CASE('d', devscan, IOE);
CASE('m', finalize, PRE_MPE(args[1]), MPE(mp_print));
CASE('t', rtc_test, IOE);
......@@ -34,6 +38,7 @@ int main(const char *args) {
CASE('c', pmp_test, CTE(simple_trap));
CASE('s', sv39_test, IOE, CTE(simple_trap));
CASE('b', cache_test);
CASE('r', rtc_accuracy_test);
case 'H':
default:
printf("Usage: make run mainargs=*\n");
......
#include <amtest.h>
#include <xs.h>
#define READ_WORD(addr) (*((volatile uint32_t *)(addr)))
#define WRITE_WORD(addr, data) (*((volatile uint32_t *)(addr)) = (data))
#define CONTEXT_M 0
#define CONTEXT_S 1
#define PLIC_PRIORITY (PLIC_BASE_ADDR + 0x4UL)
#define PLIC_PENDING (PLIC_BASE_ADDR + 0x1000UL)
#define PLIC_ENABLE(c) (PLIC_BASE_ADDR + 0x2000UL + c*0x80UL)
#define PLIC_THRESHOLD(c) (PLIC_BASE_ADDR + 0x200000UL + c*0x1000UL)
#define PLIC_CLAIM(c) (PLIC_BASE_ADDR + 0x200004UL + c*0x1000UL)
// External interrupts start with index PLIC_EXT_INTR_OFFSET(interrupt source 0 is not used)
#define PLIC_EXT_INTR_OFFSET 1
#define BUS_ERROR_INTERRUPT 256
#define L3_ERROR_INTERRUPT 257
// set `TEST_L3` to test l3 cache error
// unset `TEST_L3` to test flow of BEU
#define TEST_L3
// BEU constants
#define BEU_BASE 0x1f10010000UL
#define BEU_ENABLE_REG (BEU_BASE + 0x10UL)
#define BEU_PLIC_INTERRUPT_REG (BEU_BASE + 0x18UL)
// error handle
#define error(msg) {printf(msg); _halt(1);}
static volatile uint32_t should_claim = -1;
static volatile bool should_trigger = false;
static volatile int current_context = CONTEXT_S;
#ifndef TEST_L3
static void enable_plic_bus_error_interrupt() {
// enable bus error interrupt 0
plic_enable(CONTEXT_S, BUS_ERROR_INTERRUPT + PLIC_EXT_INTR_OFFSET);
}
static void disable_plic_bus_error_interrupt() {
// disable bus error interrupt 0
plic_disable(CONTEXT_S, BUS_ERROR_INTERRUPT + PLIC_EXT_INTR_OFFSET);
}
static void config_BEU() {
// enable icache , dcache , l2 cache ecc; 7 means 0b111
// 0b1(l2 cache ecc)1(dcache ecc)1(icache ecc)
WRITE_WORD(BEU_ENABLE_REG, READ_WORD(BEU_ENABLE_REG) | 7);
WRITE_WORD(BEU_PLIC_INTERRUPT_REG, READ_WORD(BEU_PLIC_INTERRUPT_REG) | 7);
}
#else
static void enable_plic_l3_cache_error_interrupt() {
// enable l3 cache error interrupt
plic_enable(CONTEXT_S, L3_ERROR_INTERRUPT + PLIC_EXT_INTR_OFFSET);
}
static void disable_plic_l3_cache_error_interrupt() {
// disable l3 cache error interrupt
plic_disable(CONTEXT_S, L3_ERROR_INTERRUPT + PLIC_EXT_INTR_OFFSET);
}
#endif
void handle_ext_intr() {
if (!should_trigger) {
error("should not trigger\n");
}
uint32_t claim = plic_get_claim(current_context); // READ_WORD(PLIC_CLAIM(current_context));
printf("an interrupt is detected, plic claim is: %d\n", claim);
if (claim) {
if (claim != should_claim) {
error("ERROR: is the external interrupt bit in PLIC cleared correctly?\n");
}
plic_clear_intr(claim); // CLEAR_INTR(claim - PLIC_EXT_INTR_OFFSET);
plic_clear_claim(current_context, claim); // WRITE_WORD(PLIC_CLAIM(current_context), claim);
// NOTE: claim will always be 257 as beu always sends ecc error to plic
should_claim = -1;
// disable ecc error interrupt to continue
#ifndef TEST_L3
disable_plic_bus_error_interrupt();
#else
disable_plic_l3_cache_error_interrupt();
#endif
}
else {
error("ERROR: no claim?\n");
}
}
_Context *handle_external_trap(_Event ev, _Context *ctx) {
switch(ev.event) {
case _EVENT_IRQ_TIMER:
printf("t"); break;
case _EVENT_IRQ_IODEV:
printf("d"); handle_ext_intr(); break;
case _EVENT_YIELD:
printf("y"); break;
default:
printf("u"); _halt(1);
}
return ctx;
}
static void plic_intr_init() {
for (int i = 0; i < MAX_EXTERNAL_INTR + MAX_INTERNAL_INTR + PLIC_EXT_INTR_OFFSET; i++) {
// WRITE_WORD(PLIC_PRIORITY + i * sizeof(uint32_t), 0x1);
plic_set_priority(i, 0x1);
}
for (int i = 0; i < MAX_EXTERNAL_INTR + PLIC_EXT_INTR_OFFSET + MAX_INTERNAL_INTR; i += 32) {
plic_disable_word(CONTEXT_M, i); // WRITE_WORD(PLIC_ENABLE(CONTEXT_M) + i/8, 0);
plic_disable_word(CONTEXT_S, i); // WRITE_WORD(PLIC_ENABLE(CONTEXT_S) + i/8, 0);
}
plic_set_threshold(CONTEXT_M, 0x0); // WRITE_WORD(PLIC_THRESHOLD(CONTEXT_M), 0x0);
plic_set_threshold(CONTEXT_S, 0x0); // WRITE_WORD(PLIC_THRESHOLD(CONTEXT_S), 0x0);
}
static inline void __attribute__((optimize("O0"))) wait_time(int cnt) {
char blocks[20][512];
while(cnt--) {
for(int i = 0; i < 20; i++) {
blocks[i][0] = 1;
blocks[i][1] = blocks[i][0];
}
}
}
void test_BEU() {
// NOTE: we are under S mode during testing
// enable supervisor external interrupts
asm volatile("csrs sie, %0" : : "r"((1 << 9)));
asm volatile("csrs sstatus, 2");
plic_intr_init();
#ifndef TEST_L3
// config BEU to enable cache ecc error interrupt
config_BEU();
should_trigger = true;
should_claim = BUS_ERROR_INTERRUPT + PLIC_EXT_INTR_OFFSET;
// enable plic source of BEU
enable_plic_bus_error_interrupt();
// we expect an interrupt from now on
wait_time(100);
#else
// test l3 cache error
should_trigger = true;
should_claim = L3_ERROR_INTERRUPT + PLIC_EXT_INTR_OFFSET;
// enable plic source of l3 cache error
enable_plic_l3_cache_error_interrupt();
// we expect an interrupt from now on
wait_time(100);
#endif
if(should_claim != -1) {
#ifndef TEST_L3
error("beu interrupt is not triggered or not handled correctly\n");
#else
error("l3 cache error interrupt is not triggered or not handled correctly\n");
#endif
}
printf("beu test passed!!!\n");
_halt(0);
}
......@@ -59,8 +59,12 @@ void dma_test() {
for (int i = 0; i < 64; i++) {
uint64_t offset = random_memory_offset();
uint64_t rand_num = random_number();
memory[offset] = rand_num;
ref_memory[offset] = rand_num;
// fetch memory to cache hierarchy randomly
if (rand_num & 1) {
memory[offset] = random_number();
}
// memory[offset] = rand_num;
// ref_memory[offset] = rand_num;
mshr[i].data[3] = rand_num;
mshr[i].data[5] = rand_num;
mshr[i].state.value = s_write;
......@@ -85,15 +89,39 @@ void dma_test() {
}
}
}
printf("Finished DMA write. Starting CPU read.\n");
riscv_fence();
printf("Finished DMA write. Starting DMA read.\n");
for (int i = 0; i < 64; i++) {
uint64_t rand_num = random_number();
mshr[i].data[3] = rand_num;
mshr[i].data[5] = rand_num;
mshr[i].state.value = s_read;
// do not touch mshr.address/mask
}
riscv_fence();
*mshr_valid = 0xffffffffffffffffUL;
mshr_cleared = false;
while (!mshr_cleared) {
mshr_cleared = true;
for (int i = 0; i < 64; i++) {
if (mshr[i].state.value) {
mshr_cleared = false;
break;
}
}
}
riscv_fence();
printf("Finished DMA read. Starting CPU read.\n");
for (int i = 0; i < 64; i++) {
uint64_t base_offset = (uint64_t *)mshr[i].address - (uint64_t *)memory;
volatile uint64_t *golden = (uint64_t *)ref_memory + base_offset;
volatile uint64_t *dut = (uint64_t *)memory + base_offset;
volatile uint64_t *dut = (uint64_t *)memory + base_offset;
for (int j = 0; j < 8; j++) {
// only difftest for masked written data because original value in memory may be non-zero
if (j != 3 && j != 5) continue;
uint64_t dut_data = dut[j];
uint64_t ref_data = golden[j];
if (dut_data != ref_data) {
if (dut_data != ref_data || mshr[i].data[j] != ref_data) {
printf("[ERROR ] Test %d at offset %d: DUT(0x%016lx) != REF(0x%016lx) at address 0x%lx and 0x%lx\n",
i, j, dut_data, ref_data, dut + j, golden + j);
_halt(1);
......
......@@ -29,9 +29,6 @@
// External interrupts start with index PLIC_EXT_INTR_OFFSET
#define PLIC_EXT_INTR_OFFSET 1
#define MAX_EXTERNAL_INTR 64
#define MAX_INTERNAL_INTR 10
// CSR.MIE
#define MEIE 11
#define SEIE 9
......@@ -169,7 +166,7 @@ void random_trigger() {
for (int i = 0; i < (MAX_EXTERNAL_INTR + 31) / 32; i++) {
WRITE_WORD(INTR_RANDOM_ADDR(i), 0xffffffff);
}
for (int i = 0; i < (MAX_EXTERNAL_INTR + 32) / 32; i++) {
for (int i = 0; i < (MAX_EXTERNAL_INTR + 31) / 32; i++) {
WRITE_WORD(PLIC_ENABLE(CONTEXT_S) + i * 4, 0xffffffff);
}
void hello_intr_n(int n);
......
#include <amtest.h>
#include <pmp.h>
#include <csr.h>
#include <xsextra.h>
/*
......
#include <amtest.h>
#include <nemu.h>
void rtc_accuracy_test() {
printf("RTC test\n");
// read rtc, get start
volatile uint64_t start = *(uint64_t *)(RTC_ADDR);
// do some work
volatile uint64_t a = 0;
for (int i = 0; i < 20000; i++) {
a += 1;
}
// read rtc, get end
volatile uint64_t end = *(uint64_t *)(RTC_ADDR);
printf("RTC time %llu\n", end - start);
if (end - start > 10000) printf("RTC tick seems to be too fast!\n");
}
#include <amtest.h>
#include <xs.h>
#include <nemu.h>
#if defined(__ARCH_RISCV64_NOOP) || defined(__ARCH_RISCV64_XS) || defined(__ARCH_RISCV64_XS_SOUTHLAKE) || defined(__ARCH_RISCV64_XS_SOUTHLAKE_FLASH)
#define CLINT_SOFT_ADDRESS (RTC_ADDR - 0xbff8)
#else
#define CLINT_SOFT_ADDRESS 0xa2000000
#endif
_Context *soft_trap(_Event ev, _Context *ctx) {
switch(ev.event) {
case _EVENT_IRQ_SOFT:
printf("s"); break;
case _EVENT_IRQ_TIMER:
printf("t"); break;
case _EVENT_IRQ_IODEV:
printf("d"); read_key(); break;
case _EVENT_YIELD:
printf("y"); break;
}
return ctx;
}
static void soft_intr_init() {
// enable supervisor software interrupt (sie.ssie and sstatus.sie)
asm volatile("csrs sstatus, %0" : : "r"(1 << 1));
asm volatile("csrs sie, %0" : : "r"(1 << 1));
}
void soft_intr() {
soft_intr_init();
*(uint32_t *)(CLINT_SOFT_ADDRESS) = 1;
asm volatile("csrw sip, 2");
}
\ No newline at end of file
#include <amtest.h>
#include <pmp.h>
#include <csr.h>
#include <xsextra.h>
/*
......@@ -86,7 +86,7 @@ void sv39_test() {
#endif
irq_handler_reg(EXCEPTION_STORE_PAGE_FAULT, &store_page_fault_handler);
irq_handler_reg(EXCEPTION_LOAD_PAGE_FAULT, &load_page_fault_handler);
asm volatile("sfence.vma");
printf("test sv39 data write\n");
*w_ptr = 'a';
......
......@@ -20,10 +20,12 @@
#define CTRL_DATA_OFFSET 24
#define CTRL_DIR_OFFSET 32
#define TEST_BUFFER_SIZE 128
#define L3_SIZE_KB (3 * 1024)
#define L3_NR_WAY 6
#define L3_SIZE_KB (2 * 1024)
#define L3_NR_WAY 8
#define L3_NR_BANK 4
#define OFFSET_LEN 6
#define CACHE_LINE_SIZE_BIT 512
#define CACHE_LINE_SIZE_BYTE (CACHE_LINE_SIZE_BIT / 8)
unsigned int log2(unsigned int n) {
unsigned int result = 0;
......@@ -97,6 +99,52 @@ void test3() {
}
}
// Flush a cacheline (512 bit) to memory
void flush_to_memory(uint64_t paddr) {
// printf("l3 size is set to %d KB, nr_way is set to %d, nr_bank is set to %d, ", L3_SIZE_KB, L3_NR_WAY, L3_NR_BANK);
unsigned int set_size = L3_SIZE_KB * 1024 / L3_NR_BANK / L3_NR_WAY / 64;
unsigned int set_len = log2(set_size);
// printf("nr_set is %u, set_len is %u\n", set_size, set_len);
/* In our LLC design, full address is passed by CtrlUnit to one of the SliceCtrls according to BankBits
* Afterwards, BankBits are truncated in SliceCtrl to generate real MSHR request
* So we should provide full address here
*/
uint64_t tag = (paddr >> OFFSET_LEN) >> set_len; // paddr to l3 tag
uint64_t set = (paddr >> OFFSET_LEN) & (set_size-1); // paddr to l3 set
*(uint64_t*)(CACHE_CTRL_BASE + CTRL_TAG_OFFSET) = tag;
*(uint64_t*)(CACHE_CTRL_BASE + CTRL_SET_OFFSET) = set;
// printf("flush to memory: addr 0x%llx tag 0x%llx set 0x%llx\n", &test_buffer, tag, set);
asm("fence\n");
(*(uint64_t*)CACHE_CMD_BASE) = CMD_CMO_CLEAN; // or CMD_CMO_FLUSH
wait(100);
}
// Flush an n*512 bit address region to memory
void flush_region_to_memory(uint64_t start_paddr, uint64_t size_in_byte) {
// pre-calcuated const
unsigned int set_size = L3_SIZE_KB * 1024 / L3_NR_BANK / L3_NR_WAY / 64;
unsigned int set_len = log2(set_size);
// printf("l3 size is set to %d KB, nr_way is set to %d, nr_bank is set to %d, ", L3_SIZE_KB, L3_NR_WAY, L3_NR_BANK);
// printf("nr_set is %u, set_len is %u\n", set_size, set_len);
// flush sq and sbuffer
asm("fence\n");
// send l3 cache flush op to l3 cache controller
for(uint64_t current_paddr = start_paddr; current_paddr < (start_paddr + size_in_byte); current_paddr += CACHE_LINE_SIZE_BYTE){
uint64_t tag = (current_paddr >> OFFSET_LEN) >> set_len; // paddr to l3 tag
uint64_t set = (current_paddr >> OFFSET_LEN) & (set_size-1); // paddr to l3 set
*(uint64_t*)(CACHE_CTRL_BASE + CTRL_TAG_OFFSET) = tag;
*(uint64_t*)(CACHE_CTRL_BASE + CTRL_SET_OFFSET) = set;
// printf("flush to memory: addr 0x%llx tag 0x%llx set 0x%llx\n", &test_buffer, tag, set);
(*(uint64_t*)CACHE_CMD_BASE) = CMD_CMO_CLEAN; // or CMD_CMO_FLUSH
}
// wait for the last cache op to finish
wait(100);
}
int main() {
printf("HuanCun op (mmio based) test. Note that --no-diff is required!\n");
printf("HuanCun l3 size is set to %d KB, nr_way is set to %d, nr_bank is set to %d, ", L3_SIZE_KB, L3_NR_WAY, L3_NR_BANK);
......
NAME := memscantest
SRCS := $(shell find -L ./src/ -name "*.[cS]")
include $(AM_HOME)/Makefile.app
此差异已折叠。
#ifndef __AMUNIT_H__
#define __AMUNIT_H__
#include <am.h>
#include <klib.h>
#include <klib-macros.h>
#include <encoding.h>
#include <xsextra.h>
#endif
// See LICENSE for license details.
// the memory space scan test
// scan all the physical memory address space to see whether physical memory attributes are correct
// start end description priv
// MemMap("0x00_8000_0000", "0x00_FFFF_FFFF", "h0", "PCIe Slave Space Low PCIe", "RWX"),
// MemMap("0x01_0000_0000", "0x07_FFFF_FFFF", "h0", "PCIe Slave Space High PCIe", "RWX"),
// MemMap("0x08_0000_0000", "0x1E_FFFF_FFFF", "h0", "Reserved", "R"),
// MemMap("0x1F_0000_0000", "0x1F_0FFF_FFFF", "h0", "CPUSS Perfipheral", "RW"),
// MemMap("0x1F_1000_0000", "0x1F_1FFF_FFFF", "h0", "Reserved", "R"),
// MemMap("0x1F_2000_0000", "0x1F_201F_FFFF", "h0", "DDR Config", "RW"),
// MemMap("0x1F_2020_0000", "0x1F_203F_FFFF", "h0", "PCIe PHY", "RW"),
// MemMap("0x1F_2040_0000", "0x1F_2047_FFFF", "h0", "APGC Config", "RW"),
// MemMap("0x1F_2048_0000", "0x1F_2048_FFFF", "h0", "SOC TOP Register", "RW"),
// MemMap("0x1F_2049_0000", "0x1F_2049_FFFF", "h0", "DTS", "RW"),
// MemMap("0x1F_204A_0000", "0x1F_204A_FFFF", "h0", "GPIO PAR0", "RW"),
// MemMap("0x1F_204B_0000", "0x1F_204B_FFFF", "h0", "GPIO PAR1", "RW"),
// MemMap("0x1F_204C_0000", "0x1F_204C_FFFF", "h0", "PLL0", "RW"),
// MemMap("0x1F_204D_0000", "0x1F_204D_FFFF", "h0", "PLL1", "RW"),
// MemMap("0x1F_204E_0000", "0x1F_204E_FFFF", "h0", "PLL2", "RW"),
// MemMap("0x1F_204F_0000", "0x1F_204F_03FF", "h0", "Fuse0", "RW"),
// MemMap("0x1F_204F_0400", "0x1F_2049_07FF", "h0", "Fuse1", "RW"),
// MemMap("0x1F_204F_0800", "0x1F_2049_0BFF", "h0", "RTC Register", "RW"),
// MemMap("0x1F_204F_0C00", "0x1F_7FFF_FFFF", "h0", "Reserved", "R"), // NOTE: not aligned to 4KB
// MemMap("0x1F_8000_0000", "0x1F_BFFF_FFFF", "h0", "Peripheral SS", "RWX"),
// MemMap("0x1F_C000_0000", "0x1F_DFFF_FFFF", "h0", "PCIe Slave Space", "RW"),
// MemMap("0x1F_E000_0000", "0x1F_E1FF_FFFF", "h0", "PCI SS Config Space", "RW"),
// MemMap("0x1F_E200_0000", "0x1F_E21F_FFFF", "h0", "Shared SRAM", "RWX"),
// MemMap("0x1F_E220_0000", "0x1F_FFF7_FFFF", "h0", "Reserved", "R"),
// MemMap("0x1F_FFF8_0000", "0x1F_FFFB_FFFF", "h0", "BOOT ROM", "RWX"),
// MemMap("0x1F_FFFC_0000", "0x1F_FFFF_FFFF", "h0", "Reserved", "R"),
// MemMap("0x20_0000_0000", "0x23_FFFF_FFFF", "h0", "MEM SS[DDR]", "RWX")
// how to run:
// 1. cd memscantest
// 2. make ARCH=riscv64-xs-southlake-flash
// 3. ./emu --i path/to/memscantest-riscv64-xs-southlake-flash.bin --no-diff -I 5000000 --force-dump-result 2>temp
// 4. pass if HIT GOOD TRAP appears
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <test.h>
#define VM_END 0x2400000000UL
#define VM_START 0x80000000UL
#define VM_RANGE 0x00080000
#define error(msg) {printf(msg); _halt(1);}
volatile int trap_expected;
volatile int granule;
volatile int expect_excep;
// for exec test
volatile int total_unexec_segment = 0;
volatile int cur_unexec_id = 0;
volatile int cur_unexec_num = 0;
enum CMD {
READ, WRITE, EXEC
};
struct MemAttr {
uintptr_t start;
char description[512];
uint8_t read;
uint8_t write;
uint8_t exec;
};
struct MemAttr MemAttrs[28] = {
{0x80000000UL, "PCIe Slave Space Low PCIe", 1, 1, 1},
{0x100000000UL, "PCIe Slave Space High PCIe", 1, 1, 1},
{0x800000000UL, "Reserved", 1, 0, 0},
{0x1f00000000UL, "CPUSS Perfipheral", 1, 1, 0},
{0x1f10000000UL, "Reserved", 1, 0, 0},
{0x1f20000000UL, "DDR Config", 1, 1, 0},
{0x1f20200000UL, "PCIe PHY", 1, 1, 0},
{0x1f20400000UL, "APGC Config", 1, 1, 0},
{0x1f20480000UL, "SOC TOP Register", 1, 1, 0},
{0x1f20490000UL, "DTS", 1, 1, 0},
{0x1f204a0000UL, "GPIO PAR0", 1, 1, 0},
{0x1f204b0000UL, "GPIO PAR1", 1, 1, 0},
{0x1f204c0000UL, "PLL0", 1, 1, 0},
{0x1f204d0000UL, "PLL1", 1, 1, 0},
{0x1f204e0000UL, "PLL2", 1, 1, 0},
{0x1f204f0000UL, "Fuse0", 1, 1, 0},
{0x1f204f0400UL, "Fuse1", 1, 1, 0},
{0x1f204f0800UL, "RTC Register", 1, 1, 0},
{0x1f204f0c00UL, "Reserved", 1, 0, 0},
{0x1f80000000UL, "Peripheral SS", 1, 1, 1},
{0x1fc0000000UL, "PCIe Slave Space", 1, 1, 0},
{0x1fe0000000UL, "PCI SS Config Space", 1, 1, 0},
{0x1fe2000000UL, "Shared SRAM", 1, 1, 1},
{0x1fe2200000UL, "Reserved", 1, 0, 0},
{0x1ffff80000UL, "BOOT ROM", 1, 1, 1},
{0x1ffffc0000UL, "Reserved", 1, 0, 0},
{0x2000000000UL, "MEM SS[DDR]", 1, 1, 1},
{0x2400000000UL, "MEM sentinel(end of DDR)", 0, 0, 0},
};
void exec_test();
static uintptr_t insn_len(uintptr_t pc)
{
return (*(unsigned short*)pc & 3) ? 4 : 2;
}
int pma_check(uintptr_t address, enum CMD cmd) {
int idx = 0;
for(;;idx++) {
if(address >= MemAttrs[idx].start && address < MemAttrs[idx + 1].start) {
break;
}
}
if( (cmd == READ && MemAttrs[idx].read) || (cmd == WRITE && MemAttrs[idx].write) || (cmd == EXEC && MemAttrs[idx].exec) ) {
return 1;
}
return 0;
}
// #define INLINE inline __attribute__((always_inline))
#define INLINE __attribute__((noinline))
INLINE void reset_mpp() __attribute__((optimize("O3")));
INLINE void reset_mpp() {
uintptr_t mpp_s = MSTATUS_MPP;
asm volatile ("mv t0, %0; csrs mstatus, t0; jr ra" : : "r" (mpp_s));
}
// exception handler
uintptr_t handle_trap(uintptr_t cause, uintptr_t epc, uintptr_t regs[32])
{
if (cause == CAUSE_ILLEGAL_INSTRUCTION) {
reset_mpp();
return epc + insn_len(epc);
}
if (cause != CAUSE_LOAD_ACCESS && cause != CAUSE_FETCH_ACCESS && cause != CAUSE_STORE_ACCESS)
error("unexpected exception type(actual exception)\n")
if (!trap_expected)
error("operation should not trigger an exception\n")
if (!(expect_excep == CAUSE_LOAD_ACCESS || expect_excep == CAUSE_FETCH_ACCESS || expect_excep == CAUSE_STORE_ACCESS))
error("unexpected exception type(expected exception)\n")
if (cause != expect_excep) {
error("exception type is not as expected\n")
}
trap_expected = 0;
if (cause == CAUSE_FETCH_ACCESS) {
cur_unexec_id++;
cur_unexec_num++;
return (uintptr_t)exec_test;
}
else
return epc + insn_len(epc);
}
// l1 2G super pages
uintptr_t l1pt[RISCV_PGSIZE / sizeof(uintptr_t)] __attribute__((aligned(RISCV_PGSIZE)));
static void init_pt()
{
uint64_t HIGHEST_PPN_SHITF = (RISCV_PGSHIFT + RISCV_PGLEVEL_BITS + RISCV_PGLEVEL_BITS);
// make 2G super page tables between 0x80000000L to 0x2400000000L
for(uintptr_t start_addr = VM_START; start_addr <= VM_END ; start_addr += (1 << HIGHEST_PPN_SHITF)) {
l1pt[start_addr >> HIGHEST_PPN_SHITF] = ((uintptr_t)start_addr >> RISCV_PGSHIFT << PTE_PPN_SHIFT) | PTE_V | PTE_X | PTE_R | PTE_W | PTE_A | PTE_D;
}
#if __riscv_xlen == 64
uintptr_t vm_choice = SATP_MODE_SV39;
#else
uintptr_t vm_choice = SATP_MODE_SV32;
#endif
write_csr(sptbr, ((uintptr_t)l1pt >> RISCV_PGSHIFT) |
(vm_choice * (SATP_MODE & ~(SATP_MODE<<1))));
}
INLINE void test_one_st(uintptr_t addr, uintptr_t size)
{
uintptr_t new_mstatus = (read_csr(mstatus) & ~MSTATUS_MPP) | (MSTATUS_MPP & (MSTATUS_MPP >> 1)) | MSTATUS_MPRV;
switch (size) {
case 1: asm volatile ("csrrw %0, mstatus, %0; sb x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
case 2: asm volatile ("csrrw %0, mstatus, %0; sh x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
case 4: asm volatile ("csrrw %0, mstatus, %0; sw x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
#if __riscv_xlen >= 64
case 8: asm volatile ("csrrw %0, mstatus, %0; sd x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
#endif
default: __builtin_unreachable();
}
}
INLINE void test_one_ld(uintptr_t addr, uintptr_t size)
{
uintptr_t new_mstatus = (read_csr(mstatus) & ~MSTATUS_MPP) | (MSTATUS_MPP & (MSTATUS_MPP >> 1)) | MSTATUS_MPRV;
switch (size) {
case 1: asm volatile ("csrrw %0, mstatus, %0; lb x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
case 2: asm volatile ("csrrw %0, mstatus, %0; lh x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
case 4: asm volatile ("csrrw %0, mstatus, %0; lw x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
#if __riscv_xlen >= 64
case 8: asm volatile ("csrrw %0, mstatus, %0; ld x0, (%1); csrw mstatus, %0" : "+&r" (new_mstatus) : "r" (addr)); break;
#endif
default: __builtin_unreachable();
}
}
INLINE void test_one_ldst(uintptr_t addr, int size) {
expect_excep = CAUSE_LOAD_ACCESS;
trap_expected = !pma_check(addr, READ);
test_one_ld(addr, size);
if (trap_expected)
error("exception should be handled correctly or should not be triggered\n")
expect_excep = CAUSE_STORE_ACCESS;
trap_expected = !pma_check(addr, WRITE);
test_one_st(addr, size);
if (trap_expected)
error("exception should be handled correctly or should not be triggered\n")
}
INLINE void test_all_sizes(uintptr_t addr)
{
for (size_t size = 1; size <= sizeof(uintptr_t); size *= 2) {
if (addr & (size - 1))
continue;
test_one_ldst(addr, size);
}
}
INLINE void test_range_once(uintptr_t base, uintptr_t range)
{
for (uintptr_t addr = base; addr < base + range; addr += granule)
test_all_sizes(addr);
}
INLINE void nothing() __attribute__((optimize("O0")));
INLINE void nothing() {
return ;
}
INLINE void turn_to_smode() __attribute__((optimize("O3")));
INLINE void turn_to_smode() {
uintptr_t mpp_s = MSTATUS_MPP & (MSTATUS_MPP >> 1);
asm volatile ("mv t0, %0; csrs mstatus, t0; csrw mepc, ra; mret" : : "r" (mpp_s));
}
void get_all_unexec_segments() {
for(int i=0;i<27;i++) {
if(MemAttrs[i].exec == 0)
total_unexec_segment++;
}
}
void exec_test() {
for(;cur_unexec_id<27;) {
if(MemAttrs[cur_unexec_id].exec == 0) {
expect_excep = CAUSE_FETCH_ACCESS;
trap_expected = 1;
void(*ptr)();
ptr = (void(*)())(MemAttrs[cur_unexec_id].start);
ptr();
}else {
trap_expected = 0;
cur_unexec_id++;
}
}
if(total_unexec_segment == cur_unexec_num) {
_halt(0);
}
}
void naive_rw_mem_scan_test(uintptr_t range) {
test_range_once(0x80000000UL, range);
test_range_once(0x100000000UL, range);
test_range_once(0x800000000UL, range);
test_range_once(0x1F00000000UL, range);
// test_range_once(0x1F10000000UL, range); // error
test_range_once(0x1F20000000UL, range);
test_range_once(0x1F20200000UL, range);
test_range_once(0x1F20400000UL, range);
test_range_once(0x1F20480000UL, range);
test_range_once(0x1F20490000UL, range);
test_range_once(0x1F204A0000UL, range);
test_range_once(0x1F204B0000UL, range);
test_range_once(0x1F204C0000UL, range);
test_range_once(0x1F204D0000UL, range);
test_range_once(0x1F204E0000UL, range);
test_range_once(0x1F204F0000UL, range);
test_range_once(0x1F80000000UL, range);
test_range_once(0x1FC0000000UL, range);
test_range_once(0x1FE0000000UL, range);
test_range_once(0x1FE2000000UL, range);
test_range_once(0x1FE2200000UL, range);
test_range_once(0x1FFFF80000UL, range);
test_range_once(0x1FFFFC0000UL, range);
}
void init() {
extern void trap_entry(void);
// register M mode exception handler
asm volatile("csrw mtvec, %0" : : "r"(trap_entry));
}
int main() __attribute__((optimize("O0")));
int main()
{
// note: this test is under M mode
init();
init_pmp();
granule = 8;
init_pt();
naive_rw_mem_scan_test(8);
uintptr_t mprv = MSTATUS_MPRV;
asm volatile ("csrc mstatus, %0" : : "r" (mprv));
get_all_unexec_segments();
exec_test();
return 0;
}
#include "encoding.h"
#if __riscv_xlen == 64
# define LREG ld
# define SREG sd
# define REGBYTES 8
#else
# define LREG lw
# define SREG sw
# define REGBYTES 4
#endif
.global trap_entry
.align 2
trap_entry:
addi sp, sp, -272
SREG x1, 1*REGBYTES(sp)
SREG x2, 2*REGBYTES(sp)
SREG x3, 3*REGBYTES(sp)
SREG x4, 4*REGBYTES(sp)
SREG x5, 5*REGBYTES(sp)
SREG x6, 6*REGBYTES(sp)
SREG x7, 7*REGBYTES(sp)
SREG x8, 8*REGBYTES(sp)
SREG x9, 9*REGBYTES(sp)
SREG x10, 10*REGBYTES(sp)
SREG x11, 11*REGBYTES(sp)
SREG x12, 12*REGBYTES(sp)
SREG x13, 13*REGBYTES(sp)
SREG x14, 14*REGBYTES(sp)
SREG x15, 15*REGBYTES(sp)
SREG x16, 16*REGBYTES(sp)
SREG x17, 17*REGBYTES(sp)
SREG x18, 18*REGBYTES(sp)
SREG x19, 19*REGBYTES(sp)
SREG x20, 20*REGBYTES(sp)
SREG x21, 21*REGBYTES(sp)
SREG x22, 22*REGBYTES(sp)
SREG x23, 23*REGBYTES(sp)
SREG x24, 24*REGBYTES(sp)
SREG x25, 25*REGBYTES(sp)
SREG x26, 26*REGBYTES(sp)
SREG x27, 27*REGBYTES(sp)
SREG x28, 28*REGBYTES(sp)
SREG x29, 29*REGBYTES(sp)
SREG x30, 30*REGBYTES(sp)
SREG x31, 31*REGBYTES(sp)
csrr a0, mcause
csrr a1, mepc
mv a2, sp
jal handle_trap
csrw mepc, a0
# Remain in M-mode after eret
li t0, MSTATUS_MPP
csrs mstatus, t0
LREG x1, 1*REGBYTES(sp)
LREG x2, 2*REGBYTES(sp)
LREG x3, 3*REGBYTES(sp)
LREG x4, 4*REGBYTES(sp)
LREG x5, 5*REGBYTES(sp)
LREG x6, 6*REGBYTES(sp)
LREG x7, 7*REGBYTES(sp)
LREG x8, 8*REGBYTES(sp)
LREG x9, 9*REGBYTES(sp)
LREG x10, 10*REGBYTES(sp)
LREG x11, 11*REGBYTES(sp)
LREG x12, 12*REGBYTES(sp)
LREG x13, 13*REGBYTES(sp)
LREG x14, 14*REGBYTES(sp)
LREG x15, 15*REGBYTES(sp)
LREG x16, 16*REGBYTES(sp)
LREG x17, 17*REGBYTES(sp)
LREG x18, 18*REGBYTES(sp)
LREG x19, 19*REGBYTES(sp)
LREG x20, 20*REGBYTES(sp)
LREG x21, 21*REGBYTES(sp)
LREG x22, 22*REGBYTES(sp)
LREG x23, 23*REGBYTES(sp)
LREG x24, 24*REGBYTES(sp)
LREG x25, 25*REGBYTES(sp)
LREG x26, 26*REGBYTES(sp)
LREG x27, 27*REGBYTES(sp)
LREG x28, 28*REGBYTES(sp)
LREG x29, 29*REGBYTES(sp)
LREG x30, 30*REGBYTES(sp)
LREG x31, 31*REGBYTES(sp)
addi sp, sp, 272
mret
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册