diff --git a/.github/workflows/check-usage.sh b/.github/workflows/check-usage.sh new file mode 100644 index 0000000000000000000000000000000000000000..aab1dd1951e946142f2ce35d6174dd93d6f648cb --- /dev/null +++ b/.github/workflows/check-usage.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +echo $1 +echo $2 +grep -rn $1 $2/src/main/scala/xiangshan +if [[ $? == 0 ]]; +then + exit 1 +fi +exit 0 diff --git a/.github/workflows/emu.yml b/.github/workflows/emu.yml index 6b624c545517936c6c6e4cf198ec264d75b8d70a..b506e55977807512edbc0368a355d0a0236767ed 100644 --- a/.github/workflows/emu.yml +++ b/.github/workflows/emu.yml @@ -3,7 +3,7 @@ name: EMU Test on: push: - branches: [ master, update-ci ] + branches: [ master, update-ci] pull_request: branches: [ master ] @@ -15,6 +15,8 @@ jobs: - uses: actions/checkout@v2 with: submodules: 'recursive' + - name: Check Wiring + run: bash .github/workflows/check-usage.sh "BoringUtils" $GITHUB_WORKSPACE - name: Set env run: | echo ::set-env name=NEMU_HOME::/home/ci-runner/xsenv/NEMU @@ -70,7 +72,7 @@ jobs: echo $AM_HOME echo $NEMU_HOME echo $NOOP_HOME - make -C $AM_HOME/apps/microbench ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME mainargs=test run + make -C $AM_HOME/apps/microbench ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME mainargs=test run 2> /dev/null riscv-tests: runs-on: self-hosted @@ -87,5 +89,5 @@ jobs: echo $NEMU_HOME echo $NOOP_HOME echo $RVTEST_HOME - make -C $RVTEST_HOME/isa/ SUITES+=rv64ui SUITES+=rv64um SUITES+=rv64ua NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME noop_run + make -C $RVTEST_HOME/isa/ SUITES+=rv64ui SUITES+=rv64um SUITES+=rv64ua NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME noop_run 2> /dev/null diff --git a/Makefile b/Makefile index 5d0fbc527531d6c2d82365134efa4894216e7c50..4fbe1fab941ee2b89ce4b9622be6794a4e93108c 100644 --- a/Makefile +++ b/Makefile @@ -57,7 +57,7 @@ EMU_CXXFLAGS += -DVERILATOR -Wno-maybe-uninitialized EMU_LDFLAGS = -lpthread -lSDL2 -ldl EMU_THREADS = 1 ifeq ($(EMU_THREADS), 1) - VTHREAD_FLAGS = + VTHREAD_FLAGS = --threads 1 else VTHREAD_FLAGS = --threads $(EMU_THREADS) --threads-dpi none endif @@ -69,8 +69,8 @@ VERILATOR_FLAGS = --top-module $(SIM_TOP) \ +define+RANDOMIZE_REG_INIT \ +define+RANDOMIZE_MEM_INIT \ $(VTHREAD_FLAGS) \ - --assert \ --trace \ + --assert \ --savable \ --stats-vars \ --output-split 5000 \ diff --git a/debug/Makefile b/debug/Makefile index 8a09ef7f7596e711a520cdfc053ac4b1d68ee2f9..848b83b1b13992b343cc591122172a6e562dd1c9 100644 --- a/debug/Makefile +++ b/debug/Makefile @@ -48,7 +48,7 @@ microbench_train: cat microbench.log | grep IPC coremark: - $(MAKE) -C $(AM_HOME)/apps/coremark $(ARCH) $(EMU_ARGS) mainargs=test run + $(MAKE) -C $(AM_HOME)/apps/coremark $(ARCH) $(EMU_ARGS) mainargs=test run #2 > coremark.log cat coremark.log | grep IPC diff --git a/dummy-riscv64-noop.txt b/dummy-riscv64-noop.txt new file mode 100644 index 0000000000000000000000000000000000000000..ac422ec5bea4ded01fb23c1127fcfd46cb1145c2 --- /dev/null +++ b/dummy-riscv64-noop.txt @@ -0,0 +1,785 @@ + +/home/zzf/RISCVERS/nexus-am/tests/cputest/build/dummy-riscv64-noop.elf: file format elf64-littleriscv + + +Disassembly of section .text: + +0000000080000000 <_start>: + 80000000: 00000413 li s0,0 + 80000004: 00009117 auipc sp,0x9 + 80000008: ffc10113 addi sp,sp,-4 # 80009000 <_end> + 8000000c: 034000ef jal ra,80000040 <_trm_init> + +Disassembly of section .text.startup: + +0000000080000010
: + 80000010: 00000513 li a0,0 + 80000014: 00008067 ret + +Disassembly of section .text._putc: + +0000000080000018 <_putc>: + 80000018: 0540006f j 8000006c <__am_uartlite_putchar> + +Disassembly of section .text._halt: + +000000008000001c <_halt>: + 8000001c: ff010113 addi sp,sp,-16 + 80000020: 00113423 sd ra,8(sp) + 80000024: 00050593 mv a1,a0 + 80000028: 00050513 mv a0,a0 + 8000002c: 0005006b 0x5006b + 80000030: 00001517 auipc a0,0x1 + 80000034: b5850513 addi a0,a0,-1192 # 80000b88 + 80000038: 2d5000ef jal ra,80000b0c + 8000003c: 0000006f j 8000003c <_halt+0x20> + +Disassembly of section .text._trm_init: + +0000000080000040 <_trm_init>: + 80000040: ff010113 addi sp,sp,-16 + 80000044: 00113423 sd ra,8(sp) + 80000048: 014000ef jal ra,8000005c <__am_init_uartlite> + 8000004c: 00001517 auipc a0,0x1 + 80000050: b5150513 addi a0,a0,-1199 # 80000b9d <__am_mainargs> + 80000054: fbdff0ef jal ra,80000010
+ 80000058: fc5ff0ef jal ra,8000001c <_halt> + +Disassembly of section .text.__am_init_uartlite: + +000000008000005c <__am_init_uartlite>: + 8000005c: 406007b7 lui a5,0x40600 + 80000060: 00300713 li a4,3 + 80000064: 00e78623 sb a4,12(a5) # 4060000c <_start-0x3f9ffff4> + 80000068: 00008067 ret + +Disassembly of section .text.__am_uartlite_putchar: + +000000008000006c <__am_uartlite_putchar>: + 8000006c: ff010113 addi sp,sp,-16 + 80000070: 00813023 sd s0,0(sp) + 80000074: 00113423 sd ra,8(sp) + 80000078: 00a00793 li a5,10 + 8000007c: 00050413 mv s0,a0 + 80000080: 02f50463 beq a0,a5,800000a8 <__am_uartlite_putchar+0x3c> + 80000084: 40600737 lui a4,0x40600 + 80000088: 00874783 lbu a5,8(a4) # 40600008 <_start-0x3f9ffff8> + 8000008c: 0087f793 andi a5,a5,8 + 80000090: fe079ce3 bnez a5,80000088 <__am_uartlite_putchar+0x1c> + 80000094: 00870223 sb s0,4(a4) + 80000098: 00813083 ld ra,8(sp) + 8000009c: 00013403 ld s0,0(sp) + 800000a0: 01010113 addi sp,sp,16 + 800000a4: 00008067 ret + 800000a8: 00d00513 li a0,13 + 800000ac: fc1ff0ef jal ra,8000006c <__am_uartlite_putchar> + 800000b0: fd5ff06f j 80000084 <__am_uartlite_putchar+0x18> + +Disassembly of section .text.__putch: + +00000000800000b4 <__putch>: + 800000b4: 08058c63 beqz a1,8000014c <__putch+0x98> + 800000b8: fd010113 addi sp,sp,-48 + 800000bc: 02813023 sd s0,32(sp) + 800000c0: 00913c23 sd s1,24(sp) + 800000c4: 01213823 sd s2,16(sp) + 800000c8: 01313423 sd s3,8(sp) + 800000cc: 02113423 sd ra,40(sp) + 800000d0: fff5849b addiw s1,a1,-1 + 800000d4: 00060413 mv s0,a2 + 800000d8: 00050993 mv s3,a0 + 800000dc: fff00913 li s2,-1 + 800000e0: 0280006f j 80000108 <__putch+0x54> + 800000e4: 01843703 ld a4,24(s0) + 800000e8: 00e78663 beq a5,a4,800000f4 <__putch+0x40> + 800000ec: 00d43823 sd a3,16(s0) + 800000f0: 01378023 sb s3,0(a5) + 800000f4: 02043783 ld a5,32(s0) + 800000f8: fff4849b addiw s1,s1,-1 + 800000fc: 00178793 addi a5,a5,1 + 80000100: 02f43023 sd a5,32(s0) + 80000104: 03248663 beq s1,s2,80000130 <__putch+0x7c> + 80000108: 01043783 ld a5,16(s0) + 8000010c: 00178693 addi a3,a5,1 + 80000110: fc079ae3 bnez a5,800000e4 <__putch+0x30> + 80000114: 00098513 mv a0,s3 + 80000118: f01ff0ef jal ra,80000018 <_putc> + 8000011c: 02043783 ld a5,32(s0) + 80000120: fff4849b addiw s1,s1,-1 + 80000124: 00178793 addi a5,a5,1 + 80000128: 02f43023 sd a5,32(s0) + 8000012c: fd249ee3 bne s1,s2,80000108 <__putch+0x54> + 80000130: 02813083 ld ra,40(sp) + 80000134: 02013403 ld s0,32(sp) + 80000138: 01813483 ld s1,24(sp) + 8000013c: 01013903 ld s2,16(sp) + 80000140: 00813983 ld s3,8(sp) + 80000144: 03010113 addi sp,sp,48 + 80000148: 00008067 ret + 8000014c: 00008067 ret + +Disassembly of section .text.vsnprintf_internal: + +0000000080000150 : + 80000150: f1010113 addi sp,sp,-240 + 80000154: 0e813023 sd s0,224(sp) + 80000158: 0d413023 sd s4,192(sp) + 8000015c: 0e113423 sd ra,232(sp) + 80000160: 0c913c23 sd s1,216(sp) + 80000164: 0d213823 sd s2,208(sp) + 80000168: 0d313423 sd s3,200(sp) + 8000016c: 0b513c23 sd s5,184(sp) + 80000170: 0b613823 sd s6,176(sp) + 80000174: 0b713423 sd s7,168(sp) + 80000178: 0b813023 sd s8,160(sp) + 8000017c: 09913c23 sd s9,152(sp) + 80000180: 09a13823 sd s10,144(sp) + 80000184: 09b13423 sd s11,136(sp) + 80000188: 00013c23 sd zero,24(sp) + 8000018c: 02013023 sd zero,32(sp) + 80000190: 02013c23 sd zero,56(sp) + 80000194: 02a13423 sd a0,40(sp) + 80000198: 00060413 mv s0,a2 + 8000019c: 00068a13 mv s4,a3 + 800001a0: 00050663 beqz a0,800001ac + 800001a4: 00b50533 add a0,a0,a1 + 800001a8: 58058863 beqz a1,80000738 + 800001ac: 02a13823 sd a0,48(sp) + 800001b0: 00001917 auipc s2,0x1 + 800001b4: 9f090913 addi s2,s2,-1552 # 80000ba0 <__am_mainargs+0x3> + 800001b8: 03900a93 li s5,57 + 800001bc: 00001997 auipc s3,0x1 + 800001c0: b5498993 addi s3,s3,-1196 # 80000d10 <__am_mainargs+0x173> + 800001c4: 00044503 lbu a0,0(s0) + 800001c8: 02050263 beqz a0,800001ec + 800001cc: 02500793 li a5,37 + 800001d0: 00140413 addi s0,s0,1 + 800001d4: 06f50263 beq a0,a5,80000238 + 800001d8: 01810613 addi a2,sp,24 + 800001dc: 00100593 li a1,1 + 800001e0: ed5ff0ef jal ra,800000b4 <__putch> + 800001e4: 00044503 lbu a0,0(s0) + 800001e8: fe0512e3 bnez a0,800001cc + 800001ec: 02813783 ld a5,40(sp) + 800001f0: 00078463 beqz a5,800001f8 + 800001f4: 00078023 sb zero,0(a5) + 800001f8: 0e813083 ld ra,232(sp) + 800001fc: 0e013403 ld s0,224(sp) + 80000200: 03812503 lw a0,56(sp) + 80000204: 0d813483 ld s1,216(sp) + 80000208: 0d013903 ld s2,208(sp) + 8000020c: 0c813983 ld s3,200(sp) + 80000210: 0c013a03 ld s4,192(sp) + 80000214: 0b813a83 ld s5,184(sp) + 80000218: 0b013b03 ld s6,176(sp) + 8000021c: 0a813b83 ld s7,168(sp) + 80000220: 0a013c03 ld s8,160(sp) + 80000224: 09813c83 ld s9,152(sp) + 80000228: 09013d03 ld s10,144(sp) + 8000022c: 08813d83 ld s11,136(sp) + 80000230: 0f010113 addi sp,sp,240 + 80000234: 00008067 ret + 80000238: 02000793 li a5,32 + 8000023c: 00f10e23 sb a5,28(sp) + 80000240: 00012c23 sw zero,24(sp) + 80000244: 02012023 sw zero,32(sp) + 80000248: 00040793 mv a5,s0 + 8000024c: 00000b93 li s7,0 + 80000250: 00000b13 li s6,0 + 80000254: 00000c13 li s8,0 + 80000258: 05800693 li a3,88 + 8000025c: 06c00e13 li t3,108 + 80000260: 06800313 li t1,104 + 80000264: 02f00593 li a1,47 + 80000268: 02d00e93 li t4,45 + 8000026c: 00900613 li a2,9 + 80000270: 02000813 li a6,32 + 80000274: 00100513 li a0,1 + 80000278: 02b00893 li a7,43 + 8000027c: 02b00f13 li t5,43 + 80000280: 0007c703 lbu a4,0(a5) + 80000284: 00140413 addi s0,s0,1 + 80000288: fe07079b addiw a5,a4,-32 + 8000028c: 0ff7f793 andi a5,a5,255 + 80000290: f2f6eae3 bltu a3,a5,800001c4 + 80000294: 00279793 slli a5,a5,0x2 + 80000298: 012787b3 add a5,a5,s2 + 8000029c: 0007a783 lw a5,0(a5) + 800002a0: 012787b3 add a5,a5,s2 + 800002a4: 00078067 jr a5 + 800002a8: fd070c1b addiw s8,a4,-48 + 800002ac: 00044703 lbu a4,0(s0) + 800002b0: 00140413 addi s0,s0,1 + 800002b4: 02e5f463 bgeu a1,a4,800002dc + 800002b8: fceae8e3 bltu s5,a4,80000288 + 800002bc: 002c179b slliw a5,s8,0x2 + 800002c0: 01878c3b addw s8,a5,s8 + 800002c4: 001c1c1b slliw s8,s8,0x1 + 800002c8: 00140413 addi s0,s0,1 + 800002cc: 00ec0c3b addw s8,s8,a4 + 800002d0: fff44703 lbu a4,-1(s0) + 800002d4: fd0c0c1b addiw s8,s8,-48 + 800002d8: fee5e0e3 bltu a1,a4,800002b8 + 800002dc: fbd716e3 bne a4,t4,80000288 + 800002e0: 41800c3b negw s8,s8 + 800002e4: 00040793 mv a5,s0 + 800002e8: f99ff06f j 80000280 + 800002ec: 00200793 li a5,2 + 800002f0: 008a0513 addi a0,s4,8 + 800002f4: 7afb8c63 beq s7,a5,80000aac + 800002f8: 4b77da63 bge a5,s7,800007ac + 800002fc: 00300793 li a5,3 + 80000300: 00fb8663 beq s7,a5,8000030c + 80000304: 00400793 li a5,4 + 80000308: 7afb9663 bne s7,a5,80000ab4 + 8000030c: 000a3483 ld s1,0(s4) + 80000310: 01810c93 addi s9,sp,24 + 80000314: 00050a13 mv s4,a0 + 80000318: 06010fa3 sb zero,127(sp) + 8000031c: 07f10713 addi a4,sp,127 + 80000320: 00f00613 li a2,15 + 80000324: 0080006f j 8000032c + 80000328: 00068493 mv s1,a3 + 8000032c: 00f4f793 andi a5,s1,15 + 80000330: 00f987b3 add a5,s3,a5 + 80000334: 0007c783 lbu a5,0(a5) + 80000338: fff70713 addi a4,a4,-1 + 8000033c: 0044d693 srli a3,s1,0x4 + 80000340: 00f70023 sb a5,0(a4) + 80000344: fe9662e3 bltu a2,s1,80000328 + 80000348: 08010793 addi a5,sp,128 + 8000034c: 40e7873b subw a4,a5,a4 + 80000350: fff70d1b addiw s10,a4,-1 + 80000354: ffe7079b addiw a5,a4,-2 + 80000358: fffd4493 not s1,s10 + 8000035c: 00f13023 sd a5,0(sp) + 80000360: 08010793 addi a5,sp,128 + 80000364: 009784b3 add s1,a5,s1 + 80000368: 00000b93 li s7,0 + 8000036c: 136d5a63 bge s10,s6,800004a0 + 80000370: 41ab083b subw a6,s6,s10 + 80000374: 01780b3b addw s6,a6,s7 + 80000378: 01ab073b addw a4,s6,s10 + 8000037c: 00000d93 li s11,0 + 80000380: 01875463 bge a4,s8,80000388 + 80000384: 40ec0dbb subw s11,s8,a4 + 80000388: 01813683 ld a3,24(sp) + 8000038c: fff00713 li a4,-1 + 80000390: 01875713 srli a4,a4,0x18 + 80000394: 00e6f6b3 and a3,a3,a4 + 80000398: 00100713 li a4,1 + 8000039c: 02571713 slli a4,a4,0x25 + 800003a0: 3ae68063 beq a3,a4,80000740 + 800003a4: 3a0b9c63 bnez s7,8000075c + 800003a8: 01813683 ld a3,24(sp) + 800003ac: fff00713 li a4,-1 + 800003b0: 01875713 srli a4,a4,0x18 + 800003b4: 00e6f6b3 and a3,a3,a4 + 800003b8: 00300713 li a4,3 + 800003bc: 02471713 slli a4,a4,0x24 + 800003c0: 3ce68863 beq a3,a4,80000790 + 800003c4: 000c8613 mv a2,s9 + 800003c8: 00080593 mv a1,a6 + 800003cc: 03000513 li a0,48 + 800003d0: ce5ff0ef jal ra,800000b4 <__putch> + 800003d4: 020d0463 beqz s10,800003fc + 800003d8: 00016b03 lwu s6,0(sp) + 800003dc: 001b0b13 addi s6,s6,1 + 800003e0: 01648b33 add s6,s1,s6 + 800003e4: 00148493 addi s1,s1,1 + 800003e8: fff4c503 lbu a0,-1(s1) + 800003ec: 000c8613 mv a2,s9 + 800003f0: 00100593 li a1,1 + 800003f4: cc1ff0ef jal ra,800000b4 <__putch> + 800003f8: ff6496e3 bne s1,s6,800003e4 + 800003fc: 01812783 lw a5,24(sp) + 80000400: dc0782e3 beqz a5,800001c4 + 80000404: 01c14503 lbu a0,28(sp) + 80000408: 000c8613 mv a2,s9 + 8000040c: 000d8593 mv a1,s11 + 80000410: ca5ff0ef jal ra,800000b4 <__putch> + 80000414: db1ff06f j 800001c4 + 80000418: 01812783 lw a5,24(sp) + 8000041c: ec0794e3 bnez a5,800002e4 + 80000420: 03000793 li a5,48 + 80000424: 00f10e23 sb a5,28(sp) + 80000428: 00040793 mv a5,s0 + 8000042c: e55ff06f j 80000280 + 80000430: 00200793 li a5,2 + 80000434: 66fb8463 beq s7,a5,80000a9c + 80000438: 3d77da63 bge a5,s7,8000080c + 8000043c: 00300793 li a5,3 + 80000440: 64fb8063 beq s7,a5,80000a80 + 80000444: 00400793 li a5,4 + 80000448: 00a00713 li a4,10 + 8000044c: 60fb9663 bne s7,a5,80000a58 + 80000450: 00800613 li a2,8 + 80000454: 008a0693 addi a3,s4,8 + 80000458: 000a3783 ld a5,0(s4) + 8000045c: 46c70a63 beq a4,a2,800008d0 + 80000460: 00a00613 li a2,10 + 80000464: 00068a13 mv s4,a3 + 80000468: 68c70e63 beq a4,a2,80000b04 + 8000046c: fff00793 li a5,-1 + 80000470: 00f13023 sd a5,0(sp) + 80000474: fff00493 li s1,-1 + 80000478: 00000b93 li s7,0 + 8000047c: 00000d13 li s10,0 + 80000480: 08010793 addi a5,sp,128 + 80000484: 009784b3 add s1,a5,s1 + 80000488: 01810c93 addi s9,sp,24 + 8000048c: ee0b80e3 beqz s7,8000036c + 80000490: 02012b83 lw s7,32(sp) + 80000494: 01810c93 addi s9,sp,24 + 80000498: 01703bb3 snez s7,s7 + 8000049c: ed6d4ae3 blt s10,s6,80000370 + 800004a0: 000b8b13 mv s6,s7 + 800004a4: 00000813 li a6,0 + 800004a8: ed1ff06f j 80000378 + 800004ac: 008a0c93 addi s9,s4,8 + 800004b0: 000a3483 ld s1,0(s4) + 800004b4: 540b0263 beqz s6,800009f8 + 800004b8: fffb0b9b addiw s7,s6,-1 + 800004bc: 000c8a13 mv s4,s9 + 800004c0: 00000d13 li s10,0 + 800004c4: 018b5463 bge s6,s8,800004cc + 800004c8: 416c0d3b subw s10,s8,s6 + 800004cc: 01812783 lw a5,24(sp) + 800004d0: 3a078c63 beqz a5,80000888 + 800004d4: 01810c93 addi s9,sp,24 + 800004d8: 020b0a63 beqz s6,8000050c + 800004dc: 020b9b13 slli s6,s7,0x20 + 800004e0: 020b5b13 srli s6,s6,0x20 + 800004e4: 001b0b13 addi s6,s6,1 + 800004e8: 01648b33 add s6,s1,s6 + 800004ec: 00148493 addi s1,s1,1 + 800004f0: fff4c503 lbu a0,-1(s1) + 800004f4: 000c8613 mv a2,s9 + 800004f8: 00100593 li a1,1 + 800004fc: bb9ff0ef jal ra,800000b4 <__putch> + 80000500: fe9b16e3 bne s6,s1,800004ec + 80000504: 01812783 lw a5,24(sp) + 80000508: ca078ee3 beqz a5,800001c4 + 8000050c: 01c14503 lbu a0,28(sp) + 80000510: 000c8613 mv a2,s9 + 80000514: 000d0593 mv a1,s10 + 80000518: b9dff0ef jal ra,800000b4 <__putch> + 8000051c: ca9ff06f j 800001c4 + 80000520: 02a12023 sw a0,32(sp) + 80000524: 03110223 sb a7,36(sp) + 80000528: 00040793 mv a5,s0 + 8000052c: d55ff06f j 80000280 + 80000530: 01810613 addi a2,sp,24 + 80000534: 00100593 li a1,1 + 80000538: 02500513 li a0,37 + 8000053c: b79ff0ef jal ra,800000b4 <__putch> + 80000540: c85ff06f j 800001c4 + 80000544: 02012783 lw a5,32(sp) + 80000548: 34079e63 bnez a5,800008a4 + 8000054c: 02a12023 sw a0,32(sp) + 80000550: 03010223 sb a6,36(sp) + 80000554: 00040793 mv a5,s0 + 80000558: d29ff06f j 80000280 + 8000055c: 00044703 lbu a4,0(s0) + 80000560: 00140413 addi s0,s0,1 + 80000564: fd07079b addiw a5,a4,-48 + 80000568: 0ff7f793 andi a5,a5,255 + 8000056c: 02f66663 bltu a2,a5,80000598 + 80000570: 002b179b slliw a5,s6,0x2 + 80000574: 01678b3b addw s6,a5,s6 + 80000578: 001b1b1b slliw s6,s6,0x1 + 8000057c: 00140413 addi s0,s0,1 + 80000580: 00eb0b3b addw s6,s6,a4 + 80000584: fff44703 lbu a4,-1(s0) + 80000588: fd0b0b1b addiw s6,s6,-48 + 8000058c: fd07079b addiw a5,a4,-48 + 80000590: 0ff7f793 andi a5,a5,255 + 80000594: fcf67ee3 bgeu a2,a5,80000570 + 80000598: 01010e23 sb a6,28(sp) + 8000059c: cedff06f j 80000288 + 800005a0: 00a12c23 sw a0,24(sp) + 800005a4: 01010e23 sb a6,28(sp) + 800005a8: 00040793 mv a5,s0 + 800005ac: cd5ff06f j 80000280 + 800005b0: 00044703 lbu a4,0(s0) + 800005b4: 00140413 addi s0,s0,1 + 800005b8: 53c70663 beq a4,t3,80000ae4 + 800005bc: 00300b93 li s7,3 + 800005c0: cc9ff06f j 80000288 + 800005c4: 00044703 lbu a4,0(s0) + 800005c8: 00140413 addi s0,s0,1 + 800005cc: 52670263 beq a4,t1,80000af0 + 800005d0: 00200b93 li s7,2 + 800005d4: cb5ff06f j 80000288 + 800005d8: 00200793 li a5,2 + 800005dc: 44fb8e63 beq s7,a5,80000a38 + 800005e0: 2577dc63 bge a5,s7,80000838 + 800005e4: 00300793 li a5,3 + 800005e8: 00fb8663 beq s7,a5,800005f4 + 800005ec: 00400793 li a5,4 + 800005f0: 40fb9c63 bne s7,a5,80000a08 + 800005f4: 000a3783 ld a5,0(s4) + 800005f8: 008a0a13 addi s4,s4,8 + 800005fc: 2a07ce63 bltz a5,800008b8 + 80000600: 00100b93 li s7,1 + 80000604: 06010fa3 sb zero,127(sp) + 80000608: 07f10693 addi a3,sp,127 + 8000060c: 00a00613 li a2,10 + 80000610: 00900513 li a0,9 + 80000614: 0080006f j 8000061c + 80000618: 00058793 mv a5,a1 + 8000061c: 02c7f733 remu a4,a5,a2 + 80000620: fff68693 addi a3,a3,-1 + 80000624: 00e98733 add a4,s3,a4 + 80000628: 00074703 lbu a4,0(a4) + 8000062c: 02c7d5b3 divu a1,a5,a2 + 80000630: 00e68023 sb a4,0(a3) + 80000634: fef562e3 bltu a0,a5,80000618 + 80000638: 08010793 addi a5,sp,128 + 8000063c: 40d786bb subw a3,a5,a3 + 80000640: fff68d1b addiw s10,a3,-1 + 80000644: ffe6879b addiw a5,a3,-2 + 80000648: fffd4493 not s1,s10 + 8000064c: 00f13023 sd a5,0(sp) + 80000650: e31ff06f j 80000480 + 80000654: 000a2783 lw a5,0(s4) + 80000658: 00000b93 li s7,0 + 8000065c: 008a0a13 addi s4,s4,8 + 80000660: 04f10023 sb a5,64(sp) + 80000664: 04010493 addi s1,sp,64 + 80000668: 00100b13 li s6,1 + 8000066c: e55ff06f j 800004c0 + 80000670: 000a3483 ld s1,0(s4) + 80000674: 008a0a13 addi s4,s4,8 + 80000678: 34049c63 bnez s1,800009d0 + 8000067c: 02800513 li a0,40 + 80000680: 00000497 auipc s1,0x0 + 80000684: 68848493 addi s1,s1,1672 # 80000d08 <__am_mainargs+0x16b> + 80000688: 01810c93 addi s9,sp,24 + 8000068c: 00000b17 auipc s6,0x0 + 80000690: 681b0b13 addi s6,s6,1665 # 80000d0d <__am_mainargs+0x170> + 80000694: 0080006f j 8000069c + 80000698: 0004c503 lbu a0,0(s1) + 8000069c: 00148493 addi s1,s1,1 + 800006a0: 000c8613 mv a2,s9 + 800006a4: 00100593 li a1,1 + 800006a8: a0dff0ef jal ra,800000b4 <__putch> + 800006ac: ff6496e3 bne s1,s6,80000698 + 800006b0: b15ff06f j 800001c4 + 800006b4: 00200793 li a5,2 + 800006b8: 2efb8e63 beq s7,a5,800009b4 + 800006bc: 1b77de63 bge a5,s7,80000878 + 800006c0: 00300793 li a5,3 + 800006c4: 3cfb8663 beq s7,a5,80000a90 + 800006c8: 00400793 li a5,4 + 800006cc: 00800713 li a4,8 + 800006d0: d8fb80e3 beq s7,a5,80000450 + 800006d4: 000a2703 lw a4,0(s4) + 800006d8: 008a0a13 addi s4,s4,8 + 800006dc: 00777793 andi a5,a4,7 + 800006e0: 00f987b3 add a5,s3,a5 + 800006e4: 0007c783 lbu a5,0(a5) + 800006e8: 06010fa3 sb zero,127(sp) + 800006ec: 07f10693 addi a3,sp,127 + 800006f0: 00700593 li a1,7 + 800006f4: 0140006f j 80000708 + 800006f8: 00767793 andi a5,a2,7 + 800006fc: 00f987b3 add a5,s3,a5 + 80000700: 0007c783 lbu a5,0(a5) + 80000704: 0006071b sext.w a4,a2 + 80000708: fff68693 addi a3,a3,-1 + 8000070c: 00f68023 sb a5,0(a3) + 80000710: 0037561b srliw a2,a4,0x3 + 80000714: fee5e2e3 bltu a1,a4,800006f8 + 80000718: 08010793 addi a5,sp,128 + 8000071c: 40d786bb subw a3,a5,a3 + 80000720: fff68d1b addiw s10,a3,-1 + 80000724: ffe6879b addiw a5,a3,-2 + 80000728: fffd4493 not s1,s10 + 8000072c: 00f13023 sd a5,0(sp) + 80000730: 01810c93 addi s9,sp,24 + 80000734: c2dff06f j 80000360 + 80000738: 00000513 li a0,0 + 8000073c: a71ff06f j 800001ac + 80000740: 01c14503 lbu a0,28(sp) + 80000744: 000c8613 mv a2,s9 + 80000748: 000d8593 mv a1,s11 + 8000074c: 01013423 sd a6,8(sp) + 80000750: 965ff0ef jal ra,800000b4 <__putch> + 80000754: 00813803 ld a6,8(sp) + 80000758: c40b88e3 beqz s7,800003a8 + 8000075c: 02414503 lbu a0,36(sp) + 80000760: 000c8613 mv a2,s9 + 80000764: 00100593 li a1,1 + 80000768: 01013423 sd a6,8(sp) + 8000076c: 949ff0ef jal ra,800000b4 <__putch> + 80000770: 01813683 ld a3,24(sp) + 80000774: fff00713 li a4,-1 + 80000778: 01875713 srli a4,a4,0x18 + 8000077c: 00e6f6b3 and a3,a3,a4 + 80000780: 00300713 li a4,3 + 80000784: 02471713 slli a4,a4,0x24 + 80000788: 00813803 ld a6,8(sp) + 8000078c: c2e69ce3 bne a3,a4,800003c4 + 80000790: 01c14503 lbu a0,28(sp) + 80000794: 000c8613 mv a2,s9 + 80000798: 000d8593 mv a1,s11 + 8000079c: 01013423 sd a6,8(sp) + 800007a0: 915ff0ef jal ra,800000b4 <__putch> + 800007a4: 00813803 ld a6,8(sp) + 800007a8: c1dff06f j 800003c4 + 800007ac: 00100793 li a5,1 + 800007b0: 30fb9263 bne s7,a5,80000ab4 + 800007b4: 000a4703 lbu a4,0(s4) + 800007b8: 06010fa3 sb zero,127(sp) + 800007bc: 07f10693 addi a3,sp,127 + 800007c0: 00f00593 li a1,15 + 800007c4: 0080006f j 800007cc + 800007c8: 00060713 mv a4,a2 + 800007cc: 00f77793 andi a5,a4,15 + 800007d0: 00f987b3 add a5,s3,a5 + 800007d4: 0007c783 lbu a5,0(a5) + 800007d8: fff68693 addi a3,a3,-1 + 800007dc: 0047561b srliw a2,a4,0x4 + 800007e0: 00f68023 sb a5,0(a3) + 800007e4: fee5e2e3 bltu a1,a4,800007c8 + 800007e8: 08010793 addi a5,sp,128 + 800007ec: 40d786bb subw a3,a5,a3 + 800007f0: fff68d1b addiw s10,a3,-1 + 800007f4: ffe6879b addiw a5,a3,-2 + 800007f8: fffd4493 not s1,s10 + 800007fc: 00f13023 sd a5,0(sp) + 80000800: 00050a13 mv s4,a0 + 80000804: 01810c93 addi s9,sp,24 + 80000808: b59ff06f j 80000360 + 8000080c: 00100713 li a4,1 + 80000810: 00a00793 li a5,10 + 80000814: 24eb9263 bne s7,a4,80000a58 + 80000818: 00800613 li a2,8 + 8000081c: 008a0693 addi a3,s4,8 + 80000820: 000a4703 lbu a4,0(s4) + 80000824: 18c78c63 beq a5,a2,800009bc + 80000828: 00a00613 li a2,10 + 8000082c: 2cc78863 beq a5,a2,80000afc + 80000830: 00068a13 mv s4,a3 + 80000834: c39ff06f j 8000046c + 80000838: 00100793 li a5,1 + 8000083c: 1cfb9663 bne s7,a5,80000a08 + 80000840: 000a2703 lw a4,0(s4) + 80000844: 008a0693 addi a3,s4,8 + 80000848: 0187179b slliw a5,a4,0x18 + 8000084c: 4187d79b sraiw a5,a5,0x18 + 80000850: 0c07ca63 bltz a5,80000924 + 80000854: 0ff77713 andi a4,a4,255 + 80000858: 00a00793 li a5,10 + 8000085c: 02f777bb remuw a5,a4,a5 + 80000860: 00068a13 mv s4,a3 + 80000864: 02079793 slli a5,a5,0x20 + 80000868: 0207d793 srli a5,a5,0x20 + 8000086c: 00f987b3 add a5,s3,a5 + 80000870: 0007c783 lbu a5,0(a5) + 80000874: 0e80006f j 8000095c + 80000878: 00100713 li a4,1 + 8000087c: 00800793 li a5,8 + 80000880: f8eb8ce3 beq s7,a4,80000818 + 80000884: e51ff06f j 800006d4 + 80000888: 01c14503 lbu a0,28(sp) + 8000088c: 01810c93 addi s9,sp,24 + 80000890: 000c8613 mv a2,s9 + 80000894: 000d0593 mv a1,s10 + 80000898: 81dff0ef jal ra,800000b4 <__putch> + 8000089c: c40b10e3 bnez s6,800004dc + 800008a0: c65ff06f j 80000504 + 800008a4: 02414783 lbu a5,36(sp) + 800008a8: a3e78ee3 beq a5,t5,800002e4 + 800008ac: 02a12023 sw a0,32(sp) + 800008b0: 03010223 sb a6,36(sp) + 800008b4: ca1ff06f j 80000554 + 800008b8: 02d00713 li a4,45 + 800008bc: 02e10223 sb a4,36(sp) + 800008c0: 00100713 li a4,1 + 800008c4: 02e12023 sw a4,32(sp) + 800008c8: 40f007b3 neg a5,a5 + 800008cc: d35ff06f j 80000600 + 800008d0: 00068a13 mv s4,a3 + 800008d4: 06010fa3 sb zero,127(sp) + 800008d8: 07f10693 addi a3,sp,127 + 800008dc: 00700593 li a1,7 + 800008e0: 0080006f j 800008e8 + 800008e4: 00060793 mv a5,a2 + 800008e8: 0077f713 andi a4,a5,7 + 800008ec: 00e98733 add a4,s3,a4 + 800008f0: 00074703 lbu a4,0(a4) + 800008f4: fff68693 addi a3,a3,-1 + 800008f8: 0037d613 srli a2,a5,0x3 + 800008fc: 00e68023 sb a4,0(a3) + 80000900: fef5e2e3 bltu a1,a5,800008e4 + 80000904: 08010793 addi a5,sp,128 + 80000908: 40d786bb subw a3,a5,a3 + 8000090c: fff68d1b addiw s10,a3,-1 + 80000910: ffe6879b addiw a5,a3,-2 + 80000914: fffd4493 not s1,s10 + 80000918: 00f13023 sd a5,0(sp) + 8000091c: 01810c93 addi s9,sp,24 + 80000920: a41ff06f j 80000360 + 80000924: fff74713 not a4,a4 + 80000928: 0ff77713 andi a4,a4,255 + 8000092c: 02d00793 li a5,45 + 80000930: 02f10223 sb a5,36(sp) + 80000934: 03712023 sw s7,32(sp) + 80000938: 0017071b addiw a4,a4,1 + 8000093c: 00068a13 mv s4,a3 + 80000940: 00a00793 li a5,10 + 80000944: 02f777bb remuw a5,a4,a5 + 80000948: 02079793 slli a5,a5,0x20 + 8000094c: 0207d793 srli a5,a5,0x20 + 80000950: 00f987b3 add a5,s3,a5 + 80000954: 0007c783 lbu a5,0(a5) + 80000958: 00100b93 li s7,1 + 8000095c: 06010fa3 sb zero,127(sp) + 80000960: 07f10693 addi a3,sp,127 + 80000964: 00a00613 li a2,10 + 80000968: 00900593 li a1,9 + 8000096c: 01c0006f j 80000988 + 80000970: 0007871b sext.w a4,a5 + 80000974: 02c7f7bb remuw a5,a5,a2 + 80000978: 02079793 slli a5,a5,0x20 + 8000097c: 0207d793 srli a5,a5,0x20 + 80000980: 00f987b3 add a5,s3,a5 + 80000984: 0007c783 lbu a5,0(a5) + 80000988: fff68693 addi a3,a3,-1 + 8000098c: 00f68023 sb a5,0(a3) + 80000990: 02c757bb divuw a5,a4,a2 + 80000994: fce5eee3 bltu a1,a4,80000970 + 80000998: 08010793 addi a5,sp,128 + 8000099c: 40d786bb subw a3,a5,a3 + 800009a0: fff68d1b addiw s10,a3,-1 + 800009a4: ffe6879b addiw a5,a3,-2 + 800009a8: fffd4493 not s1,s10 + 800009ac: 00f13023 sd a5,0(sp) + 800009b0: ad1ff06f j 80000480 + 800009b4: 000a5703 lhu a4,0(s4) + 800009b8: 008a0693 addi a3,s4,8 + 800009bc: 00777793 andi a5,a4,7 + 800009c0: 00f987b3 add a5,s3,a5 + 800009c4: 0007c783 lbu a5,0(a5) + 800009c8: 00068a13 mv s4,a3 + 800009cc: d1dff06f j 800006e8 + 800009d0: 01810c93 addi s9,sp,24 + 800009d4: 000c8613 mv a2,s9 + 800009d8: 00100593 li a1,1 + 800009dc: 03000513 li a0,48 + 800009e0: ed4ff0ef jal ra,800000b4 <__putch> + 800009e4: 000c8613 mv a2,s9 + 800009e8: 00100593 li a1,1 + 800009ec: 07800513 li a0,120 + 800009f0: ec4ff0ef jal ra,800000b4 <__putch> + 800009f4: 925ff06f j 80000318 + 800009f8: 00048513 mv a0,s1 + 800009fc: 15c000ef jal ra,80000b58 + 80000a00: 00050b1b sext.w s6,a0 + 80000a04: ab5ff06f j 800004b8 + 80000a08: 000a2783 lw a5,0(s4) + 80000a0c: 008a0a13 addi s4,s4,8 + 80000a10: 0007871b sext.w a4,a5 + 80000a14: f207d6e3 bgez a5,80000940 + 80000a18: 02d00693 li a3,45 + 80000a1c: 40e0073b negw a4,a4 + 80000a20: 00a00793 li a5,10 + 80000a24: 02d10223 sb a3,36(sp) + 80000a28: 00100693 li a3,1 + 80000a2c: 02f777bb remuw a5,a4,a5 + 80000a30: 02d12023 sw a3,32(sp) + 80000a34: f15ff06f j 80000948 + 80000a38: 000a2703 lw a4,0(s4) + 80000a3c: 008a0693 addi a3,s4,8 + 80000a40: 03071793 slli a5,a4,0x30 + 80000a44: 0607cc63 bltz a5,80000abc + 80000a48: 03071713 slli a4,a4,0x30 + 80000a4c: 03075713 srli a4,a4,0x30 + 80000a50: 00100b93 li s7,1 + 80000a54: e05ff06f j 80000858 + 80000a58: 000a2703 lw a4,0(s4) + 80000a5c: 00a00793 li a5,10 + 80000a60: 008a0a13 addi s4,s4,8 + 80000a64: 02f777bb remuw a5,a4,a5 + 80000a68: 00000b93 li s7,0 + 80000a6c: 02079793 slli a5,a5,0x20 + 80000a70: 0207d793 srli a5,a5,0x20 + 80000a74: 00f987b3 add a5,s3,a5 + 80000a78: 0007c783 lbu a5,0(a5) + 80000a7c: ee1ff06f j 8000095c + 80000a80: 000a3783 ld a5,0(s4) + 80000a84: 00000b93 li s7,0 + 80000a88: 008a0a13 addi s4,s4,8 + 80000a8c: b79ff06f j 80000604 + 80000a90: 000a3783 ld a5,0(s4) + 80000a94: 008a0a13 addi s4,s4,8 + 80000a98: e3dff06f j 800008d4 + 80000a9c: 008a0693 addi a3,s4,8 + 80000aa0: 000a5703 lhu a4,0(s4) + 80000aa4: 00000b93 li s7,0 + 80000aa8: db1ff06f j 80000858 + 80000aac: 000a5703 lhu a4,0(s4) + 80000ab0: d09ff06f j 800007b8 + 80000ab4: 000a2703 lw a4,0(s4) + 80000ab8: d01ff06f j 800007b8 + 80000abc: fff74713 not a4,a4 + 80000ac0: 02d00793 li a5,45 + 80000ac4: 0107171b slliw a4,a4,0x10 + 80000ac8: 0107571b srliw a4,a4,0x10 + 80000acc: 02f10223 sb a5,36(sp) + 80000ad0: 00100793 li a5,1 + 80000ad4: 02f12023 sw a5,32(sp) + 80000ad8: 0017071b addiw a4,a4,1 + 80000adc: 00068a13 mv s4,a3 + 80000ae0: e61ff06f j 80000940 + 80000ae4: 00400b93 li s7,4 + 80000ae8: 00040793 mv a5,s0 + 80000aec: f94ff06f j 80000280 + 80000af0: 00100b93 li s7,1 + 80000af4: 00040793 mv a5,s0 + 80000af8: f88ff06f j 80000280 + 80000afc: 00000b93 li s7,0 + 80000b00: d59ff06f j 80000858 + 80000b04: 00000b93 li s7,0 + 80000b08: afdff06f j 80000604 + +Disassembly of section .text.printf: + +0000000080000b0c : + 80000b0c: fa010113 addi sp,sp,-96 + 80000b10: 02810313 addi t1,sp,40 + 80000b14: 02b13423 sd a1,40(sp) + 80000b18: 02c13823 sd a2,48(sp) + 80000b1c: 02d13c23 sd a3,56(sp) + 80000b20: 00050613 mv a2,a0 + 80000b24: 00030693 mv a3,t1 + 80000b28: 00000593 li a1,0 + 80000b2c: 00000513 li a0,0 + 80000b30: 00113c23 sd ra,24(sp) + 80000b34: 04e13023 sd a4,64(sp) + 80000b38: 04f13423 sd a5,72(sp) + 80000b3c: 05013823 sd a6,80(sp) + 80000b40: 05113c23 sd a7,88(sp) + 80000b44: 00613423 sd t1,8(sp) + 80000b48: e08ff0ef jal ra,80000150 + 80000b4c: 01813083 ld ra,24(sp) + 80000b50: 06010113 addi sp,sp,96 + 80000b54: 00008067 ret + +Disassembly of section .text.strlen: + +0000000080000b58 : + 80000b58: 00054783 lbu a5,0(a0) + 80000b5c: 02078063 beqz a5,80000b7c + 80000b60: 00000793 li a5,0 + 80000b64: 00178793 addi a5,a5,1 + 80000b68: 00f50733 add a4,a0,a5 + 80000b6c: 00074703 lbu a4,0(a4) + 80000b70: fe071ae3 bnez a4,80000b64 + 80000b74: 00078513 mv a0,a5 + 80000b78: 00008067 ret + 80000b7c: 00000793 li a5,0 + 80000b80: 00078513 mv a0,a5 + 80000b84: 00008067 ret diff --git a/src/main/scala/bus/simplebus/Crossbar.scala b/src/main/scala/bus/simplebus/Crossbar.scala deleted file mode 100644 index 96b4271e7d607cfbcaf02118a9ae202505c1ed17..0000000000000000000000000000000000000000 --- a/src/main/scala/bus/simplebus/Crossbar.scala +++ /dev/null @@ -1,139 +0,0 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package bus.simplebus - -import chisel3._ -import chisel3.util._ - -import utils._ - -class SimpleBusCrossbar1toN(addressSpace: List[(Long, Long)]) extends Module { - val io = IO(new Bundle { - val in = Flipped(new SimpleBusUC) - val out = Vec(addressSpace.length, new SimpleBusUC) - }) - - val s_idle :: s_resp :: s_error :: Nil = Enum(3) - val state = RegInit(s_idle) - - // select the output channel according to the address - val addr = io.in.req.bits.addr - val outSelVec = VecInit(addressSpace.map( - range => (addr >= range._1.U && addr < (range._1 + range._2).U))) - val outSelIdx = PriorityEncoder(outSelVec) - val outSel = io.out(outSelIdx) - val outSelIdxResp = RegEnable(outSelIdx, outSel.req.fire() && (state === s_idle)) - val outSelResp = io.out(outSelIdxResp) - val reqInvalidAddr = io.in.req.valid && !outSelVec.asUInt.orR - - when(!(!io.in.req.valid || outSelVec.asUInt.orR) || !(!(io.in.req.valid && outSelVec.asUInt.andR))){printf("[ERROR] bad addr %x, time %d\n", addr, GTimer())} - // assert(!io.in.req.valid || outSelVec.asUInt.orR, "address decode error, bad addr = 0x%x\n", addr) - assert(!(io.in.req.valid && outSelVec.asUInt.andR), "address decode error, bad addr = 0x%x\n", addr) - - // bind out.req channel - (io.out zip outSelVec).map { case (o, v) => { - o.req.bits := io.in.req.bits - o.req.valid := v && (io.in.req.valid && (state === s_idle)) - o.resp.ready := v - }} - - switch (state) { - is (s_idle) { - when (outSel.req.fire()) { state := s_resp } - when (reqInvalidAddr) { state := s_error } - } - is (s_resp) { when (outSelResp.resp.fire()) { state := s_idle } } - is (s_error) { when(io.in.resp.fire()){ state := s_idle } } - } - - io.in.resp.valid := outSelResp.resp.fire() || state === s_error - io.in.resp.bits <> outSelResp.resp.bits - // io.in.resp.bits.exc.get := state === s_error - outSelResp.resp.ready := io.in.resp.ready - io.in.req.ready := outSel.req.ready || reqInvalidAddr - - Debug() { - when (state === s_idle && io.in.req.valid) { - printf(p"${GTimer()}: xbar: in.req: ${io.in.req.bits}\n") - } - - when (outSel.req.fire()) { - printf(p"${GTimer()}: xbar: outSelIdx = ${outSelIdx}, outSel.req: ${outSel.req.bits}\n") - } - when (outSel.resp.fire()) { - printf(p"${GTimer()}: xbar: outSelIdx= ${outSelIdx}, outSel.resp: ${outSel.resp.bits}\n") - } - - when (io.in.resp.fire()) { - printf(p"${GTimer()}: xbar: in.resp: ${io.in.resp.bits}\n") - } - } -} - -class SimpleBusCrossbarNto1(n: Int, userBits:Int = 0) extends Module { - val io = IO(new Bundle { - val in = Flipped(Vec(n, new SimpleBusUC(userBits))) - val out = new SimpleBusUC(userBits) - }) - - val s_idle :: s_readResp :: s_writeResp :: Nil = Enum(3) - val state = RegInit(s_idle) - - val lockWriteFun = ((x: SimpleBusReqBundle) => x.isWrite() && x.isBurst()) - val inputArb = Module(new LockingArbiter(chiselTypeOf(io.in(0).req.bits), n, 8, Some(lockWriteFun))) - (inputArb.io.in zip io.in.map(_.req)).map{ case (arb, in) => arb <> in } - val thisReq = inputArb.io.out - assert(!(thisReq.valid && !thisReq.bits.isRead() && !thisReq.bits.isWrite())) - val inflightSrc = Reg(UInt(log2Up(n).W)) - - io.out.req.bits := thisReq.bits - // bind correct valid and ready signals - io.out.req.valid := thisReq.valid && (state === s_idle) - thisReq.ready := io.out.req.ready && (state === s_idle) - - io.in.map(_.resp.bits := io.out.resp.bits) - io.in.map(_.resp.valid := false.B) - (io.in(inflightSrc).resp, io.out.resp) match { case (l, r) => { - l.valid := r.valid - r.ready := l.ready - }} - - switch (state) { - is (s_idle) { - when (thisReq.fire()) { - inflightSrc := inputArb.io.chosen - when (thisReq.bits.isRead()) { state := s_readResp } - .elsewhen (thisReq.bits.isWriteLast() || thisReq.bits.isWriteSingle()) { state := s_writeResp } - } - } - is (s_readResp) { when (io.out.resp.fire() && io.out.resp.bits.isReadLast()) { state := s_idle } } - is (s_writeResp) { when (io.out.resp.fire()) { state := s_idle } } - } -} - -class SimpleBusCrossbar(n: Int, addressSpace: List[(Long, Long)]) extends Module { - val io = IO(new Bundle { - val in = Flipped(Vec(n, new SimpleBusUC)) - val out = Vec(addressSpace.length, new SimpleBusUC) - }) - - val inXbar = Module(new SimpleBusCrossbarNto1(n)) - val outXbar = Module(new SimpleBusCrossbar1toN(addressSpace)) - inXbar.io.in <> io.in - outXbar.io.in <> inXbar.io.out - io.out <> outXbar.io.out -} diff --git a/src/main/scala/bus/simplebus/DistributedMem.scala b/src/main/scala/bus/simplebus/DistributedMem.scala deleted file mode 100644 index 4fd469da142086bf7be0f980d7e2985bcfda42b3..0000000000000000000000000000000000000000 --- a/src/main/scala/bus/simplebus/DistributedMem.scala +++ /dev/null @@ -1,62 +0,0 @@ -package bus.simplebus - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.loadMemoryFromFile - -import noop.HasNOOPParameter - -class DistributedMem(memByte: Int, dualPort: Boolean, delayCycles: Int = 0, dataFile: String = "") - extends Module with HasNOOPParameter { - val io = IO(new Bundle { - val rw = Flipped(new SimpleBusUC) - val ro = Flipped(new SimpleBusUC) - }) - - val wordNum = memByte / 8 - val nBank = XLEN / 8 - val memAddrBits = log2Up(wordNum) - def Index(addr: UInt): UInt = addr(memAddrBits + 2 - 1, 2) - - val rwIdx = Index(io.rw.req.bits.addr) - val roIdx = Index(io.ro.req.bits.addr) - val wen = io.rw.isWrite() - val wdataVec = VecInit.tabulate(nBank) { i => io.rw.req.bits.wdata(8 * (i + 1) - 1, 8 * i) } - val wmask = VecInit.tabulate(nBank) { i => io.rw.req.bits.wmask(i).asBool() } - - val rwData = Wire(UInt(XLEN.W)) - val roData = Wire(UInt(XLEN.W)) - - val mem = Mem(wordNum, Vec(nBank, UInt(8.W))) - if (dataFile != "") - loadMemoryFromFile(mem, dataFile) - - rwData := Cat(mem.read(rwIdx).reverse) - roData := Cat(mem.read(roIdx).reverse) - when (wen) { mem.write(rwIdx, wdataVec, wmask) } - - def readPort(p: SimpleBusUC, rdata: UInt) = { - val s_idle :: s_reading :: Nil = Enum(2) - val state = RegInit(s_idle) - switch (state) { - is (s_idle) { - when (p.req.fire()) { state := Mux(p.resp.fire(), s_idle, s_reading) } - } - is (s_reading) { - when (p.resp.fire()) { state := s_idle } - } - } - - p.req.ready := state === s_idle - p.resp.bits.rdata := rdata - p.resp.valid := (if (delayCycles == 0) p.req.fire() else Counter(state === s_reading, delayCycles)._2) - } - - readPort(io.rw, rwData) - if (dualPort) { - readPort(io.ro, roData) - } - else { - io.ro := DontCare - } -} diff --git a/src/main/scala/bus/simplebus/SimpleBus.scala b/src/main/scala/bus/simplebus/SimpleBus.scala deleted file mode 100644 index 74c48376c6ad6ef0a7d42c5b7feb22f5b6042666..0000000000000000000000000000000000000000 --- a/src/main/scala/bus/simplebus/SimpleBus.scala +++ /dev/null @@ -1,100 +0,0 @@ -package bus.simplebus - -import chisel3._ -import chisel3.util._ - -import noop.HasNOOPParameter -import utils._ -import bus.axi4._ - -sealed abstract class SimpleBusBundle extends Bundle with HasNOOPParameter - -object SimpleBusCmd { - // req - // hit | miss - def read = "b0000".U // read | refill - def write = "b0001".U // write | refill - def readBurst = "b0010".U // read | refill - def writeBurst = "b0011".U // write | refill - def writeLast = "b0111".U // write | refill - def probe = "b1000".U // read | do nothing - def prefetch = "b0100".U // read | refill - - // resp - def readLast = "b0110".U - def writeResp = "b0101".U - def probeHit = "b1100".U - def probeMiss = "b1000".U - - def apply() = UInt(4.W) -} - -class SimpleBusReqBundle(val userBits: Int = 0, val addrBits: Int = 32) extends SimpleBusBundle { - val addr = Output(UInt(addrBits.W)) - val size = Output(UInt(3.W)) - val cmd = Output(SimpleBusCmd()) - val wmask = Output(UInt((DataBits / 8).W)) - val wdata = Output(UInt(DataBits.W)) - val user = if (userBits > 0) Some(Output(UInt(userBits.W))) else None - - override def toPrintable: Printable = { - p"addr = 0x${Hexadecimal(addr)}, cmd = ${cmd}, size = ${size}, " + - p"wmask = 0x${Hexadecimal(wmask)}, wdata = 0x${Hexadecimal(wdata)}" - } - - def apply(addr: UInt, cmd: UInt, size: UInt, wdata: UInt, wmask: UInt, user: UInt = 0.U) { - this.addr := addr - this.cmd := cmd - this.size := size - this.wdata := wdata - this.wmask := wmask - this.user.map(_ := user) - } - - def isRead() = !cmd(0) && !cmd(3) - def isWrite() = cmd(0) - def isBurst() = cmd(1) - def isReadBurst() = cmd === SimpleBusCmd.readBurst - def isWriteSingle() = cmd === SimpleBusCmd.write - def isWriteLast() = cmd === SimpleBusCmd.writeLast - def isProbe() = cmd === SimpleBusCmd.probe - def isPrefetch() = cmd === SimpleBusCmd.prefetch -} - -class SimpleBusRespBundle(val userBits: Int = 0) extends SimpleBusBundle { - val cmd = Output(SimpleBusCmd()) - val rdata = Output(UInt(DataBits.W)) - val user = if (userBits > 0) Some(Output(UInt(userBits.W))) else None - - override def toPrintable: Printable = p"rdata = ${Hexadecimal(rdata)}, cmd = ${cmd}" - - def isReadLast() = cmd === SimpleBusCmd.readLast - def isProbeHit() = cmd === SimpleBusCmd.probeHit - def isProbeMiss() = cmd === SimpleBusCmd.probeMiss - def isWriteResp() = cmd === SimpleBusCmd.writeResp - def isPrefetch() = cmd === SimpleBusCmd.prefetch -} - -// Uncache -class SimpleBusUC(val userBits: Int = 0, val addrBits: Int = 32) extends SimpleBusBundle { - val req = Decoupled(new SimpleBusReqBundle(userBits, addrBits)) - val resp = Flipped(Decoupled(new SimpleBusRespBundle(userBits))) - - def isWrite() = req.valid && req.bits.isWrite() - def isRead() = req.valid && req.bits.isRead() - def toAXI4Lite() = SimpleBus2AXI4Converter(this, new AXI4Lite) - def toAXI4() = SimpleBus2AXI4Converter(this, new AXI4) - - def dump(name: String) = { - when (req.fire()) { printf(p"${GTimer()},[${name}] ${req.bits}\n") } - when (resp.fire()) { printf(p"${GTimer()},[${name}] ${resp.bits}\n") } - } -} - -// Cache -class SimpleBusC(val userBits: Int = 0) extends SimpleBusBundle { - val mem = new SimpleBusUC(userBits) - val coh = Flipped(new SimpleBusUC(userBits)) - - def memtoAXI4() = this.mem.toAXI4 -} diff --git a/src/main/scala/bus/simplebus/ToAXI4.scala b/src/main/scala/bus/simplebus/ToAXI4.scala deleted file mode 100644 index c6d8af32b9a682dd3298663cc71aeca74e0d9313..0000000000000000000000000000000000000000 --- a/src/main/scala/bus/simplebus/ToAXI4.scala +++ /dev/null @@ -1,182 +0,0 @@ -package bus.simplebus - -import chisel3._ -import chisel3.util._ - -import bus.axi4._ -import utils._ - -class AXI42SimpleBusConverter() extends Module { - val io = IO(new Bundle { - val in = Flipped(new AXI4(idBits = 18)) - val out = new SimpleBusUC() - }) - - val (axi, mem) = (io.in, io.out) - val (ar, aw, w, r, b) = (axi.ar.bits, axi.aw.bits, axi.w.bits, axi.r.bits, axi.b.bits) - val (req, resp) = (mem.req.bits, mem.resp.bits) - - // Default value - - val inflight_id_reg = RegInit(0.U) - val axi_na :: axi_read :: axi_write :: Nil = Enum(3) - val inflight_type = RegInit(axi_na) - private def setState(axi_type: UInt, id: UInt) = { - inflight_id_reg := id - inflight_type := axi_type; - } - private def resetState() = { - inflight_type := axi_na - inflight_id_reg := 0.U - } - private def is_inflight() = { - inflight_type =/= axi_na - } - - // Default - val default_mem = 0.U.asTypeOf(new SimpleBusUC) - val default_axi = 0.U.asTypeOf(new AXI4) - req := default_mem.req.bits - r := default_axi.r.bits - b := default_axi.b.bits - - - // Read Path - when (axi.ar.valid) { - mem.req.valid := true.B - req.addr := ar.addr - req.cmd := Mux(ar.len === 0.U, SimpleBusCmd.read, SimpleBusCmd.readBurst) - // TODO: consider ar.burst - req.size := ar.size - req.user.foreach(_ := ar.user) - req.wmask := 0.U - req.wdata := 0.U - - when (mem.req.fire) { - setState(axi_read, ar.id) - } - } - - when (mem.resp.valid) { - axi.r.valid := true.B - r.data := resp.rdata - r.id := inflight_id_reg - // TODO: r.resp handling - r.resp := AXI4Parameters.RESP_OKAY - r.last := resp.isReadLast - resp.user.foreach(r.user := _) - - when (axi.r.fire && resp.isReadLast) { - resetState() - } - } - - // Write Path - val aw_reg = Reg(new AXI4BundleA(AXI4Parameters.idBits)) - val bresp_en = RegInit(false.B) - - when (axi.aw.valid && !axi.ar.valid) { - aw_reg := aw - - when (axi.aw.fire) { - setState(axi_write, aw.id) - } - } - - when (axi.w.valid) { - mem.req.valid := true.B - req.cmd := Mux(aw_reg.len === 0.U, SimpleBusCmd.write, - Mux(w.last, SimpleBusCmd.writeLast, SimpleBusCmd.writeBurst)) - req.addr := aw_reg.addr - req.size := aw_reg.size - req.wmask := w.strb - req.wdata := w.data - req.user.foreach(_ := aw.user) - - when (axi.w.fire && w.last) { - bresp_en := true.B - } - } - - when (axi.b.fire) { - bresp_en := false.B - resetState() - } - - // Arbitration - // Slave's ready maybe generated according to valid signal, so let valid signals go through. - mem.req.valid := axi.ar.valid || axi.w.valid - mem.resp.ready := true.B || (inflight_type === axi_read && axi.r.ready) || (inflight_type === axi_write && axi.b.ready) - axi.ar.ready := !is_inflight && mem.req.ready - axi.r.valid := inflight_type === axi_read && mem.resp.valid - // AW should be buffered so no ready is considered. - axi.aw.ready := !is_inflight && !axi.ar.valid - axi.w.ready := inflight_type === axi_write && mem.req.ready - axi.b.valid := bresp_en && mem.resp.valid - axi.b.bits.resp := AXI4Parameters.RESP_OKAY -} - - -class SimpleBus2AXI4Converter[OT <: AXI4Lite](outType: OT) extends Module { - val io = IO(new Bundle { - val in = Flipped(new SimpleBusUC) - val out = Flipped(Flipped(outType)) - }) - - val toAXI4Lite = !(io.in.req.valid && io.in.req.bits.isBurst()) && (outType.getClass == classOf[AXI4Lite]).B - val toAXI4 = (outType.getClass == classOf[AXI4]).B - assert(toAXI4Lite || toAXI4) - - val (mem, axi) = (io.in, io.out) - val (ar, aw, w, r, b) = (axi.ar.bits, axi.aw.bits, axi.w.bits, axi.r.bits, axi.b.bits) - - ar.addr := mem.req.bits.addr - ar.prot := AXI4Parameters.PROT_PRIVILEDGED - w.data := mem.req.bits.wdata - w.strb := mem.req.bits.wmask - - def LineBeats = 8 - val wlast = WireInit(true.B) - val rlast = WireInit(true.B) - if (outType.getClass == classOf[AXI4]) { - val axi4 = io.out.asInstanceOf[AXI4] - axi4.ar.bits.id := 0.U - axi4.ar.bits.len := Mux(mem.req.bits.isBurst(), (LineBeats - 1).U, 0.U) - axi4.ar.bits.size := mem.req.bits.size - axi4.ar.bits.burst := AXI4Parameters.BURST_WRAP - axi4.ar.bits.lock := false.B - axi4.ar.bits.cache := 0.U - axi4.ar.bits.qos := 0.U - axi4.ar.bits.user := 0.U - axi4.w.bits.last := mem.req.bits.isWriteLast() || mem.req.bits.isWriteSingle() - wlast := axi4.w.bits.last - rlast := axi4.r.bits.last - } - - aw := ar - mem.resp.bits.rdata := r.data - mem.resp.bits.cmd := Mux(rlast, SimpleBusCmd.readLast, 0.U) - - val wSend = Wire(Bool()) - val awAck = BoolStopWatch(axi.aw.fire(), wSend) - val wAck = BoolStopWatch(axi.w.fire() && wlast, wSend) - wSend := (axi.aw.fire() && axi.w.fire() && wlast) || (awAck && wAck) - val wen = RegEnable(mem.req.bits.isWrite(), mem.req.fire()) - - axi.ar.valid := mem.isRead() - axi.aw.valid := mem.isWrite() && !awAck - axi.w .valid := mem.isWrite() && !wAck - mem.req.ready := Mux(mem.req.bits.isWrite(), !wAck && axi.w.ready, axi.ar.ready) - - axi.r.ready := mem.resp.ready - axi.b.ready := mem.resp.ready - mem.resp.valid := Mux(wen, axi.b.valid, axi.r.valid) -} - -object SimpleBus2AXI4Converter { - def apply[OT <: AXI4Lite](in: SimpleBusUC, outType: OT): OT = { - val bridge = Module(new SimpleBus2AXI4Converter(outType)) - bridge.io.in <> in - bridge.io.out - } -} diff --git a/src/main/scala/bus/tilelink/NaiveTL1toN.scala b/src/main/scala/bus/tilelink/NaiveTL1toN.scala deleted file mode 100644 index 345ad2a8817d55aa68e2a642d933037708f6a4e2..0000000000000000000000000000000000000000 --- a/src/main/scala/bus/tilelink/NaiveTL1toN.scala +++ /dev/null @@ -1,89 +0,0 @@ -package bus.tilelink - -import chisel3._ -import chisel3.util._ -import utils.{Debug, GTimer} - -// Only support A and D channel, very naive... - -class NaiveTL1toN -( - addressSpace: List[(Long, Long)], - para: TLParameters -) extends Module{ - val io = IO(new Bundle() { - val in = Flipped(TLCached(para)) - val out = Vec(addressSpace.length, TLCached(para)) - }) - - io.in <> DontCare - io.out <> DontCare - - val s_idle :: s_resp :: s_error :: Nil = Enum(3) - val state = RegInit(s_idle) - - // select the output channel according to the address - val addr = io.in.a.bits.address - val outSelVec = VecInit(addressSpace.map( - range => addr >= range._1.U && addr < (range._1 + range._2).U - )) - val outSelIdx = PriorityEncoder(outSelVec) - val outSel = io.out(outSelIdx) - val outSelIdxResp = RegEnable(outSelIdx, outSel.a.fire() && (state === s_idle)) - val outSelResp = io.out(outSelIdxResp) - val reqInvalidAddr = io.in.a.valid && !outSelVec.asUInt.orR - - when( - !(!io.in.a.valid || outSelVec.asUInt.orR) || (io.in.a.valid && outSelVec.asUInt.andR) - ){ - printf("[ERROR] bad addr %x, time %d\n", addr, GTimer()) - } - // assert(!io.in.req.valid || outSelVec.asUInt.orR, "address decode error, bad addr = 0x%x\n", addr) - assert( - !(io.in.a.valid && outSelVec.asUInt.andR), - "address decode error, bad addr = 0x%x\n", addr - ) - - // bind out.req channel - (io.out zip outSelVec).foreach { case (o, v) => - o.a.bits := io.in.a.bits - o.a.valid := v && (io.in.a.valid && (state === s_idle)) - o.d.ready := v - } - - switch (state) { - is (s_idle) { - when (outSel.a.fire()) { state := s_resp } - when (reqInvalidAddr) { state := s_error } - } - is (s_resp) { when (outSelResp.d.fire()) { state := s_idle } } - is (s_error) { when(io.in.d.fire()){ state := s_idle } } - } - - io.in.d.valid := outSelResp.d.fire() || state === s_error - io.in.d.bits <> outSelResp.d.bits - // io.in.resp.bits.exc.get := state === s_error - outSelResp.d.ready := io.in.d.ready - io.in.a.ready := outSel.a.ready || reqInvalidAddr - - Debug() { - when (state === s_idle && io.in.a.valid) { - printf(p"${GTimer()}: req: ") - io.in.a.bits.dump() - } - - when (outSel.a.fire()) { - printf(p"${GTimer()}: xbar: outSelIdx = $outSelIdx, outSel.req: ") - outSel.a.bits.dump() - } - when (outSel.d.fire()) { - printf(p"${GTimer()}: xbar: outSelIdx= $outSelIdx, outSel.resp: ") - outSel.d.bits.dump() - } - - when (io.in.d.fire()) { - printf(p"${GTimer()}: xbar: in.resp: ") - io.in.d.bits.dump() - } - } -} diff --git a/src/main/scala/device/AXI4Timer.scala b/src/main/scala/device/AXI4Timer.scala index 5bcb798b8ddd9ce85e3ca95d81133f260327840d..ee7cd091f58397cfafa7c59dba3f612bc882951a 100644 --- a/src/main/scala/device/AXI4Timer.scala +++ b/src/main/scala/device/AXI4Timer.scala @@ -1,7 +1,6 @@ package device import chisel3._ -import chisel3.util.experimental.BoringUtils import chipsalliance.rocketchip.config.Parameters import freechips.rocketchip.diplomacy.AddressSet import utils._ @@ -31,12 +30,6 @@ class AXI4Timer val tick = (nextCnt === freq) when (tick) { mtime := mtime + inc } - if (sim) { - val isWFI = WireInit(false.B) - BoringUtils.addSink(isWFI, "isWFI") - when (isWFI) { mtime := mtime + 100000.U } - } - val mapping = Map( RegMap(0x4000, mtimecmp), RegMap(0x8000, freq), diff --git a/src/main/scala/device/AXI4UART.scala b/src/main/scala/device/AXI4UART.scala index ae2b168f4e9315b137df60fef8cb94ba20f16190..6c57efe04f6a8917ca8fa8db5e0b4c0d234288f5 100644 --- a/src/main/scala/device/AXI4UART.scala +++ b/src/main/scala/device/AXI4UART.scala @@ -4,7 +4,6 @@ import chisel3._ import chisel3.util._ import bus.axi4._ import chipsalliance.rocketchip.config.Parameters -import chisel3.util.experimental.BoringUtils import freechips.rocketchip.diplomacy.AddressSet import utils._ diff --git a/src/main/scala/device/TLTimer.scala b/src/main/scala/device/TLTimer.scala index 1137fa1a7516c7bc4b2310bb4c6b02c2cb0cda83..3f94050a683380fef44640f8803584acbf48ed0d 100644 --- a/src/main/scala/device/TLTimer.scala +++ b/src/main/scala/device/TLTimer.scala @@ -4,7 +4,6 @@ import chisel3._ import chisel3.util._ import freechips.rocketchip.tilelink._ import chipsalliance.rocketchip.config._ -import chisel3.util.experimental.BoringUtils import freechips.rocketchip.diplomacy._ import freechips.rocketchip.regmapper.RegField import utils.{HasTLDump, XSDebug} @@ -35,12 +34,6 @@ class TLTimer(address: Seq[AddressSet], sim: Boolean)(implicit p: Parameters) ex val tick = (nextCnt === freq) when (tick) { mtime := mtime + inc } - if (sim) { - val isWFI = WireInit(false.B) - ExcitingUtils.addSink(isWFI, "isWFI") - when (isWFI) { mtime := mtime + 100000.U } - } - node.regmap( mapping = 0x0000 -> RegField.bytes(msip), 0x4000 -> RegField.bytes(mtimecmp), diff --git a/src/main/scala/fpu/FPUSubModule.scala b/src/main/scala/fpu/FPUSubModule.scala deleted file mode 100644 index 414884dff54fbb4d1c0a8399b1a2038ff6f8e674..0000000000000000000000000000000000000000 --- a/src/main/scala/fpu/FPUSubModule.scala +++ /dev/null @@ -1,57 +0,0 @@ -package fpu - -import chisel3._ -import chisel3.util._ - - -class FPUSubModuleInput extends Bundle{ - val op = UInt(3.W) - val isDouble = Bool() - val a, b, c = UInt(64.W) - val rm = UInt(3.W) -} - -class FPUSubModuleOutput extends Bundle{ - val fflags = new Fflags - val result = UInt(64.W) -} - -class FPUSubModuleIO extends Bundle{ - val in = Flipped(DecoupledIO(new FPUSubModuleInput)) - val out = DecoupledIO(new FPUSubModuleOutput) -} - -trait HasPipelineReg { this: FPUSubModule => - def latency: Int - - val ready = Wire(Bool()) - val cnt = RegInit(0.U((log2Up(latency)+1).W)) - - ready := (cnt < latency.U) || (cnt === latency.U && io.out.ready) - cnt := cnt + io.in.fire() - io.out.fire() - - val valids = io.in.valid +: Array.fill(latency)(RegInit(false.B)) - for(i <- 1 to latency){ - when(ready){ valids(i) := valids(i-1) } - } - - def PipelineReg[T<:Data](i: Int)(next: T) = RegEnable(next, enable = valids(i-1) && ready) - def S1Reg[T<:Data](next: T):T = PipelineReg[T](1)(next) - def S2Reg[T<:Data](next: T):T = PipelineReg[T](2)(next) - def S3Reg[T<:Data](next: T):T = PipelineReg[T](3)(next) - def S4Reg[T<:Data](next: T):T = PipelineReg[T](4)(next) - def S5Reg[T<:Data](next: T):T = PipelineReg[T](5)(next) - - io.in.ready := ready - io.out.valid := valids.last -} - -trait HasUIntToSIntHelper { - implicit class UIntToSIntHelper(x: UInt){ - def toSInt: SInt = Cat(0.U(1.W), x).asSInt() - } -} - -abstract class FPUSubModule extends Module with HasUIntToSIntHelper { - val io = IO(new FPUSubModuleIO) -} diff --git a/src/main/scala/fpu/package.scala b/src/main/scala/fpu/package.scala deleted file mode 100644 index e32b2d8a13a6a3e91c6b5831ce8077f16fd4937e..0000000000000000000000000000000000000000 --- a/src/main/scala/fpu/package.scala +++ /dev/null @@ -1,121 +0,0 @@ -import chisel3._ -import chisel3.util._ - -package object fpu { - - object FPUOpType { - def funcWidth = 6 - def FpuOp(fu: String, op: String): UInt = ("b" + fu + op).U(funcWidth.W) - - // FMA - def fadd:UInt = FpuOp("000", "000") - def fsub:UInt = FpuOp("000", "001") - def fmadd:UInt = FpuOp("000", "100") - def fmsub:UInt = FpuOp("000", "101") - def fnmsub:UInt = FpuOp("000", "110") - def fnmadd:UInt = FpuOp("000", "111") - def fmul:UInt = FpuOp("000", "010") - - // FCMP - def fmin:UInt = FpuOp("001", "000") - def fmax:UInt = FpuOp("001", "001") - def fle:UInt = FpuOp("001", "010") - def flt:UInt = FpuOp("001", "011") - def feq:UInt = FpuOp("001", "100") - - // FMV - def fmv_f2i:UInt= FpuOp("010", "000") - def fmv_i2f:UInt= FpuOp("010", "001") - def fclass:UInt = FpuOp("010", "010") - def fsgnj:UInt = FpuOp("010", "110") - def fsgnjn:UInt = FpuOp("010", "101") - def fsgnjx:UInt = FpuOp("010", "100") - - // FloatToInt - def f2w:UInt = FpuOp("011", "000") - def f2wu:UInt = FpuOp("011", "001") - def f2l:UInt = FpuOp("011", "010") - def f2lu:UInt = FpuOp("011", "011") - - // IntToFloat - def w2f:UInt = FpuOp("100", "000") - def wu2f:UInt = FpuOp("100", "001") - def l2f:UInt = FpuOp("100", "010") - def lu2f:UInt = FpuOp("100", "011") - - // FloatToFloat - def s2d:UInt = FpuOp("101", "000") - def d2s:UInt = FpuOp("110", "000") - - // Div/Sqrt - def fdiv:UInt = FpuOp("111", "000") - def fsqrt:UInt = FpuOp("111", "001") - } - - object FPUIOFunc { - def in_raw = 0.U(1.W) - def in_unbox = 1.U(1.W) - - def out_raw = 0.U(2.W) - def out_box = 1.U(2.W) - def out_sext = 2.U(2.W) - def out_zext = 3.U(2.W) - - def apply(inputFunc: UInt, outputFunc:UInt) = Cat(inputFunc, outputFunc) - } - - class Fflags extends Bundle { - val invalid = Bool() // 4 - val infinite = Bool() // 3 - val overflow = Bool() // 2 - val underflow = Bool() // 1 - val inexact = Bool() // 0 - } - - object RoudingMode { - val RNE = "b000".U(3.W) - val RTZ = "b001".U(3.W) - val RDN = "b010".U(3.W) - val RUP = "b011".U(3.W) - val RMM = "b100".U(3.W) - } - - class FloatPoint(val expWidth: Int, val mantWidth:Int) extends Bundle{ - val sign = Bool() - val exp = UInt(expWidth.W) - val mant = UInt(mantWidth.W) - def defaultNaN: UInt = Cat(0.U(1.W), Fill(expWidth+1,1.U(1.W)), Fill(mantWidth-1,0.U(1.W))) - def posInf: UInt = Cat(0.U(1.W), Fill(expWidth, 1.U(1.W)), 0.U(mantWidth.W)) - def negInf: UInt = Cat(1.U(1.W), posInf.tail(1)) - def maxNorm: UInt = Cat(0.U(1.W), Fill(expWidth-1, 1.U(1.W)), 0.U(1.W), Fill(mantWidth, 1.U(1.W))) - def expBias: UInt = Fill(expWidth-1, 1.U(1.W)) - def expBiasInt: Int = (1 << (expWidth-1)) - 1 - def mantExt: UInt = Cat(exp=/=0.U, mant) - def apply(x: UInt): FloatPoint = x.asTypeOf(new FloatPoint(expWidth, mantWidth)) - } - - object Float32 extends FloatPoint(8, 23) - object Float64 extends FloatPoint(11, 52) - - def expOverflow(sexp: SInt, expWidth: Int): Bool = sexp >= Cat(0.U(1.W), Fill(expWidth, 1.U(1.W))).asSInt() - def expOverflow(uexp: UInt, expWidth: Int): Bool = expOverflow(Cat(0.U(1.W), uexp).asSInt(), expWidth) - - def boxF32ToF64(x: UInt): UInt = Cat(Fill(32, 1.U(1.W)), x(31, 0)) - def unboxF64ToF32(x: UInt): UInt = Mux(x(63, 32)===Fill(32, 1.U(1.W)), x(31, 0), Float32.defaultNaN) - - def extF32ToF64(x: UInt): UInt = { - val f32 = Float32(x) - Cat( - f32.sign, - Mux(f32.exp === 0.U, - 0.U(Float64.expWidth.W), - Mux((~f32.exp).asUInt() === 0.U, - Cat("b111".U(3.W), f32.exp), - Cat("b0111".U(4.W) + f32.exp.head(1), f32.exp.tail(1)) - ) - ), - Cat(f32.mant, 0.U((Float64.mantWidth - Float32.mantWidth).W)) - ) - } -} - diff --git a/src/main/scala/noop/BPU.scala b/src/main/scala/noop/BPU.scala deleted file mode 100644 index 8722596fac7c60ccf4a0d8f789d0974e09b89198..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/BPU.scala +++ /dev/null @@ -1,229 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import utils._ - -class TableAddr(val idxBits: Int) extends NOOPBundle { - def tagBits = VAddrBits - 2 - idxBits - - //val res = UInt((AddrBits - VAddrBits).W) - val tag = UInt(tagBits.W) - val idx = UInt(idxBits.W) - val pad = UInt(2.W)//TODO - - def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this) - def getTag(x: UInt) = fromUInt(x).tag - def getIdx(x: UInt) = fromUInt(x).idx -} - -object BTBtype { - def B = "b00".U // branch - def J = "b01".U // jump - def I = "b10".U // indirect - def R = "b11".U // return - - def apply() = UInt(2.W) -} - -class BPUUpdateReq extends NOOPBundle { - val valid = Output(Bool()) - val pc = Output(UInt(VAddrBits.W)) - val isMissPredict = Output(Bool()) - val actualTarget = Output(UInt(VAddrBits.W)) - val actualTaken = Output(Bool()) // for branch - val fuOpType = Output(FuOpType()) - val btbType = Output(BTBtype()) - val isRVC = Output(Bool()) // for ras, save PC+2 to stack if is RVC -} - -class BPU1 extends NOOPModule { - val io = IO(new Bundle { - val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } - val out = new RedirectIO - val flush = Input(Bool()) - val brIdx = Output(UInt(3.W)) - val lateJump = Output(Bool()) - }) - - val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) - - // BTB - val NRbtb = 512 - val btbAddr = new TableAddr(log2Up(NRbtb)) - def btbEntry() = new Bundle { - val tag = UInt(btbAddr.tagBits.W) - val _type = UInt(2.W) - val target = UInt(VAddrBits.W) - val brIdx = UInt(3.W) - val valid = Bool() - } - - val btb = Module(new SRAMTemplate(btbEntry(), set = NRbtb, shouldReset = true, holdRead = true, singlePort = true)) - // flush BTB when executing fence.i - val flushBTB = WireInit(false.B) - val flushTLB = WireInit(false.B) - BoringUtils.addSink(flushBTB, "MOUFlushICache") - BoringUtils.addSink(flushTLB, "MOUFlushTLB") - btb.reset := reset.asBool || (flushBTB || flushTLB) - - Debug(false) { - when (reset.asBool || (flushBTB || flushTLB)) { - printf("[BPU-RESET] %d bpu-reset flushBTB:%d flushTLB:%d\n", GTimer(), flushBTB, flushTLB) - } - } - - btb.io.r.req.valid := io.in.pc.valid - btb.io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits) - - - val btbRead = Wire(btbEntry()) - btbRead := btb.io.r.resp.data(0) - // since there is one cycle latency to read SyncReadMem, - // we should latch the input pc for one cycle - val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) - val btbHit = btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.fire(), init = false.B) && !(pcLatch(1) && btbRead.brIdx(0)) && btbRead.valid - // btbHit will ignore pc(1,0). pc(1,0) is used to build brIdx - // !(pcLatch(1) && btbRead.brIdx(0)) is used to deal with the following case: - // ------------------------------------------------- - // 0 jump rvc // marked as "take branch" in BTB - // 2 xxx rvc <-- jump to here - // ------------------------------------------------- - val lateJump = btbRead.brIdx(2) && btbHit - io.lateJump := lateJump - // val lateJumpLatch = RegNext(lateJump) - // val lateJumpTarget = RegEnable(btbRead.target, lateJump) - Debug(false){ - //printf("[BTBHT] lateJump %x lateJumpLatch %x lateJumpTarget %x\n", lateJump, lateJumpLatch, lateJumpTarget) - when(btbHit){ - printf("[BTBHT1] %d pc=%x tag=%x,%x index=%x bridx=%x tgt=%x,%x flush %x type:%x\n", GTimer(), pcLatch, btbRead.tag, btbAddr.getTag(pcLatch), btbAddr.getIdx(pcLatch), btbRead.brIdx, btbRead.target, io.out.target, flush,btbRead._type) - printf("[BTBHT2] btbRead.brIdx %x mask %x\n", btbRead.brIdx, Cat(lateJump, Fill(2, io.out.valid))) - printf("[BTBHT5] btbReqValid:%d btbReqSetIdx:%x\n",btb.io.r.req.valid, btb.io.r.req.bits.setIdx) - } - } - - // PHT - val pht = Mem(NRbtb, UInt(2.W)) - val phtTaken = RegEnable(pht.read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid) - - // RAS - - val NRras = 16 - val ras = Mem(NRras, UInt(VAddrBits.W)) - // val raBrIdxs = Mem(NRras, UInt(2.W)) - val sp = Counter(NRras) - val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) - // val rasBrIdx = RegEnable(raBrIdxs.read(sp.value), io.in.pc.valid) - - // update - val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) - val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) - BoringUtils.addSink(req, "bpuUpdateReq") - - Debug(false){ - when(req.valid){ - printf("[BTBUP] pc=%x tag=%x index=%x bridx=%x tgt=%x type=%x\n", req.pc, btbAddr.getTag(req.pc), btbAddr.getIdx(req.pc), Cat(req.pc(1), ~req.pc(1)), req.actualTarget, req.btbType) - } - } - - //val fflag = req.btbType===3.U && btb.io.w.req.valid && btb.io.w.req.bits.setIdx==="hc9".U - //when(fflag && GTimer()>2888000.U) { - // printf("%d\n", GTimer()) - // printf("[BTBHT6] btbWrite.type is BTBtype.R/RET!!! Inpc:%x btbWrite.brIdx:%x setIdx:%x\n", io.in.pc.bits, btbWrite.brIdx, btb.io.w.req.bits.setIdx) - // printf("[BTBHT6] tag:%x target:%x _type:%x bridx:%x\n", btbWrite.tag,btbWrite.target,btbWrite._type,btbWrite.brIdx) - // printf(p"[BTBHT6] req:${req} \n") - //} - //printf("[BTBHT5] tag: target:%x type:%d brIdx:%d\n", req.actualTarget, req.btbType, Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1))) - - btbWrite.tag := btbAddr.getTag(req.pc) - btbWrite.target := req.actualTarget - btbWrite._type := req.btbType - btbWrite.brIdx := Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1)) - btbWrite.valid := true.B - // NOTE: We only update BTB at a miss prediction. - // If a miss prediction is found, the pipeline will be flushed - // in the next cycle. Therefore it is safe to use single-port - // SRAM to implement BTB, since write requests have higher priority - // than read request. Again, since the pipeline will be flushed - // in the next cycle, the read request will be useless. - btb.io.w.req.valid := req.isMissPredict && req.valid - btb.io.w.req.bits.setIdx := btbAddr.getIdx(req.pc) - btb.io.w.req.bits.data := btbWrite - - //Debug(true) { - //when (btb.io.w.req.valid && btbWrite.tag === btbAddr.getTag("hffffffff803541a4".U)) { - // printf("[BTBWrite] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx) - //} - //} - - //when (GTimer() > 77437484.U && btb.io.w.req.valid) { - // printf("[BTBWrite-ALL] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx) - //} - - val cnt = RegNext(pht.read(btbAddr.getIdx(req.pc))) - val reqLatch = RegNext(req) - when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { - val taken = reqLatch.actualTaken - val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) - val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) - when (wen) { - pht.write(btbAddr.getIdx(reqLatch.pc), newCnt) - //Debug(){ - //printf("BPUPDATE: pc %x cnt %x\n", reqLatch.pc, newCnt) - //} - } - } - when (req.valid) { - when (req.fuOpType === ALUOpType.call) { - ras.write(sp.value + 1.U, Mux(req.isRVC, req.pc + 2.U, req.pc + 4.U)) - // raBrIdxs.write(sp.value + 1.U, Mux(req.pc(1), 2.U, 1.U)) - sp.value := sp.value + 1.U - } - .elsewhen (req.fuOpType === ALUOpType.ret) { - when(sp.value === 0.U) { - //printf("ATTTTT: sp.value is 0.U\n") //TODO: sp.value may equal to 0.U - } - sp.value := Mux(sp.value===0.U, 0.U, sp.value - 1.U) //TODO: sp.value may less than 0.U - } - } - - io.out.target := Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target) - // io.out.target := Mux(lateJumpLatch && !flush, lateJumpTarget, Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target)) - // io.out.brIdx := btbRead.brIdx & Fill(3, io.out.valid) - io.brIdx := btbRead.brIdx & Cat(true.B, lateJump, Fill(2, io.out.valid)) - io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B && rasTarget=/=0.U) //TODO: add rasTarget=/=0.U, need fix - // io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !lateJump || lateJumpLatch && !flush && !lateJump - // Note: - // btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !lateJump : normal branch predict - // lateJumpLatch && !flush && !lateJump : cross line branch predict, bpu will require imem to fetch the next 16bit of current inst in next instline - // `&& !lateJump` is used to make sure this logic will run correctly when imem stalls (pcUpdate === false) - // by using `instline`, we mean a 64 bit instfetch result from imem - // ROCKET uses a 32 bit instline, and its IDU logic is more simple than this implentation. -} - -class BPU2 extends NOOPModule { - val io = IO(new Bundle { - val in = Flipped(Valid(new CtrlFlowIO)) - val out = new RedirectIO - }) - - val instr = io.in.bits.instr - val immJ = SignExt(Cat(instr(31), instr(19, 12), instr(20), instr(30, 21), 0.U(1.W)), XLEN) - val immB = SignExt(Cat(instr(31), instr(7), instr(30, 25), instr(11, 8), 0.U(1.W)), XLEN) - val table = Array( - RV32I_BRUInstr.JAL -> List(immJ, true.B), - RV32I_BRUInstr.BNE -> List(immB, instr(31)), - RV32I_BRUInstr.BEQ -> List(immB, instr(31)), - RV32I_BRUInstr.BLT -> List(immB, instr(31)), - RV32I_BRUInstr.BGE -> List(immB, instr(31)), - RV32I_BRUInstr.BLTU -> List(immB, instr(31)), - RV32I_BRUInstr.BGEU -> List(immB, instr(31)) - ) - val default = List(immB, false.B) - val offset :: predict :: Nil = ListLookup(instr, default, table) - - io.out.target := io.in.bits.pc + offset - io.out.valid := io.in.valid && predict(0) -} diff --git a/src/main/scala/noop/Bundle.scala b/src/main/scala/noop/Bundle.scala deleted file mode 100644 index 071168c2a391028ca95f220255aab9ea32be59a8..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/Bundle.scala +++ /dev/null @@ -1,127 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ - -class CtrlSignalIO extends NOOPBundle { - val src1Type = Output(SrcType()) - val src2Type = Output(SrcType()) - val src3Type = Output(SrcType()) - val fuType = Output(FuType()) - val fuOpType = Output(FuOpType()) - val rfSrc1 = Output(UInt(5.W)) - val rfSrc2 = Output(UInt(5.W)) - val rfWen = Output(Bool()) - val fpWen = Output(Bool()) - val fpInputFunc = Output(UInt(1.W)) - val fpOutputFunc = Output(UInt(2.W)) - val rfDest = Output(UInt(5.W)) - val isNoopTrap = Output(Bool()) - val isSrc1Forward = Output(Bool()) - val isSrc2Forward = Output(Bool()) -} - -class DataSrcIO extends NOOPBundle { - val src1 = Output(UInt(XLEN.W)) - val src2 = Output(UInt(XLEN.W)) - val imm = Output(UInt(XLEN.W)) -} - -class RedirectIO extends NOOPBundle { - val target = Output(UInt(VAddrBits.W)) - // val brIdx = Output(UInt(3.W)) // for RVC - val valid = Output(Bool()) -} - -// class IRIDCtrlFlowIO extends NOOPBundle { -// val instr = Output(UInt(64.W)) -// val pc = Output(UInt(VAddrBits.W)) -// val pnpc = Output(UInt(VAddrBits.W)) -// val brIdx = Output(UInt(3.W)) -// val redirect = new RedirectIO -// } - -class CtrlFlowIO extends NOOPBundle { - val instr = Output(UInt(64.W)) - val pc = Output(UInt(VAddrBits.W)) - val pnpc = Output(UInt(VAddrBits.W)) - val redirect = new RedirectIO - val exceptionVec = Output(Vec(16, Bool())) - val intrVec = Output(Vec(12, Bool())) - val brIdx = Output(UInt(4.W)) - val crossPageIPFFix = Output(Bool()) -} - -class DecodeIO extends NOOPBundle { - val cf = new CtrlFlowIO - val ctrl = new CtrlSignalIO - val data = new DataSrcIO -} - -class WriteBackIO extends NOOPBundle { - val rfWen = Output(Bool()) - val fpWen = Output(Bool()) - val rfDest = Output(UInt(5.W)) - val rfData = Output(UInt(XLEN.W)) -} - -class CommitIO extends NOOPBundle { - val decode = new DecodeIO - val isMMIO = Output(Bool()) - val intrNO = Output(UInt(XLEN.W)) - val commits = Output(Vec(FuType.num, UInt(XLEN.W))) -} - -class FunctionUnitIO extends NOOPBundle { - val in = Flipped(Decoupled(new Bundle { - val src1 = Output(UInt(XLEN.W)) - val src2 = Output(UInt(XLEN.W)) - val func = Output(FuOpType()) - })) - val out = Decoupled(Output(UInt(XLEN.W))) -} - -class ForwardIO extends NOOPBundle { - val valid = Output(Bool()) - val wb = new WriteBackIO - val fuType = Output(FuType()) -} - -class MMUIO extends NOOPBundle { - // val ptev = Output(Bool()) - // val pteu = Output(Bool()) - // val ptex = Output(Bool()) - // val valid = Output(Bool()) - // val isStore = Output(Bool()) - - val priviledgeMode = Input(UInt(2.W)) - val status_sum = Input(Bool()) - val status_mxr = Input(Bool()) - - val loadPF = Output(Bool()) - val storePF = Output(Bool()) - val addr = Output(UInt(VAddrBits.W)) - - def isPF() = loadPF || storePF -} - -class MemMMUIO extends NOOPBundle { - val imem = new MMUIO - val dmem = new MMUIO -} - -class TLBExuIO extends NOOPBundle { - val satp = Output(UInt(XLEN.W)) - val sfence = new Bundle { - val valid = Output(Bool()) - val asid = Output(UInt(9.W)) - val vaddr = Output(UInt(XLEN.W)) - } - - def access(valid: Bool, src1: UInt, src2: UInt, func: UInt, satp: UInt) = {//func no use here for just sfence.vma only - this.sfence.valid := valid - this.sfence.vaddr := src1 - this.sfence.asid := src2(8,0) - this.satp := satp - } -} \ No newline at end of file diff --git a/src/main/scala/noop/Cache.scala b/src/main/scala/noop/Cache.scala deleted file mode 100644 index fb173b1e0f14b169ae75311b03e955f4787a0c2e..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/Cache.scala +++ /dev/null @@ -1,561 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import bus.simplebus._ -import bus.axi4._ -import utils._ - -case class CacheConfig ( - ro: Boolean = false, - name: String = "cache", - userBits: Int = 0, - cacheLevel: Int = 1, - - totalSize: Int = 32, // Kbytes - ways: Int = 4 -) - -sealed trait HasCacheConst { - implicit val cacheConfig: CacheConfig - - val PAddrBits: Int - val XLEN: Int - - val cacheName = cacheConfig.name - val userBits = cacheConfig.userBits - - val ro = cacheConfig.ro - val hasCoh = !ro - val hasCohInt = (if (hasCoh) 1 else 0) - val hasPrefetch = cacheName == "l2cache" - - val cacheLevel = cacheConfig.cacheLevel - val TotalSize = cacheConfig.totalSize - val Ways = cacheConfig.ways - val LineSize = XLEN // byte - val LineBeats = LineSize / 8 //DATA WIDTH 64 - val Sets = TotalSize * 1024 / LineSize / Ways - val OffsetBits = log2Up(LineSize) - val IndexBits = log2Up(Sets) - val WordIndexBits = log2Up(LineBeats) - val TagBits = PAddrBits - OffsetBits - IndexBits - - val debug = true - - def addrBundle = new Bundle { - val tag = UInt(TagBits.W) - val index = UInt(IndexBits.W) - val wordIndex = UInt(WordIndexBits.W) - val byteOffset = UInt((if (XLEN == 64) 3 else 2).W) - } - - def CacheMetaArrayReadBus() = new SRAMReadBus(new MetaBundle, set = Sets, way = Ways) - def CacheDataArrayReadBus() = new SRAMReadBus(new DataBundle, set = Sets * LineBeats, way = Ways) - def CacheMetaArrayWriteBus() = new SRAMWriteBus(new MetaBundle, set = Sets, way = Ways) - def CacheDataArrayWriteBus() = new SRAMWriteBus(new DataBundle, set = Sets * LineBeats, way = Ways) - - def getMetaIdx(addr: UInt) = addr.asTypeOf(addrBundle).index - def getDataIdx(addr: UInt) = Cat(addr.asTypeOf(addrBundle).index, addr.asTypeOf(addrBundle).wordIndex) - - def isSameWord(a1: UInt, a2: UInt) = ((a1 >> 2) === (a2 >> 2)) - def isSetConflict(a1: UInt, a2: UInt) = (a1.asTypeOf(addrBundle).index === a2.asTypeOf(addrBundle).index) -} - -sealed abstract class CacheBundle(implicit cacheConfig: CacheConfig) extends Bundle with HasNOOPParameter with HasCacheConst -sealed abstract class CacheModule(implicit cacheConfig: CacheConfig) extends Module with HasNOOPParameter with HasCacheConst - -sealed class MetaBundle(implicit val cacheConfig: CacheConfig) extends CacheBundle { - val tag = Output(UInt(TagBits.W)) - val valid = Output(Bool()) - val dirty = Output(Bool()) - - def apply(tag: UInt, valid: Bool, dirty: Bool) = { - this.tag := tag - this.valid := valid - this.dirty := dirty - this - } -} - -sealed class DataBundle(implicit val cacheConfig: CacheConfig) extends CacheBundle { - val data = Output(UInt(DataBits.W)) - - def apply(data: UInt) = { - this.data := data - this - } -} - -sealed class Stage1IO(implicit val cacheConfig: CacheConfig) extends CacheBundle { - val req = new SimpleBusReqBundle(userBits = userBits) -} - -// meta read -sealed class CacheStage1(implicit val cacheConfig: CacheConfig) extends CacheModule { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits))) - val out = Decoupled(new Stage1IO) - val metaReadBus = CacheMetaArrayReadBus() - val dataReadBus = CacheDataArrayReadBus() - - val s2s3Empty = Input(Bool()) // FIXME: remove me when do not use nut's cache - }) - - if (ro) when (io.in.fire()) { assert(!io.in.bits.isWrite()) } - Debug(){ - if (debug) { - when(io.in.fire()){ - printf("[L1$] " +name+" cache stage1, addr in: %x, user: %x\n", io.in.bits.addr, io.in.bits.user.getOrElse(0.U)) - } - } - } - - // read meta array and data array - val readBusValid = io.in.valid && io.out.ready - io.metaReadBus.apply(valid = readBusValid, setIdx = getMetaIdx(io.in.bits.addr)) - io.dataReadBus.apply(valid = readBusValid, setIdx = getDataIdx(io.in.bits.addr)) - - io.out.bits.req := io.in.bits - io.out.valid := io.in.valid && io.metaReadBus.req.ready && io.dataReadBus.req.ready && io.s2s3Empty // FIXME: remove me when do not use nut's cache - io.in.ready := (!io.in.valid || io.out.fire()) && io.metaReadBus.req.ready && io.dataReadBus.req.ready && io.s2s3Empty // FIXME: remove me when do not use nut's cache - - Debug() { - if (debug) { - printf("%d: [" + cacheName + " stage1]: in.ready = %d, in.valid = %d, out.valid = %d, out.ready = %d, addr = %x, cmd = %x, dataReadBus.req.valid = %d\n", - GTimer(), io.in.ready, io.in.valid, io.out.valid, io.out.ready, io.in.bits.addr, io.in.bits.cmd, io.dataReadBus.req.valid) - } - } -} - -sealed class Stage2IO(implicit val cacheConfig: CacheConfig) extends CacheBundle { - val req = new SimpleBusReqBundle(userBits = userBits) - val metas = Vec(Ways, new MetaBundle) - val datas = Vec(Ways, new DataBundle) - val hit = Output(Bool()) - val waymask = Output(UInt(Ways.W)) - val mmio = Output(Bool()) - val isForwardData = Output(Bool()) - val forwardData = Output(CacheDataArrayWriteBus().req.bits) -} - -// check -sealed class CacheStage2(implicit val cacheConfig: CacheConfig) extends CacheModule { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new Stage1IO)) - val out = Decoupled(new Stage2IO) - val metaReadResp = Flipped(Vec(Ways, new MetaBundle)) - val dataReadResp = Flipped(Vec(Ways, new DataBundle)) - val metaWriteBus = Input(CacheMetaArrayWriteBus()) - val dataWriteBus = Input(CacheDataArrayWriteBus()) - }) - - val req = io.in.bits.req - val addr = req.addr.asTypeOf(addrBundle) - - val isForwardMeta = io.in.valid && io.metaWriteBus.req.valid && io.metaWriteBus.req.bits.setIdx === getMetaIdx(req.addr) - val isForwardMetaReg = RegInit(false.B) - when (isForwardMeta) { isForwardMetaReg := true.B } - when (io.in.fire() || !io.in.valid) { isForwardMetaReg := false.B } - val forwardMetaReg = RegEnable(io.metaWriteBus.req.bits, isForwardMeta) - - val metaWay = Wire(Vec(Ways, chiselTypeOf(forwardMetaReg.data))) - forwardMetaReg.waymask.getOrElse("b1".U).asBools.zipWithIndex.map { case (w, i) => - metaWay(i) := Mux(isForwardMetaReg && w, forwardMetaReg.data, io.metaReadResp(i)) - } - - val hitVec = VecInit(metaWay.map(m => m.valid && (m.tag === addr.tag) && io.in.valid)).asUInt - val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U - - val invalidVec = VecInit(metaWay.map(m => !m.valid)).asUInt - val hasInvalidWay = invalidVec.orR - val refillInvalidWaymask = Mux(invalidVec >= 8.U, "b1000".U, - Mux(invalidVec >= 4.U, "b0100".U, - Mux(invalidVec >= 2.U, "b0010".U, "b0001".U))) - - // val waymask = Mux(io.out.bits.hit, hitVec, victimWaymask) - val waymask = Mux(io.out.bits.hit, hitVec, Mux(hasInvalidWay, refillInvalidWaymask, victimWaymask)) - assert(!(io.in.valid && PopCount(waymask) > 1.U)) - - io.out.bits.metas := metaWay - io.out.bits.hit := io.in.valid && hitVec.orR - io.out.bits.waymask := waymask - io.out.bits.datas := io.dataReadResp - io.out.bits.mmio := xiangshan.AddressSpace.isMMIO(ZeroExt(req.addr, 40)) // FIXME: isMMIO should have PAddrBits Length ?? - - val isForwardData = io.in.valid && (io.dataWriteBus.req match { case r => - r.valid && r.bits.setIdx === getDataIdx(req.addr) - }) - val isForwardDataReg = RegInit(false.B) - when (isForwardData) { isForwardDataReg := true.B } - when (io.in.fire() || !io.in.valid) { isForwardDataReg := false.B } - val forwardDataReg = RegEnable(io.dataWriteBus.req.bits, isForwardData) - io.out.bits.isForwardData := isForwardDataReg || isForwardData - io.out.bits.forwardData := Mux(isForwardData, io.dataWriteBus.req.bits, forwardDataReg) - - io.out.bits.req <> req - io.out.valid := io.in.valid - io.in.ready := !io.in.valid || io.out.fire() - - Debug() { - if (debug) { - printf("%d: [" + cacheName + " S2]: isFD:%d isFDreg:%d inFire:%d invalid:%d \n", GTimer(), isForwardData, isForwardDataReg, io.in.fire(), io.in.valid) - } - } -} - -// writeback -sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheModule { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new Stage2IO)) - val out = Decoupled(new SimpleBusRespBundle(userBits = userBits)) - val isFinish = Output(Bool()) - val flush = Input(Bool()) - val dataReadBus = CacheDataArrayReadBus() - val dataWriteBus = CacheDataArrayWriteBus() - val metaWriteBus = CacheMetaArrayWriteBus() - - val mem = new SimpleBusUC - val mmio = new SimpleBusUC - val cohResp = Decoupled(new SimpleBusRespBundle) - - // use to distinguish prefetch request and normal request - val dataReadRespToL1 = Output(Bool()) - }) - - val metaWriteArb = Module(new Arbiter(CacheMetaArrayWriteBus().req.bits, 2)) - val dataWriteArb = Module(new Arbiter(CacheDataArrayWriteBus().req.bits, 2)) - - val req = io.in.bits.req - val addr = req.addr.asTypeOf(addrBundle) - val mmio = io.in.valid && io.in.bits.mmio - val hit = io.in.valid && io.in.bits.hit - val miss = io.in.valid && !io.in.bits.hit - val probe = io.in.valid && hasCoh.B && req.isProbe() - val hitReadBurst = hit && req.isReadBurst() - val meta = Mux1H(io.in.bits.waymask, io.in.bits.metas) - assert(!(mmio && hit), "MMIO request should not hit in cache") - - // this is ugly - if (cacheName == "dcache") { - BoringUtils.addSource(mmio, "lsuMMIO") - } - - val useForwardData = io.in.bits.isForwardData && io.in.bits.waymask === io.in.bits.forwardData.waymask.getOrElse("b1".U) - val dataReadArray = Mux1H(io.in.bits.waymask, io.in.bits.datas).data - val dataRead = Mux(useForwardData, io.in.bits.forwardData.data.data, dataReadArray) - val wordMask = Mux(!ro.B && req.isWrite(), MaskExpand(req.wmask), 0.U(DataBits.W)) - - val writeL2BeatCnt = Counter(LineBeats) - when(io.out.fire() && (req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast())) { - writeL2BeatCnt.inc() - } - - val hitWrite = hit && req.isWrite() - val dataHitWriteBus = Wire(CacheDataArrayWriteBus()).apply( - data = Wire(new DataBundle).apply(MaskData(dataRead, req.wdata, wordMask)), - valid = hitWrite, setIdx = Cat(addr.index, Mux(req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast(), writeL2BeatCnt.value, addr.wordIndex)), waymask = io.in.bits.waymask) - - val metaHitWriteBus = Wire(CacheMetaArrayWriteBus()).apply( - valid = hitWrite && !meta.dirty, setIdx = getMetaIdx(req.addr), waymask = io.in.bits.waymask, - data = Wire(new MetaBundle).apply(tag = meta.tag, valid = true.B, dirty = (!ro).B) - ) - - val s_idle :: s_memReadReq :: s_memReadResp :: s_memWriteReq :: s_memWriteResp :: s_mmioReq :: s_mmioResp :: s_wait_resp :: s_release :: Nil = Enum(9) - val state = RegInit(s_idle) - val needFlush = RegInit(false.B) - - when (io.flush && (state =/= s_idle)) { needFlush := true.B } - when (io.out.fire() && needFlush) { needFlush := false.B } - - val readBeatCnt = Counter(LineBeats) - val writeBeatCnt = Counter(LineBeats) - - val s2_idle :: s2_dataReadWait :: s2_dataOK :: Nil = Enum(3) - val state2 = RegInit(s2_idle) - - io.dataReadBus.apply(valid = (state === s_memWriteReq || state === s_release) && (state2 === s2_idle), - setIdx = Cat(addr.index, Mux(state === s_release, readBeatCnt.value, writeBeatCnt.value))) - val dataWay = RegEnable(io.dataReadBus.resp.data, state2 === s2_dataReadWait) - val dataHitWay = Mux1H(io.in.bits.waymask, dataWay).data - - switch (state2) { - is (s2_idle) { when (io.dataReadBus.req.fire()) { state2 := s2_dataReadWait } } - is (s2_dataReadWait) { state2 := s2_dataOK } - is (s2_dataOK) { when (io.mem.req.fire() || io.cohResp.fire() || hitReadBurst && io.out.ready) { state2 := s2_idle } } - } - - // critical word first read - val raddr = (if (XLEN == 64) Cat(req.addr(PAddrBits-1,3), 0.U(3.W)) - else Cat(req.addr(PAddrBits-1,2), 0.U(2.W))) - // dirty block addr - val waddr = Cat(meta.tag, addr.index, 0.U(OffsetBits.W)) - val cmd = Mux(state === s_memReadReq, SimpleBusCmd.readBurst, - Mux((writeBeatCnt.value === (LineBeats - 1).U), SimpleBusCmd.writeLast, SimpleBusCmd.writeBurst)) - io.mem.req.bits.apply(addr = Mux(state === s_memReadReq, raddr, waddr), - cmd = cmd, size = (if (XLEN == 64) "b11".U else "b10".U), - wdata = dataHitWay, wmask = Fill(DataBytes, 1.U)) - - io.mem.resp.ready := true.B - io.mem.req.valid := (state === s_memReadReq) || ((state === s_memWriteReq) && (state2 === s2_dataOK)) - - // mmio - io.mmio.req.bits := req - io.mmio.resp.ready := true.B - io.mmio.req.valid := (state === s_mmioReq) - - val afterFirstRead = RegInit(false.B) - val alreadyOutFire = RegEnable(true.B, init = false.B, io.out.fire()) - val readingFirst = !afterFirstRead && io.mem.resp.fire() && (state === s_memReadResp) - val inRdataRegDemand = RegEnable(Mux(mmio, io.mmio.resp.bits.rdata, io.mem.resp.bits.rdata), - Mux(mmio, state === s_mmioResp, readingFirst)) - - // probe - io.cohResp.valid := ((state === s_idle) && probe) || - ((state === s_release) && (state2 === s2_dataOK)) - io.cohResp.bits.rdata := dataHitWay - val releaseLast = Counter(state === s_release && io.cohResp.fire(), LineBeats)._2 - io.cohResp.bits.cmd := Mux(state === s_release, Mux(releaseLast, SimpleBusCmd.readLast, 0.U), - Mux(hit, SimpleBusCmd.probeHit, SimpleBusCmd.probeMiss)) - - val respToL1Fire = hitReadBurst && io.out.ready && state2 === s2_dataOK - val respToL1Last = Counter((state === s_idle || state === s_release && state2 === s2_dataOK) && hitReadBurst && io.out.ready, LineBeats)._2 - - switch (state) { - is (s_idle) { - afterFirstRead := false.B - alreadyOutFire := false.B - - when (probe) { - when (io.cohResp.fire()) { - state := Mux(hit, s_release, s_idle) - readBeatCnt.value := addr.wordIndex - } - } .elsewhen (hitReadBurst && io.out.ready) { - state := s_release - readBeatCnt.value := Mux(addr.wordIndex === (LineBeats - 1).U, 0.U, (addr.wordIndex + 1.U)) - } .elsewhen ((miss || mmio) && !io.flush) { - state := Mux(mmio, s_mmioReq, Mux(!ro.B && meta.dirty, s_memWriteReq, s_memReadReq)) - } - } - - is (s_mmioReq) { when (io.mmio.req.fire()) { state := s_mmioResp } } - is (s_mmioResp) { when (io.mmio.resp.fire()) { state := s_wait_resp } } - - is (s_release) { - when (io.cohResp.fire() || respToL1Fire) { readBeatCnt.inc() } - when (probe && io.cohResp.fire() && releaseLast || respToL1Fire && respToL1Last) { state := s_idle } - } - - is (s_memReadReq) { when (io.mem.req.fire()) { - state := s_memReadResp - readBeatCnt.value := addr.wordIndex - }} - - is (s_memReadResp) { - when (io.mem.resp.fire()) { - afterFirstRead := true.B - readBeatCnt.inc() - when (req.cmd === SimpleBusCmd.writeBurst) { writeL2BeatCnt.value := 0.U } - when (io.mem.resp.bits.isReadLast()) { state := s_wait_resp } - } - } - - is (s_memWriteReq) { - when (io.mem.req.fire()) { writeBeatCnt.inc() } - when (io.mem.req.bits.isWriteLast() && io.mem.req.fire()) { state := s_memWriteResp } - } - - is (s_memWriteResp) { when (io.mem.resp.fire()) { state := s_memReadReq } } - is (s_wait_resp) { when (io.out.fire() || needFlush || alreadyOutFire) { state := s_idle } } - } - - val dataRefill = MaskData(io.mem.resp.bits.rdata, req.wdata, Mux(readingFirst, wordMask, 0.U(DataBits.W))) - val dataRefillWriteBus = Wire(CacheDataArrayWriteBus).apply( - valid = (state === s_memReadResp) && io.mem.resp.fire(), setIdx = Cat(addr.index, readBeatCnt.value), - data = Wire(new DataBundle).apply(dataRefill), waymask = io.in.bits.waymask) - - dataWriteArb.io.in(0) <> dataHitWriteBus.req - dataWriteArb.io.in(1) <> dataRefillWriteBus.req - io.dataWriteBus.req <> dataWriteArb.io.out - - val metaRefillWriteBus = Wire(CacheMetaArrayWriteBus()).apply( - valid = (state === s_memReadResp) && io.mem.resp.fire() && io.mem.resp.bits.isReadLast(), - data = Wire(new MetaBundle).apply(valid = true.B, tag = addr.tag, dirty = !ro.B && req.isWrite()), - setIdx = getMetaIdx(req.addr), waymask = io.in.bits.waymask - ) - - metaWriteArb.io.in(0) <> metaHitWriteBus.req - metaWriteArb.io.in(1) <> metaRefillWriteBus.req - io.metaWriteBus.req <> metaWriteArb.io.out - - if (cacheLevel == 2) { - when ((state === s_memReadResp) && io.mem.resp.fire() && req.isReadBurst()) { - // readBurst request miss - io.out.bits.rdata := dataRefill - io.out.bits.cmd := Mux(io.mem.resp.bits.isReadLast(), SimpleBusCmd.readLast, SimpleBusCmd.readBurst) - }.elsewhen (req.isWriteLast() || req.cmd === SimpleBusCmd.writeBurst) { - // writeBurst/writeLast request, no matter hit or miss - io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) - io.out.bits.cmd := DontCare - }.elsewhen (hitReadBurst && state === s_release) { - // readBurst request hit - io.out.bits.rdata := dataHitWay - io.out.bits.cmd := Mux(respToL1Last, SimpleBusCmd.readLast, SimpleBusCmd.readBurst) - }.otherwise { - io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) - io.out.bits.cmd := req.cmd - } - } else { - io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) - io.out.bits.cmd := Mux(io.in.bits.req.isRead(), SimpleBusCmd.readLast, Mux(io.in.bits.req.isWrite(), SimpleBusCmd.writeResp, DontCare))//DontCare, added by lemover - } - io.out.bits.user.zip(req.user).map { case (o,i) => o := i } - - io.out.valid := io.in.valid && Mux(req.isBurst() && (cacheLevel == 2).B, - Mux(req.isWrite() && (hit || !hit && state === s_wait_resp), true.B, (state === s_memReadResp && io.mem.resp.fire() && req.cmd === SimpleBusCmd.readBurst)) || (respToL1Fire && respToL1Last && state === s_release), - Mux(probe, false.B, Mux(hit, true.B, Mux(req.isWrite() || mmio, state === s_wait_resp, afterFirstRead && !alreadyOutFire))) - ) - - // With critical-word first, the pipeline registers between - // s2 and s3 can not be overwritten before a missing request - // is totally handled. We use io.isFinish to indicate when the - // request really ends. - io.isFinish := Mux(probe, io.cohResp.fire() && Mux(miss, state === s_idle, (state === s_release) && releaseLast), - Mux(hit || req.isWrite(), io.out.fire(), (state === s_wait_resp) && (io.out.fire() || alreadyOutFire)) - ) - - io.in.ready := io.out.ready && (state === s_idle && !hitReadBurst) && !miss && !probe - io.dataReadRespToL1 := hitReadBurst && (state === s_idle && io.out.ready || state === s_release && state2 === s2_dataOK) - - assert(!(metaHitWriteBus.req.valid && metaRefillWriteBus.req.valid)) - assert(!(dataHitWriteBus.req.valid && dataRefillWriteBus.req.valid)) - assert(!(!ro.B && io.flush), "only allow to flush icache") - Debug() { - if (debug) { - printf("%d: [" + cacheName + " S3]: in.ready = %d, in.valid = %d, hit = %x, state = %d, addr = %x cmd:%d probe:%d isFinish:%d\n", - GTimer(), io.in.ready, io.in.valid, hit, state, req.addr, req.cmd, probe, io.isFinish) - printf("%d: [" + cacheName + " S3]: out.valid:%d rdata:%x cmd:%d user:%x \n", - GTimer(), io.out.valid, io.out.bits.rdata, io.out.bits.cmd, io.out.bits.user.getOrElse(0.U)) - printf("%d: [" + cacheName + " S3]: DHW: (%d, %d), data:%x MHW:(%d, %d)\n", - GTimer(), dataHitWriteBus.req.valid, dataHitWriteBus.req.ready, dataHitWriteBus.req.bits.data.asUInt, metaHitWriteBus.req.valid, metaHitWriteBus.req.ready) - printf("%d: [" + cacheName + " S3]: useFD:%d isFD:%d FD:%x DreadArray:%x dataRead:%x inwaymask:%x FDwaymask:%x \n", - GTimer(), useForwardData, io.in.bits.isForwardData, io.in.bits.forwardData.data.data, dataReadArray, dataRead, io.in.bits.waymask, io.in.bits.forwardData.waymask.getOrElse("b1".U)) - } - } -} - -class Cache(implicit val cacheConfig: CacheConfig) extends CacheModule { - val io = IO(new Bundle { - val in = Flipped(new SimpleBusUC(userBits = userBits)) - val flush = Input(UInt(2.W)) - val out = new SimpleBusC - val mmio = new SimpleBusUC - val empty = Output(Bool()) - }) - - // cpu pipeline - val s1 = Module(new CacheStage1) - val s2 = Module(new CacheStage2) - val s3 = Module(new CacheStage3) - val metaArray = Module(new SRAMTemplateWithArbiter(nRead = 1, new MetaBundle, set = Sets, way = Ways, shouldReset = true)) - val dataArray = Module(new SRAMTemplateWithArbiter(nRead = 2, new DataBundle, set = Sets * LineBeats, way = Ways)) - - if (cacheName == "icache") { - // flush icache when executing fence.i - val flushICache = WireInit(false.B) - BoringUtils.addSink(flushICache, "MOUFlushICache") - metaArray.reset := reset.asBool || flushICache - } - - val arb = Module(new Arbiter(new SimpleBusReqBundle(userBits = userBits), hasCohInt + 1)) - arb.io.in(hasCohInt + 0) <> io.in.req - - s1.io.in <> arb.io.out - /* - val s2BlockByPrefetch = if (cacheLevel == 2) { - s2.io.out.valid && s3.io.in.valid && s3.io.in.bits.req.isPrefetch() && !s3.io.in.ready - } else { false.B } - */ - PipelineConnect(s1.io.out, s2.io.in, s2.io.out.fire(), io.flush(0)) - PipelineConnect(s2.io.out, s3.io.in, s3.io.isFinish, io.flush(1) || s2.io.out.bits.mmio && s2.io.out.bits.req.isPrefetch()/* || s2BlockByPrefetch*/) - io.in.resp <> s3.io.out - s3.io.flush := io.flush(1) - io.out.mem <> s3.io.mem - io.mmio <> s3.io.mmio - io.empty := !s2.io.in.valid && !s3.io.in.valid - s1.io.s2s3Empty := io.empty // FIXME: remove me when do not use nut's cache - - io.in.resp.valid := Mux(s3.io.out.valid && s3.io.out.bits.isPrefetch(), false.B, s3.io.out.valid || s3.io.dataReadRespToL1) - - if (hasCoh) { - val cohReq = io.out.coh.req.bits - // coh does not have user signal, any better code? - val coh = Wire(new SimpleBusReqBundle(userBits = userBits)) - coh.apply(addr = cohReq.addr, cmd = cohReq.cmd, size = cohReq.cmd, wdata = cohReq.wdata, wmask = cohReq.wmask) - arb.io.in(0).bits := coh - arb.io.in(0).valid := io.out.coh.req.valid - io.out.coh.req.ready := arb.io.in(0).ready - io.out.coh.resp <> s3.io.cohResp - } else { - io.out.coh.req.ready := true.B - io.out.coh.resp := DontCare - io.out.coh.resp.valid := false.B - s3.io.cohResp.ready := true.B - } - - metaArray.io.r(0) <> s1.io.metaReadBus - dataArray.io.r(0) <> s1.io.dataReadBus - dataArray.io.r(1) <> s3.io.dataReadBus - - metaArray.io.w <> s3.io.metaWriteBus - dataArray.io.w <> s3.io.dataWriteBus - - s2.io.metaReadResp := s1.io.metaReadBus.resp.data - s2.io.dataReadResp := s1.io.dataReadBus.resp.data - s2.io.dataWriteBus := s3.io.dataWriteBus - s2.io.metaWriteBus := s3.io.metaWriteBus - - BoringUtils.addSource(s3.io.in.valid && s3.io.in.bits.hit, "perfCntCondM" + cacheName + "Hit") - - Debug() { - if (debug) { - when(true.B) { - io.in.dump(cacheName + ".in") - printf("%d:" + cacheName + "InReq(%d, %d) InResp(%d, %d) \n", GTimer(), io.in.req.valid, io.in.req.ready, io.in.resp.valid, io.in.resp.ready) - printf("%d:" + cacheName + " {IN s1:(%d,%d), s2:(%d,%d), s3:(%d,%d)} {OUT s1:(%d,%d), s2:(%d,%d), s3:(%d,%d)}\n", - GTimer(), s1.io.in.valid, s1.io.in.ready, s2.io.in.valid, s2.io.in.ready, s3.io.in.valid, s3.io.in.ready, s1.io.out.valid, s1.io.out.ready, s2.io.out.valid, s2.io.out.ready, s3.io.out.valid, s3.io.out.ready) - when (s1.io.in.valid) { printf("%d ", GTimer()) ; printf(p"[${cacheName}.S1]: ${s1.io.in.bits}\n") } - when (s2.io.in.valid) { printf("%d ", GTimer()) ; printf(p"[${cacheName}.S2]: ${s2.io.in.bits.req}\n") } - when (s3.io.in.valid) { printf("%d ", GTimer()) ; printf(p"[${cacheName}.S3]: ${s3.io.in.bits.req}\n") } - // s3.io.mem.dump(cacheName + ".mem") - }} - } -} - -object Cache { - def apply(in: SimpleBusUC, mmio: Seq[SimpleBusUC], flush: UInt, empty: Bool, enable: Boolean = true)(implicit cacheConfig: CacheConfig) = { - if (enable) { - val cache = Module(new Cache) - cache.io.flush := flush - cache.io.in <> in - mmio(0) <> cache.io.mmio - empty := cache.io.empty - cache.io.out - } else { - assert(false, "XiangShan should not reach here!") - val addrspace = List(AddressSpace.dram) ++ AddressSpace.mmio - val xbar = Module(new SimpleBusCrossbar1toN(addrspace)) - val busC = WireInit(0.U.asTypeOf(new SimpleBusC)) - busC.mem <> xbar.io.out(0) - xbar.io.in <> in - (mmio zip xbar.io.out.drop(1)) foreach { case (mmio_in, xbar_out) => - mmio_in <> xbar_out - } - empty := false.B - busC - } - } -} diff --git a/src/main/scala/noop/Decode.scala b/src/main/scala/noop/Decode.scala deleted file mode 100644 index 9c4a9c7fafdcfce46f2bbfedd2cfe763b9bd27cb..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/Decode.scala +++ /dev/null @@ -1,74 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import noop.isa.{RVDInstr, RVFInstr} - -trait HasInstrType { - def InstrN = "b0000".U - def InstrI = "b0100".U - def InstrR = "b0101".U - def InstrS = "b0010".U - def InstrB = "b0001".U - def InstrU = "b0110".U - def InstrJ = "b0111".U - def InstrA = "b1110".U - def InstrSA = "b1111".U // Atom Inst: SC - - def isrfWen(instrType : UInt): Bool = instrType(2) -} - -// trait CompInstConst { -// val RVCRegNumTable = Array( -// BitPat("b000") -> 8.U, -// BitPat("b001") -> 9.U, -// BitPat("b010") -> 10.U, -// BitPat("b011") -> 11.U, -// BitPat("b100") -> 12.U, -// BitPat("b101") -> 13.U, -// BitPat("b110") -> 14.U, -// BitPat("b111") -> 15.U -// ) -// } - -object SrcType { - def reg = "b00".U - def pc = "b01".U - def imm = "b01".U - def fp = "b10".U - def apply() = UInt(2.W) -} - -object FuType { - def num = 6 - def alu = "b000".U - def lsu = "b001".U - def mdu = "b010".U - def csr = "b011".U - def mou = "b100".U - def fpu = "b101".U - def apply() = UInt(log2Up(num).W) -} - -object FuOpType { - def apply() = UInt(6.W) -} - -object Instructions extends HasInstrType with HasNOOPParameter { - def NOP = 0x00000013.U - val DecodeDefault = List(InstrN, FuType.csr, CSROpType.jmp) - def DecodeTable = RVIInstr.table ++ NOOPTrap.table ++ - (if (HasMExtension) RVMInstr.table else Nil) ++ - (if (HasCExtension) RVCInstr.table else Nil) ++ - (if (HasFPU) RVFInstr.table ++ RVDInstr.table else Nil) ++ - Priviledged.table ++ - RVAInstr.table ++ - RVZicsrInstr.table ++ RVZifenceiInstr.table -} - -object CInstructions extends HasInstrType with HasNOOPParameter{ - def NOP = 0x00000013.U - val DecodeDefault = List(RVCInstr.ImmNone, RVCInstr.DtCare, RVCInstr.DtCare, RVCInstr.DtCare) - // val DecodeDefault = List(InstrN, FuType.csr, CSROpType.jmp) - def CExtraDecodeTable = RVCInstr.cExtraTable -} diff --git a/src/main/scala/noop/EXU.scala b/src/main/scala/noop/EXU.scala deleted file mode 100644 index 47902ebde861cb7abd4307d11f41758e52d5b125..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/EXU.scala +++ /dev/null @@ -1,151 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils -import utils._ -import bus.simplebus._ -import noop.fu.FPU - -class EXU(implicit val p: NOOPConfig) extends NOOPModule { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new DecodeIO)) - val out = Decoupled(new CommitIO) - val flush = Input(Bool()) - val dmem = new SimpleBusUC(addrBits = VAddrBits) - val forward = new ForwardIO - val memMMU = Flipped(new MemMMUIO) - }) - - val src1 = io.in.bits.data.src1 - val src2 = io.in.bits.data.src2 - - val (fuType, fuOpType) = (io.in.bits.ctrl.fuType, io.in.bits.ctrl.fuOpType) - - val fuValids = Wire(Vec(FuType.num, Bool())) - (0 until FuType.num).map (i => fuValids(i) := (fuType === i.U) && io.in.valid && !io.flush) - - val alu = Module(new ALU) - val aluOut = alu.access(valid = fuValids(FuType.alu), src1 = src1, src2 = src2, func = fuOpType) - alu.io.cfIn := io.in.bits.cf - alu.io.offset := io.in.bits.data.imm - alu.io.out.ready := true.B - - val lsu = Module(new LSU) - val lsuTlbPF = WireInit(false.B) - val lsuOut = lsu.access(valid = fuValids(FuType.lsu), src1 = src1, src2 = io.in.bits.data.imm, func = fuOpType, dtlbPF = lsuTlbPF) - lsu.io.wdata := src2 - lsu.io.instr := io.in.bits.cf.instr - io.out.bits.isMMIO := lsu.io.isMMIO || (AddressSpace.isMMIO(io.in.bits.cf.pc) && io.out.valid) - io.dmem <> lsu.io.dmem - lsu.io.out.ready := true.B - - val mdu = Module(new MDU) - val mduOut = mdu.access(valid = fuValids(FuType.mdu), src1 = src1, src2 = src2, func = fuOpType) - mdu.io.out.ready := true.B - - val csr = Module(new CSR) - val csrOut = csr.access(valid = fuValids(FuType.csr), src1 = src1, src2 = src2, func = fuOpType) - csr.io.cfIn := io.in.bits.cf - csr.io.cfIn.exceptionVec(loadAddrMisaligned) := lsu.io.loadAddrMisaligned - csr.io.cfIn.exceptionVec(storeAddrMisaligned) := lsu.io.storeAddrMisaligned - csr.io.instrValid := io.in.valid && !io.flush - io.out.bits.intrNO := csr.io.intrNO - csr.io.out.ready := true.B - - csr.io.imemMMU <> io.memMMU.imem - csr.io.dmemMMU <> io.memMMU.dmem - - val mou = Module(new MOU) - // mou does not write register - mou.access(valid = fuValids(FuType.mou), src1 = src1, src2 = src2, func = fuOpType) - mou.io.cfIn := io.in.bits.cf - mou.io.out.ready := true.B - - val (fpuOut,fpuOutValid) = if(HasFPU){ - val fpu = Module(new FPU) - Debug(){ - when(io.in.valid){ - printf(p"[EXU] at pc=${Hexadecimal(io.in.bits.cf.pc)} " + - p"fpu in valid=${fpu.io.in.valid} " + - p"fpu out valid=${fpu.io.out.valid}\n") - } - } - fpu.io.out.ready := true.B - csr.io.fpu_csr <> fpu.io.fpu_csr - fpu.io.fpWen := io.in.bits.ctrl.fpWen - fpu.io.inputFunc := io.in.bits.ctrl.fpInputFunc - fpu.io.outputFunc := io.in.bits.ctrl.fpOutputFunc - fpu.io.instr := io.in.bits.cf.instr - (fpu.access(fuValids(FuType.fpu), src1, src2, io.in.bits.data.imm, io.in.bits.ctrl.fuOpType), fpu.io.out.valid) - } else { - csr.io.fpu_csr <> DontCare - (0.U,false.B) - } - - - io.out.bits.decode := DontCare - (io.out.bits.decode.ctrl, io.in.bits.ctrl) match { case (o, i) => - o.rfWen := i.rfWen && (!lsuTlbPF && !lsu.io.loadAddrMisaligned && !lsu.io.storeAddrMisaligned || !fuValids(FuType.lsu)) && !(csr.io.wenFix && fuValids(FuType.csr)) - o.rfDest := i.rfDest - o.fuType := i.fuType - o.fpWen := i.fpWen && (!lsuTlbPF && !lsu.io.loadAddrMisaligned && !lsu.io.storeAddrMisaligned || !fuValids(FuType.lsu)) && !(csr.io.wenFix && fuValids(FuType.csr)) - } - io.out.bits.decode.cf.pc := io.in.bits.cf.pc - - io.out.bits.decode.cf.instr := io.in.bits.cf.instr - io.out.bits.decode.cf.redirect <> - Mux(mou.io.redirect.valid, mou.io.redirect, - Mux(csr.io.redirect.valid, csr.io.redirect, alu.io.redirect)) - Debug(){ - //when(mou.io.redirect.valid || csr.io.redirect.valid || alu.io.redirect.valid){ - printf("[REDIRECT] inValid:%d mou %x csr %x alu %x \n", io.in.valid, mou.io.redirect.valid, csr.io.redirect.valid, alu.io.redirect.valid) - printf("[REDIRECT] flush: %d mou %x csr %x alu %x\n", io.flush, mou.io.redirect.target, csr.io.redirect.target, alu.io.redirect.target) - //} - } - - // FIXME: should handle io.out.ready == false - io.out.valid := io.in.valid && MuxLookup(fuType, true.B, List( - FuType.lsu -> lsu.io.out.valid, - FuType.mdu -> mdu.io.out.valid, - FuType.fpu -> fpuOutValid - )) - - io.out.bits.commits(FuType.alu) := aluOut - io.out.bits.commits(FuType.lsu) := lsuOut - io.out.bits.commits(FuType.csr) := csrOut - io.out.bits.commits(FuType.mdu) := mduOut - io.out.bits.commits(FuType.mou) := 0.U - io.out.bits.commits(FuType.fpu) := fpuOut - - io.in.ready := !io.in.valid || io.out.fire() - - io.forward.valid := io.in.valid - io.forward.wb.rfWen := io.in.bits.ctrl.rfWen - io.forward.wb.fpWen := io.in.bits.ctrl.fpWen - io.forward.wb.rfDest := io.in.bits.ctrl.rfDest - io.forward.wb.rfData := Mux(alu.io.out.fire(), aluOut, lsuOut) - io.forward.fuType := io.in.bits.ctrl.fuType - - val isBru = ALUOpType.isBru(fuOpType) - BoringUtils.addSource(alu.io.out.fire() && !isBru, "perfCntCondMaluInstr") - BoringUtils.addSource(alu.io.out.fire() && isBru, "perfCntCondMbruInstr") - BoringUtils.addSource(lsu.io.out.fire(), "perfCntCondMlsuInstr") - BoringUtils.addSource(mdu.io.out.fire(), "perfCntCondMmduInstr") - BoringUtils.addSource(csr.io.out.fire(), "perfCntCondMcsrInstr") - - if (!p.FPGAPlatform) { - val nooptrap = io.in.bits.ctrl.isNoopTrap && io.in.valid - val cycleCnt = WireInit(0.U(XLEN.W)) - val instrCnt = WireInit(0.U(XLEN.W)) - - BoringUtils.addSink(cycleCnt, "simCycleCnt") - BoringUtils.addSink(instrCnt, "simInstrCnt") - - BoringUtils.addSource(nooptrap, "trapValid") - BoringUtils.addSource(io.in.bits.data.src1, "trapCode") - BoringUtils.addSource(io.in.bits.cf.pc, "trapPC") - BoringUtils.addSource(cycleCnt, "trapCycleCnt") - BoringUtils.addSource(instrCnt, "trapInstrCnt") - } -} diff --git a/src/main/scala/noop/IDU1.scala b/src/main/scala/noop/IDU1.scala deleted file mode 100644 index 54ac5e19a78a017126451bb89d2fc07ab934df4b..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/IDU1.scala +++ /dev/null @@ -1,192 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import utils._ - -class IDU1 extends NOOPModule with HasInstrType with HasExceptionNO { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new CtrlFlowIO)) - val out = Decoupled(new CtrlFlowIO) - val flush = Input(Bool()) - val redirect = new RedirectIO - }) - - val instr = Wire(UInt(32.W)) - val isRVC = instr(1,0) =/= "b11".U - - //RVC support FSM - //only ensure pnpc given by this FSM is right. May need flush after 6 offset 32 bit inst - val s_idle :: s_extra :: s_waitnext :: s_waitnext_thenj :: Nil = Enum(4) - val state = RegInit(UInt(2.W), s_idle) - val pcOffsetR = RegInit(UInt(3.W), 0.U) - val pcOffset = Mux(state === s_idle, io.in.bits.pc(2,0), pcOffsetR) - val instIn = Cat(0.U(16.W), io.in.bits.instr) - // val nextState = WireInit(0.U(2.W)) - val canGo = WireInit(false.B) - val canIn = WireInit(false.B) - val brIdx = io.in.bits.brIdx - // val brIdx = 0.U - val rvcFinish = pcOffset === 0.U && (!isRVC || brIdx(0)) || pcOffset === 4.U && (!isRVC || brIdx(0)) || pcOffset === 2.U && (isRVC || brIdx(1)) || pcOffset === 6.U && isRVC - // if brIdx(0) (branch taken at inst with offest 0), ignore the rest part of this instline - // just get next pc and instline from IFU - val rvcNext = pcOffset === 0.U && (isRVC && !brIdx(0)) || pcOffset === 4.U && (isRVC && !brIdx(0)) || pcOffset === 2.U && !isRVC && !brIdx(1) - val rvcSpecial = pcOffset === 6.U && !isRVC && !brIdx(2) - val rvcSpecialJump = pcOffset === 6.U && !isRVC && brIdx(2) - val pnpcIsSeq = brIdx(3) - // val pnpcIsSeqRight = io.in.bits.pnpc === (Cat(io.in.bits.pc(VAddrBits-1,2), 0.U(2.W)) + 4.U) // TODO: add a new user bit bpRight to do this - // assert(pnpcIsSeq === pnpcIsSeqRight) - val flushIFU = (state === s_idle || state === s_extra) && rvcSpecial && io.in.valid && !pnpcIsSeq - when(flushIFU){printf("flushIFU at pc %x offset %x timer:%d\n", io.in.bits.pc, pcOffset, GTimer())} - assert(!flushIFU) - val loadNextInstline = (state === s_idle || state === s_extra) && (rvcSpecial || rvcSpecialJump) && io.in.valid && pnpcIsSeq - // val loadNextInstline =false.B - val pcOut = WireInit(0.U(VAddrBits.W)) - val pnpcOut = WireInit(0.U(VAddrBits.W)) - val specialPCR = Reg(UInt(VAddrBits.W)) // reg for full inst that cross 2 inst line - val specialNPCR = Reg(UInt(VAddrBits.W)) // reg for pnc for full inst jump that cross 2 inst line - val specialInstR = Reg(UInt(16.W)) - val specialIPFR = RegInit(Bool(), false.B) - val redirectPC = Cat(io.in.bits.pc(VAddrBits-1,3), 0.U(3.W))+"b1010".U // IDU can got get full inst from a single inst line - val rvcForceLoadNext = (pcOffset === 2.U && !isRVC && io.in.bits.pnpc(2,0) === 4.U && !brIdx(1)) - //------------------------------------------------------ - // rvcForceLoadNext is used to deal with: - // case 1: - // 8010004a: 406007b7 lui a5,0x40600 - // 8010004e: 470d li a4,3 - // 80100050: 00e78623 sb a4,12(a5) # 4060000c <_start-0x3faffff4> - // For icache req inst in seq, if there is no rvcForceLoadNext, - // after 8010004e there will be 8010004c instead of 80100050 - //------------------------------------------------------ - // case 2: - // 80100046: 406007b7 lui a5,0x40600 - // 8010004a: 470d li a4,3 - // force load next instline into ID stage, if bp wrong, it will be flushed by flushIFU - //------------------------------------------------------ - // if there is a j inst in current inst line, a redirect req will be sent by ALU before invalid inst exception being committed - // when brIdx(1), next instline will just be branch target, eatline is no longer needed - - // only for test, add this to pipeline when do real implementation - // val predictBranch = io.in.valid && Mux(io.in.bits.pc(1), io.in.bits.pc + 2.U === io.in.bits.pnpc, io.in.bits.pc + 4.U === io.in.bits.pnpc) - // val flush = rvcSpecial - instr := Mux((state === s_waitnext || state === s_waitnext_thenj), Cat(instIn(15,0), specialInstR), LookupTree(pcOffset, List( - "b000".U -> instIn(31,0), - "b010".U -> instIn(31+16,16), - "b100".U -> instIn(63,32), - "b110".U -> instIn(63+16,32+16) - ))) - - io.redirect.target := redirectPC - io.redirect.valid := flushIFU - - when(!io.flush){ - switch(state){ - is(s_idle){//decode current pc in pipeline - canGo := rvcFinish || rvcNext - canIn := rvcFinish || rvcForceLoadNext - pcOut := io.in.bits.pc - pnpcOut := Mux(rvcFinish, io.in.bits.pnpc, Mux(isRVC, io.in.bits.pc+2.U, io.in.bits.pc+4.U)) - when(io.out.fire() && rvcFinish){state := s_idle} - when(io.out.fire() && rvcNext){ - state := s_extra - pcOffsetR := pcOffset + Mux(isRVC, 2.U, 4.U) - } - when(rvcSpecial && io.in.valid){ - state := s_waitnext - specialPCR := pcOut - specialInstR := io.in.bits.instr(63,63-16+1) - specialIPFR := io.in.bits.exceptionVec(instrPageFault) - } - when(rvcSpecialJump && io.in.valid){ - state := s_waitnext_thenj - specialPCR := pcOut - specialNPCR := io.in.bits.pnpc - specialInstR := io.in.bits.instr(63,63-16+1) - specialIPFR := io.in.bits.exceptionVec(instrPageFault) - } - } - is(s_extra){//get 16 aligned inst, pc controled by this FSM - canGo := rvcFinish || rvcNext - canIn := rvcFinish || rvcForceLoadNext - pcOut := Cat(io.in.bits.pc(VAddrBits-1,3), pcOffsetR(2,0)) - pnpcOut := Mux(rvcFinish, io.in.bits.pnpc, Mux(isRVC, pcOut+2.U, pcOut+4.U)) - when(io.out.fire() && rvcFinish){state := s_idle} - when(io.out.fire() && rvcNext){ - state := s_extra - pcOffsetR := pcOffset + Mux(isRVC, 2.U, 4.U) - } - when(rvcSpecial && io.in.valid){ - state := s_waitnext - specialPCR := pcOut - specialInstR := io.in.bits.instr(63,63-16+1) - specialIPFR := io.in.bits.exceptionVec(instrPageFault) - } - when(rvcSpecialJump && io.in.valid){ - state := s_waitnext_thenj - specialPCR := pcOut - specialNPCR := io.in.bits.pnpc - specialInstR := io.in.bits.instr(63,63-16+1) - specialIPFR := io.in.bits.exceptionVec(instrPageFault) - } - } - is(s_waitnext){//require next 64bits, for this inst has size 32 and offset 6 - //ignore bp result, use pc+4 instead - pcOut := specialPCR - pnpcOut := specialPCR + 4.U - // pnpcOut := Mux(rvcFinish, io.in.bits.pnpc, Mux(isRVC, pcOut+2.U, pcOut+4.U)) - canGo := io.in.valid - canIn := false.B - when(io.out.fire()){ - state := s_extra - pcOffsetR := "b010".U - } - } - is(s_waitnext_thenj){//require next 64bits, for this inst has size 32 and offset 6 - //use bp result - pcOut := specialPCR - pnpcOut := specialNPCR - // pnpcOut := Mux(rvcFinish, io.in.bits.pnpc, Mux(isRVC, pcOut+2.U, pcOut+4.U)) - canGo := io.in.valid - canIn := true.B - when(io.out.fire()){ - state := s_idle - } - } - // is(s_readnext){//npc right, get next 64 inst bits, flush pipeline is not needed - // //ignore bp result, use pc+4 instead - // pcOut := specialPCR - // pnpcOut := specialPCR + 4.U - // // pnpcOut := Mux(rvcFinish, io.in.bits.pnpc, Mux(isRVC, pcOut+2.U, pcOut+4.U)) - // canGo := io.in.valid - // canIn := false.B - // when(io.out.fire()){ - // state := s_extra - // pcOffsetR := "b010".U - // } - // } - } - }.otherwise{ - state := s_idle - canGo := DontCare - canIn := DontCare - pcOut := DontCare - pnpcOut := DontCare - } - - //output signals - io.out.bits := DontCare - io.out.bits.redirect.valid := false.B - io.out.bits.pc := pcOut - io.out.bits.pnpc := pnpcOut - io.out.bits.instr := instr - io.out.bits.brIdx := io.in.bits.brIdx - - io.out.valid := io.in.valid && canGo - io.in.ready := (!io.in.valid || (io.out.fire() && canIn) || loadNextInstline) - - io.out.bits.exceptionVec := io.in.bits.exceptionVec/*.map(_ := false.B)*/ //Fix by zhangzifei from false.B - io.out.bits.exceptionVec(instrPageFault) := io.in.bits.exceptionVec(instrPageFault) || specialIPFR && (state === s_waitnext_thenj || state === s_waitnext) - io.out.bits.crossPageIPFFix := io.in.bits.exceptionVec(instrPageFault) && (state === s_waitnext_thenj || state === s_waitnext) && !specialIPFR -} diff --git a/src/main/scala/noop/IDU2.scala b/src/main/scala/noop/IDU2.scala deleted file mode 100644 index 041b5018e9ed2d4aabf6172e274050e2ec4d922d..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/IDU2.scala +++ /dev/null @@ -1,217 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils -import noop.isa.{RVDInstr, RVFInstr, RVF_LSUInstr, RVD_LSUInstr} -import utils._ - -class IDU2(implicit val p: NOOPConfig) extends NOOPModule with HasInstrType { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new CtrlFlowIO)) - val out = Decoupled(new DecodeIO) - val flush = Input(Bool()) - }) - - val hasIntr = Wire(Bool()) - val hasIntrOrExceptino = hasIntr || io.in.bits.exceptionVec(instrPageFault) - val instr = io.in.bits.instr(31, 0) - val decodeList = ListLookup(instr, Instructions.DecodeDefault, Instructions.DecodeTable) - val commonInstrType :: commonFuType :: commonFuOpType :: Nil = decodeList - - val intrInstrType :: intrFuType :: intrFuOpType :: Nil = Instructions.DecodeDefault - - //(isFp, src1Type, src2Type, src3Type, rfWen, fpWen, fuOpType, inputFunc, outputFunc) - val fpExtraDecodeTable = RVFInstr.extraTable ++ RVDInstr.extraTable - val isFp :: fpSrc1Type :: fpSrc2Type :: fpSrc3Type :: fpRfWen :: fpWen :: fpFuOpType :: fpInputFunc :: fpOutputFunc :: Nil = - if(HasFPU) ListLookup(instr, RVFInstr.extraTableDefault, fpExtraDecodeTable) else RVFInstr.extraTableDefault - - val floatLdStInstrs = List( - RVF_LSUInstr.FLW, - RVF_LSUInstr.FSW, - RVD_LSUInstr.FLD, - RVCInstr.C_FLD, - RVCInstr.C_FLDSP, - RVD_LSUInstr.FSD, - RVCInstr.C_FSD, - RVCInstr.C_FSDSP - ) - - def treeCmp(key: UInt, cmpList: List[BitPat]): Bool = { - cmpList.size match { - case 1 => - key === cmpList.head - case n => - treeCmp(key, cmpList take n/2) || treeCmp(key, cmpList drop n/2) - } - } - - val isFloatLdSd = if(HasFPU) treeCmp(instr, floatLdStInstrs) else false.B - - val isRVFD = isFp.asBool() - val instrType = Mux(hasIntrOrExceptino, - intrInstrType, - commonInstrType - ) - val fuType = Mux(hasIntrOrExceptino, - intrFuType, - Mux(isRVFD && !isFloatLdSd, - FuType.fpu, - commonFuType - ) - ) - val fuOpType = Mux(hasIntrOrExceptino, - intrFuOpType, - Mux(isRVFD, fpFuOpType, commonFuOpType) - ) - - - val isRVC = instr(1,0) =/= "b11".U - val rvcImmType :: rvcSrc1Type :: rvcSrc2Type :: rvcDestType :: Nil = - ListLookup(instr, CInstructions.DecodeDefault, CInstructions.CExtraDecodeTable) - - io.out.bits := DontCare - - io.out.bits.ctrl.fuType := fuType - io.out.bits.ctrl.fuOpType := fuOpType - io.out.bits.ctrl.fpInputFunc := fpInputFunc - io.out.bits.ctrl.fpOutputFunc := fpOutputFunc - - val SrcTypeTable = List( - InstrI -> (SrcType.reg, SrcType.imm), - InstrR -> (SrcType.reg, SrcType.reg), - InstrS -> (SrcType.reg, SrcType.reg), - InstrSA-> (SrcType.reg, SrcType.reg), - InstrB -> (SrcType.reg, SrcType.reg), - InstrU -> (SrcType.pc , SrcType.imm), - InstrJ -> (SrcType.pc , SrcType.imm), - InstrN -> (SrcType.pc , SrcType.imm) - ) - val src1Type = Mux(isRVFD, - fpSrc1Type, - LookupTree(instrType, SrcTypeTable.map(p => (p._1, p._2._1))) - ) - val src2Type = Mux(isRVFD, - fpSrc2Type, - LookupTree(instrType, SrcTypeTable.map(p => (p._1, p._2._2))) - ) - - val (rs, rt, rd) = (instr(19, 15), instr(24, 20), instr(11, 7)) - // see riscv-spec vol1, Table 16.1: Compressed 16-bit RVC instruction formats. - val rs1 = instr(11,7) - val rs2 = instr(6,2) - val rs1p = LookupTree(instr(9,7), RVCInstr.RVCRegNumTable.map(p => (p._1, p._2))) - val rs2p = LookupTree(instr(4,2), RVCInstr.RVCRegNumTable.map(p => (p._1, p._2))) - val rvc_shamt = Cat(instr(12),instr(6,2)) - // val rdp_rs1p = LookupTree(instr(9,7), RVCRegNumTable) - // val rdp = LookupTree(instr(4,2), RVCRegNumTable) - - val RegLookUpTable = List( - RVCInstr.DtCare -> 0.U, - RVCInstr.REGrs -> rs, - RVCInstr.REGrt -> rt, - RVCInstr.REGrd -> rd, - RVCInstr.REGrs1 -> rs1, - RVCInstr.REGrs2 -> rs2, - RVCInstr.REGrs1p -> rs1p, - RVCInstr.REGrs2p -> rs2p, - RVCInstr.REGx1 -> 1.U, - RVCInstr.REGx2 -> 2.U - ) - - val rvc_src1 = LookupTree(rvcSrc1Type, RegLookUpTable.map(p => (p._1, p._2))) - val rvc_src2 = LookupTree(rvcSrc2Type, RegLookUpTable.map(p => (p._1, p._2))) - val rvc_dest = LookupTree(rvcDestType, RegLookUpTable.map(p => (p._1, p._2))) - - val rfSrc1 = Mux(isRVC, rvc_src1, rs) - val rfSrc2 = Mux(isRVC, rvc_src2, rt) - val rfDest = Mux(isRVC, rvc_dest, rd) - - val rfWen = !hasIntrOrExceptino && Mux(isRVFD, fpRfWen.asBool(), isrfWen(instrType)) - - // TODO: refactor decode logic - // make non-register addressing to zero, since isu.sb.isBusy(0) === false.B - io.out.bits.ctrl.rfSrc1 := Mux(src1Type === SrcType.pc, 0.U, rfSrc1) - io.out.bits.ctrl.rfSrc2 := Mux(src2Type === SrcType.imm, 0.U, rfSrc2) - io.out.bits.ctrl.rfWen := rfWen - io.out.bits.ctrl.fpWen := fpWen.asBool() - io.out.bits.ctrl.rfDest := Mux(fpWen.asBool() || rfWen, rfDest, 0.U) - - io.out.bits.data := DontCare - val imm = LookupTree(instrType, List( - InstrI -> SignExt(instr(31, 20), XLEN), - InstrS -> SignExt(Cat(instr(31, 25), instr(11, 7)), XLEN), - InstrSA -> SignExt(Cat(instr(31, 25), instr(11, 7)), XLEN), - InstrB -> SignExt(Cat(instr(31), instr(7), instr(30, 25), instr(11, 8), 0.U(1.W)), XLEN), - InstrU -> SignExt(Cat(instr(31, 12), 0.U(12.W)), XLEN),//fixed - InstrJ -> SignExt(Cat(instr(31), instr(19, 12), instr(20), instr(30, 21), 0.U(1.W)), XLEN) - )) - val immrvc = LookupTree(rvcImmType, List( - // InstrIW -> Cat(Fill(20+32, instr(31)), instr(31, 20)),//fixed - RVCInstr.ImmNone -> 0.U(XLEN.W), - RVCInstr.ImmLWSP -> ZeroExt(Cat(instr(3,2), instr(12), instr(6,4), 0.U(2.W)), XLEN), - RVCInstr.ImmLDSP -> ZeroExt(Cat(instr(4,2), instr(12), instr(6,5), 0.U(3.W)), XLEN), - RVCInstr.ImmSWSP -> ZeroExt(Cat(instr(8,7), instr(12,9), 0.U(2.W)), XLEN), - RVCInstr.ImmSDSP -> ZeroExt(Cat(instr(9,7), instr(12,10), 0.U(3.W)), XLEN), - RVCInstr.ImmSW -> ZeroExt(Cat(instr(5), instr(12,10), instr(6), 0.U(2.W)), XLEN), - RVCInstr.ImmSD -> ZeroExt(Cat(instr(6,5), instr(12,10), 0.U(3.W)), XLEN), - RVCInstr.ImmLW -> ZeroExt(Cat(instr(5), instr(12,10), instr(6), 0.U(2.W)), XLEN), - RVCInstr.ImmLD -> ZeroExt(Cat(instr(6,5), instr(12,10), 0.U(3.W)), XLEN), - RVCInstr.ImmJ -> SignExt(Cat(instr(12), instr(8), instr(10,9), instr(6), instr(7), instr(2), instr(11), instr(5,3), 0.U(1.W)), XLEN), - RVCInstr.ImmB -> SignExt(Cat(instr(12), instr(6,5), instr(2), instr(11,10), instr(4,3), 0.U(1.W)), XLEN), - RVCInstr.ImmLI -> SignExt(Cat(instr(12), instr(6,2)), XLEN), - RVCInstr.ImmLUI -> SignExt(Cat(instr(12), instr(6,2), 0.U(12.W)), XLEN), - RVCInstr.ImmADDI -> SignExt(Cat(instr(12), instr(6,2)), XLEN), - RVCInstr.ImmADDI16SP-> SignExt(Cat(instr(12), instr(4,3), instr(5), instr(2), instr(6), 0.U(4.W)), XLEN), - RVCInstr.ImmADD4SPN-> ZeroExt(Cat(instr(10,7), instr(12,11), instr(5), instr(6), 0.U(2.W)), XLEN) - // ImmFLWSP -> - // ImmFLDSP -> - )) - io.out.bits.data.imm := Mux(isRVC, immrvc, imm) - - when (fuType === FuType.alu) { - def isLink(reg: UInt) = (reg === 1.U || reg === 5.U) - when (isLink(rfDest) && fuOpType === ALUOpType.jal) { io.out.bits.ctrl.fuOpType := ALUOpType.call } - when (fuOpType === ALUOpType.jalr) { - when (isLink(rfSrc1)) { io.out.bits.ctrl.fuOpType := ALUOpType.ret } - when (isLink(rfDest)) { io.out.bits.ctrl.fuOpType := ALUOpType.call } - } - } - // fix LUI - io.out.bits.ctrl.src1Type := Mux(instr(6,0) === "b0110111".U, SrcType.reg, src1Type) - io.out.bits.ctrl.src2Type := src2Type - io.out.bits.ctrl.src3Type := fpSrc3Type - - // io.out.bits.ctrl.isInvOpcode := (instrType === InstrN) && io.in.valid - io.out.bits.ctrl.isNoopTrap := (instr(31,0) === NOOPTrap.TRAP) && io.in.valid - - //output signals - - io.out.valid := io.in.valid - io.in.ready := !io.in.valid || io.out.fire() && !hasIntr - io.out.bits.cf <> io.in.bits - - Debug(){ - when(io.out.fire()){printf("[IDU] issue: pc %x npc %x instr %x\n", io.out.bits.cf.pc, io.out.bits.cf.pnpc, io.out.bits.cf.instr)} - } - - val intrVec = WireInit(0.U(12.W)) - BoringUtils.addSink(intrVec, "intrVecIDU") - io.out.bits.cf.intrVec.zip(intrVec.asBools).map{ case(x, y) => x := y } - hasIntr := intrVec.orR - - io.out.bits.cf.exceptionVec.map(_ := false.B) - io.out.bits.cf.exceptionVec(illegalInstr) := (!isRVFD && instrType === InstrN && !hasIntr) && io.in.valid - io.out.bits.cf.exceptionVec(instrPageFault) := io.in.bits.exceptionVec(instrPageFault) - - io.out.bits.ctrl.isNoopTrap := (instr === NOOPTrap.TRAP) && io.in.valid - - if (!p.FPGAPlatform) { - val isWFI = (instr === Priviledged.WFI) && io.in.valid - BoringUtils.addSource(isWFI, "isWFI") - } -} - -// Note -// C.LWSP is only valid when rd̸=x0; the code points with rd=x0 are reserved -// C.LDSP is only valid when rd̸=x0; the code points with rd=x0 are reserved. diff --git a/src/main/scala/noop/IFU.scala b/src/main/scala/noop/IFU.scala deleted file mode 100644 index 1c363088e91ebc604a39248c1d3587fd420ac825..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/IFU.scala +++ /dev/null @@ -1,115 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import utils._ -import bus.simplebus._ - -trait HasResetVector { - val resetVector = 0x40000000L//TODO: set reset vec -} - -class IFU extends NOOPModule with HasResetVector { - val io = IO(new Bundle { - - val imem = new SimpleBusUC(userBits = VAddrBits*2 + 4, addrBits = VAddrBits) - // val pc = Input(UInt(VAddrBits.W)) - val out = Decoupled(new CtrlFlowIO) - - val redirect = Flipped(new RedirectIO) - val flushVec = Output(UInt(4.W)) - val bpFlush = Output(Bool()) - val ipf = Input(Bool()) - }) - - // pc - val pc = RegInit(resetVector.U(VAddrBits.W)) - val pcUpdate = io.redirect.valid || io.imem.req.fire() - val snpc = Mux(pc(1), pc + 2.U, pc + 4.U) // sequential next pc - - val bp1 = Module(new BPU1) - - // - val lateJump = bp1.io.lateJump - val lateJumpLatch = RegInit(false.B) - when(pcUpdate || bp1.io.flush) { - lateJumpLatch := Mux(bp1.io.flush, false.B, lateJump && !lateJumpLatch) - } - val lateJumpTarget = RegEnable(bp1.io.out.target, lateJump) - val lateJumpForceSeq = lateJump && bp1.io.out.valid - val lateJumpForceTgt = lateJumpLatch && !bp1.io.flush - - // predicted next pc - val pnpc = Mux(lateJump, snpc, bp1.io.out.target) - val pbrIdx = bp1.io.brIdx - val npc = Mux(io.redirect.valid, io.redirect.target, Mux(lateJumpLatch, lateJumpTarget, Mux(bp1.io.out.valid, pnpc, snpc))) - val npcIsSeq = Mux(io.redirect.valid , false.B, Mux(lateJumpLatch, false.B, Mux(lateJump, true.B, Mux(bp1.io.out.valid, false.B, true.B)))) - // Debug(){ - // printf("[NPC] %x %x %x %x %x %x\n",lateJumpLatch, lateJumpTarget, lateJump, bp1.io.out.valid, pnpc, snpc) - // } - - // val npc = Mux(io.redirect.valid, io.redirect.target, Mux(io.redirectRVC.valid, io.redirectRVC.target, snpc)) - val brIdx = Wire(UInt(4.W)) - // brIdx(0) -> branch at pc offset 0 (mod 4) - // brIdx(1) -> branch at pc offset 2 (mod 4) - // brIdx(2) -> branch at pc offset 6 (mod 8), and this inst is not rvc inst - brIdx := Cat(npcIsSeq, Mux(io.redirect.valid, 0.U, pbrIdx)) - //TODO: BP will be disabled shortly after a redirect request - - bp1.io.in.pc.valid := io.imem.req.fire() // only predict when Icache accepts a request - bp1.io.in.pc.bits := npc // predict one cycle early - // bp1.io.flush := io.redirect.valid - bp1.io.flush := io.redirect.valid - //val bp2 = Module(new BPU2) - //bp2.io.in.bits := io.out.bits - //bp2.io.in.valid := io.imem.resp.fire() - - when (pcUpdate) { - pc := npc - // printf("[IF1] pc=%x\n", pc) - } - - Debug(){ - when(pcUpdate) { - printf("[IFUPC] pc:%x pcUpdate:%d npc:%x RedValid:%d RedTarget:%x LJL:%d LJTarget:%x LJ:%d snpc:%x bpValid:%d pnpn:%x \n",pc, pcUpdate, npc, io.redirect.valid,io.redirect.target,lateJumpLatch,lateJumpTarget,lateJump,snpc,bp1.io.out.valid,pnpc) - //printf(p"[IFUIN] redirect: ${io.redirect} \n") - } - } - - io.flushVec := Mux(io.redirect.valid, "b1111".U, 0.U) - io.bpFlush := false.B - - io.imem.req.bits.apply(addr = Cat(pc(VAddrBits-1,1),0.U(1.W)), //cache will treat it as Cat(pc(63,3),0.U(3.W)) - size = "b11".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(brIdx(3,0), npc(VAddrBits-1, 0), pc(VAddrBits-1, 0))) - io.imem.req.valid := io.out.ready - //TODO: add ctrlFlow.exceptionVec - io.imem.resp.ready := io.out.ready || io.flushVec(0) - - io.out.bits := DontCare - //inst path only uses 32bit inst, get the right inst according to pc(2) - - Debug(){ - when(io.imem.req.fire()){ - printf("[IFI] pc=%x user=%x %x %x %x \n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, pbrIdx, brIdx) - } - when (io.out.fire()) { - printf("[IFO] pc=%x inst=%x\n", io.out.bits.pc, io.out.bits.instr) - } - } - - // io.out.bits.instr := (if (XLEN == 64) io.imem.resp.bits.rdata.asTypeOf(Vec(2, UInt(32.W)))(io.out.bits.pc(2)) - // else io.imem.resp.bits.rdata) - io.out.bits.instr := io.imem.resp.bits.rdata - io.imem.resp.bits.user.map{ case x => - io.out.bits.pc := x(VAddrBits-1,0) - io.out.bits.pnpc := x(VAddrBits*2-1,VAddrBits) - io.out.bits.brIdx := x(VAddrBits*2 + 3, VAddrBits*2) - } - io.out.bits.exceptionVec(instrPageFault) := io.ipf - io.out.valid := io.imem.resp.valid && !io.flushVec(0) - - BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") - BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") -} diff --git a/src/main/scala/noop/ISU.scala b/src/main/scala/noop/ISU.scala deleted file mode 100644 index 25e137eb201c2ed39ecb7fb62e8b5b1baf47a090..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/ISU.scala +++ /dev/null @@ -1,159 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import utils._ - -trait HasRegFileParameter { - val NRReg = 32 -} - -class RegFile(width:Int, hasZero:Boolean = true) extends HasRegFileParameter with HasNOOPParameter { - val rf = Mem(NRReg, UInt(width.W)) - def read(addr: UInt) : UInt = if(hasZero) Mux(addr === 0.U, 0.U, rf(addr)) else rf(addr) - def write(addr: UInt, data: UInt) = { rf(addr) := data } -} - -class ScoreBoard(hasZero:Boolean = true) extends HasRegFileParameter { - val busy = RegInit(0.U(NRReg.W)) - def isBusy(idx: UInt): Bool = busy(idx) - def mask(idx: UInt) = (1.U(NRReg.W) << idx)(NRReg-1, 0) - def update(setMask: UInt, clearMask: UInt) = { - // When clearMask(i) and setMask(i) are both set, setMask(i) wins. - // This can correctly record the busy bit when reg(i) is written - // and issued at the same cycle. - // Note that rf(0) is always free when hasZero==true. - if(hasZero) busy := Cat(((busy & ~clearMask) | setMask)(NRReg-1, 1), 0.U(1.W)) - else busy := ((busy & ~clearMask) | setMask) - } -} - -class ISU(implicit val p: NOOPConfig) extends NOOPModule with HasRegFileParameter { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new DecodeIO)) - val out = Decoupled(new DecodeIO) - val wb = Flipped(new WriteBackIO) - val flush = Input(Bool()) - val forward = Flipped(new ForwardIO) - }) - - io.out.bits := DontCare - val rfSrc1 = io.in.bits.ctrl.rfSrc1 - val rfSrc2 = io.in.bits.ctrl.rfSrc2 - val rfDest = io.in.bits.ctrl.rfDest - - def isDepend(rfSrc: UInt, rfDest: UInt, wen: Bool): Bool = (rfSrc =/= 0.U) && (rfSrc === rfDest) && wen - - val forwardRfWen = io.forward.wb.rfWen && io.forward.valid - val dontForward = (io.forward.fuType =/= FuType.alu) && (io.forward.fuType =/= FuType.lsu) - val src1DependEX = isDepend(rfSrc1, io.forward.wb.rfDest, forwardRfWen) - val src2DependEX = isDepend(rfSrc2, io.forward.wb.rfDest, forwardRfWen) - val src1DependWB = isDepend(rfSrc1, io.wb.rfDest, io.wb.rfWen) - val src2DependWB = isDepend(rfSrc2, io.wb.rfDest, io.wb.rfWen) - - val src1ForwardNextCycle = src1DependEX && !dontForward - val src2ForwardNextCycle = src2DependEX && !dontForward - val src1Forward = src1DependWB && Mux(dontForward, !src1DependEX, true.B) - val src2Forward = src2DependWB && Mux(dontForward, !src2DependEX, true.B) - - val sb = new ScoreBoard - val src1Ready = !sb.isBusy(rfSrc1) || src1ForwardNextCycle || src1Forward - val src2Ready = !sb.isBusy(rfSrc2) || src2ForwardNextCycle || src2Forward - - val fpr = new RegFile(width = XLEN, hasZero = false) - - val (fprSrcReady,fprSrcData):(Bool,Array[UInt]) = if(HasFPU){ - val fsb = new ScoreBoard(hasZero = false) - val forwardFpWen = io.forward.wb.fpWen && io.forward.valid - - when (io.wb.fpWen) { - fpr.write(io.wb.rfDest, io.wb.rfData) - } - - val fsbClearMask = Mux(io.wb.fpWen && !isDepend(io.wb.rfDest, io.forward.wb.rfDest, forwardFpWen), - fsb.mask(io.wb.rfDest), 0.U(NRReg.W)) - val fsbSetMask = Mux(io.out.fire() && io.in.bits.ctrl.fpWen, fsb.mask(rfDest), 0.U) - when (io.flush) { fsb.update(0.U, Fill(NRReg, 1.U(1.W))) } - .otherwise { fsb.update(fsbSetMask, fsbClearMask) } - - val instr = io.in.bits.cf.instr - - val (fpSrc1,fpSrc2,fpSrc3) = (rfSrc1, rfSrc2, instr(31, 27)) - val srcs = Seq(fpSrc1, fpSrc2, fpSrc3).zip(Seq( - io.in.bits.ctrl.src1Type, - io.in.bits.ctrl.src2Type, - io.in.bits.ctrl.src3Type - )) - val dataVec = Array.fill(3)(Wire(UInt(XLEN.W))) - // result - (srcs.zipWithIndex.map({ - case ((src, t),i) => - val dependEX = isDepend(src, io.forward.wb.rfDest, forwardFpWen) - val dependWB = isDepend(src, io.wb.rfDest, io.wb.fpWen) - val forwardEX = dependEX && !dontForward - val forwardWB = dependWB && Mux(dontForward, !dependEX, true.B) - dataVec(i) := MuxCase(fpr.read(src), Seq( - forwardEX -> io.forward.wb.rfData, - forwardWB -> io.wb.rfData - )) - (!fsb.busy(src) || forwardEX || forwardWB) || (t =/= SrcType.fp) - }).reduceLeft(_ && _), dataVec) - } else (true.B, Array.fill(3)(0.U)) - - io.out.valid := io.in.valid && src1Ready && src2Ready && fprSrcReady - - val rf = new RegFile(XLEN) -// io.out.bits.data.src1 := Mux1H(List( -// (io.in.bits.ctrl.src1Type === SrcType.pc) -> SignExt(io.in.bits.cf.pc, AddrBits), -// src1ForwardNextCycle -> io.forward .wb.rfData, -// (src1Forward && !src1ForwardNextCycle) -> io.wb.rfData, -// ((io.in.bits.ctrl.src1Type =/= SrcType.pc) && !src1ForwardNextCycle && !src1Forward) -> rf.read(rfSrc1) -// )) -// io.out.bits.data.src2 := Mux1H(List( -// (io.in.bits.ctrl.src2Type =/= SrcType.reg) -> io.in.bits.data.imm, -// src2ForwardNextCycle -> io.forward.wb.rfData, -// (src2Forward && !src2ForwardNextCycle) -> io.wb.rfData, -// ((io.in.bits.ctrl.src2Type === SrcType.reg) && !src2ForwardNextCycle && !src2Forward) -> rf.read(rfSrc2) -// )) - - io.out.bits.data.src1 := MuxCase(rf.read(rfSrc1), Seq( - (io.in.bits.ctrl.src1Type === SrcType.fp) -> fprSrcData(0), - (io.in.bits.ctrl.src1Type === SrcType.pc) -> SignExt(io.in.bits.cf.pc, AddrBits), - src1ForwardNextCycle -> io.forward.wb.rfData, - src1Forward -> io.wb.rfData - )) - io.out.bits.data.src2 := MuxCase(rf.read(rfSrc2), Seq( - (io.in.bits.ctrl.src2Type === SrcType.fp) -> fprSrcData(1), - (io.in.bits.ctrl.src2Type =/= SrcType.reg) -> io.in.bits.data.imm, - src2ForwardNextCycle -> io.forward.wb.rfData, - src2Forward -> io.wb.rfData - )) - - io.out.bits.data.imm := Mux(io.in.bits.ctrl.src3Type===SrcType.fp, fprSrcData(2), io.in.bits.data.imm) - - io.out.bits.cf <> io.in.bits.cf - io.out.bits.ctrl := io.in.bits.ctrl - io.out.bits.ctrl.isSrc1Forward := src1ForwardNextCycle - io.out.bits.ctrl.isSrc2Forward := src2ForwardNextCycle - - when (io.wb.rfWen) { rf.write(io.wb.rfDest, io.wb.rfData) } - - val wbClearMask = Mux(io.wb.rfWen && !isDepend(io.wb.rfDest, io.forward.wb.rfDest, forwardRfWen), sb.mask(io.wb.rfDest), 0.U(NRReg.W)) - val isuFireSetMask = Mux(io.out.fire() && io.in.bits.ctrl.rfWen, sb.mask(rfDest), 0.U) - when (io.flush) { sb.update(0.U, Fill(NRReg, 1.U(1.W))) } - .otherwise { sb.update(isuFireSetMask, wbClearMask) } - - io.in.ready := !io.in.valid || io.out.fire() - - // read after write - BoringUtils.addSource(io.in.valid && !io.out.valid, "perfCntCondMrawStall") - BoringUtils.addSource(io.out.valid && !io.out.fire(), "perfCntCondMexuBusy") - - if (!p.FPGAPlatform) { - val gRegs = (0 until NRReg).map(i => rf.read(i.U)) - val fRegs = (0 until NRReg).map(i => if(HasFPU) fpr.read(i.U) else 0.U) - BoringUtils.addSource(VecInit(gRegs ++ fRegs), "difftestRegs") - } -} diff --git a/src/main/scala/noop/NOOP.scala b/src/main/scala/noop/NOOP.scala deleted file mode 100644 index eadde5b5e97488c285e57895f492f3172d927785..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/NOOP.scala +++ /dev/null @@ -1,117 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils -import bus.simplebus._ -import bus.axi4._ -import utils._ - -trait HasNOOPParameter { - val XLEN = 64 - val HasMExtension = true - val HasCExtension = true - val HasDiv = true - val HasIcache = true - val HasDcache = true - val EnableStoreQueue = false - val AddrBits = 64 // AddrBits is used in some cases - val VAddrBits = 39 // VAddrBits is Virtual Memory addr bits - val PAddrBits = 32 // PAddrBits is Phyical Memory addr bits - val AddrBytes = AddrBits / 8 // unused - val DataBits = XLEN - val DataBytes = DataBits / 8 - val HasFPU = true -} - -abstract class NOOPModule extends Module with HasNOOPParameter with HasExceptionNO -abstract class NOOPBundle extends Bundle with HasNOOPParameter - -case class NOOPConfig ( - FPGAPlatform: Boolean = true, - EnableDebug: Boolean = false -) - -object AddressSpace { - // (start, size) - def mmio = List((0x0000000040000000L, 0x0000000010000000L)) - def dram = (0x0000000080000000L, 0x0000000010000000L) - - //def isMMIO(addr: UInt) = mmio.map(range => ((addr & ~((range._2 - 1).U(32.W))) === range._1.U)).reduce(_ || _) - def isMMIO(addr: UInt) = addr(31,28) === "h4".U -} - -class NOOP(implicit val p: NOOPConfig) extends NOOPModule { - val io = IO(new Bundle { - val imem = new SimpleBusC - val dmem = new SimpleBusC - val mmio = new SimpleBusUC - val frontend = Flipped(new SimpleBusUC) - }) - - val ifu = Module(new IFU) - val idu1 = Module(new IDU1) - val idu2 = Module(new IDU2) - val isu = Module(new ISU) - val exu = Module(new EXU) - val wbu = Module(new WBU) - - def pipelineConnect2[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], - isFlush: Bool, entries: Int = 4, pipe: Boolean = false) = { - right <> FlushableQueue(left, isFlush, entries = entries, pipe = pipe) - } - - pipelineConnect2(ifu.io.out, idu1.io.in, ifu.io.flushVec(0)) - PipelineConnect(idu1.io.out, idu2.io.in, idu2.io.out.fire(), ifu.io.flushVec(1)) - PipelineConnect(idu2.io.out, isu.io.in, isu.io.out.fire(), ifu.io.flushVec(1)) - PipelineConnect(isu.io.out, exu.io.in, exu.io.out.fire(), ifu.io.flushVec(2)) - PipelineConnect(exu.io.out, wbu.io.in, true.B, ifu.io.flushVec(3)) - idu1.io.flush := ifu.io.flushVec(1) - idu2.io.flush := ifu.io.flushVec(1) - isu.io.flush := ifu.io.flushVec(2) - exu.io.flush := ifu.io.flushVec(3) - - Debug() { - printf("------------------------ TIMER: %d ------------------------\n", GTimer()) - printf("flush = %b, ifu:(%d,%d), idu1:(%d,%d), idu2:(%d,%d), isu:(%d,%d), exu:(%d,%d), wbu: (%d,%d)\n", - ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, - idu1.io.in.valid, idu1.io.in.ready, idu2.io.in.valid, idu2.io.in.ready, isu.io.in.valid, isu.io.in.ready, - exu.io.in.valid, exu.io.in.ready, wbu.io.in.valid, wbu.io.in.ready) - when (ifu.io.out.valid) { printf("IFU: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr, ifu.io.out.bits.pnpc)} ; - when (idu1.io.in.valid) { printf("ID1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu1.io.in.bits.pc, idu1.io.in.bits.instr, idu1.io.in.bits.pnpc) } - when (idu2.io.in.valid) { printf("ID2: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu2.io.in.bits.pc, idu2.io.in.bits.instr, idu2.io.in.bits.pnpc) } - when (isu.io.in.valid) { printf("ISU: pc = 0x%x, pnpc = 0x%x\n", isu.io.in.bits.cf.pc, isu.io.in.bits.cf.pnpc)} ; - when (exu.io.in.valid) { printf("EXU: pc = 0x%x, pnpc = 0x%x\n", exu.io.in.bits.cf.pc, exu.io.in.bits.cf.pnpc)} ; - when (wbu.io.in.valid) { printf("WBU: pc = 0x%x rfWen:%d rfDest:%d rfData:%x Futype:%x\n", wbu.io.in.bits.decode.cf.pc, wbu.io.in.bits.decode.ctrl.rfWen, wbu.io.in.bits.decode.ctrl.rfDest, wbu.io.wb.rfData, wbu.io.in.bits.decode.ctrl.fuType )} - // when (io.in.valid) { printf("TIMER: %d WBU: pc = 0x%x wen %x wdata %x mmio %x intrNO %x\n", GTimer(), io.in.bits.decode.cf.pc, io.wb.rfWen, io.wb.rfData, io.in.bits.isMMIO, io.in.bits.intrNO) } - - // printf(p"IFUO: redirectIO:${ifu.io.out.bits.redirect}\n") ; printf("IFUO: exceptionVec: %x\n", ifu.io.out.bits.exceptionVec.asUInt)} - // printf(p"IDUO: redirectIO:${idu.io.out.bits.cf.redirect} redirectIOC:${idu.io.redirect}\n") ; printf("IDUO: exceptionVec:%x\n", idu.io.out.bits.cf.exceptionVec.asUInt)} - // printf(p"ISUO: ${isu.io.out.bits.cf.redirect}\n") ; printf("ISUO: exceptionVec:%x\n", isu.io.out.bits.cf.exceptionVec.asUInt)} - when (exu.io.out.bits.decode.cf.redirect.valid) { printf("EXUO: redirect valid:%d target:%x\n", exu.io.out.bits.decode.cf.redirect.valid, exu.io.out.bits.decode.cf.redirect.target) } - // when (wbu.io.in.valid) { printf("WBU: pc = 0x%x rfWen:%d rfDest:%d rfData:%x Futype:%x commits(0):%x commits(1):%x commits(3):%x\n", wbu.io.in.bits.decode.cf.pc, wbu.io.in.bits.decode.ctrl.rfWen, wbu.io.in.bits.decode.ctrl.rfDest, wbu.io.wb.rfData, wbu.io.in.bits.decode.ctrl.fuType, wbu.io.in.bits.commits(0), wbu.io.in.bits.commits(1), wbu.io.in.bits.commits(3)) } - - } - - isu.io.wb <> wbu.io.wb - ifu.io.redirect <> wbu.io.redirect - // forward - isu.io.forward <> exu.io.forward - - val mmioXbar = Module(new SimpleBusCrossbarNto1(if (HasDcache) 2 else 3)) - val dmemXbar = Module(new SimpleBusCrossbarNto1(4)) - - val itlb = TLB(in = ifu.io.imem, mem = dmemXbar.io.in(1), flush = ifu.io.flushVec(0) | ifu.io.bpFlush, csrMMU = exu.io.memMMU.imem)(TLBConfig(name = "itlb", userBits = VAddrBits*2 + 4, totalEntry = 4)) - ifu.io.ipf := itlb.io.ipf - io.imem <> Cache(in = itlb.io.out, mmio = mmioXbar.io.in.take(1), flush = Fill(2, ifu.io.flushVec(0) | ifu.io.bpFlush), empty = itlb.io.cacheEmpty)( - CacheConfig(ro = true, name = "icache", userBits = VAddrBits*2 + 4)) - - val dtlb = TLB(in = exu.io.dmem, mem = dmemXbar.io.in(2), flush = false.B, csrMMU = exu.io.memMMU.dmem)(TLBConfig(name = "dtlb", totalEntry = 64)) - dmemXbar.io.in(0) <> dtlb.io.out - io.dmem <> Cache(in = dmemXbar.io.out, mmio = mmioXbar.io.in.drop(1), flush = "b00".U, empty = dtlb.io.cacheEmpty, enable = HasDcache)(CacheConfig(ro = false, name = "dcache")) - - // Make DMA access through L1 DCache to keep coherence - dmemXbar.io.in(3) <> io.frontend - - io.mmio <> mmioXbar.io.out -} diff --git a/src/main/scala/noop/NOOPTrap.scala b/src/main/scala/noop/NOOPTrap.scala deleted file mode 100644 index efb4d0dc8a072f01a8b94ce4bf17ed150bfa9a56..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/NOOPTrap.scala +++ /dev/null @@ -1,14 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ - -object NOOPTrap extends HasInstrType { - def StateGoodTrap = 0.U - def StateBadTrap = 1.U - def StateInvOpcode = 2.U - def StateRunning = 3.U - - def TRAP = BitPat("b????????????_?????_000_?????_1101011") - val table = Array(TRAP -> List(InstrI, FuType.alu, ALUOpType.add)) -} diff --git a/src/main/scala/noop/TLB.scala b/src/main/scala/noop/TLB.scala deleted file mode 100644 index d6fb58ee2676486949fcd68f0c55c0b0f8a17924..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/TLB.scala +++ /dev/null @@ -1,609 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import bus.simplebus._ -import bus.axi4._ -import utils._ - -trait Sv39Const extends HasNOOPParameter{ - val Level = 3 - val offLen = 12 - val ppn0Len = 9 - val ppn1Len = 9 - val ppn2Len = PAddrBits - offLen - ppn0Len - ppn1Len // 2 - val ppnLen = ppn2Len + ppn1Len + ppn0Len - val vpn2Len = 9 - val vpn1Len = 9 - val vpn0Len = 9 - val vpnLen = vpn2Len + vpn1Len + vpn0Len - - //val paddrLen = PAddrBits - //val vaddrLen = VAddrBits - val satpLen = XLEN - val satpModeLen = 4 - val asidLen = 16 - val flagLen = 8 - - val ptEntryLen = XLEN - val satpResLen = XLEN - ppnLen - satpModeLen - asidLen - //val vaResLen = 25 // unused - //val paResLen = 25 // unused - val pteResLen = XLEN - ppnLen - 2 - flagLen - - def vaBundle = new Bundle { - val vpn2 = UInt(vpn2Len.W) - val vpn1 = UInt(vpn1Len.W) - val vpn0 = UInt(vpn0Len.W) - val off = UInt( offLen.W) - } - - def vaBundle2 = new Bundle { - val vpn = UInt(vpnLen.W) - val off = UInt(offLen.W) - } - - def vaBundle3 = new Bundle { - val vpn = UInt(vpnLen.W) - val off = UInt(offLen.W) - } - - def vpnBundle = new Bundle { - val vpn2 = UInt(vpn2Len.W) - val vpn1 = UInt(vpn1Len.W) - val vpn0 = UInt(vpn0Len.W) - } - - def paBundle = new Bundle { - val ppn2 = UInt(ppn2Len.W) - val ppn1 = UInt(ppn1Len.W) - val ppn0 = UInt(ppn0Len.W) - val off = UInt( offLen.W) - } - - def paBundle2 = new Bundle { - val ppn = UInt(ppnLen.W) - val off = UInt(offLen.W) - } - - def paddrApply(ppn: UInt, vpnn: UInt):UInt = { - Cat(Cat(ppn, vpnn), 0.U(3.W)) - } - - def pteBundle = new Bundle { - val reserved = UInt(pteResLen.W) - val ppn = UInt(ppnLen.W) - val rsw = UInt(2.W) - val flag = new Bundle { - val d = UInt(1.W) - val a = UInt(1.W) - val g = UInt(1.W) - val u = UInt(1.W) - val x = UInt(1.W) - val w = UInt(1.W) - val r = UInt(1.W) - val v = UInt(1.W) - } - } - - def satpBundle = new Bundle { - val mode = UInt(satpModeLen.W) - val asid = UInt(asidLen.W) - val res = UInt(satpResLen.W) - val ppn = UInt(ppnLen.W) - } - - def flagBundle = new Bundle { - val d = Bool()//UInt(1.W) - val a = Bool()//UInt(1.W) - val g = Bool()//UInt(1.W) - val u = Bool()//UInt(1.W) - val x = Bool()//UInt(1.W) - val w = Bool()//UInt(1.W) - val r = Bool()//UInt(1.W) - val v = Bool()//UInt(1.W) - } - - def maskPaddr(ppn:UInt, vaddr:UInt, mask:UInt) = { - MaskData(vaddr, Cat(ppn, 0.U(offLen.W)), Cat(Fill(ppn2Len, 1.U(1.W)), mask, 0.U(offLen.W))) - } - - def MaskEQ(mask: UInt, pattern: UInt, vpn: UInt) = { - (Cat("h1ff".U(vpn2Len.W), mask) & pattern) === (Cat("h1ff".U(vpn2Len.W), mask) & vpn) - } - -} - -case class TLBConfig ( - name: String = "tlb", - userBits: Int = 0, - - totalEntry: Int = 4, - ways: Int = 4 -) - -sealed trait HasTlbConst extends Sv39Const{ - implicit val tlbConfig: TLBConfig - - val AddrBits: Int - val PAddrBits: Int - val VAddrBits: Int - val XLEN: Int - - val tlbname = tlbConfig.name - val userBits = tlbConfig.userBits - - val maskLen = vpn0Len + vpn1Len // 18 - val metaLen = vpnLen + asidLen + maskLen + flagLen // 27 + 16 + 18 + 8 = 69, is asid necessary - val dataLen = ppnLen + PAddrBits // - val tlbLen = metaLen + dataLen - val Ways = tlbConfig.ways - val TotalEntry = tlbConfig.totalEntry - val Sets = TotalEntry / Ways - val IndexBits = log2Up(Sets) - val TagBits = vpnLen - IndexBits - - val debug = true //&& tlbname == "dtlb" - - def vaddrTlbBundle = new Bundle { - val tag = UInt(TagBits.W) - val index = UInt(IndexBits.W) - val off = UInt(offLen.W) - } - - def metaBundle = new Bundle { - val vpn = UInt(vpnLen.W) - val asid = UInt(asidLen.W) - val mask = UInt(maskLen.W) // to support super page - val flag = UInt(flagLen.W) - } - - def dataBundle = new Bundle { - val ppn = UInt(ppnLen.W) - val pteaddr = UInt(PAddrBits.W) // pte addr, used to write back pte when flag changes (flag.d, flag.v) - } - - def tlbBundle = new Bundle { - val vpn = UInt(vpnLen.W) - val asid = UInt(asidLen.W) - val mask = UInt(maskLen.W) - val flag = UInt(flagLen.W) - val ppn = UInt(ppnLen.W) - val pteaddr = UInt(PAddrBits.W) - } - - def tlbBundle2 = new Bundle { - val meta = UInt(metaLen.W) - val data = UInt(dataLen.W) - } - - def getIndex(vaddr: UInt) : UInt = { - vaddr.asTypeOf(vaddrTlbBundle).index - } -} - -sealed abstract class TlbBundle(implicit tlbConfig: TLBConfig) extends Bundle with HasNOOPParameter with HasTlbConst with Sv39Const -sealed abstract class TlbModule(implicit tlbConfig: TLBConfig) extends Module with HasNOOPParameter with HasTlbConst with Sv39Const with HasCSRConst - -class TLBMDWriteBundle (val IndexBits: Int, val Ways: Int, val tlbLen: Int) extends Bundle with HasNOOPParameter with Sv39Const { - val wen = Output(Bool()) - val windex = Output(UInt(IndexBits.W)) - val waymask = Output(UInt(Ways.W)) - val wdata = Output(UInt(tlbLen.W)) - - def apply(wen: UInt, windex: UInt, waymask: UInt, vpn: UInt, asid: UInt, mask: UInt, flag: UInt, ppn: UInt, pteaddr: UInt) { - this.wen := wen - this.windex := windex - this.waymask := waymask - this.wdata := Cat(vpn, asid, mask, flag, ppn, pteaddr) - } -} - -class TLBMD(implicit val tlbConfig: TLBConfig) extends TlbModule { - val io = IO(new Bundle { - val tlbmd = Output(Vec(Ways, UInt(tlbLen.W))) - val write = Flipped(new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen)) - val rindex = Input(UInt(IndexBits.W)) - val ready = Output(Bool()) - }) - - //val tlbmd = Reg(Vec(Ways, UInt(tlbLen.W))) - val tlbmd = Mem(Sets, Vec(Ways, UInt(tlbLen.W))) - io.tlbmd := tlbmd(io.rindex) - - //val reset = WireInit(false.B) - val resetState = RegInit(true.B)//RegEnable(true.B, init = true.B, reset) - val (resetSet, resetFinish) = Counter(resetState, Sets) - when (resetFinish) { resetState := false.B } - - val writeWen = io.write.wen//WireInit(false.B) - val writeSetIdx = io.write.windex - val writeWayMask = io.write.waymask - val writeData = io.write.wdata - - val wen = Mux(resetState, true.B, writeWen) - val setIdx = Mux(resetState, resetSet, writeSetIdx) - val waymask = Mux(resetState, Fill(Ways, "b1".U), writeWayMask) - val dataword = Mux(resetState, 0.U, writeData) - val wdata = VecInit(Seq.fill(Ways)(dataword)) - - when (wen) { tlbmd.write(setIdx, wdata, waymask.asBools) } - - io.ready := !resetState - def rready() = !resetState - def wready() = !resetState -} - -class TLB(implicit val tlbConfig: TLBConfig) extends TlbModule{ - val io = IO(new Bundle { - val in = Flipped(new SimpleBusUC(userBits = userBits, addrBits = VAddrBits)) - val out = new SimpleBusUC(userBits = userBits) - - val mem = new SimpleBusUC(userBits = userBits) - val flush = Input(Bool()) - val csrMMU = new MMUIO - val cacheEmpty = Input(Bool()) - val ipf = Output(Bool()) - }) - - val satp = WireInit(0.U(XLEN.W)) - BoringUtils.addSink(satp, "CSRSATP") - - // tlb exec - val tlbExec = Module(new TLBExec) - val tlbEmpty = Module(new TLBEmpty) - val mdTLB = Module(new TLBMD) - val mdUpdate = Wire(Bool()) - - tlbExec.io.flush := io.flush - tlbExec.io.satp := satp - tlbExec.io.mem <> io.mem - tlbExec.io.pf <> io.csrMMU - tlbExec.io.md <> RegEnable(mdTLB.io.tlbmd, mdUpdate) - tlbExec.io.mdReady := mdTLB.io.ready - mdTLB.io.rindex := getIndex(io.in.req.bits.addr) - mdTLB.io.write <> tlbExec.io.mdWrite - - io.ipf := false.B - - // meta reset - val flushTLB = WireInit(false.B) - BoringUtils.addSink(flushTLB, "MOUFlushTLB") - mdTLB.reset := reset.asBool || flushTLB - Debug() { - when(flushTLB && GTimer() > 77437080.U) { - printf("%d sfence_vma req.pc:%x valid:%d\n", GTimer(), io.in.req.bits.addr, io.in.req.valid) - } - } - - // VM enable && io - val vmEnable = satp.asTypeOf(satpBundle).mode === 8.U && (io.csrMMU.priviledgeMode < ModeM) - - def PipelineConnectTLB[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], update: Bool, rightOutFire: Bool, isFlush: Bool, vmEnable: Bool) = { - val valid = RegInit(false.B) - when (rightOutFire) { valid := false.B } - when (left.valid && right.ready && vmEnable) { valid := true.B } - when (isFlush) { valid := false.B } - - left.ready := right.ready - right.bits <> RegEnable(left.bits, left.valid && right.ready) - right.valid := valid //&& !isFlush - - update := left.valid && right.ready - } - - tlbEmpty.io.in <> DontCare - tlbEmpty.io.out.ready := DontCare - PipelineConnectTLB(io.in.req, tlbExec.io.in, mdUpdate, tlbExec.io.isFinish, io.flush, vmEnable) - if(tlbname == "dtlb") { - PipelineConnect(tlbExec.io.out, tlbEmpty.io.in, tlbEmpty.io.out.fire(), io.flush) - } - when(!vmEnable) { - tlbExec.io.out.ready := true.B // let existed request go out - if( tlbname == "dtlb") { tlbEmpty.io.out.ready := true.B } - io.out.req.valid := io.in.req.valid - io.in.req.ready := io.out.req.ready - io.out.req.bits.addr := io.in.req.bits.addr(PAddrBits-1, 0) - io.out.req.bits.size := io.in.req.bits.size - io.out.req.bits.cmd := io.in.req.bits.cmd - io.out.req.bits.wmask := io.in.req.bits.wmask - io.out.req.bits.wdata := io.in.req.bits.wdata - io.out.req.bits.user.map(_ := io.in.req.bits.user.getOrElse(0.U)) - }.otherwise { - if (tlbname == "dtlb") { io.out.req <> tlbEmpty.io.out} - else { io.out.req <> tlbExec.io.out } - } - io.out.resp <> io.in.resp - - // lsu need dtlb signals - if(tlbname == "dtlb") { - val alreadyOutFinish = RegEnable(true.B, init=false.B, tlbExec.io.out.valid && !tlbExec.io.out.ready) - when(alreadyOutFinish && tlbExec.io.out.fire()) { alreadyOutFinish := false.B} - val tlbFinish = (tlbExec.io.out.valid && !alreadyOutFinish) || tlbExec.io.pf.isPF() - BoringUtils.addSource(tlbFinish, "DTLBFINISH") - BoringUtils.addSource(io.csrMMU.isPF(), "DTLBPF") - BoringUtils.addSource(vmEnable, "DTLBENABLE") - } - - // instruction page fault - if (tlbname == "itlb") { - when (tlbExec.io.ipf && vmEnable) { - tlbExec.io.out.ready := io.cacheEmpty && io.in.resp.ready - io.out.req.valid := false.B - } - - when (tlbExec.io.ipf && vmEnable && io.cacheEmpty) { - io.in.resp.valid := true.B - io.in.resp.bits.rdata := 0.U - io.in.resp.bits.cmd := SimpleBusCmd.readLast - io.in.resp.bits.user.map(_ := tlbExec.io.in.bits.user.getOrElse(0.U)) - io.ipf := tlbExec.io.ipf - } - } - - Debug() { - if (debug) { - printf("[TLB-" + tlbname+ "]: Timer:%d---------\n", GTimer()) - printf("[TLB-" + tlbname+ "]: InReq(%d, %d) InResp(%d, %d) OutReq(%d, %d) OutResp(%d, %d) vmEnable:%d mode:%d\n", io.in.req.valid, io.in.req.ready, io.in.resp.valid, io.in.resp.ready, io.out.req.valid, io.out.req.ready, io.out.resp.valid, io.out.resp.ready, vmEnable, io.csrMMU.priviledgeMode) - printf("[TLB-" + tlbname+ "]: InReq: addr:%x cmd:%d wdata:%x OutReq: addr:%x cmd:%x wdata:%x\n", io.in.req.bits.addr, io.in.req.bits.cmd, io.in.req.bits.wdata, io.out.req.bits.addr, io.out.req.bits.cmd, io.out.req.bits.wdata) - printf("[TLB-" + tlbname+ "]: OutResp: rdata:%x cmd:%x Inresp: rdata:%x cmd:%x\n", io.out.resp.bits.rdata, io.out.resp.bits.cmd, io.in.resp.bits.rdata, io.in.resp.bits.cmd) - printf("[TLB-" + tlbname+ "]: satp:%x flush:%d cacheEmpty:%d instrPF:%d loadPF:%d storePF:%d \n", satp, io.flush, io.cacheEmpty, io.ipf, io.csrMMU.loadPF, io.csrMMU.storePF) - } - } - -} - -class TLBExec(implicit val tlbConfig: TLBConfig) extends TlbModule{ - val io = IO(new Bundle { - val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, addrBits = VAddrBits))) - val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) - - val md = Input(Vec(Ways, UInt(tlbLen.W))) - val mdWrite = new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen) - val mdReady = Input(Bool()) - - val mem = new SimpleBusUC(userBits = userBits) - val flush = Input(Bool()) - val satp = Input(UInt(XLEN.W)) - val pf = new MMUIO - val ipf = Output(Bool()) - val isFinish = Output(Bool()) - }) - - val md = io.md//RegEnable(mdTLB.io.tlbmd, io.in.ready) - - // lazy renaming - val req = io.in.bits - val vpn = req.addr.asTypeOf(vaBundle2).vpn.asTypeOf(vpnBundle) - val pf = io.pf - val satp = io.satp.asTypeOf(satpBundle) - - // pf init - pf.loadPF := false.B - pf.storePF := false.B - pf.addr := req.addr - - // check hit or miss - val hitVec = VecInit(md.map(m => m.asTypeOf(tlbBundle).flag.asTypeOf(flagBundle).v && (m.asTypeOf(tlbBundle).asid === satp.asid) && MaskEQ(m.asTypeOf(tlbBundle).mask, m.asTypeOf(tlbBundle).vpn, vpn.asUInt))).asUInt - val hit = io.in.valid && hitVec.orR - val miss = io.in.valid && !hitVec.orR - - val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U - val waymask = Mux(hit, hitVec, victimWaymask) - - val loadPF = WireInit(false.B) - val storePF = WireInit(false.B) - - // hit - val hitMeta = Mux1H(waymask, md).asTypeOf(tlbBundle2).meta.asTypeOf(metaBundle) - val hitData = Mux1H(waymask, md).asTypeOf(tlbBundle2).data.asTypeOf(dataBundle) - val hitFlag = hitMeta.flag.asTypeOf(flagBundle) - val hitMask = hitMeta.mask - // hit write back pte.flag - val hitinstrPF = WireInit(false.B) - val hitWB = hit && (!hitFlag.a || !hitFlag.d && req.isWrite()) && !hitinstrPF && !(loadPF || storePF || io.pf.isPF()) - val hitRefillFlag = Cat(req.isWrite().asUInt, 1.U(1.W), 0.U(6.W)) | hitFlag.asUInt - val hitWBStore = RegEnable(Cat(0.U(10.W), hitData.ppn, 0.U(2.W), hitRefillFlag), hitWB) - - // hit permission check - val hitCheck = hit /*&& hitFlag.v */&& !(pf.priviledgeMode === ModeU && !hitFlag.u) && !(pf.priviledgeMode === ModeS && hitFlag.u && !pf.status_sum) - val hitExec = hitCheck && hitFlag.x - val hitLoad = hitCheck && (hitFlag.r || pf.status_mxr && hitFlag.x) - val hitStore = hitCheck && hitFlag.w - - val isAMO = WireInit(false.B) - if (tlbname == "dtlb") { - BoringUtils.addSink(isAMO, "ISAMO") - } - - io.pf.loadPF := RegNext(loadPF, init =false.B) - io.pf.storePF := RegNext(storePF, init = false.B) - - if (tlbname == "itlb") { hitinstrPF := !hitExec && hit} - if (tlbname == "dtlb") { - loadPF := !hitLoad && req.isRead() && hit && !isAMO - storePF := (!hitStore && req.isWrite() && hit) || (!hitLoad && req.isRead() && hit && isAMO) - } - - // miss - val s_idle :: s_memReadReq :: s_memReadResp :: s_write_pte :: s_wait_resp :: s_miss_slpf :: Nil = Enum(6) - val state = RegInit(s_idle) - val level = RegInit(Level.U(log2Up(Level).W)) - - val memRespStore = Reg(UInt(XLEN.W)) - val missMask = WireInit("h3ffff".U(maskLen.W)) - val missMaskStore = Reg(UInt(maskLen.W)) - val missMetaRefill = WireInit(false.B) - val missRefillFlag = WireInit(0.U(8.W)) - val memRdata = io.mem.resp.bits.rdata.asTypeOf(pteBundle) - val raddr = Reg(UInt(PAddrBits.W)) - val alreadyOutFire = RegEnable(true.B, init = false.B, io.out.fire) - - //handle flush - val needFlush = RegInit(false.B) - val ioFlush = io.flush - val isFlush = needFlush || ioFlush - when (ioFlush && (state =/= s_idle)) { needFlush := true.B} - when (io.out.fire() && needFlush) { needFlush := false.B} - - val missIPF = RegInit(false.B) - - // state machine to handle miss(ptw) and pte-writing-back - switch (state) { - is (s_idle) { - when (!ioFlush && hitWB) { - state := s_write_pte - needFlush := false.B - alreadyOutFire := false.B - }.elsewhen (miss && !ioFlush) { - state := s_memReadReq - raddr := paddrApply(satp.ppn, vpn.vpn2) // - level := Level.U - needFlush := false.B - alreadyOutFire := false.B - } - } - - is (s_memReadReq) { - when (isFlush) { - state := s_idle - needFlush := false.B - }.elsewhen (io.mem.req.fire()) { state := s_memReadResp} - } - - is (s_memReadResp) { - val missflag = memRdata.flag.asTypeOf(flagBundle) - when (io.mem.resp.fire()) { - when (isFlush) { - state := s_idle - needFlush := false.B - }.elsewhen (!(missflag.r || missflag.x) && (level===3.U || level===2.U)) { - when(!missflag.v || (!missflag.r && missflag.w)) { //TODO: fix needflush - if(tlbname == "itlb") { state := s_wait_resp } else { state := s_miss_slpf } - if(tlbname == "itlb") { missIPF := true.B } - if(tlbname == "dtlb") { - loadPF := req.isRead() && !isAMO - storePF := req.isWrite() || isAMO - } - Debug() { - if(debug) { - printf("%d " + tlbname +" tlbException!!! ", GTimer()) - printf(p" req:${req} Memreq:${io.mem.req} MemResp:${io.mem.resp}") - printf(" level:%d",level) - printf("\n") - } - } - }.otherwise { - state := s_memReadReq - raddr := paddrApply(memRdata.ppn, Mux(level === 3.U, vpn.vpn1, vpn.vpn0)) - } - }.elsewhen (level =/= 0.U) { //TODO: fix needFlush - val permCheck = missflag.v && !(pf.priviledgeMode === ModeU && !missflag.u) && !(pf.priviledgeMode === ModeS && missflag.u && !pf.status_sum) - val permExec = permCheck && missflag.x - val permLoad = permCheck && (missflag.r || pf.status_mxr && missflag.x) - val permStore = permCheck && missflag.w - val updateAD = !missflag.a || (!missflag.d && req.isWrite()) - val updateData = Cat( 0.U(56.W), req.isWrite(), 1.U(1.W), 0.U(6.W) ) - missRefillFlag := Cat(req.isWrite(), 1.U(1.W), 0.U(6.W)) | missflag.asUInt - memRespStore := io.mem.resp.bits.rdata | updateData - if(tlbname == "itlb") { - when (!permExec) { missIPF := true.B ; state := s_wait_resp} - .otherwise { - state := Mux(updateAD, s_write_pte, s_wait_resp) - missMetaRefill := true.B - } - } - if(tlbname == "dtlb") { - when((!permLoad && req.isRead()) || (!permStore && req.isWrite())) { - state := s_miss_slpf - loadPF := req.isRead() && !isAMO - storePF := req.isWrite() || isAMO - }.otherwise { - state := Mux(updateAD, s_write_pte, s_wait_resp) - missMetaRefill := true.B - } - } - missMask := Mux(level===3.U, 0.U(maskLen.W), Mux(level===2.U, "h3fe00".U(maskLen.W), "h3ffff".U(maskLen.W))) - missMaskStore := missMask - } - level := level - 1.U - } - } - - is (s_write_pte) { - when (isFlush) { - state := s_idle - needFlush := false.B - }.elsewhen (io.mem.req.fire()) { state := s_wait_resp } - } - - is (s_wait_resp) { when (io.out.fire() || ioFlush || alreadyOutFire){ - state := s_idle - missIPF := false.B - alreadyOutFire := false.B - }} - - is (s_miss_slpf) { - state := s_idle - } - } - - // mem - val cmd = Mux(state === s_write_pte, SimpleBusCmd.write, SimpleBusCmd.read) - io.mem.req.bits.apply(addr = Mux(hitWB, hitData.pteaddr, raddr), cmd = cmd, size = (if (XLEN == 64) "b11".U else "b10".U), wdata = Mux( hitWB, hitWBStore, memRespStore), wmask = 0xff.U) - io.mem.req.valid := ((state === s_memReadReq || state === s_write_pte) && !isFlush) - io.mem.resp.ready := true.B - - // tlb refill - io.mdWrite.apply(wen = RegNext((missMetaRefill && !isFlush) || (hitWB && state === s_idle && !isFlush), init = false.B), - windex = RegNext(getIndex(req.addr)), waymask = RegNext(waymask), vpn = RegNext(vpn.asUInt), - asid = RegNext(Mux(hitWB, hitMeta.asid, satp.asid)), mask = RegNext(Mux(hitWB, hitMask, missMask)), - flag = RegNext(Mux(hitWB, hitRefillFlag, missRefillFlag)), ppn = RegNext(Mux(hitWB, hitData.ppn, memRdata.ppn)), - pteaddr = RegNext((Mux(hitWB, hitData.pteaddr, raddr)))) - - // io - io.out.bits := req - io.out.bits.addr := Mux(hit, maskPaddr(hitData.ppn, req.addr(PAddrBits-1, 0), hitMask), maskPaddr(memRespStore.asTypeOf(pteBundle).ppn, req.addr(PAddrBits-1, 0), missMaskStore)) - io.out.valid := io.in.valid && Mux(hit && !hitWB, !(io.pf.isPF() || loadPF || storePF), state === s_wait_resp)// && !alreadyOutFire - - io.in.ready := io.out.ready && (state === s_idle) && !miss && !hitWB && io.mdReady && (!io.pf.isPF() && !loadPF && !storePF)//maybe be optimized - - io.ipf := Mux(hit, hitinstrPF, missIPF) - io.isFinish := io.out.fire() || io.pf.isPF() - - Debug() { - if (debug) { - printf("[TLBExec-" + tlbname+ "]: Timer:%d---------\n", GTimer()) - printf("[TLBExec-" + tlbname+ "]: In(%d, %d) Out(%d, %d) InAddr:%x OutAddr:%x cmd:%d \n", io.in.valid, io.in.ready, io.out.valid, io.out.ready, req.addr, io.out.bits.addr, req.cmd) - printf("[TLBExec-" + tlbname+ "]: isAMO:%d io.Flush:%d needFlush:%d alreadyOutFire:%d isFinish:%d\n",isAMO, io.flush, needFlush, alreadyOutFire, io.isFinish) - printf("[TLBExec-" + tlbname+ "]: hit:%d hitWB:%d hitVPN:%x hitFlag:%x hitPPN:%x hitRefillFlag:%x hitWBStore:%x hitCheck:%d hitExec:%d hitLoad:%d hitStore:%d\n", hit, hitWB, hitMeta.vpn, hitFlag.asUInt, hitData.ppn, hitRefillFlag, hitWBStore, hitCheck, hitExec, hitLoad, hitStore) - printf("[TLBExec-" + tlbname+ "]: miss:%d state:%d level:%d raddr:%x memRdata:%x missMask:%x missRefillFlag:%x missMetaRefill:%d\n", miss, state, level, raddr, memRdata.asUInt, missMask, missRefillFlag, missMetaRefill) - printf("[TLBExec-" + tlbname+ "]: meta/data: (0)%x|%b|%x (1)%x|%b|%x (2)%x|%b|%x (3)%x|%b|%x rread:%d\n", md(0).asTypeOf(tlbBundle).vpn, md(0).asTypeOf(tlbBundle).flag, md(0).asTypeOf(tlbBundle).ppn, md(1).asTypeOf(tlbBundle).vpn, md(1).asTypeOf(tlbBundle).flag, md(1).asTypeOf(tlbBundle).ppn, md(2).asTypeOf(tlbBundle).vpn, md(2).asTypeOf(tlbBundle).flag, md(2).asTypeOf(tlbBundle).ppn, md(3).asTypeOf(tlbBundle).vpn, md(3).asTypeOf(tlbBundle).flag, md(3).asTypeOf(tlbBundle).ppn, io.mdReady) - printf("[TLBExec-" + tlbname+ "]: md: wen:%d windex:%x waymask:%x vpn:%x asid:%x mask:%x flag:%x asid:%x ppn:%x pteaddr:%x\n", io.mdWrite.wen, io.mdWrite.windex, io.mdWrite.waymask, io.mdWrite.wdata.asTypeOf(tlbBundle).vpn, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).mask, io.mdWrite.wdata.asTypeOf(tlbBundle).flag, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).ppn, io.mdWrite.wdata.asTypeOf(tlbBundle).pteaddr) - printf("[TLBExec-" + tlbname+ "]: MemReq(%d, %d) MemResp(%d, %d) addr:%x cmd:%d rdata:%x cmd:%d\n", io.mem.req.valid, io.mem.req.ready, io.mem.resp.valid, io.mem.resp.ready, io.mem.req.bits.addr, io.mem.req.bits.cmd, io.mem.resp.bits.rdata, io.mem.resp.bits.cmd) - printf("[TLBExec-" + tlbname+ "]: io.ipf:%d hitinstrPF:%d missIPF:%d pf.loadPF:%d pf.storePF:%d loadPF:%d storePF:%d\n", io.ipf, hitinstrPF, missIPF, io.pf.loadPF, io.pf.storePF, loadPF, storePF) - } - } -} - -class TLBEmpty(implicit val tlbConfig: TLBConfig) extends TlbModule { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits))) - val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) - }) - - io.out <> io.in -} - -object TLB { - def apply(in: SimpleBusUC, mem: SimpleBusUC, flush: Bool, csrMMU: MMUIO)(implicit tlbConfig: TLBConfig) = { - val tlb = Module(new TLB) - tlb.io.in <> in - tlb.io.mem <> mem - tlb.io.flush := flush - tlb.io.csrMMU <> csrMMU - tlb - } -} \ No newline at end of file diff --git a/src/main/scala/noop/WBU.scala b/src/main/scala/noop/WBU.scala deleted file mode 100644 index c70a79270cac9bed9da537efe1a2189e4b7d468d..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/WBU.scala +++ /dev/null @@ -1,43 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils -import utils._ - -class WBU(implicit val p: NOOPConfig) extends NOOPModule{ - val io = IO(new Bundle { - val in = Flipped(Decoupled(new CommitIO)) - val wb = new WriteBackIO - val redirect = new RedirectIO - }) - - io.wb.rfWen := io.in.bits.decode.ctrl.rfWen && io.in.valid - io.wb.fpWen := io.in.bits.decode.ctrl.fpWen && io.in.valid - io.wb.rfDest := io.in.bits.decode.ctrl.rfDest - io.wb.rfData := io.in.bits.commits(io.in.bits.decode.ctrl.fuType) - io.in.ready := true.B - - io.redirect := io.in.bits.decode.cf.redirect - io.redirect.valid := io.in.bits.decode.cf.redirect.valid && io.in.valid - - Debug(){ - when (io.in.valid) { printf("[COMMIT] TIMER: %d WBU: pc = 0x%x inst %x wen %x wdata %x mmio %x intrNO %x\n", GTimer(), io.in.bits.decode.cf.pc, io.in.bits.decode.cf.instr, io.wb.rfWen, io.wb.rfData, io.in.bits.isMMIO, io.in.bits.intrNO) } - } - - BoringUtils.addSource(io.in.valid, "perfCntCondMinstret") - if (!p.FPGAPlatform) { - BoringUtils.addSource(RegNext(io.in.valid), "difftestCommit") - BoringUtils.addSource(RegNext(SignExt(io.in.bits.decode.cf.pc, AddrBits)), "difftestThisPC") - BoringUtils.addSource(RegNext(io.in.bits.decode.cf.instr), "difftestThisINST") - BoringUtils.addSource(RegNext(io.in.bits.isMMIO), "difftestIsMMIO") - BoringUtils.addSource(RegNext(io.in.bits.decode.cf.instr(1,0)=/="b11".U), "difftestIsRVC") - BoringUtils.addSource(RegNext(io.in.bits.intrNO), "difftestIntrNO") - } else { - BoringUtils.addSource(io.in.valid, "ilaWBUvalid") - BoringUtils.addSource(io.in.bits.decode.cf.pc, "ilaWBUpc") - BoringUtils.addSource(io.wb.rfWen, "ilaWBUrfWen") - BoringUtils.addSource(io.wb.rfDest, "ilaWBUrfDest") - BoringUtils.addSource(io.wb.rfData, "ilaWBUrfData") - } -} diff --git a/src/main/scala/noop/fu/ALU.scala b/src/main/scala/noop/fu/ALU.scala deleted file mode 100644 index 3a3f74354b40b105d9e65a540aeecd100344b8fa..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/fu/ALU.scala +++ /dev/null @@ -1,170 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import utils._ - -object ALUOpType { - def add = "b000000".U - def sll = "b000001".U - def slt = "b000010".U - def sltu = "b000011".U - def xor = "b000100".U - def srl = "b000101".U - def or = "b000110".U - def and = "b000111".U - def sub = "b001000".U - def sra = "b001101".U - - def addw = "b100000".U - def subw = "b101000".U - def sllw = "b100001".U - def srlw = "b100101".U - def sraw = "b101101".U - - def isWordOp(func: UInt) = func(5) - - def jal = "b011000".U - def jalr = "b011010".U - // def cjalr= "b111010".U // pc + 2 instead of 4 - def beq = "b010000".U - def bne = "b010001".U - def blt = "b010100".U - def bge = "b010101".U - def bltu = "b010110".U - def bgeu = "b010111".U - - // for RAS - def call = "b011100".U - def ret = "b011110".U - - def isBru(func: UInt) = func(4)//[important] - def pcPlus2(func: UInt) = func(5)//[important] - def isBranch(func: UInt) = !func(3) - def isJump(func: UInt) = isBru(func) && !isBranch(func) - def getBranchType(func: UInt) = func(2, 1) - def isBranchInvert(func: UInt) = func(0) -} - -class ALUIO extends FunctionUnitIO { - val cfIn = Flipped(new CtrlFlowIO) - val redirect = new RedirectIO - val offset = Input(UInt(XLEN.W)) -} - -class ALU extends NOOPModule { - val io = IO(new ALUIO) - - val (valid, src1, src2, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.func) - def access(valid: Bool, src1: UInt, src2: UInt, func: UInt): UInt = { - this.valid := valid - this.src1 := src1 - this.src2 := src2 - this.func := func - io.out.bits - } - - val isAdderSub = (func =/= ALUOpType.add) && (func =/= ALUOpType.addw) && !ALUOpType.isJump(func) - val adderRes = (src1 +& (src2 ^ Fill(XLEN, isAdderSub))) + isAdderSub - val xorRes = src1 ^ src2 - val sltu = !adderRes(XLEN) - val slt = xorRes(XLEN-1) ^ sltu - - val shsrc1 = LookupTreeDefault(func, src1, List( - ALUOpType.srlw -> ZeroExt(src1(31,0), 64), - ALUOpType.sraw -> SignExt(src1(31,0), 64) - )) - val shamt = Mux(ALUOpType.isWordOp(func), src2(4, 0), src2(5, 0)) - val res = LookupTreeDefault(func(3, 0), adderRes, List( - ALUOpType.sll -> ((shsrc1 << shamt)(XLEN-1, 0)), - ALUOpType.slt -> ZeroExt(slt, XLEN), - ALUOpType.sltu -> ZeroExt(sltu, XLEN), - ALUOpType.xor -> xorRes, - ALUOpType.srl -> (shsrc1 >> shamt), - ALUOpType.or -> (src1 | src2), - ALUOpType.and -> (src1 & src2), - ALUOpType.sra -> ((shsrc1.asSInt >> shamt).asUInt) - )) - val aluRes = Mux(ALUOpType.isWordOp(func), SignExt(res(31,0), 64), res) - - val branchOpTable = List( - ALUOpType.getBranchType(ALUOpType.beq) -> !xorRes.orR, - ALUOpType.getBranchType(ALUOpType.blt) -> slt, - ALUOpType.getBranchType(ALUOpType.bltu) -> sltu - ) - - val isBranch = ALUOpType.isBranch(func) - val isBru = ALUOpType.isBru(func) - // val pcPlus2 = ALUOpType.pcPlus2(func) - val taken = LookupTree(ALUOpType.getBranchType(func), branchOpTable) ^ ALUOpType.isBranchInvert(func) - val target = Mux(isBranch, io.cfIn.pc + io.offset, adderRes)(VAddrBits-1,0) - val predictWrong = (io.redirect.target =/= io.cfIn.pnpc) - val isRVC = (io.cfIn.instr(1,0) =/= "b11".U) - io.redirect.target := Mux(!taken && isBranch, Mux(isRVC, io.cfIn.pc + 2.U, io.cfIn.pc + 4.U), target) - // with branch predictor, this is actually to fix the wrong prediction - io.redirect.valid := valid && isBru && predictWrong - // may be can be moved to ISU to calculate pc + 4 - // this is actually for jal and jalr to write pc + 4/2 to rd - io.out.bits := Mux(isBru, Mux(!isRVC, SignExt(io.cfIn.pc, AddrBits) + 4.U, SignExt(io.cfIn.pc, AddrBits) + 2.U), aluRes) - // when(pcPlus2 && isBru){ - // printf("CJALR %x %x \n ", io.cfIn.instr, io.cfIn.pc) - // } - - Debug(){ - when(valid && isBru){ - printf("[BRU] tgt %x, valid:%d, npc: %x, pdwrong: %x\n", io.redirect.target, io.redirect.valid, io.cfIn.pnpc, predictWrong) - printf("[BRU] taken:%d addrRes:%x src1:%x src2:%x func:%x\n", taken, adderRes, src1, src2, func) - } - } - - Debug(false){ - when(valid && isBru){ - printf("[BPW] pc %x tgt %x, npc: %x, pdwrong: %x type: %x%x%x%x\n", io.cfIn.pc, io.redirect.target, io.cfIn.pnpc, predictWrong, isBranch, (func === ALUOpType.jal || func === ALUOpType.call), func === ALUOpType.jalr, func === ALUOpType.ret) - } - - when(true.B) { - printf("[ALUIN0] valid:%d isBru:%d isBranch:%d \n", valid, isBru, isBranch) - printf("[ALUIN1] pc %x instr %x tgt %x, npc: %x, pdwrong: %x type: %x%x%x%x\n", io.cfIn.pc, io.cfIn.instr, io.redirect.target, io.cfIn.pnpc, predictWrong, isBranch, (func === ALUOpType.jal || func === ALUOpType.call), func === ALUOpType.jalr, func === ALUOpType.ret) - printf("[ALUIN2] func:%b ", func) - printf(" bpuUpdateReq: valid:%d pc:%x isMissPredict:%d actualTarget:%x actualTaken:%x fuOpType:%x btbType:%x isRVC:%d \n", valid && isBru, io.cfIn.pc, predictWrong, target, taken, func, LookupTree(func, RV32I_BRUInstr.bruFuncTobtbTypeTable), isRVC) - printf("[ALUIN3]tgt %x, npc: %x, pdwrong: %x\n", io.redirect.target, io.cfIn.pnpc, predictWrong) - printf("[ALUIN4]taken:%d addrRes:%x src1:%x src2:%x func:%x\n", taken, adderRes, src1, src2, func) - } - } - - io.in.ready := true.B - io.out.valid := valid - - val bpuUpdateReq = WireInit(0.U.asTypeOf(new BPUUpdateReq)) - bpuUpdateReq.valid := valid && isBru - bpuUpdateReq.pc := io.cfIn.pc - bpuUpdateReq.isMissPredict := predictWrong - bpuUpdateReq.actualTarget := target - bpuUpdateReq.actualTaken := taken - bpuUpdateReq.fuOpType := func - bpuUpdateReq.btbType := LookupTree(func, RV32I_BRUInstr.bruFuncTobtbTypeTable) - bpuUpdateReq.isRVC := isRVC - - BoringUtils.addSource(RegNext(bpuUpdateReq), "bpuUpdateReq") - - val right = valid && isBru && !predictWrong - val wrong = valid && isBru && predictWrong - BoringUtils.addSource(right && isBranch, "MbpBRight") - BoringUtils.addSource(wrong && isBranch, "MbpBWrong") - BoringUtils.addSource(wrong && isBranch && io.cfIn.pc(2,0)==="h0".U && isRVC, "Custom1") - BoringUtils.addSource(wrong && isBranch && io.cfIn.pc(2,0)==="h0".U && !isRVC, "Custom2") - BoringUtils.addSource(wrong && isBranch && io.cfIn.pc(2,0)==="h2".U && isRVC, "Custom3") - BoringUtils.addSource(wrong && isBranch && io.cfIn.pc(2,0)==="h2".U && !isRVC, "Custom4") - BoringUtils.addSource(wrong && isBranch && io.cfIn.pc(2,0)==="h4".U && isRVC, "Custom5") - BoringUtils.addSource(wrong && isBranch && io.cfIn.pc(2,0)==="h4".U && !isRVC, "Custom6") - BoringUtils.addSource(wrong && isBranch && io.cfIn.pc(2,0)==="h6".U && isRVC, "Custom7") - BoringUtils.addSource(wrong && isBranch && io.cfIn.pc(2,0)==="h6".U && !isRVC, "Custom8") - BoringUtils.addSource(right && (func === ALUOpType.jal || func === ALUOpType.call), "MbpJRight") - BoringUtils.addSource(wrong && (func === ALUOpType.jal || func === ALUOpType.call), "MbpJWrong") - BoringUtils.addSource(right && func === ALUOpType.jalr, "MbpIRight") - BoringUtils.addSource(wrong && func === ALUOpType.jalr, "MbpIWrong") - BoringUtils.addSource(right && func === ALUOpType.ret, "MbpRRight") - BoringUtils.addSource(wrong && func === ALUOpType.ret, "MbpRWrong") -} diff --git a/src/main/scala/noop/fu/CSR.scala b/src/main/scala/noop/fu/CSR.scala deleted file mode 100644 index 9d28b1fc2361fa3a97a1d01a32bba07cd478b11b..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/fu/CSR.scala +++ /dev/null @@ -1,830 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils -import noop.fu.FpuCsrIO -import utils._ - -object CSROpType { - def jmp = "b000".U - def wrt = "b001".U - def set = "b010".U - def clr = "b011".U - def wrti = "b101".U - def seti = "b110".U - def clri = "b111".U -} - -trait HasCSRConst { - // User Trap Setup - val Ustatus = 0x000 - val Uie = 0x004 - val Utvec = 0x005 - - // User Trap Handling - val Uscratch = 0x040 - val Uepc = 0x041 - val Ucause = 0x042 - val Utval = 0x043 - val Uip = 0x044 - - // User Floating-Point CSRs (not implemented) - val Fflags = 0x001 - val Frm = 0x002 - val Fcsr = 0x003 - - // User Counter/Timers - val Cycle = 0xC00 - val Time = 0xC01 - val Instret = 0xC02 - - // Supervisor Trap Setup - val Sstatus = 0x100 - val Sedeleg = 0x102 - val Sideleg = 0x103 - val Sie = 0x104 - val Stvec = 0x105 - val Scounteren = 0x106 - - // Supervisor Trap Handling - val Sscratch = 0x140 - val Sepc = 0x141 - val Scause = 0x142 - val Stval = 0x143 - val Sip = 0x144 - - // Supervisor Protection and Translation - val Satp = 0x180 - - // Machine Information Registers - val Mvendorid = 0xF11 - val Marchid = 0xF12 - val Mimpid = 0xF13 - val Mhartid = 0xF14 - - // Machine Trap Setup - val Mstatus = 0x300 - val Misa = 0x301 - val Medeleg = 0x302 - val Mideleg = 0x303 - val Mie = 0x304 - val Mtvec = 0x305 - val Mcounteren = 0x306 - - // Machine Trap Handling - val Mscratch = 0x340 - val Mepc = 0x341 - val Mcause = 0x342 - val Mtval = 0x343 - val Mip = 0x344 - - // Machine Memory Protection - // TBD - val Pmpcfg0 = 0x3A0 - val Pmpcfg1 = 0x3A1 - val Pmpcfg2 = 0x3A2 - val Pmpcfg3 = 0x3A3 - val PmpaddrBase = 0x3B0 - - // Machine Counter/Timers - // Currently, NOOP uses perfcnt csr set instead of standard Machine Counter/Timers - // 0xB80 - 0x89F are also used as perfcnt csr - - // Machine Counter Setup (not implemented) - // Debug/Trace Registers (shared with Debug Mode) (not implemented) - // Debug Mode Registers (not implemented) - - def privEcall = 0x000.U - def privMret = 0x302.U - def privSret = 0x102.U - def privUret = 0x002.U - - def ModeM = 0x3.U - def ModeH = 0x2.U - def ModeS = 0x1.U - def ModeU = 0x0.U - - def IRQ_UEIP = 0 - def IRQ_SEIP = 1 - def IRQ_MEIP = 3 - - def IRQ_UTIP = 4 - def IRQ_STIP = 5 - def IRQ_MTIP = 7 - - def IRQ_USIP = 8 - def IRQ_SSIP = 9 - def IRQ_MSIP = 11 - - val IntPriority = Seq( - IRQ_MEIP, IRQ_MSIP, IRQ_MTIP, - IRQ_SEIP, IRQ_SSIP, IRQ_STIP, - IRQ_UEIP, IRQ_USIP, IRQ_UTIP - ) -} - -trait HasExceptionNO { - def instrAddrMisaligned = 0 - def instrAccessFault = 1 - def illegalInstr = 2 - def breakPoint = 3 - def loadAddrMisaligned = 4 - def loadAccessFault = 5 - def storeAddrMisaligned = 6 - def storeAccessFault = 7 - def ecallU = 8 - def ecallS = 9 - def ecallM = 11 - def instrPageFault = 12 - def loadPageFault = 13 - def storePageFault = 15 - - val ExcPriority = Seq( - breakPoint, // TODO: different BP has different priority - instrPageFault, - instrAccessFault, - illegalInstr, - instrAddrMisaligned, - ecallM, ecallS, ecallU, - storeAddrMisaligned, - loadAddrMisaligned, - storePageFault, - loadPageFault, - storeAccessFault, - loadAccessFault - ) -} - - -class CSRIO extends FunctionUnitIO { - val cfIn = Flipped(new CtrlFlowIO) - val redirect = new RedirectIO - val fpu_csr = Flipped(new FpuCsrIO) - // for exception check - val instrValid = Input(Bool()) - // for differential testing - val intrNO = Output(UInt(XLEN.W)) - val imemMMU = Flipped(new MMUIO) - val dmemMMU = Flipped(new MMUIO) - val wenFix = Output(Bool()) -} - -class CSR(implicit val p: NOOPConfig) extends NOOPModule with HasCSRConst{ - val io = IO(new CSRIO) - - val (valid, src1, src2, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.func) - def access(valid: Bool, src1: UInt, src2: UInt, func: UInt): UInt = { - this.valid := valid - this.src1 := src1 - this.src2 := src2 - this.func := func - io.out.bits - } - - // CSR define - - class Priv extends Bundle { - val m = Output(Bool()) - val h = Output(Bool()) - val s = Output(Bool()) - val u = Output(Bool()) - } - - val csrNotImplemented = RegInit(UInt(XLEN.W), 0.U) - - class MstatusStruct extends Bundle { - val sd = Output(UInt(1.W)) - val pad1 = Output(UInt((XLEN-37).W)) - val sxl = Output(UInt(2.W)) - val uxl = Output(UInt(2.W)) - val pad0 = Output(UInt(9.W)) - val tsr = Output(UInt(1.W)) - val tw = Output(UInt(1.W)) - val tvm = Output(UInt(1.W)) - val mxr = Output(UInt(1.W)) - val sum = Output(UInt(1.W)) - val mprv = Output(UInt(1.W)) - val xs = Output(UInt(2.W)) - val fs = Output(UInt(2.W)) - val mpp = Output(UInt(2.W)) - val hpp = Output(UInt(2.W)) - val spp = Output(UInt(1.W)) - val pie = new Priv - val ie = new Priv - assert(this.getWidth == XLEN) - } - - class Interrupt extends Bundle { - val e = new Priv - val t = new Priv - val s = new Priv - } - - // Machine-Level CSRs - - val mtvec = RegInit(UInt(XLEN.W), 0.U) - val mcounteren = RegInit(UInt(XLEN.W), 0.U) - val mcause = RegInit(UInt(XLEN.W), 0.U) - val mtval = RegInit(UInt(XLEN.W), 0.U) - val mepc = Reg(UInt(XLEN.W)) - - val mie = RegInit(0.U(XLEN.W)) - val mipWire = WireInit(0.U.asTypeOf(new Interrupt)) - val mipReg = RegInit(0.U.asTypeOf(new Interrupt).asUInt) - val mipFixMask = "h777".U - val mip = (mipWire.asUInt | mipReg).asTypeOf(new Interrupt) - - def getMisaMxl(mxl: Int): UInt = {mxl.U << (XLEN-2)} - def getMisaExt(ext: Char): UInt = {1.U << (ext.toInt - 'a'.toInt)} - var extList = List('a', 's', 'i', 'u') - if(HasMExtension){ extList = extList :+ 'm'} - if(HasCExtension){ extList = extList :+ 'c'} - if(HasFPU){ extList = extList ++ List('f', 'd')} - val misaInitVal = getMisaMxl(2) | extList.foldLeft(0.U)((sum, i) => sum | getMisaExt(i)) //"h8000000000141105".U - val misa = RegInit(UInt(XLEN.W), misaInitVal) - // MXL = 2 | 0 | EXT = b 00 0000 0100 0001 0001 0000 0101 - // (XLEN-1, XLEN-2) | |(25, 0) ZY XWVU TSRQ PONM LKJI HGFE DCBA - - val mvendorid = RegInit(UInt(XLEN.W), 0.U) // this is a non-commercial implementation - val marchid = RegInit(UInt(XLEN.W), 0.U) // return 0 to indicate the field is not implemented - val mimpid = RegInit(UInt(XLEN.W), 0.U) // provides a unique encoding of the version of the processor implementation - val mhartid = RegInit(UInt(XLEN.W), 0.U) // the hardware thread running the code - val mstatus = RegInit(UInt(XLEN.W), "h00001800".U) - // val mstatus = RegInit(UInt(XLEN.W), "h8000c0100".U) - // mstatus Value Table - // | sd | - // | pad1 | - // | sxl | hardlinked to 10, use 00 to pass xv6 test - // | uxl | hardlinked to 00 - // | pad0 | - // | tsr | - // | tw | - // | tvm | - // | mxr | - // | sum | - // | mprv | - // | xs | 00 | - // | fs | - // | mpp | 00 | - // | hpp | 00 | - // | spp | 0 | - // | pie | 0000 | - // | ie | 0000 | uie hardlinked to 0, as N ext is not implemented - val mstatusStruct = mstatus.asTypeOf(new MstatusStruct) - def mstatusUpdateSideEffect(mstatus: UInt): UInt = { - val mstatusOld = WireInit(mstatus.asTypeOf(new MstatusStruct)) - val mstatusNew = Cat(mstatusOld.fs === "b11".U, mstatus(XLEN-2, 0)) - mstatusNew - } - - val medeleg = RegInit(UInt(XLEN.W), 0.U) - val mideleg = RegInit(UInt(XLEN.W), 0.U) - val mscratch = RegInit(UInt(XLEN.W), 0.U) - - val pmpcfg0 = RegInit(UInt(XLEN.W), 0.U) - val pmpcfg1 = RegInit(UInt(XLEN.W), 0.U) - val pmpcfg2 = RegInit(UInt(XLEN.W), 0.U) - val pmpcfg3 = RegInit(UInt(XLEN.W), 0.U) - val pmpaddr0 = RegInit(UInt(XLEN.W), 0.U) - val pmpaddr1 = RegInit(UInt(XLEN.W), 0.U) - val pmpaddr2 = RegInit(UInt(XLEN.W), 0.U) - val pmpaddr3 = RegInit(UInt(XLEN.W), 0.U) - - // Superviser-Level CSRs - - // val sstatus = RegInit(UInt(XLEN.W), "h00000000".U) - val sstatusWmask = "hc6122".U - // Sstatus Write Mask - // ------------------------------------------------------- - // 19 9 5 2 - // 0 1100 0000 0001 0010 0010 - // 0 c 0 1 2 2 - // ------------------------------------------------------- - val sstatusRmask = sstatusWmask | "h8000000300018000".U - // Sstatus Read Mask = (SSTATUS_WMASK | (0xf << 13) | (1ull << 63) | (3ull << 32)) - val stvec = RegInit(UInt(XLEN.W), 0.U) - // val sie = RegInit(0.U(XLEN.W)) - val sieMask = "h222".U & mideleg - val sipMask = "h222".U & mideleg - //val satp = RegInit(UInt(XLEN.W), "h8000000000087fbe".U) - val satp = RegInit(UInt(XLEN.W), 0.U) - val sepc = RegInit(UInt(XLEN.W), 0.U) - val scause = RegInit(UInt(XLEN.W), 0.U) - val stval = Reg(UInt(XLEN.W)) - val sscratch = RegInit(UInt(XLEN.W), 0.U) - val scounteren = RegInit(UInt(XLEN.W), 0.U) - BoringUtils.addSource(satp, "CSRSATP") - - // User-Level CSRs - val uepc = Reg(UInt(XLEN.W)) - - // fcsr - class FcsrStruct extends Bundle{ - val reserved = UInt((XLEN-3-5).W) - val frm = UInt(3.W) - val fflags = UInt(5.W) - assert(this.getWidth == XLEN) - } - val fcsr = RegInit(0.U(XLEN.W)) - // set mstatus->sd and mstatus->fs when true - val csrw_dirty_fp_state = WireInit(false.B) - - def frm_wfn(wdata: UInt): UInt = { - val fcsrOld = WireInit(fcsr.asTypeOf(new FcsrStruct)) - csrw_dirty_fp_state := true.B - fcsrOld.frm := wdata(2,0) - fcsrOld.asUInt() - } - def frm_rfn(rdata: UInt): UInt = rdata(7,5) - - def fflags_wfn(wdata: UInt): UInt = { - val fcsrOld = WireInit(fcsr.asTypeOf(new FcsrStruct)) - csrw_dirty_fp_state := true.B - fcsrOld.fflags := wdata(4,0) - fcsrOld.asUInt() - } - def fflags_rfn(rdata:UInt): UInt = rdata(4,0) - - def fcsr_wfn(wdata: UInt): UInt = { - val fcsrOld = WireInit(fcsr.asTypeOf(new FcsrStruct)) - csrw_dirty_fp_state := true.B - Cat(fcsrOld.reserved, wdata.asTypeOf(fcsrOld).frm, wdata.asTypeOf(fcsrOld).fflags) - } - - val fcsrMapping = Map( - MaskedRegMap(Fflags, fcsr, wfn = fflags_wfn, rfn = fflags_rfn), - MaskedRegMap(Frm, fcsr, wfn = frm_wfn, rfn = frm_rfn), - MaskedRegMap(Fcsr, fcsr, wfn = fcsr_wfn) - ) - - // Atom LR/SC Control Bits - val setLr = WireInit(Bool(), false.B) - val setLrVal = WireInit(Bool(), false.B) - val setLrAddr = WireInit(UInt(AddrBits.W), DontCare) //TODO : need check - val lr = RegInit(Bool(), false.B) - val lrAddr = RegInit(UInt(AddrBits.W), 0.U) - BoringUtils.addSink(setLr, "set_lr") - BoringUtils.addSink(setLrVal, "set_lr_val") - BoringUtils.addSink(setLrAddr, "set_lr_addr") - BoringUtils.addSource(lr, "lr") - BoringUtils.addSource(lrAddr, "lr_addr") - - when(setLr){ - lr := setLrVal - lrAddr := setLrAddr - } - - // Hart Priviledge Mode - val priviledgeMode = RegInit(UInt(2.W), ModeM) - - // perfcnt - val hasPerfCnt = !p.FPGAPlatform - val nrPerfCnts = if (hasPerfCnt) 0x80 else 0x3 - val perfCnts = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W))) - val perfCntsLoMapping = (0 until nrPerfCnts).map { case i => MaskedRegMap(0xb00 + i, perfCnts(i)) } - val perfCntsHiMapping = (0 until nrPerfCnts).map { case i => MaskedRegMap(0xb80 + i, perfCnts(i)(63, 32)) } - - // CSR reg map - val mapping = Map( - - // User Trap Setup - // MaskedRegMap(Ustatus, ustatus), - // MaskedRegMap(Uie, uie, 0.U, MaskedRegMap.Unwritable), - // MaskedRegMap(Utvec, utvec), - - // User Trap Handling - // MaskedRegMap(Uscratch, uscratch), - // MaskedRegMap(Uepc, uepc), - // MaskedRegMap(Ucause, ucause), - // MaskedRegMap(Utval, utval), - // MaskedRegMap(Uip, uip), - - // User Counter/Timers - // MaskedRegMap(Cycle, cycle), - // MaskedRegMap(Time, time), - // MaskedRegMap(Instret, instret), - - // Supervisor Trap Setup - MaskedRegMap(Sstatus, mstatus, sstatusWmask, mstatusUpdateSideEffect, sstatusRmask), - - // MaskedRegMap(Sedeleg, Sedeleg), - // MaskedRegMap(Sideleg, Sideleg), - MaskedRegMap(Sie, mie, sieMask, MaskedRegMap.NoSideEffect, sieMask), - MaskedRegMap(Stvec, stvec), - MaskedRegMap(Scounteren, scounteren), - - // Supervisor Trap Handling - MaskedRegMap(Sscratch, sscratch), - MaskedRegMap(Sepc, sepc), - MaskedRegMap(Scause, scause), - MaskedRegMap(Stval, stval), - MaskedRegMap(Sip, mip.asUInt, sipMask, MaskedRegMap.Unwritable, sipMask), - - // Supervisor Protection and Translation - MaskedRegMap(Satp, satp), - - // Machine Information Registers - MaskedRegMap(Mvendorid, mvendorid, 0.U, MaskedRegMap.Unwritable), - MaskedRegMap(Marchid, marchid, 0.U, MaskedRegMap.Unwritable), - MaskedRegMap(Mimpid, mimpid, 0.U, MaskedRegMap.Unwritable), - MaskedRegMap(Mhartid, mhartid, 0.U, MaskedRegMap.Unwritable), - - // Machine Trap Setup - // MaskedRegMap(Mstatus, mstatus, "hffffffffffffffee".U, (x=>{printf("mstatus write: %x time: %d\n", x, GTimer()); x})), - MaskedRegMap(Mstatus, mstatus, "hffffffffffffffff".U, mstatusUpdateSideEffect), - MaskedRegMap(Misa, misa), // now MXL, EXT is not changeable - MaskedRegMap(Medeleg, medeleg, "hbbff".U), - MaskedRegMap(Mideleg, mideleg, "h222".U), - MaskedRegMap(Mie, mie), - MaskedRegMap(Mtvec, mtvec), - MaskedRegMap(Mcounteren, mcounteren), - - // Machine Trap Handling - MaskedRegMap(Mscratch, mscratch), - MaskedRegMap(Mepc, mepc), - MaskedRegMap(Mcause, mcause), - MaskedRegMap(Mtval, mtval), - MaskedRegMap(Mip, mip.asUInt, 0.U, MaskedRegMap.Unwritable), - - // Machine Memory Protection - MaskedRegMap(Pmpcfg0, pmpcfg0), - MaskedRegMap(Pmpcfg1, pmpcfg1), - MaskedRegMap(Pmpcfg2, pmpcfg2), - MaskedRegMap(Pmpcfg3, pmpcfg3), - MaskedRegMap(PmpaddrBase + 0, pmpaddr0), - MaskedRegMap(PmpaddrBase + 1, pmpaddr1), - MaskedRegMap(PmpaddrBase + 2, pmpaddr2), - MaskedRegMap(PmpaddrBase + 3, pmpaddr3) - - ) ++ - perfCntsLoMapping ++ (if (XLEN == 32) perfCntsHiMapping else Nil) ++ - (if(HasFPU) fcsrMapping else Nil) - - val addr = src2(11, 0) - val rdata = Wire(UInt(XLEN.W)) - val csri = ZeroExt(io.cfIn.instr(19,15), XLEN) //unsigned imm for csri. [TODO] - val wdata = LookupTree(func, List( - CSROpType.wrt -> src1, - CSROpType.set -> (rdata | src1), - CSROpType.clr -> (rdata & ~src1), - CSROpType.wrti -> csri,//TODO: csri --> src2 - CSROpType.seti -> (rdata | csri), - CSROpType.clri -> (rdata & ~csri) - )) - - val wen = (valid && func =/= CSROpType.jmp) - // Debug(){when(wen){printf("[CSR] addr %x wdata %x func %x rdata %x\n", addr, wdata, func, rdata)}} - MaskedRegMap.generate(mapping, addr, rdata, wen, wdata) - val isIllegalAddr = MaskedRegMap.isIllegalAddr(mapping, addr) - val resetSatp = addr === Satp.U && wen // write to satp will cause the pipeline be flushed - io.out.bits := rdata - - // Fix Mip/Sip write - val fixMapping = Map( - MaskedRegMap(Mip, mipReg.asUInt, mipFixMask), - MaskedRegMap(Sip, mipReg.asUInt, sipMask, MaskedRegMap.NoSideEffect, sipMask) - ) - val rdataDummy = Wire(UInt(XLEN.W)) - MaskedRegMap.generate(fixMapping, addr, rdataDummy, wen, wdata) - - when(io.fpu_csr.fflags.asUInt() =/= 0.U){ - fcsr := fflags_wfn(io.fpu_csr.fflags.asUInt()) - } - // set fs and sd in mstatus - when(csrw_dirty_fp_state || io.fpu_csr.dirty_fs){ - val mstatusNew = WireInit(mstatus.asTypeOf(new MstatusStruct)) - mstatusNew.fs := "b11".U - mstatusNew.sd := true.B - mstatus := mstatusNew.asUInt() - } - io.fpu_csr.frm := fcsr.asTypeOf(new FcsrStruct).frm - - // CSR inst decode - val ret = Wire(Bool()) - val isEcall = addr === privEcall && func === CSROpType.jmp - val isMret = addr === privMret && func === CSROpType.jmp - val isSret = addr === privSret && func === CSROpType.jmp - val isUret = addr === privUret && func === CSROpType.jmp - - Debug(false){ - when(wen){ - printf("[CSR] csr write: pc %x addr %x rdata %x wdata %x func %x\n", io.cfIn.pc, addr, rdata, wdata, func) - printf("[MST] time %d pc %x mstatus %x mideleg %x medeleg %x mode %x\n", GTimer(), io.cfIn.pc, mstatus, mideleg , medeleg, priviledgeMode) - } - } - - // MMU Permission Check - - // def MMUPermissionCheck(ptev: Bool, pteu: Bool): Bool = ptev && !(priviledgeMode === ModeU && !pteu) && !(priviledgeMode === ModeS && pteu && mstatusStruct.sum.asBool) - // def MMUPermissionCheckLoad(ptev: Bool, pteu: Bool): Bool = ptev && !(priviledgeMode === ModeU && !pteu) && !(priviledgeMode === ModeS && pteu && mstatusStruct.sum.asBool) && (pter || (mstatusStruct.mxr && ptex)) - // imem - // val imemPtev = true.B - // val imemPteu = true.B - // val imemPtex = true.B - // val imemReq = true.B - // val imemPermissionCheckPassed = MMUPermissionCheck(imemPtev, imemPteu) - // val hasInstrPageFault = imemReq && !(imemPermissionCheckPassed && imemPtex) - // assert(!hasInstrPageFault) - - // dmem - // val dmemPtev = true.B - // val dmemPteu = true.B - // val dmemReq = true.B - // val dmemPermissionCheckPassed = MMUPermissionCheck(dmemPtev, dmemPteu) - // val dmemIsStore = true.B - - // val hasLoadPageFault = dmemReq && !dmemIsStore && !(dmemPermissionCheckPassed) - // val hasStorePageFault = dmemReq && dmemIsStore && !(dmemPermissionCheckPassed) - // assert(!hasLoadPageFault) - // assert(!hasStorePageFault) - - //TODO: Havn't test if io.dmemMMU.priviledgeMode is correct yet - io.imemMMU.priviledgeMode := priviledgeMode - io.dmemMMU.priviledgeMode := Mux(mstatusStruct.mprv.asBool, mstatusStruct.mpp, priviledgeMode) - io.imemMMU.status_sum := mstatusStruct.sum.asBool - io.dmemMMU.status_sum := mstatusStruct.sum.asBool - io.imemMMU.status_mxr := DontCare - io.dmemMMU.status_mxr := mstatusStruct.mxr.asBool - - val hasInstrPageFault = io.cfIn.exceptionVec(instrPageFault) && valid - val hasLoadPageFault = io.dmemMMU.loadPF - val hasStorePageFault = io.dmemMMU.storePF - val hasStoreAddrMisaligned = io.cfIn.exceptionVec(storeAddrMisaligned) - val hasLoadAddrMisaligned = io.cfIn.exceptionVec(loadAddrMisaligned) - - when(hasInstrPageFault || hasLoadPageFault || hasStorePageFault){ - val tval = Mux(hasInstrPageFault, Mux(io.cfIn.crossPageIPFFix, SignExt(io.cfIn.pc + 2.U, XLEN), SignExt(io.cfIn.pc, XLEN)), SignExt(io.dmemMMU.addr, XLEN)) - when(priviledgeMode === ModeM){ - mtval := tval - }.otherwise{ - stval := tval - } - } - - val lsuAddr = WireInit(0.U(64.W)) - BoringUtils.addSink(lsuAddr, "LSUADDR") - when(hasLoadAddrMisaligned || hasStoreAddrMisaligned) - { - mtval := SignExt(lsuAddr, XLEN) - } - - // Exception and Intr - - // interrupts - - val ideleg = (mideleg & mip.asUInt) - def priviledgedEnableDetect(x: Bool): Bool = Mux(x, ((priviledgeMode === ModeS) && mstatusStruct.ie.s) || (priviledgeMode < ModeS), - ((priviledgeMode === ModeM) && mstatusStruct.ie.m) || (priviledgeMode < ModeM)) - - val intrVecEnable = Wire(Vec(12, Bool())) - intrVecEnable.zip(ideleg.asBools).map{case(x,y) => x := priviledgedEnableDetect(y)} - val intrVec = mie(11,0) & mip.asUInt & intrVecEnable.asUInt - BoringUtils.addSource(intrVec, "intrVecIDU") - // val intrNO = PriorityEncoder(intrVec) - - val intrNO = IntPriority.foldRight(0.U)((i: Int, sum: UInt) => Mux(io.cfIn.intrVec(i), i.U, sum)) - // val intrNO = PriorityEncoder(io.cfIn.intrVec) - val raiseIntr = io.cfIn.intrVec.asUInt.orR - - val mtip = WireInit(false.B) - val meip = WireInit(false.B) - BoringUtils.addSink(mtip, "mtip") - BoringUtils.addSink(meip, "meip") - mipWire.t.m := mtip - mipWire.e.m := meip - - // exceptions - - // TODO: merge iduExceptionVec, csrExceptionVec as raiseExceptionVec - val csrExceptionVec = Wire(Vec(16, Bool())) - csrExceptionVec.map(_ := false.B) - csrExceptionVec(ecallM) := priviledgeMode === ModeM && io.in.valid && isEcall - csrExceptionVec(ecallS) := priviledgeMode === ModeS && io.in.valid && isEcall - csrExceptionVec(ecallU) := priviledgeMode === ModeU && io.in.valid && isEcall - // csrExceptionVec(instrPageFault) := hasInstrPageFault - csrExceptionVec(illegalInstr) := isIllegalAddr && wen // Trigger an illegal instr exception when unimplemented csr is being read/written - csrExceptionVec(loadPageFault) := hasLoadPageFault - csrExceptionVec(storePageFault) := hasStorePageFault - val iduExceptionVec = io.cfIn.exceptionVec - val raiseExceptionVec = csrExceptionVec.asUInt() | iduExceptionVec.asUInt() - val raiseException = raiseExceptionVec.orR - val exceptionNO = ExcPriority.foldRight(0.U)((i: Int, sum: UInt) => Mux(raiseExceptionVec(i), i.U, sum)) - io.wenFix := raiseException - - val causeNO = (raiseIntr << (XLEN-1)) | Mux(raiseIntr, intrNO, exceptionNO) - io.intrNO := Mux(raiseIntr, causeNO, 0.U) - - val raiseExceptionIntr = (raiseException || raiseIntr) && io.instrValid - val retTarget = Wire(UInt(VAddrBits.W)) - val trapTarget = Wire(UInt(VAddrBits.W)) - io.redirect.valid := (valid && func === CSROpType.jmp) || raiseExceptionIntr || resetSatp - io.redirect.target := Mux(resetSatp, io.cfIn.pnpc, Mux(raiseExceptionIntr, trapTarget, retTarget)) - - Debug(){ - when(raiseExceptionIntr){ - printf("[CSR] int/exc: pc %x int (%d):%x exc: (%d):%x\n",io.cfIn.pc, intrNO, io.cfIn.intrVec.asUInt, exceptionNO, raiseExceptionVec.asUInt) - printf("[MST] time %d pc %x mstatus %x mideleg %x medeleg %x mode %x\n", GTimer(), io.cfIn.pc, mstatus, mideleg , medeleg, priviledgeMode) - } - when(io.redirect.valid){ - printf("[CSR] redirect to %x\n", io.redirect.target) - } - } - - // Debug(false){ - // when(raiseExceptionIntr){ - // printf("[CSR] raiseExceptionIntr!\n[CSR] int/exc: pc %x int (%d):%x exc: (%d):%x\n",io.cfIn.pc, intrNO, io.cfIn.intrVec.asUInt, exceptionNO, raiseExceptionVec.asUInt) - // printf("[MST] time %d pc %x mstatus %x mideleg %x medeleg %x mode %x\n", GTimer(), io.cfIn.pc, mstatus, mideleg , medeleg, priviledgeMode) - // } - - // when(valid && isMret){ - // printf("[CSR] Mret to %x!\n[CSR] int/exc: pc %x int (%d):%x exc: (%d):%x\n",retTarget, io.cfIn.pc, intrNO, io.cfIn.intrVec.asUInt, exceptionNO, raiseExceptionVec.asUInt) - // printf("[MST] time %d pc %x mstatus %x mideleg %x medeleg %x mode %x\n", GTimer(), io.cfIn.pc, mstatus, mideleg , medeleg, priviledgeMode) - // } - - // when(valid && isSret){ - // printf("[CSR] Sret to %x!\n[CSR] int/exc: pc %x int (%d):%x exc: (%d):%x\n",retTarget, io.cfIn.pc, intrNO, io.cfIn.intrVec.asUInt, exceptionNO, raiseExceptionVec.asUInt) - // printf("[MST] time %d pc %x mstatus %x mideleg %x medeleg %x mode %x\n", GTimer(), io.cfIn.pc, mstatus, mideleg , medeleg, priviledgeMode) - // } - //printf("[CSR] Red(%d, %x) raiseExcepIntr:%d valid:%d instrValid:%x \n", io.redirect.valid, io.redirect.target, raiseExceptionIntr, valid, io.instrValid) - // } - - // Branch control - - val deleg = Mux(raiseIntr, mideleg , medeleg) - // val delegS = ((deleg & (1 << (causeNO & 0xf))) != 0) && (priviledgeMode < ModeM); - val delegS = (deleg(causeNO(3,0))) && (priviledgeMode < ModeM) - val tvalWen = !(hasInstrPageFault || hasLoadPageFault || hasStorePageFault || hasLoadAddrMisaligned || hasStoreAddrMisaligned) || raiseIntr // in noop-riscv64, no exception will come together with PF - - ret := isMret || isSret || isUret - trapTarget := Mux(delegS, stvec, mtvec)(VAddrBits-1, 0) - retTarget := DontCare - // TODO redirect target - // val illegalEret = TODO - - when (valid && isMret) { - val mstatusOld = WireInit(mstatus.asTypeOf(new MstatusStruct)) - val mstatusNew = WireInit(mstatus.asTypeOf(new MstatusStruct)) - // mstatusNew.mpp.m := ModeU //TODO: add mode U - mstatusNew.ie.m := mstatusOld.pie.m - priviledgeMode := mstatusOld.mpp - mstatusNew.pie.m := true.B - mstatusNew.mpp := ModeU - mstatus := mstatusNew.asUInt - lr := false.B - retTarget := mepc(VAddrBits-1, 0) - } - - when (valid && isSret) { - val mstatusOld = WireInit(mstatus.asTypeOf(new MstatusStruct)) - val mstatusNew = WireInit(mstatus.asTypeOf(new MstatusStruct)) - // mstatusNew.mpp.m := ModeU //TODO: add mode U - mstatusNew.ie.s := mstatusOld.pie.s - priviledgeMode := Cat(0.U(1.W), mstatusOld.spp) - mstatusNew.pie.s := true.B - mstatusNew.spp := ModeU - mstatus := mstatusNew.asUInt - lr := false.B - retTarget := sepc(VAddrBits-1, 0) - } - - when (valid && isUret) { - val mstatusOld = WireInit(mstatus.asTypeOf(new MstatusStruct)) - val mstatusNew = WireInit(mstatus.asTypeOf(new MstatusStruct)) - // mstatusNew.mpp.m := ModeU //TODO: add mode U - mstatusNew.ie.u := mstatusOld.pie.u - priviledgeMode := ModeU - mstatusNew.pie.u := true.B - mstatus := mstatusNew.asUInt - retTarget := uepc(VAddrBits-1, 0) - } - - when (raiseExceptionIntr) { - val mstatusOld = WireInit(mstatus.asTypeOf(new MstatusStruct)) - val mstatusNew = WireInit(mstatus.asTypeOf(new MstatusStruct)) - - when (delegS) { - scause := causeNO - sepc := SignExt(io.cfIn.pc, XLEN) - mstatusNew.spp := priviledgeMode - mstatusNew.pie.s := mstatusOld.ie.s - mstatusNew.ie.s := false.B - priviledgeMode := ModeS - when(tvalWen){stval := 0.U} // TODO: should not use =/= - // printf("[*] mstatusNew.spp %x\n", mstatusNew.spp) - // trapTarget := stvec(VAddrBits-1. 0) - }.otherwise { - mcause := causeNO - mepc := SignExt(io.cfIn.pc, XLEN) - mstatusNew.mpp := priviledgeMode - mstatusNew.pie.m := mstatusOld.ie.m - mstatusNew.ie.m := false.B - priviledgeMode := ModeM - when(tvalWen){mtval := 0.U} // TODO: should not use =/= - // trapTarget := mtvec(VAddrBits-1. 0) - } - // mstatusNew.pie.m := LookupTree(priviledgeMode, List( - // ModeM -> mstatusOld.ie.m, - // ModeH -> mstatusOld.ie.h, //ERROR - // ModeS -> mstatusOld.ie.s, - // ModeU -> mstatusOld.ie.u - // )) - - mstatus := mstatusNew.asUInt - } - - io.in.ready := true.B - io.out.valid := valid - - Debug(false) { - printf("[CSR2] Red(%d, %x) raiseExcepIntr:%d isSret:%d retTarget:%x sepc:%x delegs:%d deleg:%x cfInpc:%x valid:%d instrValid:%x \n", io.redirect.valid, io.redirect.target, raiseExceptionIntr, isSret, retTarget, sepc, delegS, deleg, io.cfIn.pc, valid, io.instrValid) - } - - Debug(false) { - when(raiseExceptionIntr && delegS ) { - printf("[CSR2] Red(%d, %x) raiseExcepIntr:%d isSret:%d retTarget:%x sepc:%x delegs:%d deleg:%x cfInpc:%x valid:%d instrValid:%x \n", io.redirect.valid, io.redirect.target, raiseExceptionIntr, isSret, retTarget, sepc, delegS, deleg, io.cfIn.pc, valid, io.instrValid) - printf("[CSR3] sepc is writen!!! pc:%x time:%d\n", io.cfIn.pc, GTimer()) - } - } - - // perfcnt - - val perfCntList = Map( - "Mcycle" -> (0xb00, "perfCntCondMcycle" ), - "Minstret" -> (0xb02, "perfCntCondMinstret" ), - "MimemStall" -> (0xb03, "perfCntCondMimemStall" ), - "MaluInstr" -> (0xb04, "perfCntCondMaluInstr" ), - "MbruInstr" -> (0xb05, "perfCntCondMbruInstr" ), - "MlsuInstr" -> (0xb06, "perfCntCondMlsuInstr" ), - "MmduInstr" -> (0xb07, "perfCntCondMmduInstr" ), - "McsrInstr" -> (0xb08, "perfCntCondMcsrInstr" ), - "MloadInstr" -> (0xb09, "perfCntCondMloadInstr" ), - "MloadStall" -> (0xb0a, "perfCntCondMloadStall" ), - "MstoreStall" -> (0xb0b, "perfCntCondMstoreStall"), - "MmmioInstr" -> (0xb0c, "perfCntCondMmmioInstr" ), - "MicacheHit" -> (0xb0d, "perfCntCondMicacheHit" ), - "MdcacheHit" -> (0xb0e, "perfCntCondMdcacheHit" ), - "MmulInstr" -> (0xb0f, "perfCntCondMmulInstr" ), - "MifuFlush" -> (0xb10, "perfCntCondMifuFlush" ), - "MrawStall" -> (0xb11, "perfCntCondMrawStall" ), - "MexuBusy" -> (0xb12, "perfCntCondMexuBusy" ), - "MbpBRight" -> (0xb13, "MbpBRight" ), - "MbpBWrong" -> (0xb14, "MbpBWrong" ), - "MbpJRight" -> (0xb15, "MbpJRight" ), - "MbpJWrong" -> (0xb16, "MbpJWrong" ), - "MbpIRight" -> (0xb17, "MbpIRight" ), - "MbpIWrong" -> (0xb18, "MbpIWrong" ), - "MbpRRight" -> (0xb19, "MbpRRight" ), - "MbpRWrong" -> (0xb1a, "MbpRWrong" ), - "Custom1" -> (0xb1b, "Custom1" ), - "Custom2" -> (0xb1c, "Custom2" ), - "Custom3" -> (0xb1d, "Custom3" ), - "Custom4" -> (0xb1e, "Custom4" ), - "Custom5" -> (0xb1f, "Custom5" ), - "Custom6" -> (0xb20, "Custom6" ), - "Custom7" -> (0xb21, "Custom7" ), - "Custom8" -> (0xb22, "Custom8" ), - "Ml2cacheHit" -> (0xb23, "perfCntCondMl2cacheHit") - ) - val perfCntCond = List.fill(0x80)(WireInit(false.B)) - (perfCnts zip perfCntCond).map { case (c, e) => { when (e) { c := c + 1.U } } } - - BoringUtils.addSource(WireInit(true.B), "perfCntCondMcycle") - perfCntList.map { case (name, (addr, boringId)) => { - BoringUtils.addSink(perfCntCond(addr & 0x7f), boringId) - if (!hasPerfCnt) { - // do not enable perfcnts except for Mcycle and Minstret - if (addr != perfCntList("Mcycle")._1 && addr != perfCntList("Minstret")._1) { - perfCntCond(addr & 0x7f) := false.B - } - } - }} - - val nooptrap = WireInit(false.B) - BoringUtils.addSink(nooptrap, "nooptrap") - def readWithScala(addr: Int): UInt = mapping(addr)._1 - - if (!p.FPGAPlatform) { - // to monitor - BoringUtils.addSource(readWithScala(perfCntList("Mcycle")._1), "simCycleCnt") - BoringUtils.addSource(readWithScala(perfCntList("Minstret")._1), "simInstrCnt") - - // display all perfcnt when nooptrap is executed - when (nooptrap) { - printf("======== PerfCnt =========\n") - perfCntList.toSeq.sortBy(_._2._1).map { case (name, (addr, boringId)) => - printf("%d <- " + name + "\n", readWithScala(addr)) } - } - - // for differential testing - BoringUtils.addSource(RegNext(priviledgeMode), "difftestMode") - BoringUtils.addSource(RegNext(mstatus), "difftestMstatus") - BoringUtils.addSource(RegNext(mstatus & sstatusRmask), "difftestSstatus") - BoringUtils.addSource(RegNext(mepc), "difftestMepc") - BoringUtils.addSource(RegNext(sepc), "difftestSepc") - BoringUtils.addSource(RegNext(mcause), "difftestMcause") - BoringUtils.addSource(RegNext(scause), "difftestScause") - } else { - BoringUtils.addSource(readWithScala(perfCntList("Minstret")._1), "ilaInstrCnt") - } -} diff --git a/src/main/scala/noop/fu/FPU.scala b/src/main/scala/noop/fu/FPU.scala deleted file mode 100644 index b2ceffe884108404d8c6242a4f199789fbea7e84..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/fu/FPU.scala +++ /dev/null @@ -1,126 +0,0 @@ -package noop.fu - -import chisel3.{util, _} -import chisel3.util._ -import utils._ -import noop._ -import fpu._ -import fpu.FPUIOFunc._ -import fpu.divsqrt.DivSqrt -import fpu.fma.FMA - -class FpInstr extends NOOPBundle { - val func5 = UInt(5.W) - val fmt = UInt(2.W) - val rs2 = UInt(5.W) - val rs1 = UInt(5.W) - val rm = UInt(3.W) - val rd = UInt(5.W) - val op = UInt(7.W) - assert(this.getWidth == 32) -} - -class FpuCsrIO extends NOOPBundle { - val fflags = Output(new Fflags) - val isIllegal = Output(Bool()) - val dirty_fs = Output(Bool()) - val frm = Input(UInt(3.W)) -} - -class FPUIO extends FunctionUnitIO{ - // use XLEN because fpu share data path with cpu - val src3 = Input(UInt(XLEN.W)) - val fpu_csr = new FpuCsrIO - val fpWen = Input(Bool()) - val instr = Input(UInt(32.W)) - val inputFunc = Input(UInt(1.W)) - val outputFunc = Input(UInt(2.W)) -} - - - - -class FPU extends NOOPModule{ -// require(XLEN >= FLEN) - val io = IO(new FPUIO) - val (valid, src1, src2, src3, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.src3, io.in.bits.func) - def access(valid: Bool, src1: UInt, src2: UInt, src3: UInt, func: UInt): UInt = { - this.valid := valid - this.src1 := src1 - this.src2 := src2 - this.src3 := src3 - this.func := func - io.out.bits - } - - val instr = io.instr.asTypeOf(new FpInstr) - val isRVD = instr.fmt(0) - val src = VecInit(Seq(src1, src2, src3)).map(x => - Mux(io.inputFunc === in_unbox, unboxF64ToF32(x), x) - ) - - val roudingMode = Mux(instr.rm===7.U, io.fpu_csr.frm, instr.rm) - val op = func(2, 0) - val fu = func(5, 3) - - val s_ready :: s_wait :: Nil = Enum(2) - val state = RegInit(s_ready) - switch(state){ - is(s_ready){ - when(io.in.valid){ - state := s_wait - } - } - is(s_wait){ - when(io.out.fire()){ - state := s_ready - } - } - } - - val subModuleInput = Wire(new FPUSubModuleInput) - subModuleInput.a := src(0) - subModuleInput.b := src(1) - subModuleInput.c := src(2) - subModuleInput.op := op - subModuleInput.isDouble := isRVD - subModuleInput.rm := roudingMode - - val subModules = Array[FPUSubModule]( - Module(new FMA), // 0 - Module(new FCMP), // 1 - Module(new FMV(XLEN)), // 2 - Module(new FloatToInt), // 3 - Module(new IntToFloat), // 4 - Module(new F32toF64), // 5 - Module(new F64toF32), // 6 - Module(new DivSqrt) //7 - ) - val outFuncReg = RegEnable(io.outputFunc, io.in.fire()) - val fuReg = RegEnable(fu, io.in.fire()) - for((module, idx) <- subModules.zipWithIndex){ - module.io.in.bits := subModuleInput - module.io.in.valid := io.in.fire() && idx.U===fu - module.io.out.ready := true.B - } - - val subModuleOutput = Wire(Decoupled(new FPUSubModuleOutput)) - subModuleOutput := LookupTree(fuReg, subModules.zipWithIndex.map({ - case (module, idx) => - idx.U -> module.io.out - })) - val result = subModuleOutput.bits.result - - io.in.ready := state===s_ready - io.out.valid := subModuleOutput.valid - io.out.bits := MuxLookup(outFuncReg, result, Seq( - out_sext -> SignExt(result(31, 0), XLEN), - out_box -> boxF32ToF64(result) - )) - - //TODO: check illegal rounding mode exception - io.fpu_csr.isIllegal := false.B - io.fpu_csr.dirty_fs := io.in.fire() && io.fpWen - io.fpu_csr.fflags := Mux(io.out.valid, subModuleOutput.bits.fflags, 0.U.asTypeOf(new Fflags)) -} - diff --git a/src/main/scala/noop/fu/LSU.scala b/src/main/scala/noop/fu/LSU.scala deleted file mode 100644 index 2933faebfbff590fda5ae82f11040cc0cf87c170..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/fu/LSU.scala +++ /dev/null @@ -1,470 +0,0 @@ -package noop -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils -import utils._ -import bus.simplebus._ -import fpu.boxF32ToF64 - -object LSUOpType { - def lb = "b000000".U - def lh = "b000001".U - def lw = "b000010".U - def ld = "b000011".U - def lbu = "b000100".U - def lhu = "b000101".U - def lwu = "b000110".U - def flw = "b010110".U // box 32-bit data to 64-bit with 1s - def sb = "b001000".U - def sh = "b001001".U - def sw = "b001010".U - def sd = "b001011".U - - def lr = "b100000".U - def sc = "b100001".U - def amoswap = "b100010".U - def amoadd = "b100011".U - def amoxor = "b100100".U - def amoand = "b100101".U - def amoor = "b100110".U - def amomin = "b110111".U - def amomax = "b110000".U - def amominu = "b110001".U - def amomaxu = "b110010".U - - def isStore(func: UInt): Bool = func(3) - def isAtom(func: UInt): Bool = func(5) - def isLoad(func: UInt): Bool = !isStore(func) & !isAtom(func) - def isLR(func: UInt): Bool = func === lr - def isSC(func: UInt): Bool = func === sc - def isAMO(func: UInt): Bool = isAtom(func) && !isLR(func) && !isSC(func) - - def atomW = "010".U - def atomD = "011".U -} - -class LSUIO extends FunctionUnitIO { - val wdata = Input(UInt(XLEN.W)) - val instr = Input(UInt(32.W)) // Atom insts need aq rl funct3 bit from instr - val dmem = new SimpleBusUC(addrBits = VAddrBits) - val isMMIO = Output(Bool()) - val dtlbPF = Output(Bool()) - val loadAddrMisaligned = Output(Bool()) - val storeAddrMisaligned = Output(Bool()) -} - -class StoreQueueEntry extends NOOPBundle{ - val src1 = UInt(XLEN.W) - val src2 = UInt(XLEN.W) - val wdata = UInt(XLEN.W) - val func = UInt(6.W) -} - -class AtomALU extends NOOPModule { - val io = IO(new NOOPBundle{ - val src1 = Input(UInt(XLEN.W)) - val src2 = Input(UInt(XLEN.W)) - val func = Input(UInt(6.W)) - val isWordOp = Input(Bool()) - val result = Output(UInt(XLEN.W)) - }) - - // src1: load result - // src2: reg result - val src1 = io.src1 - val src2 = io.src2 - val func = io.func - val isAdderSub = (func =/= LSUOpType.amoadd) - val adderRes = (src1 +& (src2 ^ Fill(XLEN, isAdderSub))) + isAdderSub - val xorRes = src1 ^ src2 - val sltu = !adderRes(XLEN) - val slt = xorRes(XLEN-1) ^ sltu - - val res = LookupTreeDefault(func(5, 0), adderRes, List( - LSUOpType.amoswap -> src2, - LSUOpType.amoadd -> adderRes, - LSUOpType.amoxor -> xorRes, - LSUOpType.amoand -> (src1 & src2), - LSUOpType.amoor -> (src1 | src2), - LSUOpType.amomin -> Mux(slt(0), src1, src2), - LSUOpType.amomax -> Mux(slt(0), src2, src1), - LSUOpType.amominu -> Mux(sltu(0), src1, src2), - LSUOpType.amomaxu -> Mux(sltu(0), src2, src1) - )) - - io.result := Mux(io.isWordOp, SignExt(res(31,0), 64), res) -} - -class LSU extends NOOPModule { - val io = IO(new LSUIO) - val (valid, src1, src2, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.func) - def access(valid: Bool, src1: UInt, src2: UInt, func: UInt, dtlbPF: Bool): UInt = { - this.valid := valid - this.src1 := src1 - this.src2 := src2 - this.func := func - dtlbPF := io.dtlbPF - io.out.bits - } - val lsExecUnit = Module(new LSExecUnit) - lsExecUnit.io.instr := DontCare - io.dtlbPF := lsExecUnit.io.dtlbPF - - val storeReq = valid & LSUOpType.isStore(func) - val loadReq = valid & LSUOpType.isLoad(func) - val atomReq = valid & LSUOpType.isAtom(func) - val amoReq = valid & LSUOpType.isAMO(func) - val lrReq = valid & LSUOpType.isLR(func) - val scReq = valid & LSUOpType.isSC(func) - BoringUtils.addSource(amoReq, "ISAMO") - BoringUtils.addSource(amoReq, "ISAMO2") - - val aq = io.instr(26) - val rl = io.instr(25) - val funct3 = io.instr(14, 12) - - val atomWidthW = !funct3(0) - val atomWidthD = funct3(0) - - // Atom LR/SC Control Bits - val setLr = Wire(Bool()) - val setLrVal = Wire(Bool()) - val setLrAddr = Wire(UInt(AddrBits.W)) - val lr = WireInit(Bool(), false.B) - val lrAddr = WireInit(UInt(AddrBits.W), DontCare) - BoringUtils.addSource(setLr, "set_lr") - BoringUtils.addSource(setLrVal, "set_lr_val") - BoringUtils.addSource(setLrAddr, "set_lr_addr") - BoringUtils.addSink(lr, "lr") - BoringUtils.addSink(lrAddr, "lr_addr") - - val scInvalid = !(src1 === lrAddr) && scReq - - // PF signal from TLB - val dtlbFinish = WireInit(false.B) - val dtlbPF = WireInit(false.B) - val dtlbEnable = WireInit(false.B) - BoringUtils.addSink(dtlbFinish, "DTLBFINISH") - BoringUtils.addSink(dtlbPF, "DTLBPF") - BoringUtils.addSink(dtlbEnable, "DTLBENABLE") - - // LSU control FSM state - val s_idle :: s_load :: s_lr :: s_sc :: s_amo_l :: s_amo_a :: s_amo_s :: Nil = Enum(7) - - // LSU control FSM - val state = RegInit(s_idle) - val atomMemReg = Reg(UInt(XLEN.W)) - val atomRegReg = Reg(UInt(XLEN.W)) - val atomALU = Module(new AtomALU) - atomALU.io.src1 := atomMemReg - atomALU.io.src2 := io.wdata - atomALU.io.func := func - atomALU.io.isWordOp := atomWidthW - - // StoreQueue - // TODO: inst fence needs storeQueue to be finished - val enableStoreQueue = EnableStoreQueue // StoreQueue is disabled for page fault detection - val storeQueue = Module(new Queue(new StoreQueueEntry, 4)) - storeQueue.io.enq.valid := state === s_idle && storeReq - storeQueue.io.enq.bits.src1 := src1 - storeQueue.io.enq.bits.src2 := src2 - storeQueue.io.enq.bits.wdata := io.wdata - storeQueue.io.enq.bits.func := func - storeQueue.io.deq.ready := lsExecUnit.io.out.fire() - - lsExecUnit.io.in.valid := false.B - lsExecUnit.io.out.ready := DontCare - lsExecUnit.io.in.bits.src1 := DontCare - lsExecUnit.io.in.bits.src2 := DontCare - lsExecUnit.io.in.bits.func := DontCare - lsExecUnit.io.wdata := DontCare - io.out.valid := false.B - io.in.ready := false.B - - switch (state) { - is(s_idle){ - if(enableStoreQueue){ - lsExecUnit.io.in.valid := Mux(storeQueue.io.deq.valid, storeQueue.io.deq.valid, io.in.valid) - lsExecUnit.io.out.ready := io.out.ready - lsExecUnit.io.in.bits.src1 := Mux(storeQueue.io.deq.valid, storeQueue.io.deq.bits.src1, src1) - lsExecUnit.io.in.bits.src2 := Mux(storeQueue.io.deq.valid, storeQueue.io.deq.bits.src2, src2) - lsExecUnit.io.in.bits.func := Mux(storeQueue.io.deq.valid, storeQueue.io.deq.bits.func, func) - lsExecUnit.io.wdata := Mux(storeQueue.io.deq.valid, storeQueue.io.deq.bits.wdata, io.wdata) - io.in.ready := Mux(storeReq, storeQueue.io.enq.ready, false.B) || scInvalid - io.out.valid := Mux(storeReq, storeQueue.io.enq.ready, false.B) || scInvalid - }else{ - lsExecUnit.io.in.valid := io.in.valid && !atomReq - lsExecUnit.io.out.ready := io.out.ready - lsExecUnit.io.in.bits.src1 := src1 - lsExecUnit.io.in.bits.src2 := src2 - lsExecUnit.io.in.bits.func := func - lsExecUnit.io.wdata := io.wdata - io.in.ready := lsExecUnit.io.out.fire() || scInvalid - io.out.valid := lsExecUnit.io.out.valid || scInvalid - } - - // when(storeReq){ - // state := s_idle - // } - if(enableStoreQueue){ - when(loadReq){state := Mux(storeQueue.io.deq.valid, s_idle, s_load)} - when(amoReq){state := Mux(storeQueue.io.deq.valid, s_idle, s_amo_l)} - when(lrReq){state := Mux(storeQueue.io.deq.valid, s_idle, s_lr)} - when(scReq){state := Mux(storeQueue.io.deq.valid, s_idle, s_sc)} - }else{ - when(amoReq){state := s_amo_l} - when(lrReq){state := s_lr} - when(scReq){state := Mux(scInvalid, s_idle, s_sc)} - } - } - - is(s_load){ - lsExecUnit.io.in.valid := true.B - lsExecUnit.io.out.ready := io.out.ready - lsExecUnit.io.in.bits.src1 := src1 - lsExecUnit.io.in.bits.src2 := src2 - lsExecUnit.io.in.bits.func := func - lsExecUnit.io.wdata := DontCare - io.in.ready := lsExecUnit.io.out.fire() - io.out.valid := lsExecUnit.io.out.valid - when(lsExecUnit.io.out.fire()){state := s_idle}//load finished - } - - is(s_amo_l){ - lsExecUnit.io.in.valid := true.B - lsExecUnit.io.out.ready := true.B - lsExecUnit.io.in.bits.src1 := src1 - lsExecUnit.io.in.bits.src2 := 0.U - lsExecUnit.io.in.bits.func := Mux(atomWidthD, LSUOpType.ld, LSUOpType.lw) - lsExecUnit.io.wdata := DontCare - io.in.ready := false.B - io.out.valid := false.B - when(lsExecUnit.io.out.fire()){ - state := s_amo_a; - Debug(){printf("[AMO-L] lsExecUnit.io.out.bits %x addr %x src2 %x\n", lsExecUnit.io.out.bits, lsExecUnit.io.in.bits.src1, io.wdata)} - } - atomMemReg := lsExecUnit.io.out.bits - atomRegReg := lsExecUnit.io.out.bits - } - - is(s_amo_a){ - lsExecUnit.io.in.valid := false.B - lsExecUnit.io.out.ready := false.B - lsExecUnit.io.in.bits.src1 := DontCare - lsExecUnit.io.in.bits.src2 := DontCare - lsExecUnit.io.in.bits.func := DontCare - lsExecUnit.io.wdata := DontCare - io.in.ready := false.B - io.out.valid := false.B - state := s_amo_s - atomMemReg := atomALU.io.result - Debug(){printf("[AMO-A] src1 %x src2 %x res %x\n", atomMemReg, io.wdata, atomALU.io.result)} - } - - is(s_amo_s){ - lsExecUnit.io.in.valid := true.B - lsExecUnit.io.out.ready := io.out.ready - lsExecUnit.io.in.bits.src1 := src1 - lsExecUnit.io.in.bits.src2 := 0.U - lsExecUnit.io.in.bits.func := Mux(atomWidthD, LSUOpType.sd, LSUOpType.sw) - lsExecUnit.io.wdata := atomMemReg - io.in.ready := lsExecUnit.io.out.fire() - io.out.valid := lsExecUnit.io.out.fire() - when(lsExecUnit.io.out.fire()){ - state := s_idle; - Debug(){printf("[AMO-S] atomRegReg %x addr %x\n", atomRegReg, lsExecUnit.io.in.bits.src1)} - } - } - is(s_lr){ - lsExecUnit.io.in.valid := true.B - lsExecUnit.io.out.ready := io.out.ready - lsExecUnit.io.in.bits.src1 := src1 - lsExecUnit.io.in.bits.src2 := 0.U - lsExecUnit.io.in.bits.func := Mux(atomWidthD, LSUOpType.ld, LSUOpType.lw) - lsExecUnit.io.wdata := DontCare - io.in.ready := lsExecUnit.io.out.fire() - io.out.valid := lsExecUnit.io.out.fire() - when(lsExecUnit.io.out.fire()){ - state := s_idle; - Debug(){printf("[LR]\n")} - } - } - is(s_sc){ - lsExecUnit.io.in.valid := true.B - lsExecUnit.io.out.ready := io.out.ready - lsExecUnit.io.in.bits.src1 := src1 - lsExecUnit.io.in.bits.src2 := 0.U - lsExecUnit.io.in.bits.func := Mux(atomWidthD, LSUOpType.sd, LSUOpType.sw) - lsExecUnit.io.wdata := io.wdata - io.in.ready := lsExecUnit.io.out.fire() - io.out.valid := lsExecUnit.io.out.fire() - when(lsExecUnit.io.out.fire()){ - state := s_idle; - Debug(){printf("[SC] \n")} - } - } - } - when(dtlbPF || io.loadAddrMisaligned || io.storeAddrMisaligned){ - state := s_idle - io.out.valid := true.B - io.in.ready := true.B - } - - // controled by FSM - // io.in.ready := lsExecUnit.io.in.ready - // lsExecUnit.io.wdata := io.wdata - // io.out.valid := lsExecUnit.io.out.valid - - //Set LR/SC bits - setLr := io.out.fire() && (lrReq || scReq) - setLrVal := lrReq - setLrAddr := src1 - - io.dmem <> lsExecUnit.io.dmem - io.out.bits := Mux(scReq, scInvalid, Mux(state === s_amo_s, atomRegReg, lsExecUnit.io.out.bits)) - - val lsuMMIO = WireInit(false.B) - BoringUtils.addSink(lsuMMIO, "lsuMMIO") - - val mmioReg = RegInit(false.B) - when (!mmioReg) { mmioReg := lsuMMIO } - when (io.out.valid) { mmioReg := false.B } - io.isMMIO := mmioReg && io.out.valid - - io.loadAddrMisaligned := lsExecUnit.io.loadAddrMisaligned - io.storeAddrMisaligned := lsExecUnit.io.storeAddrMisaligned -} - -class LSExecUnit extends NOOPModule { - val io = IO(new LSUIO) - - val (valid, src1, src2, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.func) - def access(valid: Bool, src1: UInt, src2: UInt, func: UInt): UInt = { - this.valid := valid - this.src1 := src1 - this.src2 := src2 - this.func := func - io.out.bits - } - - def genWmask(addr: UInt, sizeEncode: UInt): UInt = { - LookupTree(sizeEncode, List( - "b00".U -> 0x1.U, //0001 << addr(2:0) - "b01".U -> 0x3.U, //0011 - "b10".U -> 0xf.U, //1111 - "b11".U -> 0xff.U //11111111 - )) << addr(2, 0) - } - def genWdata(data: UInt, sizeEncode: UInt): UInt = { - LookupTree(sizeEncode, List( - "b00".U -> Fill(8, data(7, 0)), - "b01".U -> Fill(4, data(15, 0)), - "b10".U -> Fill(2, data(31, 0)), - "b11".U -> data - )) - } - - val dmem = io.dmem - val addr = src1 + src2 - val addrLatch = RegNext(addr) - val isStore = valid && LSUOpType.isStore(func) - val partialLoad = !isStore && (func =/= LSUOpType.ld) - - val s_idle :: s_wait_tlb :: s_wait_resp :: s_partialLoad :: Nil = Enum(4) - val state = RegInit(s_idle) - - val dtlbFinish = WireInit(false.B) - val dtlbPF = WireInit(false.B) - val dtlbEnable = WireInit(false.B) - BoringUtils.addSink(dtlbFinish, "DTLBFINISH") - BoringUtils.addSink(dtlbPF, "DTLBPF") - BoringUtils.addSink(dtlbEnable, "DTLBENABLE") - - io.dtlbPF := dtlbPF - - switch (state) { - is (s_idle) { - when (dmem.req.fire() && dtlbEnable) { state := s_wait_tlb } - when (dmem.req.fire() && !dtlbEnable) { state := s_wait_resp } - //when (dmem.req.fire()) { state := Mux(isStore, s_partialLoad, s_wait_resp) } - } - is (s_wait_tlb) { - when (dtlbFinish && dtlbPF ) { state := s_idle } - when (dtlbFinish && !dtlbPF) { state := s_wait_resp/*Mux(isStore, s_partialLoad, s_wait_resp) */} - } - is (s_wait_resp) { when (dmem.resp.fire()) { state := Mux(partialLoad, s_partialLoad, s_idle) } } - is (s_partialLoad) { state := s_idle } - } - - Debug(){ - //when (dmem.req.fire()){ - printf("[LSU] IN(%d, %d) OUT(%d, %d) addr %x, size %x, wdata_raw %x, isStore %x \n", io.in.valid, io.in.ready, io.out.valid, io.out.ready, addr, func(1,0), io.wdata, isStore) - printf("[LSU] dtlbFinish:%d dtlbEnable:%d dtlbPF:%d state:%d addr:%x dmemReqFire:%d dmemRespFire:%d dmemRdata:%x \n",dtlbFinish, dtlbEnable, dtlbPF, state, dmem.req.bits.addr, dmem.req.fire(), dmem.resp.fire(), dmem.resp.bits.rdata) - //} - //when (dtlbFinish && dtlbEnable) { - printf("[LSU] dtlbFinish:%d dtlbEnable:%d dtlbPF:%d state:%d addr:%x dmemReqFire:%d dmemRespFire:%d dmemRdata:%x \n",dtlbFinish, dtlbEnable, dtlbPF, state, dmem.req.bits.addr, dmem.req.fire(), dmem.resp.fire(), dmem.resp.bits.rdata) - //} - } - - val size = func(1,0) - dmem.req.bits.apply(addr = addr(VAddrBits-1, 0), size = size, wdata = genWdata(io.wdata, size), - wmask = genWmask(addr, size), cmd = Mux(isStore, SimpleBusCmd.write, SimpleBusCmd.read)) - dmem.req.valid := valid && (state === s_idle) && !io.loadAddrMisaligned && !io.storeAddrMisaligned - dmem.resp.ready := true.B - - io.out.valid := Mux( dtlbPF || io.loadAddrMisaligned || io.storeAddrMisaligned, true.B, Mux(partialLoad, state === s_partialLoad, dmem.resp.fire() && (state === s_wait_resp))) - io.in.ready := (state === s_idle) || dtlbPF - - val rdata = dmem.resp.bits.rdata - val rdataLatch = RegNext(rdata) - val rdataSel = LookupTree(addrLatch(2, 0), List( - "b000".U -> rdataLatch(63, 0), - "b001".U -> rdataLatch(63, 8), - "b010".U -> rdataLatch(63, 16), - "b011".U -> rdataLatch(63, 24), - "b100".U -> rdataLatch(63, 32), - "b101".U -> rdataLatch(63, 40), - "b110".U -> rdataLatch(63, 48), - "b111".U -> rdataLatch(63, 56) - )) - val rdataPartialLoad = LookupTree(func, List( - LSUOpType.lb -> SignExt(rdataSel(7, 0) , XLEN), - LSUOpType.lh -> SignExt(rdataSel(15, 0), XLEN), - LSUOpType.lw -> SignExt(rdataSel(31, 0), XLEN), - LSUOpType.lbu -> ZeroExt(rdataSel(7, 0) , XLEN), - LSUOpType.lhu -> ZeroExt(rdataSel(15, 0), XLEN), - LSUOpType.lwu -> ZeroExt(rdataSel(31, 0), XLEN), - LSUOpType.flw -> boxF32ToF64(rdataSel(31,0)) - )) - val addrAligned = LookupTree(func(1,0), List( - "b00".U -> true.B, //b - "b01".U -> (addr(0) === 0.U), //h - "b10".U -> (addr(1,0) === 0.U), //w - "b11".U -> (addr(2,0) === 0.U) //d - )) - - io.out.bits := Mux(partialLoad, rdataPartialLoad, rdata) - - io.isMMIO := DontCare - - val isAMO = WireInit(false.B) - BoringUtils.addSink(isAMO, "ISAMO2") - BoringUtils.addSource(addr, "LSUADDR") - - io.loadAddrMisaligned := valid && !isStore && !isAMO && !addrAligned - io.storeAddrMisaligned := valid && (isStore || isAMO) && !addrAligned - - when(io.loadAddrMisaligned || io.storeAddrMisaligned) { - //printf("[LSU] misaligned addr detected\n") - } - - BoringUtils.addSource(dmem.isRead() && dmem.req.fire(), "perfCntCondMloadInstr") - BoringUtils.addSource(BoolStopWatch(dmem.isRead(), dmem.resp.fire()), "perfCntCondMloadStall") - BoringUtils.addSource(BoolStopWatch(dmem.isWrite(), dmem.resp.fire()), "perfCntCondMstoreStall") - BoringUtils.addSource(io.isMMIO, "perfCntCondMmmioInstr") - Debug() { - when (dmem.req.fire() && (addr === "h80104708".U || genWdata(io.wdata, size)(31,0) === "h80000218".U)){ - //printf("[LSUBP] time %d, addr %x, size %x, wdata_raw %x, wdata %x, isStore %x \n", GTimer(), addr, func(1,0), io.wdata, genWdata(io.wdata, size), isStore) - } - } -} diff --git a/src/main/scala/noop/fu/MDU.scala b/src/main/scala/noop/fu/MDU.scala deleted file mode 100644 index 20fecd768bd608b601b66e6fd05655f46491b681..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/fu/MDU.scala +++ /dev/null @@ -1,170 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import utils._ - -object MDUOpType { - def mul = "b0000".U - def mulh = "b0001".U - def mulhsu = "b0010".U - def mulhu = "b0011".U - def div = "b0100".U - def divu = "b0101".U - def rem = "b0110".U - def remu = "b0111".U - - def mulw = "b1000".U - def divw = "b1100".U - def divuw = "b1101".U - def remw = "b1110".U - def remuw = "b1111".U - - def isDiv(op: UInt) = op(2) - def isDivSign(op: UInt) = isDiv(op) && !op(0) - def isW(op: UInt) = op(3) -} - -class MulDivIO(val len: Int) extends Bundle { - val in = Flipped(DecoupledIO(Vec(2, Output(UInt(len.W))))) - val sign = Input(Bool()) - val out = DecoupledIO(Output(UInt((len * 2).W))) -} - -class Multiplier(len: Int) extends NOOPModule { - val io = IO(new MulDivIO(len)) - val latency = 1 - - def DSPInPipe[T <: Data](a: T) = RegNext(a) - def DSPOutPipe[T <: Data](a: T) = RegNext(RegNext(RegNext(a))) - val mulRes = (DSPInPipe(io.in.bits(0)).asSInt * DSPInPipe(io.in.bits(1)).asSInt) - io.out.bits := DSPOutPipe(mulRes).asUInt - io.out.valid := DSPOutPipe(DSPInPipe(io.in.fire())) - - val busy = RegInit(false.B) - when (io.in.valid && !busy) { busy := true.B } - when (io.out.valid) { busy := false.B } - io.in.ready := (if (latency == 0) true.B else !busy) -} - -class Divider(len: Int = 64) extends NOOPModule { - val io = IO(new MulDivIO(len)) - - def abs(a: UInt, sign: Bool): (Bool, UInt) = { - val s = a(len - 1) && sign - (s, Mux(s, -a, a)) - } - - val s_idle :: s_log2 :: s_shift :: s_compute :: s_finish :: Nil = Enum(5) - val state = RegInit(s_idle) - val newReq = (state === s_idle) && io.in.fire() - - val (a, b) = (io.in.bits(0), io.in.bits(1)) - val divBy0 = b === 0.U(len.W) - - val shiftReg = Reg(UInt((1 + len * 2).W)) - val hi = shiftReg(len * 2, len) - val lo = shiftReg(len - 1, 0) - - val (aSign, aVal) = abs(a, io.sign) - val (bSign, bVal) = abs(b, io.sign) - val aSignReg = RegEnable(aSign, newReq) - val qSignReg = RegEnable((aSign ^ bSign) && !divBy0, newReq) - val bReg = RegEnable(bVal, newReq) - val aValx2Reg = RegEnable(Cat(aVal, "b0".U), newReq) - - val cnt = Counter(len) - when (newReq) { - state := s_log2 - } .elsewhen (state === s_log2) { - // `canSkipShift` is calculated as following: - // bEffectiveBit = Log2(bVal, XLEN) + 1.U - // aLeadingZero = 64.U - aEffectiveBit = 64.U - (Log2(aVal, XLEN) + 1.U) - // canSkipShift = aLeadingZero + bEffectiveBit - // = 64.U - (Log2(aVal, XLEN) + 1.U) + Log2(bVal, XLEN) + 1.U - // = 64.U + Log2(bVal, XLEN) - Log2(aVal, XLEN) - // = (64.U | Log2(bVal, XLEN)) - Log2(aVal, XLEN) // since Log2(bVal, XLEN) < 64.U - val canSkipShift = (64.U | Log2(bReg)) - Log2(aValx2Reg) - // When divide by 0, the quotient should be all 1's. - // Therefore we can not shift in 0s here. - // We do not skip any shift to avoid this. - cnt.value := Mux(divBy0, 0.U, Mux(canSkipShift >= (len-1).U, (len-1).U, canSkipShift)) - state := s_shift - } .elsewhen (state === s_shift) { - shiftReg := aValx2Reg << cnt.value - state := s_compute - } .elsewhen (state === s_compute) { - val enough = hi.asUInt >= bReg.asUInt - shiftReg := Cat(Mux(enough, hi - bReg, hi)(len - 1, 0), lo, enough) - cnt.inc() - when (cnt.value === (len-1).U) { state := s_finish } - } .elsewhen (state === s_finish) { - state := s_idle - } - - val r = hi(len, 1) - val resQ = Mux(qSignReg, -lo, lo) - val resR = Mux(aSignReg, -r, r) - io.out.bits := Cat(resR, resQ) - - io.out.valid := (if (HasDiv) (state === s_finish) else io.in.valid) // FIXME: should deal with ready = 0 - io.in.ready := (state === s_idle) -} - -class MDUIO extends FunctionUnitIO { -} - -class MDU extends NOOPModule { - val io = IO(new MDUIO) - - val (valid, src1, src2, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.func) - def access(valid: Bool, src1: UInt, src2: UInt, func: UInt): UInt = { - this.valid := valid - this.src1 := src1 - this.src2 := src2 - this.func := func - io.out.bits - } - - val isDiv = MDUOpType.isDiv(func) - val isDivSign = MDUOpType.isDivSign(func) - val isW = MDUOpType.isW(func) - - val mul = Module(new Multiplier(XLEN + 1)) - val div = Module(new Divider(64)) - List(mul.io, div.io).map { case x => - x.sign := isDivSign - x.out.ready := io.out.ready - } - - val signext = SignExt(_: UInt, XLEN+1) - val zeroext = ZeroExt(_: UInt, XLEN+1) - val mulInputFuncTable = List( - MDUOpType.mul -> (zeroext, zeroext), - MDUOpType.mulh -> (signext, signext), - MDUOpType.mulhsu -> (signext, zeroext), - MDUOpType.mulhu -> (zeroext, zeroext) - ) - mul.io.in.bits(0) := LookupTree(func(1,0), mulInputFuncTable.map(p => (p._1(1,0), p._2._1(src1)))) - mul.io.in.bits(1) := LookupTree(func(1,0), mulInputFuncTable.map(p => (p._1(1,0), p._2._2(src2)))) - - val divInputFunc = (x: UInt) => Mux(isW, Mux(isDivSign, SignExt(x(31,0), XLEN), ZeroExt(x(31,0), XLEN)), x) - div.io.in.bits(0) := divInputFunc(src1) - div.io.in.bits(1) := divInputFunc(src2) - - mul.io.in.valid := io.in.valid && !isDiv - div.io.in.valid := io.in.valid && isDiv - - val mulRes = Mux(func(1,0) === MDUOpType.mul(1,0), mul.io.out.bits(XLEN-1,0), mul.io.out.bits(2*XLEN-1,XLEN)) - val divRes = Mux(func(1) /* rem */, div.io.out.bits(2*XLEN-1,XLEN), div.io.out.bits(XLEN-1,0)) - val res = Mux(isDiv, divRes, mulRes) - io.out.bits := Mux(isW, SignExt(res(31,0),XLEN), res) - - val isDivReg = Mux(io.in.fire(), isDiv, RegNext(isDiv)) - io.in.ready := Mux(isDiv, div.io.in.ready, mul.io.in.ready) - io.out.valid := Mux(isDivReg, div.io.out.valid, mul.io.out.valid) - - BoringUtils.addSource(mul.io.out.fire(), "perfCntCondMmulInstr") -} diff --git a/src/main/scala/noop/fu/MOU.scala b/src/main/scala/noop/fu/MOU.scala deleted file mode 100644 index ff40bcc293792bdcd2771ddf02ce83198821ec82..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/fu/MOU.scala +++ /dev/null @@ -1,54 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import utils._ - -// memory order unit -object MOUOpType { - def fence = "b00".U - def fencei = "b01".U - def sfence_vma = "b10".U -} - -class MOUIO extends FunctionUnitIO { - val cfIn = Flipped(new CtrlFlowIO) - val redirect = new RedirectIO -} - -class MOU extends NOOPModule { - val io = IO(new MOUIO) - - val (valid, src1, src2, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.func) - def access(valid: Bool, src1: UInt, src2: UInt, func: UInt): UInt = { - this.valid := valid - this.src1 := src1 - this.src2 := src2 - this.func := func - io.out.bits - } - - io.redirect.target := io.cfIn.pc + 4.U - io.redirect.valid := valid - val flushICache = valid && (func === MOUOpType.fencei) - BoringUtils.addSource(flushICache, "MOUFlushICache") - Debug(false){ - when(flushICache){ - printf("%d: [MOU] Flush I$ at %x\n", GTimer(), io.cfIn.pc) - } - } - - val flushTLB = valid && (func === MOUOpType.sfence_vma) - BoringUtils.addSource(flushTLB, "MOUFlushTLB") - Debug(false) { - when (flushTLB) { - printf("%d: [MOU] Flush TLB at %x\n", GTimer(), io.cfIn.pc) - } - } - - io.out.bits := 0.U - io.in.ready := true.B - io.out.valid := valid -} diff --git a/src/main/scala/noop/isa/Priviledged.scala b/src/main/scala/noop/isa/Priviledged.scala deleted file mode 100644 index 09faa568935b32f80dfb58c8dd21db16fadb2541..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/isa/Priviledged.scala +++ /dev/null @@ -1,24 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ - -object Priviledged extends HasInstrType { - def ECALL = BitPat("b000000000000_00000_000_00000_1110011") - def MRET = BitPat("b001100000010_00000_000_00000_1110011") - def SRET = BitPat("b000100000010_00000_000_00000_1110011") - def SFANCE_VMA = BitPat("b0001001_?????_?????_000_00000_1110011") - def FENCE = BitPat("b????????????_?????_000_?????_0001111") - def WFI = BitPat("b0001000_00101_00000_000_00000_1110011") - - val table = Array( - ECALL -> List(InstrI, FuType.csr, CSROpType.jmp), - MRET -> List(InstrI, FuType.csr, CSROpType.jmp), - SRET -> List(InstrI, FuType.csr, CSROpType.jmp), - SFANCE_VMA -> List(InstrR, FuType.mou, MOUOpType.sfence_vma), - FENCE -> List(InstrS, FuType.alu, ALUOpType.add), // nop InstrS -> !wen - WFI -> List(InstrI, FuType.alu, ALUOpType.add) // nop - // FENCE -> List(InstrB, FuType.mou, MOUOpType.fencei) - - ) -} diff --git a/src/main/scala/noop/isa/RVA.scala b/src/main/scala/noop/isa/RVA.scala deleted file mode 100644 index ba9c0d000f644e125791926e2cfdc4ad046fb32a..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/isa/RVA.scala +++ /dev/null @@ -1,42 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ - -object RVAInstr extends HasInstrType { - // Note: use instr(14,12) to distinguish D/W inst - // def LR = BitPat("b00010??00000_?????_???_?????_0101111") - // def SC = BitPat("b00011??00000_?????_???_?????_0101111") - def LR_D = BitPat("b00010_??_00000_?????_011_?????_0101111") - def SC_D = BitPat("b00011_??_?????_?????_011_?????_0101111") - def LR_W = BitPat("b00010_??_00000_?????_010_?????_0101111") - def SC_W = BitPat("b00011_??_?????_?????_010_?????_0101111") - def AMOSWAP = BitPat("b00001_??_?????_?????_01?_?????_0101111") - def AMOADD = BitPat("b00000_??_?????_?????_01?_?????_0101111") - def AMOXOR = BitPat("b00100_??_?????_?????_01?_?????_0101111") - def AMOAND = BitPat("b01100_??_?????_?????_01?_?????_0101111") - def AMOOR = BitPat("b01000_??_?????_?????_01?_?????_0101111") - def AMOMIN = BitPat("b10000_??_?????_?????_01?_?????_0101111") - def AMOMAX = BitPat("b10100_??_?????_?????_01?_?????_0101111") - def AMOMINU = BitPat("b11000_??_?????_?????_01?_?????_0101111") - def AMOMAXU = BitPat("b11100_??_?????_?????_01?_?????_0101111") - // funct3 === 010 or 011 - - val table = Array( - // LR -> List(InstrI, FuType.lsu, LSUOpType.lr), - LR_D -> List(InstrI, FuType.lsu, LSUOpType.lr), - LR_W -> List(InstrI, FuType.lsu, LSUOpType.lr), - // SC -> List(InstrS, FuType.lsu, LSUOpType.sc), - SC_D -> List(InstrSA, FuType.lsu, LSUOpType.sc), - SC_W -> List(InstrSA, FuType.lsu, LSUOpType.sc), - AMOSWAP -> List(InstrR, FuType.lsu, LSUOpType.amoswap), - AMOADD -> List(InstrR, FuType.lsu, LSUOpType.amoadd), - AMOXOR -> List(InstrR, FuType.lsu, LSUOpType.amoxor), - AMOAND -> List(InstrR, FuType.lsu, LSUOpType.amoand), - AMOOR -> List(InstrR, FuType.lsu, LSUOpType.amoor), - AMOMIN -> List(InstrR, FuType.lsu, LSUOpType.amomin), - AMOMAX -> List(InstrR, FuType.lsu, LSUOpType.amomax), - AMOMINU -> List(InstrR, FuType.lsu, LSUOpType.amominu), - AMOMAXU -> List(InstrR, FuType.lsu, LSUOpType.amomaxu) - ) -} diff --git a/src/main/scala/noop/isa/RVC.scala b/src/main/scala/noop/isa/RVC.scala deleted file mode 100644 index 2b5a5888947b7af7141c98c864de4eba45531eb0..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/isa/RVC.scala +++ /dev/null @@ -1,204 +0,0 @@ -// This package is used to deal with RVC decode -package noop - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import utils._ - -trait HasRVCConst { - - val RVCRegNumTable = List( - "b000".U -> 8.U, - "b001".U -> 9.U, - "b010".U -> 10.U, - "b011".U -> 11.U, - "b100".U -> 12.U, - "b101".U -> 13.U, - "b110".U -> 14.U, - "b111".U -> 15.U - ) - - // Imm src - def ImmNone = "b10000".U - def ImmLWSP = "b00000".U - def ImmLDSP = "b00001".U - def ImmSWSP = "b00010".U - def ImmSDSP = "b00011".U - def ImmSW = "b00100".U - def ImmSD = "b00101".U - def ImmLW = "b00110".U - def ImmLD = "b00111".U - def ImmJ = "b01000".U - def ImmB = "b01001".U - def ImmLI = "b01010".U - def ImmLUI = "b01011".U - def ImmADDI = "b01100".U - def ImmADDI16SP = "b01101".U - def ImmADD4SPN = "b01110".U - - // REG src - def DtCare = "b0000".U // reg x0 - def REGrs = "b0011".U - def REGrt = "b0001".U - def REGrd = "b0010".U - def REGrs1 = "b0100".U - def REGrs2 = "b0101".U - def REGrs1p = "b0110".U - def REGrs2p = "b0111".U - def REGx1 = "b1000".U - def REGx2 = "b1001".U -} - -object RVCInstr extends HasInstrType with HasRVCConst { - - // RVC 00 -// def C_XX = BitPat("b????????????????_???_?_10_987_65_432_10") - def C_ILLEGAL = BitPat("b0000000000000000_000_0_00_000_00_000_00") - def C_ADDI4SPN = BitPat("b????????????????_000_?_??_???_??_???_00") - def C_FLD = BitPat("b????????????????_001_?_??_???_??_???_00") -// def C_LQ = BitPat("b????????????????_001_?_??_???_??_???_00") - def C_LW = BitPat("b????????????????_010_?_??_???_??_???_00") -// def C_FLW = BitPat("b????????????????_011_?_??_???_??_???_00") // RV32FC Only - def C_LD = BitPat("b????????????????_011_?_??_???_??_???_00") - // def C_LI = BitPat("b????????????????_100_?_??_???_??_???_00") //reserved - def C_FSD = BitPat("b????????????????_101_?_??_???_??_???_00") -// def C_SQ = BitPat("b????????????????_101_?_??_???_??_???_00") - def C_SW = BitPat("b????????????????_110_?_??_???_??_???_00") -// def C_FSW = BitPat("b????????????????_111_?_??_???_??_???_00") // RV32FC Only - def C_SD = BitPat("b????????????????_111_?_??_???_??_???_00") - - // RVC 01 - def C_NOP = BitPat("b????????????????_000_?_00_000_??_???_01") - def C_ADDI = BitPat("b????????????????_000_?_??_???_??_???_01") - def C_JAL = BitPat("b????????????????_001_?_??_???_??_???_01") - def C_ADDIW = BitPat("b????????????????_001_?_??_???_??_???_01") - def C_LI = BitPat("b????????????????_010_?_??_???_??_???_01") - def C_ADDI16SP= BitPat("b????????????????_011_?_00_010_??_???_01") - def C_LUI = BitPat("b????????????????_011_?_??_???_??_???_01") - def C_SRLI = BitPat("b????????????????_100_?_00_???_??_???_01") -// def C_SRLI64 = BitPat("b????????????????_100_0_01_???_00_000_01") - def C_SRAI = BitPat("b????????????????_100_?_01_???_??_???_01") -// def C_SAI64 = BitPat("b????????????????_100_0_01_???_00_000_01") - def C_ANDI = BitPat("b????????????????_100_?_10_???_??_???_01") - def C_SUB = BitPat("b????????????????_100_0_11_???_00_???_01") - def C_XOR = BitPat("b????????????????_100_0_11_???_01_???_01") - def C_OR = BitPat("b????????????????_100_0_11_???_10_???_01") - def C_AND = BitPat("b????????????????_100_0_11_???_11_???_01") - def C_SUBW = BitPat("b????????????????_100_1_11_???_00_???_01") - def C_ADDW = BitPat("b????????????????_100_1_11_???_01_???_01") -// def C_RES = BitPat("b????????????????_100_1_11_???_??_???_01") -// def C_RES = BitPat("b????????????????_100_1_11_???_??_???_01") - def C_J = BitPat("b????????????????_101_?_??_???_??_???_01") - def C_BEQZ = BitPat("b????????????????_110_?_??_???_??_???_01") - def C_BNEZ = BitPat("b????????????????_111_?_??_???_??_???_01") - - //RVC 11 - def C_SLLI = BitPat("b????????????????_000_?_??_???_??_???_10") -// def C_SLLI64 = BitPat("b????????????????_000_0_??_???_00_000_10") - def C_FLDSP = BitPat("b????????????????_001_?_??_???_??_???_10") -// def C_LQSP = BitPat("b????????????????_001_?_??_???_??_???_10") - def C_LWSP = BitPat("b????????????????_010_?_??_???_??_???_10") -// def C_FLWSP = BitPat("b????????????????_011_?_??_???_??_???_10") // RV32FC Only - def C_LDSP = BitPat("b????????????????_011_?_??_???_??_???_10") - def C_JR = BitPat("b????????????????_100_0_??_???_00_000_10") - def C_MV = BitPat("b????????????????_100_0_??_???_??_???_10") - def C_EBREAK = BitPat("b????????????????_100_1_00_000_00_000_10") - def C_JALR = BitPat("b????????????????_100_1_??_???_00_000_10") - def C_ADD = BitPat("b????????????????_100_1_??_???_??_???_10") - def C_FSDSP = BitPat("b????????????????_101_?_??_???_??_???_10") -// def C_SQSP = BitPat("b????????????????_101_?_??_???_??_???_10") - def C_SWSP = BitPat("b????????????????_110_?_??_???_??_???_10") -// def C_FSWSP = BitPat("b????????????????_111_?_??_???_??_???_10") // RV32FC Only - def C_SDSP = BitPat("b????????????????_111_?_??_???_??_???_10") - - // TODO: HINT - // TODO: RES - -// def is_C_ADDI4SPN(op: UInt) = op(12,5) =/= 0.U - - val table = Array( - C_ILLEGAL -> List(InstrN, FuType.csr, CSROpType.jmp), - C_ADDI4SPN -> List(InstrI, FuType.alu, ALUOpType.add), - C_LW -> List(InstrI, FuType.lsu, LSUOpType.lw), - C_LD -> List(InstrI, FuType.lsu, LSUOpType.ld), - C_SW -> List(InstrS, FuType.lsu, LSUOpType.sw), - C_SD -> List(InstrS, FuType.lsu, LSUOpType.sd), - C_NOP -> List(InstrI, FuType.alu, ALUOpType.add), - C_ADDI -> List(InstrI, FuType.alu, ALUOpType.add), - // C_JAL -> List(InstrI, FuType.alu, ALUOpType.add),//RV32C only - C_ADDIW -> List(InstrI, FuType.alu, ALUOpType.addw), - C_LI -> List(InstrI, FuType.alu, ALUOpType.add), - C_ADDI16SP -> List(InstrI, FuType.alu, ALUOpType.add), - C_LUI -> List(InstrI, FuType.alu, ALUOpType.add), - C_SRLI -> List(InstrI, FuType.alu, ALUOpType.srl), - C_SRAI -> List(InstrI, FuType.alu, ALUOpType.sra), - C_ANDI -> List(InstrI, FuType.alu, ALUOpType.and), - C_SUB -> List(InstrR, FuType.alu, ALUOpType.sub), - C_XOR -> List(InstrR, FuType.alu, ALUOpType.xor), - C_OR -> List(InstrR, FuType.alu, ALUOpType.or), - C_AND -> List(InstrR, FuType.alu, ALUOpType.and), - C_SUBW -> List(InstrR, FuType.alu, ALUOpType.subw), - C_ADDW -> List(InstrR, FuType.alu, ALUOpType.addw), - C_J -> List(InstrJ, FuType.alu, ALUOpType.jal), - C_BEQZ -> List(InstrB, FuType.alu, ALUOpType.beq), - C_BNEZ -> List(InstrB, FuType.alu, ALUOpType.bne), - C_SLLI -> List(InstrI, FuType.alu, ALUOpType.sll), - C_LWSP -> List(InstrI, FuType.lsu, LSUOpType.lw), - C_LDSP -> List(InstrI, FuType.lsu, LSUOpType.ld), - C_JR -> List(InstrI, FuType.alu, ALUOpType.jalr), - C_MV -> List(InstrR, FuType.alu, ALUOpType.add), - C_EBREAK -> List(InstrI, FuType.alu, ALUOpType.add), - C_JALR -> List(InstrI, FuType.alu, ALUOpType.jalr), - C_ADD -> List(InstrR, FuType.alu, ALUOpType.add), - C_SWSP -> List(InstrS, FuType.lsu, LSUOpType.sw), - C_SDSP -> List(InstrS, FuType.lsu, LSUOpType.sd) - ) - - val cExtraTable = Array( - C_ADDI4SPN -> List(ImmADD4SPN, REGx2, DtCare, REGrs2p), - C_FLD -> List(ImmLD, REGrs1p, DtCare, REGrs2p), - C_LW -> List(ImmLW, REGrs1p, DtCare, REGrs2p), - C_LD -> List(ImmLD, REGrs1p, DtCare, REGrs2p), - C_FSD -> List(ImmSD, REGrs1p, REGrs2p, DtCare), - C_SW -> List(ImmSW, REGrs1p, REGrs2p, DtCare), - C_SD -> List(ImmSD, REGrs1p, REGrs2p, DtCare), - C_NOP -> List(ImmNone, DtCare, DtCare, DtCare), - C_ADDI -> List(ImmADDI, REGrd, DtCare, REGrd), - // C_JAL -> List(), - C_ADDIW -> List(ImmADDI, REGrd, DtCare, REGrd), - C_LI -> List(ImmLI, DtCare, DtCare, REGrd), - C_ADDI16SP -> List(ImmADDI16SP, REGx2, DtCare, REGx2), - C_LUI -> List(ImmLUI, DtCare, DtCare, REGrd), - C_SRLI -> List(ImmLI, REGrs1p, DtCare, REGrs1p), - C_SRAI -> List(ImmLI, REGrs1p, DtCare, REGrs1p), - C_ANDI -> List(ImmLI, REGrs1p, DtCare, REGrs1p), - C_SUB -> List(ImmNone, REGrs1p, REGrs2p, REGrs1p), - C_XOR -> List(ImmNone, REGrs1p, REGrs2p, REGrs1p), - C_OR -> List(ImmNone, REGrs1p, REGrs2p, REGrs1p), - C_AND -> List(ImmNone, REGrs1p, REGrs2p, REGrs1p), - C_SUBW -> List(ImmNone, REGrs1p, REGrs2p, REGrs1p), - C_ADDW -> List(ImmNone, REGrs1p, REGrs2p, REGrs1p), - C_J -> List(ImmJ, DtCare, DtCare, DtCare), - C_BEQZ -> List(ImmB, REGrs1p, DtCare, DtCare), // rd: x0 - C_BNEZ -> List(ImmB, REGrs1p, DtCare, DtCare), // rd: x0 - C_SLLI -> List(ImmLI, REGrd, DtCare, REGrd), - C_FLDSP -> List(ImmLDSP, REGx2, DtCare, REGrd), - // C_LQSP -> List(), - C_LWSP -> List(ImmLWSP, REGx2, DtCare, REGrd), - C_LDSP -> List(ImmLDSP, REGx2, DtCare, REGrd), - C_JR -> List(ImmNone, REGrs1, DtCare, DtCare), - C_MV -> List(ImmNone, REGrs2, DtCare, REGrd), - C_EBREAK -> List(ImmNone, DtCare, DtCare, DtCare), //not implemented - C_JALR -> List(ImmNone, REGrs1, DtCare, REGx1), - C_ADD -> List(ImmNone, REGrd, REGrs2, REGrd), - C_FSDSP -> List(ImmSDSP, REGx2, REGrs2, DtCare), - // C_SQSP -> List(), - C_SWSP -> List(ImmSWSP, REGx2, REGrs2, DtCare), - C_SDSP -> List(ImmSDSP, REGx2, REGrs2, DtCare) - ) - - //TODO: support pc = 2 aligned address - //TODO: branch predictor support pc = 2 align -} diff --git a/src/main/scala/noop/isa/RVD.scala b/src/main/scala/noop/isa/RVD.scala deleted file mode 100644 index 495b16bf27d37e987389dc575876907a22820560..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/isa/RVD.scala +++ /dev/null @@ -1,107 +0,0 @@ -package noop.isa - -import Chisel.BitPat -import noop._ -import noop.SrcType.{fp, imm, reg} -import RVF_FPUInstr.{Y, N} -import RVCInstr._ -import fpu.FPUIOFunc._ -import fpu.FPUOpType._ - -object RVD_LSUInstr extends HasInstrType{ - def FLD = BitPat("b?????????????????011?????0000111") - def FSD = BitPat("b?????????????????011?????0100111") - val table = Array( - FLD -> List(InstrI, FuType.lsu, LSUOpType.ld), - C_FLD -> List(InstrI, FuType.lsu, LSUOpType.ld), - C_FLDSP -> List(InstrI, FuType.lsu, LSUOpType.ld), - FSD -> List(InstrS, FuType.lsu, LSUOpType.sd), - C_FSD -> List(InstrS, FuType.lsu, LSUOpType.sd), - C_FSDSP -> List(InstrS, FuType.lsu, LSUOpType.sd) - ) -} - -object RVD_FPUInstr extends HasNOOPParameter { - def FADD_D = BitPat("b0000001??????????????????1010011") - def FSUB_D = BitPat("b0000101??????????????????1010011") - def FMUL_D = BitPat("b0001001??????????????????1010011") - def FDIV_D = BitPat("b0001101??????????????????1010011") - def FSGNJ_D = BitPat("b0010001??????????000?????1010011") - def FSGNJN_D = BitPat("b0010001??????????001?????1010011") - def FSGNJX_D = BitPat("b0010001??????????010?????1010011") - def FMIN_D = BitPat("b0010101??????????000?????1010011") - def FMAX_D = BitPat("b0010101??????????001?????1010011") - def FCVT_S_D = BitPat("b010000000001?????????????1010011") - def FCVT_D_S = BitPat("b010000100000?????????????1010011") - def FSQRT_D = BitPat("b010110100000?????????????1010011") - def FLE_D = BitPat("b1010001??????????000?????1010011") - def FLT_D = BitPat("b1010001??????????001?????1010011") - def FEQ_D = BitPat("b1010001??????????010?????1010011") - def FCVT_W_D = BitPat("b110000100000?????????????1010011") - def FCVT_WU_D = BitPat("b110000100001?????????????1010011") - def FCVT_L_D = BitPat("b110000100010?????????????1010011") - def FCVT_LU_D = BitPat("b110000100011?????????????1010011") - def FMV_X_D = BitPat("b111000100000?????000?????1010011") - def FCLASS_D = BitPat("b111000100000?????001?????1010011") - def FCVT_D_W = BitPat("b110100100000?????????????1010011") - def FCVT_D_WU = BitPat("b110100100001?????????????1010011") - def FCVT_D_L = BitPat("b110100100010?????????????1010011") - def FCVT_D_LU = BitPat("b110100100011?????????????1010011") - def FMV_D_X = BitPat("b111100100000?????000?????1010011") - def FLD = BitPat("b?????????????????011?????0000111") - def FSD = BitPat("b?????????????????011?????0100111") - def FMADD_D = BitPat("b?????01??????????????????1000011") - def FMSUB_D = BitPat("b?????01??????????????????1000111") - def FNMSUB_D = BitPat("b?????01??????????????????1001011") - def FNMADD_D = BitPat("b?????01??????????????????1001111") - // (isFp, src1Type, src2Type, src3Type, rfWen, fpWen, fuOpType, inputFunc, outputFunc) - val table = Array( - FLD -> List(Y, reg, imm, imm, N, Y, LSUOpType.ld, in_raw, out_raw), - C_FLD -> List(Y, reg, imm, imm, N, Y, LSUOpType.ld, in_raw, out_raw), - C_FLDSP -> List(Y, reg, imm, imm, N, Y, LSUOpType.ld, in_raw, out_raw), - FSD -> List(Y, reg, fp, imm, N, N, LSUOpType.sd, in_raw, out_raw), - C_FSD -> List(Y, reg, fp, imm, N, N, LSUOpType.sd, in_raw, out_raw), - C_FSDSP -> List(Y, reg, fp, imm, N, N, LSUOpType.sd, in_raw, out_raw), - // fp fp -> fp - FADD_D -> List(Y, fp, fp, imm, N, Y, fadd, in_raw, out_raw), - FSUB_D -> List(Y, fp, fp, imm, N, Y, fsub, in_raw, out_raw), - FMUL_D -> List(Y, fp, fp, imm, N, Y, fmul, in_raw, out_raw), - FDIV_D -> List(Y, fp, fp, imm, N, Y, fdiv, in_raw, out_raw), - FMIN_D -> List(Y, fp, fp, imm, N, Y, fmin, in_raw, out_raw), - FMAX_D -> List(Y, fp, fp, imm, N, Y, fmax, in_raw, out_raw), - FSGNJ_D -> List(Y, fp, fp, imm, N, Y, fsgnj, in_raw, out_raw), - FSGNJN_D -> List(Y, fp, fp, imm, N, Y, fsgnjn, in_raw, out_raw), - FSGNJX_D -> List(Y, fp, fp, imm, N, Y, fsgnjx, in_raw, out_raw), - // fp -> fp - FSQRT_D -> List(Y, fp, imm, imm, N, Y, fsqrt, in_raw, out_raw), - FCVT_S_D -> List(Y, fp, imm, imm, N, Y, d2s, in_raw, out_box), - FCVT_D_S -> List(Y, fp, imm, imm, N, Y, s2d, in_unbox, out_raw), - // fp fp fp -> fp - FMADD_D -> List(Y, fp, fp, fp, N, Y, fmadd, in_raw, out_raw), - FNMADD_D -> List(Y, fp, fp, fp, N, Y, fnmadd, in_raw, out_raw), - FMSUB_D -> List(Y, fp, fp, fp, N, Y, fmsub, in_raw, out_raw), - FNMSUB_D -> List(Y, fp, fp, fp, N, Y, fnmsub, in_raw, out_raw), - // fp -> gp - FCLASS_D -> List(Y, fp, imm, imm, Y, N, fclass, in_raw, out_raw), - FMV_X_D -> List(Y, fp, imm, imm, Y, N, fmv_f2i, in_raw, out_raw), - FCVT_W_D -> List(Y, fp, imm, imm, Y, N, f2w, in_raw, out_sext), - FCVT_WU_D -> List(Y, fp, imm, imm, Y, N, f2wu, in_raw, out_sext), - FCVT_L_D -> List(Y, fp, imm, imm, Y, N, f2l, in_raw, out_raw), - FCVT_LU_D -> List(Y, fp, imm, imm, Y, N, f2lu, in_raw, out_raw), - // fp fp -> gp - FLE_D -> List(Y, fp, fp, imm, Y, N, fle, in_raw, out_raw), - FLT_D -> List(Y, fp, fp, imm, Y, N, flt, in_raw, out_raw), - FEQ_D -> List(Y, fp, fp, imm, Y, N, feq, in_raw, out_raw), - // gp -> fp - FMV_D_X -> List(Y, reg, imm, imm, N, Y, fmv_i2f, in_raw, out_raw), - FCVT_D_W -> List(Y, reg, imm, imm, N, Y, w2f, in_raw, out_raw), - FCVT_D_WU -> List(Y, reg, imm, imm, N, Y, wu2f, in_raw, out_raw), - FCVT_D_L -> List(Y, reg, imm, imm, N, Y, l2f, in_raw, out_raw), - FCVT_D_LU -> List(Y, reg, imm, imm, N, Y, lu2f, in_raw, out_raw) - ) -} - -object RVDInstr { - val table = RVD_LSUInstr.table - val extraTable = RVD_FPUInstr.table -} \ No newline at end of file diff --git a/src/main/scala/noop/isa/RVF.scala b/src/main/scala/noop/isa/RVF.scala deleted file mode 100644 index 6ab8abea10a70a63db850ecca434afaeeb7600e4..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/isa/RVF.scala +++ /dev/null @@ -1,100 +0,0 @@ -package noop.isa - -import Chisel.BitPat -import chisel3._ -import noop._ -import noop.SrcType._ -import fpu.FPUOpType._ -import fpu.FPUIOFunc._ - -object RVF_LSUInstr extends HasInstrType{ - def FLW = BitPat("b?????????????????010?????0000111") - def FSW = BitPat("b?????????????????010?????0100111") - val table = Array( - FLW -> List(InstrI, FuType.lsu, LSUOpType.flw), - FSW -> List(InstrS, FuType.lsu, LSUOpType.sw) - ) -} - -object RVF_FPUInstr extends HasNOOPParameter { - def FADD_S = BitPat("b0000000??????????????????1010011") - def FSUB_S = BitPat("b0000100??????????????????1010011") - def FMUL_S = BitPat("b0001000??????????????????1010011") - def FDIV_S = BitPat("b0001100??????????????????1010011") - def FSGNJ_S = BitPat("b0010000??????????000?????1010011") - def FSGNJN_S = BitPat("b0010000??????????001?????1010011") - def FSGNJX_S = BitPat("b0010000??????????010?????1010011") - def FMIN_S = BitPat("b0010100??????????000?????1010011") - def FMAX_S = BitPat("b0010100??????????001?????1010011") - def FSQRT_S = BitPat("b010110000000?????????????1010011") - def FLE_S = BitPat("b1010000??????????000?????1010011") - def FLT_S = BitPat("b1010000??????????001?????1010011") - def FEQ_S = BitPat("b1010000??????????010?????1010011") - def FCVT_W_S = BitPat("b110000000000?????????????1010011") - def FCVT_WU_S = BitPat("b110000000001?????????????1010011") - def FCVT_L_S = BitPat("b110000000010?????????????1010011") - def FCVT_LU_S = BitPat("b110000000011?????????????1010011") - def FMV_X_W = BitPat("b111000000000?????000?????1010011") - def FCLASS_S = BitPat("b111000000000?????001?????1010011") - def FCVT_S_W = BitPat("b110100000000?????????????1010011") - def FCVT_S_WU = BitPat("b110100000001?????????????1010011") - def FCVT_S_L = BitPat("b110100000010?????????????1010011") - def FCVT_S_LU = BitPat("b110100000011?????????????1010011") - def FMV_W_X = BitPat("b111100000000?????000?????1010011") - def FLW = BitPat("b?????????????????010?????0000111") - def FSW = BitPat("b?????????????????010?????0100111") - def FMADD_S = BitPat("b?????00??????????????????1000011") - def FMSUB_S = BitPat("b?????00??????????????????1000111") - def FNMSUB_S = BitPat("b?????00??????????????????1001011") - def FNMADD_S = BitPat("b?????00??????????????????1001111") - - def Y: Bool = true.B - def N: Bool = false.B - - // (isFp, src1Type, src2Type, src3Type, rfWen, fpWen, fuOpType, inputFunc, outputFunc) - val DecodeDefault = List(N, imm, imm, imm, N, N, fadd, in_raw, out_raw) - val table = Array( - FLW -> List(Y, reg, imm, imm, N, Y, LSUOpType.flw, in_raw, out_raw), - FSW -> List(Y, reg, fp, imm, N, N, LSUOpType.sw, in_raw, out_raw), - // fp fp -> fp - FADD_S -> List(Y, fp, fp, imm, N, Y, fadd, in_unbox, out_box), - FSUB_S -> List(Y, fp, fp, imm, N, Y, fsub, in_unbox, out_box), - FMUL_S -> List(Y, fp, fp, imm, N, Y, fmul, in_unbox, out_box), - FDIV_S -> List(Y, fp, fp, imm, N, Y, fdiv, in_unbox, out_box), - FMIN_S -> List(Y, fp, fp, imm, N, Y, fmin, in_unbox, out_box), - FMAX_S -> List(Y, fp, fp, imm, N, Y, fmax, in_unbox, out_box), - FSGNJ_S -> List(Y, fp, fp, imm, N, Y, fsgnj, in_unbox, out_box), - FSGNJN_S -> List(Y, fp, fp, imm, N, Y, fsgnjn, in_unbox, out_box), - FSGNJX_S -> List(Y, fp, fp, imm, N, Y, fsgnjx, in_unbox, out_box), - // fp -> fp - FSQRT_S -> List(Y, fp, imm, imm, N, Y, fsqrt, in_unbox, out_box), - // fp fp fp -> fp - FMADD_S -> List(Y, fp, fp, fp, N, Y, fmadd, in_unbox, out_box), - FNMADD_S -> List(Y, fp, fp, fp, N, Y, fnmadd, in_unbox, out_box), - FMSUB_S -> List(Y, fp, fp, fp, N, Y, fmsub, in_unbox, out_box), - FNMSUB_S -> List(Y, fp, fp, fp, N, Y, fnmsub, in_unbox, out_box), - // fp -> gp - FCLASS_S -> List(Y, fp, imm, imm, Y, N, fclass, in_unbox, out_raw), - FMV_X_W -> List(Y, fp, imm, imm, Y, N, fmv_f2i, in_raw, out_sext), - FCVT_W_S -> List(Y, fp, imm, imm, Y, N, f2w, in_unbox, out_sext), - FCVT_WU_S -> List(Y, fp, imm, imm, Y, N, f2wu, in_unbox, out_sext), - FCVT_L_S -> List(Y, fp, imm, imm, Y, N, f2l, in_unbox, out_raw), - FCVT_LU_S -> List(Y, fp, imm, imm, Y, N, f2lu, in_unbox, out_raw) , - // fp fp -> gp - FLE_S -> List(Y, fp, fp, imm, Y, N, fle, in_unbox, out_raw), - FLT_S -> List(Y, fp, fp, imm, Y, N, flt, in_unbox, out_raw), - FEQ_S -> List(Y, fp, fp, imm, Y, N, feq, in_unbox, out_raw), - // gp -> fp - FMV_W_X -> List(Y, reg, imm, imm, N, Y, fmv_i2f, in_raw, out_box), - FCVT_S_W -> List(Y, reg, imm, imm, N, Y, w2f, in_raw, out_box), - FCVT_S_WU -> List(Y, reg, imm, imm, N, Y, wu2f, in_raw, out_box), - FCVT_S_L -> List(Y, reg, imm, imm, N, Y, l2f, in_raw, out_box), - FCVT_S_LU -> List(Y, reg, imm, imm, N, Y, lu2f, in_raw, out_box) - ) -} - -object RVFInstr{ - val table = RVF_LSUInstr.table - val extraTable = RVF_FPUInstr.table - val extraTableDefault = RVF_FPUInstr.DecodeDefault -} diff --git a/src/main/scala/noop/isa/RVI.scala b/src/main/scala/noop/isa/RVI.scala deleted file mode 100644 index f4a70266cf384e6030899481f9af2dca2c7ce602..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/isa/RVI.scala +++ /dev/null @@ -1,155 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ - -object RV32I_ALUInstr extends HasInstrType with HasNOOPParameter { - def ADDI = BitPat("b????????????_?????_000_?????_0010011") - def SLLI = if (XLEN == 32) BitPat("b0000000?????_?????_001_?????_0010011") - else BitPat("b000000??????_?????_001_?????_0010011") - def SLTI = BitPat("b????????????_?????_010_?????_0010011") - def SLTIU = BitPat("b????????????_?????_011_?????_0010011") - def XORI = BitPat("b????????????_?????_100_?????_0010011") - def SRLI = if (XLEN == 32) BitPat("b0000000?????_?????_101_?????_0010011") - else BitPat("b000000??????_?????_101_?????_0010011") - def ORI = BitPat("b????????????_?????_110_?????_0010011") - def ANDI = BitPat("b????????????_?????_111_?????_0010011") - def SRAI = if (XLEN == 32) BitPat("b0100000?????_?????_101_?????_0010011") - else BitPat("b010000??????_?????_101_?????_0010011") - - def ADD = BitPat("b0000000_?????_?????_000_?????_0110011") - def SLL = BitPat("b0000000_?????_?????_001_?????_0110011") - def SLT = BitPat("b0000000_?????_?????_010_?????_0110011") - def SLTU = BitPat("b0000000_?????_?????_011_?????_0110011") - def XOR = BitPat("b0000000_?????_?????_100_?????_0110011") - def SRL = BitPat("b0000000_?????_?????_101_?????_0110011") - def OR = BitPat("b0000000_?????_?????_110_?????_0110011") - def AND = BitPat("b0000000_?????_?????_111_?????_0110011") - def SUB = BitPat("b0100000_?????_?????_000_?????_0110011") - def SRA = BitPat("b0100000_?????_?????_101_?????_0110011") - - def AUIPC = BitPat("b????????????????????_?????_0010111") - def LUI = BitPat("b????????????????????_?????_0110111") - - val table = Array( - ADDI -> List(InstrI, FuType.alu, ALUOpType.add), - SLLI -> List(InstrI, FuType.alu, ALUOpType.sll), - SLTI -> List(InstrI, FuType.alu, ALUOpType.slt), - SLTIU -> List(InstrI, FuType.alu, ALUOpType.sltu), - XORI -> List(InstrI, FuType.alu, ALUOpType.xor), - SRLI -> List(InstrI, FuType.alu, ALUOpType.srl), - ORI -> List(InstrI, FuType.alu, ALUOpType.or ), - ANDI -> List(InstrI, FuType.alu, ALUOpType.and), - SRAI -> List(InstrI, FuType.alu, ALUOpType.sra), - - ADD -> List(InstrR, FuType.alu, ALUOpType.add), - SLL -> List(InstrR, FuType.alu, ALUOpType.sll), - SLT -> List(InstrR, FuType.alu, ALUOpType.slt), - SLTU -> List(InstrR, FuType.alu, ALUOpType.sltu), - XOR -> List(InstrR, FuType.alu, ALUOpType.xor), - SRL -> List(InstrR, FuType.alu, ALUOpType.srl), - OR -> List(InstrR, FuType.alu, ALUOpType.or ), - AND -> List(InstrR, FuType.alu, ALUOpType.and), - SUB -> List(InstrR, FuType.alu, ALUOpType.sub), - SRA -> List(InstrR, FuType.alu, ALUOpType.sra), - - AUIPC -> List(InstrU, FuType.alu, ALUOpType.add), - LUI -> List(InstrU, FuType.alu, ALUOpType.add) - ) -} - -object RV32I_BRUInstr extends HasInstrType { - def JAL = BitPat("b????????????????????_?????_1101111") - def JALR = BitPat("b????????????_?????_000_?????_1100111") - - def BNE = BitPat("b???????_?????_?????_001_?????_1100011") - def BEQ = BitPat("b???????_?????_?????_000_?????_1100011") - def BLT = BitPat("b???????_?????_?????_100_?????_1100011") - def BGE = BitPat("b???????_?????_?????_101_?????_1100011") - def BLTU = BitPat("b???????_?????_?????_110_?????_1100011") - def BGEU = BitPat("b???????_?????_?????_111_?????_1100011") - - val table = Array( - JAL -> List(InstrJ, FuType.alu, ALUOpType.jal), - JALR -> List(InstrI, FuType.alu, ALUOpType.jalr), - - BEQ -> List(InstrB, FuType.alu, ALUOpType.beq), - BNE -> List(InstrB, FuType.alu, ALUOpType.bne), - BLT -> List(InstrB, FuType.alu, ALUOpType.blt), - BGE -> List(InstrB, FuType.alu, ALUOpType.bge), - BLTU -> List(InstrB, FuType.alu, ALUOpType.bltu), - BGEU -> List(InstrB, FuType.alu, ALUOpType.bgeu) - ) - - val bruFuncTobtbTypeTable = List( - ALUOpType.beq -> BTBtype.B, - ALUOpType.bne -> BTBtype.B, - ALUOpType.blt -> BTBtype.B, - ALUOpType.bge -> BTBtype.B, - ALUOpType.bltu -> BTBtype.B, - ALUOpType.bgeu -> BTBtype.B, - ALUOpType.call -> BTBtype.J, - ALUOpType.ret -> BTBtype.R, - ALUOpType.jal -> BTBtype.J, - ALUOpType.jalr -> BTBtype.I - ) -} - -object RV32I_LSUInstr extends HasInstrType { - def LB = BitPat("b????????????_?????_000_?????_0000011") - def LH = BitPat("b????????????_?????_001_?????_0000011") - def LW = BitPat("b????????????_?????_010_?????_0000011") - def LBU = BitPat("b????????????_?????_100_?????_0000011") - def LHU = BitPat("b????????????_?????_101_?????_0000011") - def SB = BitPat("b???????_?????_?????_000_?????_0100011") - def SH = BitPat("b???????_?????_?????_001_?????_0100011") - def SW = BitPat("b???????_?????_?????_010_?????_0100011") - - val table = Array( - LB -> List(InstrI, FuType.lsu, LSUOpType.lb ), - LH -> List(InstrI, FuType.lsu, LSUOpType.lh ), - LW -> List(InstrI, FuType.lsu, LSUOpType.lw ), - LBU -> List(InstrI, FuType.lsu, LSUOpType.lbu), - LHU -> List(InstrI, FuType.lsu, LSUOpType.lhu), - SB -> List(InstrS, FuType.lsu, LSUOpType.sb ), - SH -> List(InstrS, FuType.lsu, LSUOpType.sh ), - SW -> List(InstrS, FuType.lsu, LSUOpType.sw) - ) -} - -object RV64IInstr extends HasInstrType { - def ADDIW = BitPat("b???????_?????_?????_000_?????_0011011") - def SLLIW = BitPat("b0000000_?????_?????_001_?????_0011011") - def SRLIW = BitPat("b0000000_?????_?????_101_?????_0011011") - def SRAIW = BitPat("b0100000_?????_?????_101_?????_0011011") - def SLLW = BitPat("b0000000_?????_?????_001_?????_0111011") - def SRLW = BitPat("b0000000_?????_?????_101_?????_0111011") - def SRAW = BitPat("b0100000_?????_?????_101_?????_0111011") - def ADDW = BitPat("b0000000_?????_?????_000_?????_0111011") - def SUBW = BitPat("b0100000_?????_?????_000_?????_0111011") - - def LWU = BitPat("b???????_?????_?????_110_?????_0000011") - def LD = BitPat("b???????_?????_?????_011_?????_0000011") - def SD = BitPat("b???????_?????_?????_011_?????_0100011") - - val table = Array( - ADDIW -> List(InstrI, FuType.alu, ALUOpType.addw), - SLLIW -> List(InstrI, FuType.alu, ALUOpType.sllw), - SRLIW -> List(InstrI, FuType.alu, ALUOpType.srlw), - SRAIW -> List(InstrI, FuType.alu, ALUOpType.sraw), - SLLW -> List(InstrR, FuType.alu, ALUOpType.sllw), - SRLW -> List(InstrR, FuType.alu, ALUOpType.srlw), - SRAW -> List(InstrR, FuType.alu, ALUOpType.sraw), - ADDW -> List(InstrR, FuType.alu, ALUOpType.addw), - SUBW -> List(InstrR, FuType.alu, ALUOpType.subw), - - LWU -> List(InstrI, FuType.lsu, LSUOpType.lwu), - LD -> List(InstrI, FuType.lsu, LSUOpType.ld ), - SD -> List(InstrS, FuType.lsu, LSUOpType.sd) - ) -} - -object RVIInstr extends HasNOOPParameter { - val table = RV32I_ALUInstr.table ++ RV32I_BRUInstr.table ++ RV32I_LSUInstr.table ++ - (if (XLEN == 64) RV64IInstr.table else Nil) -} diff --git a/src/main/scala/noop/isa/RVM.scala b/src/main/scala/noop/isa/RVM.scala deleted file mode 100644 index 804c680c951f8f32b5dfd27377c0945dde49c1f5..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/isa/RVM.scala +++ /dev/null @@ -1,57 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ - -object RV32MInstr extends HasInstrType with HasNOOPParameter { - def MUL = BitPat("b0000001_?????_?????_000_?????_0110011") - def MULH = BitPat("b0000001_?????_?????_001_?????_0110011") - def MULHSU = BitPat("b0000001_?????_?????_010_?????_0110011") - def MULHU = BitPat("b0000001_?????_?????_011_?????_0110011") - def DIV = BitPat("b0000001_?????_?????_100_?????_0110011") - def DIVU = BitPat("b0000001_?????_?????_101_?????_0110011") - def REM = BitPat("b0000001_?????_?????_110_?????_0110011") - def REMU = BitPat("b0000001_?????_?????_111_?????_0110011") - def MULW = BitPat("b0000001_?????_?????_000_?????_0111011") - def DIVW = BitPat("b0000001_?????_?????_100_?????_0111011") - def DIVUW = BitPat("b0000001_?????_?????_101_?????_0111011") - def REMW = BitPat("b0000001_?????_?????_110_?????_0111011") - def REMUW = BitPat("b0000001_?????_?????_111_?????_0111011") - - val mulTable = Array( - MUL -> List(InstrR, FuType.mdu, MDUOpType.mul), - MULH -> List(InstrR, FuType.mdu, MDUOpType.mulh), - MULHSU -> List(InstrR, FuType.mdu, MDUOpType.mulhsu), - MULHU -> List(InstrR, FuType.mdu, MDUOpType.mulhu) - ) - val divTable = Array( - DIV -> List(InstrR, FuType.mdu, MDUOpType.div), - DIVU -> List(InstrR, FuType.mdu, MDUOpType.divu), - REM -> List(InstrR, FuType.mdu, MDUOpType.rem), - REMU -> List(InstrR, FuType.mdu, MDUOpType.remu) - ) - val table = mulTable ++ (if (HasDiv) divTable else Nil) -} - -object RV64MInstr extends HasInstrType with HasNOOPParameter { - def MULW = BitPat("b0000001_?????_?????_000_?????_0111011") - def DIVW = BitPat("b0000001_?????_?????_100_?????_0111011") - def DIVUW = BitPat("b0000001_?????_?????_101_?????_0111011") - def REMW = BitPat("b0000001_?????_?????_110_?????_0111011") - def REMUW = BitPat("b0000001_?????_?????_111_?????_0111011") - - val mulTable = Array( - MULW -> List(InstrR, FuType.mdu, MDUOpType.mulw) - ) - val divTable = Array( - DIVW -> List(InstrR, FuType.mdu, MDUOpType.divw), - DIVUW -> List(InstrR, FuType.mdu, MDUOpType.divuw), - REMW -> List(InstrR, FuType.mdu, MDUOpType.remw), - REMUW -> List(InstrR, FuType.mdu, MDUOpType.remuw) - ) - val table = mulTable ++ (if (HasDiv) divTable else Nil) -} - -object RVMInstr extends HasNOOPParameter { - val table = RV32MInstr.table ++ (if (XLEN == 64) RV64MInstr.table else Nil) -} diff --git a/src/main/scala/noop/isa/RVZicsr.scala b/src/main/scala/noop/isa/RVZicsr.scala deleted file mode 100644 index 8906ee39f571f55c2269affff3a6040442ea523b..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/isa/RVZicsr.scala +++ /dev/null @@ -1,22 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ - -object RVZicsrInstr extends HasInstrType { - def CSRRW = BitPat("b????????????_?????_001_?????_1110011") - def CSRRS = BitPat("b????????????_?????_010_?????_1110011") - def CSRRC = BitPat("b????????????_?????_011_?????_1110011") - def CSRRWI = BitPat("b????????????_?????_101_?????_1110011") - def CSRRSI = BitPat("b????????????_?????_110_?????_1110011") - def CSRRCI = BitPat("b????????????_?????_111_?????_1110011") - - val table = Array( - CSRRW -> List(InstrI, FuType.csr, CSROpType.wrt), - CSRRS -> List(InstrI, FuType.csr, CSROpType.set), - CSRRC -> List(InstrI, FuType.csr, CSROpType.clr), - CSRRWI -> List(InstrI, FuType.csr, CSROpType.wrti), - CSRRSI -> List(InstrI, FuType.csr, CSROpType.seti), - CSRRCI -> List(InstrI, FuType.csr, CSROpType.clri) - ) -} diff --git a/src/main/scala/noop/isa/RVZifencei.scala b/src/main/scala/noop/isa/RVZifencei.scala deleted file mode 100644 index 22fa15d7d795353eb745ee2ef472649797ea6b55..0000000000000000000000000000000000000000 --- a/src/main/scala/noop/isa/RVZifencei.scala +++ /dev/null @@ -1,12 +0,0 @@ -package noop - -import chisel3._ -import chisel3.util._ - -object RVZifenceiInstr extends HasInstrType { - def FENCEI = BitPat("b000000000000_00000_001_00000_0001111") - - val table = Array( - FENCEI -> List(InstrB, FuType.mou, MOUOpType.fencei) - ) -} diff --git a/src/main/scala/system/Coherence.scala b/src/main/scala/system/Coherence.scala deleted file mode 100644 index 0d5150467387bf9952daf20ef3382b9a177b6c92..0000000000000000000000000000000000000000 --- a/src/main/scala/system/Coherence.scala +++ /dev/null @@ -1,83 +0,0 @@ -package system - -import chisel3._ -import chisel3.util._ - -import utils._ -import bus.simplebus._ -import noop.HasNOOPParameter - -trait HasCoherenceParameter extends HasNOOPParameter { - val supportCoh = HasDcache -} - -class CoherenceManager extends Module with HasCoherenceParameter { - val io = IO(new Bundle { - val in = Flipped(new SimpleBusUC) - val out = new Bundle { - val mem = new SimpleBusUC - val coh = new SimpleBusUC - } - }) - - // state transition: - // write: s_idle -> s_memWriteResp -> s_idle - // read: s_idle -> s_probeResp -> (hit) s_probeForward -> s_idle - // +> (miss) s_memReadReq -> s_memReadResp -> s_idle - - val s_idle :: s_probeResp :: s_probeForward :: s_memReadReq :: s_memReadResp :: s_memWriteResp :: Nil = Enum(6) - val state = RegInit(s_idle) - val inflight = state =/= s_idle - - val thisReq = io.in.req - assert(!(thisReq.valid && !thisReq.bits.isRead() && !thisReq.bits.isWrite())) - - // when read, we should first probe dcache - val reqLatch = RegEnable(thisReq.bits, !inflight && thisReq.bits.isRead()) - io.out.coh match { case c => { - c.req.bits := thisReq.bits - c.req.bits.cmd := SimpleBusCmd.probe - c.resp.ready := true.B - }} - - io.out.mem.req.bits := thisReq.bits - // bind correct valid and ready signals - io.out.mem.req.valid := false.B - thisReq.ready := false.B - io.out.coh.req.valid := false.B - when (if (supportCoh) thisReq.bits.isWrite() else true.B) { - io.out.mem.req.valid := thisReq.valid && !inflight - thisReq.ready := io.out.mem.req.ready && !inflight - } .elsewhen (thisReq.bits.isRead()) { - io.out.coh.req.valid := thisReq.valid && !inflight - thisReq.ready := io.out.coh.req.ready && !inflight - } - - io.in.resp <> io.out.mem.resp - - switch (state) { - is (s_idle) { - when (thisReq.fire()) { - when (thisReq.bits.isRead()) { state := Mux(supportCoh.B, s_probeResp, s_memReadResp) } - .elsewhen (thisReq.bits.isWriteLast()) { state := s_memWriteResp } - } - } - is (s_probeResp) { - when (io.out.coh.resp.fire()) { - state := Mux(io.out.coh.resp.bits.isProbeHit(), s_probeForward, s_memReadReq) - } - } - is (s_probeForward) { - val thisResp = io.in.resp - thisResp <> io.out.coh.resp - when (thisResp.fire() && thisResp.bits.isReadLast()) { state := s_idle } - } - is (s_memReadReq) { - io.out.mem.req.bits := reqLatch - io.out.mem.req.valid := true.B - when (io.out.mem.req.fire()) { state := s_memReadResp } - } - is (s_memReadResp) { when (io.out.mem.resp.fire() && io.out.mem.resp.bits.isReadLast()) { state := s_idle } } - is (s_memWriteResp) { when (io.out.mem.resp.fire()) { state := s_idle } } - } -} diff --git a/src/main/scala/system/Prefetcher.scala b/src/main/scala/system/Prefetcher.scala deleted file mode 100644 index 8d2f45b525ce52e38e3df9736083ac84038a8c3a..0000000000000000000000000000000000000000 --- a/src/main/scala/system/Prefetcher.scala +++ /dev/null @@ -1,47 +0,0 @@ -package system - -import noop.{NOOP, NOOPConfig, HasNOOPParameter, Cache, CacheConfig} -import bus.axi4.{AXI4, AXI4Lite} -import bus.simplebus._ -import utils._ - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -trait HasPrefetcherParameter extends HasNOOPParameter { - val supportPrefetch = HasDcache -} - -class Prefetcher extends Module with HasPrefetcherParameter { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new SimpleBusReqBundle)) - val out = Decoupled(new SimpleBusReqBundle) - }) - val getNewReq = RegInit(false.B) - val prefetchReq = RegNext(io.in.bits) - prefetchReq.cmd := SimpleBusCmd.prefetch - prefetchReq.addr := io.in.bits.addr + XLEN.U - - val lastReqAddr = (RegEnable(io.in.bits.addr, io.in.fire())) - val thisReqAddr = io.in.bits.addr - val lineMask = Cat(Fill(AddrBits - 6, 1.U(1.W)), 0.U(6.W)) - val neqAddr = (thisReqAddr & lineMask) =/= (lastReqAddr & lineMask) - - when (!getNewReq) { - io.out.bits <> io.in.bits - io.out.valid := io.in.valid - io.in.ready := !io.in.valid || io.out.fire() - getNewReq := io.in.fire() && io.in.bits.isBurst() && neqAddr - }.otherwise { - io.out.bits <> prefetchReq - io.out.valid := true.B - io.in.ready := false.B - getNewReq := !io.out.fire() - } - - Debug() { - printf("%d: [Prefetcher]: in(%d,%d), out(%d,%d), in.bits.addr = %x\n", - GTimer(), io.in.valid, io.in.ready, io.out.valid, io.out.ready, io.in.bits.addr) - } -} diff --git a/src/main/scala/system/SoC.scala b/src/main/scala/system/SoC.scala index 18bf0978b1be9f569b8aed1b2b306e9bb88b4c3f..67205d47d9fdf7a5b66fc9423492ba6c1d42d270 100644 --- a/src/main/scala/system/SoC.scala +++ b/src/main/scala/system/SoC.scala @@ -67,12 +67,9 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter { val meip = Input(Bool()) val ila = if(env.FPGAPlatform && EnableILA) Some(Output(new ILABundle)) else None }) - val mtipSync = clint.module.io.mtip - val msipSync = clint.module.io.msip - val meipSync = RegNext(RegNext(io.meip)) - ExcitingUtils.addSource(mtipSync, "mtip") - ExcitingUtils.addSource(msipSync, "msip") - ExcitingUtils.addSource(meipSync, "meip") + xsCore.module.io.externalInterrupt.mtip := clint.module.io.mtip + xsCore.module.io.externalInterrupt.msip := clint.module.io.msip + xsCore.module.io.externalInterrupt.meip := RegNext(RegNext(io.meip)) } } diff --git a/src/main/scala/top/XiangShanStage.scala b/src/main/scala/top/XiangShanStage.scala index 765bf9503836b71bd09213bf38c9e99b23ecbe9b..d1f21a39c35007b73a9b687e70ec73ea6a2b439e 100644 --- a/src/main/scala/top/XiangShanStage.scala +++ b/src/main/scala/top/XiangShanStage.scala @@ -5,7 +5,7 @@ import firrtl.AnnotationSeq import firrtl.annotations.NoTargetAnnotation import firrtl.options.{HasShellOptions, Shell, ShellOption} import firrtl.stage.{FirrtlCli, RunFirrtlTransformAnnotation} -import xstransforms.ShowPrintTransform +// import xstransforms.ShowPrintTransform import xstransforms.PrintModuleName case class DisablePrintfAnnotation(m: String) extends NoTargetAnnotation @@ -78,7 +78,7 @@ object XiangShanStage { (new XiangShanStage).execute( args, annotations ++ Seq( - RunFirrtlTransformAnnotation(new ShowPrintTransform), + // RunFirrtlTransformAnnotation(new ShowPrintTransform), RunFirrtlTransformAnnotation(new PrintModuleName) ) ) diff --git a/src/main/scala/utils/BitUtils.scala b/src/main/scala/utils/BitUtils.scala index 375dc038f65f8b1f1d59f1a85b4481b6e47b17da..81c1aa25ad636dc8799d129b3d88bc38387efc6d 100644 --- a/src/main/scala/utils/BitUtils.scala +++ b/src/main/scala/utils/BitUtils.scala @@ -82,9 +82,9 @@ object GenMask { // generate w/r mask def apply(high: Int, low: Int) = { require(high > low) - VecInit(List.fill(high+1)(true.B)).asUInt >> low << low + (VecInit(List.fill(high+1)(true.B)).asUInt >> low << low).asUInt() } def apply(pos: Int) = { - 1.U << pos + (1.U << pos).asUInt() } } \ No newline at end of file diff --git a/src/main/scala/utils/Debug.scala b/src/main/scala/utils/Debug.scala deleted file mode 100644 index 9761cdfd5922e13842a2275ca179b76dc2071568..0000000000000000000000000000000000000000 --- a/src/main/scala/utils/Debug.scala +++ /dev/null @@ -1,15 +0,0 @@ -package utils - -import chisel3._ -import chisel3.util._ - -import noop.NOOPConfig - -object Debug { - def apply(flag: Boolean = NOOPConfig().EnableDebug, cond: Bool = true.B)(body: => Unit): Any = - if (flag) { when (cond && GTimer() > 100.U) { body } } -} - -object ShowType { - def apply[T: Manifest](t: T) = println(manifest[T]) -} diff --git a/src/main/scala/utils/Lock.scala b/src/main/scala/utils/Lock.scala deleted file mode 100644 index ee6d6b1a359da1249c599b46d26fe117e3abe558..0000000000000000000000000000000000000000 --- a/src/main/scala/utils/Lock.scala +++ /dev/null @@ -1,43 +0,0 @@ -package utils - -import chisel3._ -import chisel3.util._ - -class LockBundle extends Bundle { - val lock = Input(Bool()) - val unlock = Input(Bool()) - val holding = Output(Bool()) -} - -class Lock(n: Int) extends Module { - val io = IO(new Bundle { - val bundle = Vec(n, new LockBundle) - }) - - val lock = RegInit(0.U(n.W)) - val lockReq = VecInit(io.bundle.map(_.lock)).asUInt - val unlockReq = VecInit(io.bundle.map(_.unlock)).asUInt - - val lockEmpty = lock === 0.U - val hasLockReq = lockReq =/= 0.U - val lockNext = 1.U << PriorityEncoder(lockReq) - when (lockEmpty && hasLockReq) { lock := lockNext } - - val hasUnlockReq = unlockReq =/= 0.U - assert(PopCount(unlockReq) <= 1.U, "only the lock holder can issue unlock request") - assert(!(lockEmpty && hasUnlockReq), "only the lock holder can issue unlock request") - assert((lock & lockReq) === 0.U, "can not issue lock request when holding the lock") - when (!lockEmpty && hasUnlockReq) { - assert(unlockReq === lock, "only the lock holder can issue unlock request") - lock := 0.U - } - - val holding = Mux(lockEmpty && hasLockReq, lockNext, lock) - io.bundle.map(_.holding).zip(holding.asBools).map{ case (l, r) => l := r } - assert(PopCount(io.bundle.map(_.holding)) <= 1.U, "there should be only one lock holder") - - Debug() { - when (lockEmpty && hasLockReq) { printf("%d: %d acquire lock\n", GTimer(), PriorityEncoder(lockNext)) } - when (!lockEmpty && hasUnlockReq) { printf("%d: %d release lock\n", GTimer(), PriorityEncoder(lock)) } - } -} diff --git a/src/main/scala/utils/ParallelMux.scala b/src/main/scala/utils/ParallelMux.scala index a41da4769341bdda7971dda528e5877bf6bda260..6aa6d953f40ad621c5d8378dc1280776cb22705a 100644 --- a/src/main/scala/utils/ParallelMux.scala +++ b/src/main/scala/utils/ParallelMux.scala @@ -5,6 +5,7 @@ import chisel3.util._ object ParallelOperation { def apply[T <: Data](xs: Seq[T], func: (T, T) => T): T = { + require(xs.nonEmpty) xs match { case Seq(a) => a case Seq(a, b) => func(a, b) diff --git a/src/main/scala/utils/SRAMTemplate.scala b/src/main/scala/utils/SRAMTemplate.scala index d4769506dfd733a8dac8a8f357e87c81537ff091..ce894d41a73bdb1cc2e1ee57a26d4e3512d8fcde 100644 --- a/src/main/scala/utils/SRAMTemplate.scala +++ b/src/main/scala/utils/SRAMTemplate.scala @@ -85,14 +85,14 @@ class SRAMTemplate[T <: Data](gen: T, set: Int, way: Int = 1, io.r.req.ready := !resetState && (if (singlePort) !wen else true.B) io.w.req.ready := true.B - Debug(false) { - when (wen) { - printf("%d: SRAMTemplate: write %x to idx = %d\n", GTimer(), wdata.asUInt, setIdx) - } - when (RegNext(realRen)) { - printf("%d: SRAMTemplate: read %x at idx = %d\n", GTimer(), VecInit(rdata).asUInt, RegNext(io.r.req.bits.setIdx)) - } - } + // Debug(false) { + // when (wen) { + // printf("%d: SRAMTemplate: write %x to idx = %d\n", GTimer(), wdata.asUInt, setIdx) + // } + // when (RegNext(realRen)) { + // printf("%d: SRAMTemplate: read %x at idx = %d\n", GTimer(), VecInit(rdata).asUInt, RegNext(io.r.req.bits.setIdx)) + // } + // } } class SRAMTemplateWithArbiter[T <: Data](nRead: Int, gen: T, set: Int, way: Int = 1, diff --git a/src/main/scala/xiangshan/Bundle.scala b/src/main/scala/xiangshan/Bundle.scala index 32a2acba2b0117e62cdb6eeeed35ac75ae8e4156..12f247778d36333be3b5f39e67cb3bbbf41a2205 100644 --- a/src/main/scala/xiangshan/Bundle.scala +++ b/src/main/scala/xiangshan/Bundle.scala @@ -2,8 +2,8 @@ package xiangshan import chisel3._ import chisel3.util._ -import bus.simplebus._ import xiangshan.backend.brq.BrqPtr +import xiangshan.backend.fu.fpu.Fflags import xiangshan.backend.rename.FreeListPtr import xiangshan.backend.roq.RoqPtr import xiangshan.mem.{LqPtr, SqPtr} @@ -203,20 +203,34 @@ class ExuInput extends XSBundle { class ExuOutput extends XSBundle { val uop = new MicroOp val data = UInt(XLEN.W) + val fflags = new Fflags val redirectValid = Bool() val redirect = new Redirect val brUpdate = new BranchUpdateInfo val debug = new DebugBundle } +class ExternalInterruptIO extends XSBundle { + val mtip = Input(Bool()) + val msip = Input(Bool()) + val meip = Input(Bool()) +} + +class CSRSpecialIO extends XSBundle { + val exception = Flipped(ValidIO(new MicroOp)) + val isInterrupt = Input(Bool()) + val memExceptionVAddr = Input(UInt(VAddrBits.W)) + val trapTarget = Output(UInt(VAddrBits.W)) + val externalInterrupt = new ExternalInterruptIO + val interrupt = Output(Bool()) +} + class ExuIO extends XSBundle { val in = Flipped(DecoupledIO(new ExuInput)) val redirect = Flipped(ValidIO(new Redirect)) val out = DecoupledIO(new ExuOutput) // for csr - val exception = Flipped(ValidIO(new MicroOp)) - // for Lsu - val dmem = new SimpleBusUC + val csrOnly = new CSRSpecialIO val mcommit = Input(UInt(3.W)) } @@ -237,6 +251,8 @@ class FrontendToBackendIO extends XSBundle { val redirect = Flipped(ValidIO(new Redirect)) val outOfOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfo)) val inOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfo)) + val sfence = Input(new SfenceBundle) + val tlbCsrIO = Input(new TlbCsrBundle) } class TlbCsrBundle extends XSBundle { diff --git a/src/main/scala/xiangshan/XSCore.scala b/src/main/scala/xiangshan/XSCore.scala index 830ce079093bc660f9799ad8a8b1f18707ca95d0..41f0b9bd38f8f8114431aabe84dd649298435e52 100644 --- a/src/main/scala/xiangshan/XSCore.scala +++ b/src/main/scala/xiangshan/XSCore.scala @@ -2,13 +2,13 @@ package xiangshan import chisel3._ import chisel3.util._ -import noop.{Cache, CacheConfig, HasExceptionNO, TLB, TLBConfig} import top.Parameters import xiangshan.backend._ import xiangshan.backend.dispatch.DispatchParameters import xiangshan.backend.exu.ExuParameters import xiangshan.frontend._ import xiangshan.mem._ +import xiangshan.backend.fu.HasExceptionNO import xiangshan.cache.{ICache, DCache, DCacheParameters, ICacheParameters, L1plusCacheParameters, PTW, Uncache} import chipsalliance.rocketchip.config import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp} @@ -28,7 +28,7 @@ case class XSCoreParameters AddrBits: Int = 64, VAddrBits: Int = 39, PAddrBits: Int = 40, - HasFPU: Boolean = false, + HasFPU: Boolean = true, FectchWidth: Int = 8, EnableBPU: Boolean = true, EnableBPD: Boolean = true, @@ -49,8 +49,8 @@ case class XSCoreParameters CommitWidth: Int = 6, BrqSize: Int = 12, IssQueSize: Int = 8, - NRPhyRegs: Int = 72, - NRIntReadPorts: Int = 8, + NRPhyRegs: Int = 128, + NRIntReadPorts: Int = 14, NRIntWritePorts: Int = 8, NRFpReadPorts: Int = 14, NRFpWritePorts: Int = 8, @@ -76,8 +76,8 @@ case class XSCoreParameters AluCnt = 4, MulCnt = 0, MduCnt = 2, - FmacCnt = 0, - FmiscCnt = 0, + FmacCnt = 4, + FmiscCnt = 2, FmiscDivSqrtCnt = 0, LduCnt = 2, StuCnt = 2 @@ -89,7 +89,8 @@ case class XSCoreParameters TlbEntrySize: Int = 32, TlbL2EntrySize: Int = 256, // or 512 PtwL1EntrySize: Int = 16, - PtwL2EntrySize: Int = 256 + PtwL2EntrySize: Int = 256, + NumPerfCounters: Int = 16 ) trait HasXSParameter { @@ -164,6 +165,7 @@ trait HasXSParameter { val TlbL2EntrySize = core.TlbL2EntrySize val PtwL1EntrySize = core.PtwL1EntrySize val PtwL2EntrySize = core.PtwL2EntrySize + val NumPerfCounters = core.NumPerfCounters val l1BusDataWidth = 256 @@ -191,14 +193,18 @@ trait HasXSLog { this: RawModule => implicit val moduleName: String = this.name } -abstract class XSModule extends Module +abstract class XSModule extends MultiIOModule with HasXSParameter with HasExceptionNO with HasXSLog +{ + def io: Record +} //remove this trait after impl module logic -trait NeedImpl { this: Module => +trait NeedImpl { this: RawModule => override protected def IO[T <: Data](iodef: T): T = { + println(s"[Warn]: (${this.name}) please reomve 'NeedImpl' after implement this module") val io = chisel3.experimental.IO(iodef) io <> DontCare io @@ -269,6 +275,9 @@ class XSCore()(implicit p: config.Parameters) extends LazyModule { } class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) with HasXSParameter { + val io = IO(new Bundle { + val externalInterrupt = new ExternalInterruptIO + }) val front = Module(new Frontend) val backend = Module(new Backend) @@ -279,17 +288,19 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) with HasXSParameter val icache = outer.icache.module val ptw = outer.ptw.module - // TODO: connect this - front.io.backend <> backend.io.frontend front.io.icacheResp <> icache.io.resp front.io.icacheToTlb <> icache.io.tlb icache.io.req <> front.io.icacheReq icache.io.flush <> front.io.icacheFlush + icache.io.fencei := backend.io.fencei mem.io.backend <> backend.io.mem + io.externalInterrupt <> backend.io.externalInterrupt ptw.io.tlb(0) <> mem.io.ptw ptw.io.tlb(1) <> front.io.ptw + ptw.io.sfence <> backend.io.sfence + ptw.io.csr <> backend.io.tlbCsrIO dcache.io.lsu.load <> mem.io.loadUnitToDcacheVec dcache.io.lsu.lsroq <> mem.io.loadMiss diff --git a/src/main/scala/xiangshan/backend/Backend.scala b/src/main/scala/xiangshan/backend/Backend.scala index 1bdb782f0e6c1cff9f7a88bf06e430dbacd18485..463b647da066525074d339ef47493743f4de26aa 100644 --- a/src/main/scala/xiangshan/backend/Backend.scala +++ b/src/main/scala/xiangshan/backend/Backend.scala @@ -1,18 +1,14 @@ package xiangshan.backend -import bus.simplebus.SimpleBusUC import chisel3._ import chisel3.util._ -import chisel3.util.experimental.BoringUtils -import noop.MemMMUIO import xiangshan._ import xiangshan.backend.decode.{DecodeBuffer, DecodeStage} import xiangshan.backend.rename.Rename import xiangshan.backend.brq.Brq import xiangshan.backend.dispatch.Dispatch import xiangshan.backend.exu._ -import xiangshan.backend.fu.FunctionUnit -import xiangshan.backend.issue.{IssueQueue, ReservationStation} +import xiangshan.backend.issue.ReservationStationNew import xiangshan.backend.regfile.{Regfile, RfWritePort} import xiangshan.backend.roq.Roq import xiangshan.mem._ @@ -26,6 +22,10 @@ class Backend extends XSModule val io = IO(new Bundle { val frontend = Flipped(new FrontendToBackendIO) val mem = Flipped(new MemToBackendIO) + val externalInterrupt = new ExternalInterruptIO + val sfence = Output(new SfenceBundle) + val fencei = Output(Bool()) + val tlbCsrIO = Output(new TlbCsrBundle) }) @@ -33,14 +33,16 @@ class Backend extends XSModule val jmpExeUnit = Module(new JmpExeUnit) val mulExeUnits = Array.tabulate(exuParameters.MulCnt)(_ => Module(new MulExeUnit)) val mduExeUnits = Array.tabulate(exuParameters.MduCnt)(_ => Module(new MulDivExeUnit)) - // val fmacExeUnits = Array.tabulate(exuParameters.FmacCnt)(_ => Module(new Fmac)) - // val fmiscExeUnits = Array.tabulate(exuParameters.FmiscCnt)(_ => Module(new Fmisc)) - // val fmiscDivSqrtExeUnits = Array.tabulate(exuParameters.FmiscDivSqrtCnt)(_ => Module(new FmiscDivSqrt)) - val exeUnits = jmpExeUnit +: (aluExeUnits ++ mulExeUnits ++ mduExeUnits) - exeUnits.foreach(_.io.exception := DontCare) - exeUnits.foreach(_.io.dmem := DontCare) + val fmacExeUnits = Array.tabulate(exuParameters.FmacCnt)(_ => Module(new FmacExeUnit)) + val fmiscExeUnits = Array.tabulate(exuParameters.FmiscCnt)(_ => Module(new FmiscExeUnit)) + // val fmiscDivSqrtExeUnits = Array.tabulate(exuParameters.FmiscDivSqrtCnt)(_ => Module(new FmiscDivSqrtExeUnit)) + val exeUnits = jmpExeUnit +: (aluExeUnits ++ mulExeUnits ++ mduExeUnits ++ fmacExeUnits ++ fmiscExeUnits) + exeUnits.foreach(_.io.csrOnly := DontCare) exeUnits.foreach(_.io.mcommit := DontCare) + fmacExeUnits.foreach(_.frm := jmpExeUnit.frm) + fmiscExeUnits.foreach(_.frm := jmpExeUnit.frm) + val decode = Module(new DecodeStage) val brq = Module(new Brq) val decBuf = Module(new DecodeBuffer) @@ -57,12 +59,6 @@ class Backend extends XSModule numWirtePorts = NRFpWritePorts, hasZero = false )) - val memRf = Module(new Regfile( - numReadPorts = 2*exuParameters.StuCnt + exuParameters.LduCnt, - numWirtePorts = NRIntWritePorts, - hasZero = true, - isMemRf = true - )) // backend redirect, flush pipeline val redirect = Mux( @@ -92,93 +88,88 @@ class Backend extends XSModule def needData(a: ExuConfig, b: ExuConfig): Boolean = (a.readIntRf && b.writeIntRf) || (a.readFpRf && b.writeFpRf) - val reservedStations = exeUnits. - zipWithIndex. - map({ case (exu, i) => - - val cfg = exu.config - - val wakeUpDateVec = exuConfigs.zip(exeWbReqs).filter(x => needData(cfg, x._1)).map(_._2) - val bypassCnt = exuConfigs.count(c => c.enableBypass && needData(cfg, c)) - - println(s"exu:${cfg.name} wakeupCnt:${wakeUpDateVec.length} bypassCnt:$bypassCnt") - - val rs = Module(new ReservationStation( - cfg, wakeUpDateVec.length, bypassCnt, cfg.enableBypass, false - )) - rs.io.redirect <> redirect - rs.io.numExist <> dispatch.io.numExist(i) - rs.io.enqCtrl <> dispatch.io.enqIQCtrl(i) - rs.io.enqData <> dispatch.io.enqIQData(i) - for( - (wakeUpPort, exuOut) <- - rs.io.wakeUpPorts.zip(wakeUpDateVec) - ){ - wakeUpPort.bits := exuOut.bits - wakeUpPort.valid := exuOut.valid - } - - exu.io.in <> rs.io.deq - exu.io.redirect <> redirect - rs - }) - - for( rs <- reservedStations){ - rs.io.bypassUops <> reservedStations. - filter(x => x.enableBypass && needData(rs.exuCfg, x.exuCfg)). - map(_.io.selectedUop) - - val bypassDataVec = exuConfigs.zip(exeWbReqs). - filter(x => x._1.enableBypass && needData(rs.exuCfg, x._1)).map(_._2) + val reservedStations = exuConfigs.zipWithIndex.map({ case (cfg, i) => + + // NOTE: exu could have certern and uncertaion latency + // but could not have multiple certern latency + var certainLatency = -1 + if(cfg.hasCertainLatency) { certainLatency = cfg.latency.latencyVal.get } + + val writeBackedData = exuConfigs.zip(exeWbReqs).filter(x => x._1.hasCertainLatency && needData(cfg, x._1)).map(_._2.bits.data) + val wakeupCnt = writeBackedData.length + + val extraListenPorts = exuConfigs + .zip(exeWbReqs) + .filter(x => x._1.hasUncertainlatency && needData(cfg, x._1)) + .map(_._2) + val extraListenPortsCnt = extraListenPorts.length + + val feedback = (cfg == Exu.ldExeUnitCfg) || (cfg == Exu.stExeUnitCfg) + + println(s"${i}: exu:${cfg.name} wakeupCnt: ${wakeupCnt} extraListenPorts: ${extraListenPortsCnt} delay:${certainLatency} feedback:${feedback}") + + val rs = Module(new ReservationStationNew(cfg, wakeupCnt, extraListenPortsCnt, fixedDelay = certainLatency, feedback = feedback)) + + rs.io.redirect <> redirect + rs.io.numExist <> dispatch.io.numExist(i) + rs.io.enqCtrl <> dispatch.io.enqIQCtrl(i) + rs.io.enqData <> dispatch.io.enqIQData(i) + + rs.io.writeBackedData <> writeBackedData + for((x, y) <- rs.io.extraListenPorts.zip(extraListenPorts)){ + x.valid := y.fire() + x.bits := y.bits + } - for(i <- bypassDataVec.indices){ - rs.io.bypassData(i).valid := bypassDataVec(i).valid - rs.io.bypassData(i).bits := bypassDataVec(i).bits + cfg match { + case Exu.ldExeUnitCfg => + case Exu.stExeUnitCfg => + case otherCfg => + exeUnits(i).io.in <> rs.io.deq + exeUnits(i).io.redirect <> redirect + rs.io.tlbFeedback := DontCare } - } - val issueQueues = exuConfigs. - zipWithIndex. - takeRight(exuParameters.LduCnt + exuParameters.StuCnt). - map({case (cfg, i) => - val wakeUpDateVec = exuConfigs.zip(exeWbReqs).filter(x => needData(cfg, x._1)).map(_._2) - val bypassUopVec = reservedStations. - filter(r => r.exuCfg.enableBypass && needData(cfg, r.exuCfg)).map(_.io.selectedUop) - val bypassDataVec = exuConfigs.zip(exeWbReqs). - filter(x => x._1.enableBypass && needData(cfg, x._1)).map(_._2) - - val iq = Module(new IssueQueue( - cfg, wakeUpDateVec.length, bypassUopVec.length - )) - println(s"exu:${cfg.name} wakeupCnt:${wakeUpDateVec.length} bypassCnt:${bypassUopVec.length}") - iq.io.redirect <> redirect - iq.io.tlbFeedback := io.mem.tlbFeedback(i - exuParameters.ExuCnt + exuParameters.LduCnt + exuParameters.StuCnt) - iq.io.enq <> dispatch.io.enqIQCtrl(i) - dispatch.io.numExist(i) := iq.io.numExist - for( - (wakeUpPort, exuOut) <- - iq.io.wakeUpPorts.zip(wakeUpDateVec) - ){ - wakeUpPort.bits := exuOut.bits - wakeUpPort.valid := exuOut.fire() // data after arbit - } - iq.io.bypassUops <> bypassUopVec - for(i <- bypassDataVec.indices){ - iq.io.bypassData(i).valid := bypassDataVec(i).valid - iq.io.bypassData(i).bits := bypassDataVec(i).bits - } - iq - }) + rs + }) + + for(rs <- reservedStations){ + rs.io.broadcastedUops <> reservedStations. + filter(x => x.exuCfg.hasCertainLatency && needData(rs.exuCfg, x.exuCfg)). + map(_.io.selectedUop) + } io.mem.commits <> roq.io.commits io.mem.roqDeqPtr := roq.io.roqDeqPtr - io.mem.ldin <> issueQueues.filter(_.exuCfg == Exu.ldExeUnitCfg).map(_.io.deq) - io.mem.stin <> issueQueues.filter(_.exuCfg == Exu.stExeUnitCfg).map(_.io.deq) - jmpExeUnit.io.exception.valid := roq.io.redirect.valid && roq.io.redirect.bits.isException - jmpExeUnit.io.exception.bits := roq.io.exception + + io.mem.ldin <> reservedStations.filter(_.exuCfg == Exu.ldExeUnitCfg).map(_.io.deq) + io.mem.stin <> reservedStations.filter(_.exuCfg == Exu.stExeUnitCfg).map(_.io.deq) + jmpExeUnit.io.csrOnly.exception.valid := roq.io.redirect.valid && roq.io.redirect.bits.isException + jmpExeUnit.io.csrOnly.exception.bits := roq.io.exception + jmpExeUnit.fflags := roq.io.fflags + jmpExeUnit.dirty_fs := roq.io.dirty_fs + jmpExeUnit.io.csrOnly.externalInterrupt := io.externalInterrupt + jmpExeUnit.io.csrOnly.memExceptionVAddr := io.mem.exceptionAddr.vaddr + jmpExeUnit.fenceToSbuffer <> io.mem.fenceToSbuffer + io.mem.sfence <> jmpExeUnit.sfence + io.mem.csr <> jmpExeUnit.tlbCsrIO + + io.mem.exceptionAddr.lsIdx.lsroqIdx := roq.io.exception.lsroqIdx + io.mem.exceptionAddr.lsIdx.lqIdx := roq.io.exception.lqIdx + io.mem.exceptionAddr.lsIdx.sqIdx := roq.io.exception.sqIdx + io.mem.exceptionAddr.isStore := CommitType.lsInstIsStore(roq.io.exception.ctrl.commitType) + + io.mem.tlbFeedback <> reservedStations.filter( + x => x.exuCfg == Exu.ldExeUnitCfg || x.exuCfg == Exu.stExeUnitCfg + ).map(_.io.tlbFeedback) io.frontend.outOfOrderBrInfo <> brq.io.outOfOrderBrInfo io.frontend.inOrderBrInfo <> brq.io.inOrderBrInfo + io.frontend.sfence <> jmpExeUnit.sfence + io.frontend.tlbCsrIO <> jmpExeUnit.tlbCsrIO + + io.fencei := jmpExeUnit.fencei + io.tlbCsrIO := jmpExeUnit.tlbCsrIO decode.io.in <> io.frontend.cfVec brq.io.roqRedirect <> roq.io.redirect @@ -197,9 +188,9 @@ class Backend extends XSModule rename.io.redirect <> redirect rename.io.roqCommits <> roq.io.commits rename.io.in <> decBuf.io.out - rename.io.intRfReadAddr <> dispatch.io.readIntRf.map(_.addr) ++ dispatch.io.intMemRegAddr + rename.io.intRfReadAddr <> dispatch.io.readIntRf.map(_.addr) ++ dispatch.io.memIntRf.map(_.addr) rename.io.intPregRdy <> dispatch.io.intPregRdy ++ dispatch.io.intMemRegRdy - rename.io.fpRfReadAddr <> dispatch.io.readFpRf.map(_.addr) ++ dispatch.io.fpMemRegAddr + rename.io.fpRfReadAddr <> dispatch.io.readFpRf.map(_.addr) ++ dispatch.io.memFpRf.map(_.addr) rename.io.fpPregRdy <> dispatch.io.fpPregRdy ++ dispatch.io.fpMemRegRdy rename.io.replayPregReq <> dispatch.io.replayPregReq dispatch.io.redirect <> redirect @@ -208,6 +199,8 @@ class Backend extends XSModule roq.io.memRedirect <> io.mem.replayAll roq.io.brqRedirect <> brq.io.redirect roq.io.dp1Req <> dispatch.io.toRoq + roq.io.intrBitSet := jmpExeUnit.io.csrOnly.interrupt + roq.io.trapTarget := jmpExeUnit.io.csrOnly.trapTarget dispatch.io.roqIdxs <> roq.io.roqIdxs io.mem.dp1Req <> dispatch.io.toLsroq dispatch.io.lsIdxs <> io.mem.lsIdxs @@ -216,9 +209,8 @@ class Backend extends XSModule dispatch.io.dequeueRoqIndex.bits := Mux(io.mem.oldestStore.valid, io.mem.oldestStore.bits, roq.io.commitRoqIndex.bits) - intRf.io.readPorts <> dispatch.io.readIntRf - fpRf.io.readPorts <> dispatch.io.readFpRf ++ issueQueues.flatMap(_.io.readFpRf) - memRf.io.readPorts <> issueQueues.flatMap(_.io.readIntRf) + intRf.io.readPorts <> dispatch.io.readIntRf ++ dispatch.io.memIntRf + fpRf.io.readPorts <> dispatch.io.readFpRf ++ dispatch.io.memFpRf io.mem.redirect <> redirect @@ -235,9 +227,7 @@ class Backend extends XSModule rfWrite.data := x.bits.data rfWrite } - val intRfWrite = wbIntResults.map(exuOutToRfWrite) - intRf.io.writePorts <> intRfWrite - memRf.io.writePorts <> intRfWrite + intRf.io.writePorts <> wbIntResults.map(exuOutToRfWrite) fpRf.io.writePorts <> wbFpResults.map(exuOutToRfWrite) rename.io.wbIntResults <> wbIntResults @@ -247,32 +237,12 @@ class Backend extends XSModule roq.io.exeWbResults.last := brq.io.out - // TODO: Remove sink and source - val tmp = WireInit(0.U) - val sinks = Array[String]( - "DTLBFINISH", - "DTLBPF", - "DTLBENABLE", - "perfCntCondMdcacheLoss", - "perfCntCondMl2cacheLoss", - "perfCntCondMdcacheHit", - "lsuMMIO", - "perfCntCondMl2cacheHit", - "perfCntCondMl2cacheReq", - "mtip", - "perfCntCondMdcacheReq", - "meip" - ) - for (s <- sinks) { - BoringUtils.addSink(tmp, s) - } - val debugIntReg, debugFpReg = WireInit(VecInit(Seq.fill(32)(0.U(XLEN.W)))) - BoringUtils.addSink(debugIntReg, "DEBUG_INT_ARCH_REG") - BoringUtils.addSink(debugFpReg, "DEBUG_FP_ARCH_REG") + ExcitingUtils.addSink(debugIntReg, "DEBUG_INT_ARCH_REG", ExcitingUtils.Debug) + ExcitingUtils.addSink(debugFpReg, "DEBUG_FP_ARCH_REG", ExcitingUtils.Debug) val debugArchReg = WireInit(VecInit(debugIntReg ++ debugFpReg)) if (!env.FPGAPlatform) { - BoringUtils.addSource(debugArchReg, "difftestRegs") + ExcitingUtils.addSource(debugArchReg, "difftestRegs", ExcitingUtils.Debug) } } diff --git a/src/main/scala/xiangshan/backend/decode/DecodeHelper.scala b/src/main/scala/xiangshan/backend/decode/DecodeHelper.scala index 0c6ee0d8721a0db7003fb780cdf8bc9a76bfef0f..3797d818b7b120a3d1fa6da471f01a9cee6851ec 100644 --- a/src/main/scala/xiangshan/backend/decode/DecodeHelper.scala +++ b/src/main/scala/xiangshan/backend/decode/DecodeHelper.scala @@ -58,9 +58,6 @@ object Instructions extends HasInstrType with HasXSParameter { (if (HasMExtension) RVMInstr.table else Nil) ++ (if (HasCExtension) RVCInstr.table else Nil) ++ (if (HasFPU) RVFInstr.table ++ RVDInstr.table else Nil) -// Privileged.table ++ -// RVAInstr.table ++ -// RVZicsrInstr.table } object CInstructions extends HasInstrType with HasXSParameter { diff --git a/src/main/scala/xiangshan/backend/decode/Decoder.scala b/src/main/scala/xiangshan/backend/decode/Decoder.scala index cae8a0ce925d7cc179e2cdd1aff6719720d2026b..4887eaac2cf38e5a6ef782bbf2e8f1dc1d929983 100644 --- a/src/main/scala/xiangshan/backend/decode/Decoder.scala +++ b/src/main/scala/xiangshan/backend/decode/Decoder.scala @@ -2,11 +2,10 @@ package xiangshan.backend.decode import chisel3._ import chisel3.util._ -import chisel3.util.experimental.BoringUtils import xiangshan._ import utils._ import xiangshan.backend._ -import xiangshan.backend.decode.isa.RVCInstr +import xiangshan.backend.decode.isa.{RVCInstr, RV32I_ALUInstr, RVFInstr, RVDInstr} import xiangshan.{CfCtrl, CtrlFlow} @@ -36,16 +35,19 @@ class Decoder extends XSModule with HasInstrType { io.out.ctrl.fuType := fuType val SrcTypeTable = List( - InstrI -> (SrcType.reg, SrcType.imm), - InstrFI -> (SrcType.reg, SrcType.imm), - InstrR -> (SrcType.reg, SrcType.reg), - InstrS -> (SrcType.reg, SrcType.reg), - InstrFS -> (SrcType.reg, SrcType.fp ), - InstrSA -> (SrcType.reg, SrcType.reg), - InstrB -> (SrcType.reg, SrcType.reg), - InstrU -> (SrcType.pc , SrcType.imm), - InstrJ -> (SrcType.pc , SrcType.imm), - InstrN -> (SrcType.pc , SrcType.imm) + InstrI -> (SrcType.reg, SrcType.imm), + InstrFI -> (SrcType.reg, SrcType.imm), + InstrR -> (SrcType.reg, SrcType.reg), + InstrFR -> (SrcType.fp, SrcType.fp ), + InstrS -> (SrcType.reg, SrcType.reg), + InstrFS -> (SrcType.reg, SrcType.fp ), + InstrSA -> (SrcType.reg, SrcType.reg), + InstrB -> (SrcType.reg, SrcType.reg), + InstrU -> (SrcType.pc , SrcType.imm), + InstrJ -> (SrcType.pc , SrcType.imm), + InstrN -> (SrcType.pc , SrcType.imm), + InstrGtoF -> (SrcType.reg, SrcType.imm), + InstrFtoG -> (SrcType.fp , SrcType.fp) ) val src1Type = LookupTree(instrType, SrcTypeTable.map(p => (p._1, p._2._1))) val src2Type = LookupTree(instrType, SrcTypeTable.map(p => (p._1, p._2._2))) @@ -77,6 +79,7 @@ class Decoder extends XSModule with HasInstrType { val rfSrc1 = Mux(isRVC, rvc_src1, rs) val rfSrc2 = Mux(isRVC, rvc_src2, rt) + val rfSrc3 = instr(31, 27) val rfDest = Mux(isRVC, rvc_dest, rd) // TODO: refactor decode logic @@ -85,6 +88,7 @@ class Decoder extends XSModule with HasInstrType { val fpWen = isfpWen(instrType) io.out.ctrl.lsrc1 := Mux(src1Type === SrcType.pc, 0.U, rfSrc1) io.out.ctrl.lsrc2 := Mux(src2Type === SrcType.imm, 0.U, rfSrc2) + io.out.ctrl.lsrc3 := rfSrc3 io.out.ctrl.rfWen := rfWen io.out.ctrl.fpWen := fpWen io.out.ctrl.ldest := Mux(fpWen || rfWen, rfDest, 0.U) @@ -128,11 +132,46 @@ class Decoder extends XSModule with HasInstrType { } } - io.out.ctrl.src1Type := Mux(instr(6,0) === "b0110111".U || instr(15, 13) === "b011".U && instr(1, 0) === "b01".U, SrcType.reg, src1Type) - io.out.ctrl.src2Type := src2Type - // val vmEnable = WireInit(false.B) - // BoringUtils.addSink(vmEnable, "DTLBENABLE") + + + def bitPatLookup(key: UInt, default: UInt, mapping: Seq[(BitPat, UInt)]) = { + mapping.foldLeft(default){case (d, (k, v)) => Mux(k === key, v, d)} + } + + io.out.ctrl.src1Type := bitPatLookup(instr, src1Type, Seq( + RV32I_ALUInstr.LUI -> SrcType.reg // FIX LUI + )) + io.out.ctrl.src2Type := bitPatLookup(instr, src2Type, Seq( + RVFInstr.FSQRT_S -> SrcType.imm, + RVFInstr.FCLASS_S -> SrcType.imm, + RVFInstr.FMV_X_W -> SrcType.imm, + RVFInstr.FCVT_W_S -> SrcType.imm, + RVFInstr.FCVT_WU_S -> SrcType.imm, + RVFInstr.FCVT_L_S -> SrcType.imm, + RVFInstr.FCVT_LU_S -> SrcType.imm, + + RVDInstr.FSQRT_D -> SrcType.imm, + RVDInstr.FCVT_S_D -> SrcType.imm, + RVDInstr.FCVT_D_S -> SrcType.imm, + RVDInstr.FCLASS_D -> SrcType.imm, + RVDInstr.FMV_X_D -> SrcType.imm, + RVDInstr.FCVT_W_D -> SrcType.imm, + RVDInstr.FCVT_WU_D -> SrcType.imm, + RVDInstr.FCVT_L_D -> SrcType.imm, + RVDInstr.FCVT_LU_D -> SrcType.imm + )) + io.out.ctrl.src3Type := bitPatLookup(instr, SrcType.imm, Seq( + RVFInstr.FMADD_S -> SrcType.fp, + RVFInstr.FNMADD_S -> SrcType.fp, + RVFInstr.FMSUB_S -> SrcType.fp, + RVFInstr.FNMSUB_S -> SrcType.fp, + + RVDInstr.FMADD_D -> SrcType.fp, + RVDInstr.FNMADD_D -> SrcType.fp, + RVDInstr.FMSUB_D -> SrcType.fp, + RVDInstr.FNMSUB_D -> SrcType.fp, + )) io.out.cf.exceptionVec.map(_ := false.B) io.out.cf.exceptionVec(illegalInstr) := instrType === InstrN @@ -146,6 +185,8 @@ class Decoder extends XSModule with HasInstrType { io.out.ctrl.noSpecExec := io.out.ctrl.isXSTrap || io.out.ctrl.fuType===FuType.csr || io.out.ctrl.fuType===FuType.mou || io.out.ctrl.fuType===FuType.fence/*noSpecExec make it sent to alu0,for roq is empty*/ io.out.ctrl.flushPipe := io.out.ctrl.fuType===FuType.fence + io.out.ctrl.isRVF := instr(26, 25) === 0.U + XSDebug("in: instr=%x pc=%x excepVec=%b intrVec=%b crossPageIPFFix=%d\n", io.in.instr, io.in.pc, io.in.exceptionVec.asUInt, io.in.intrVec.asUInt, io.in.crossPageIPFFix) diff --git a/src/main/scala/xiangshan/backend/decode/isa/RVC.scala b/src/main/scala/xiangshan/backend/decode/isa/RVC.scala index 6331b2acda4a48a49f59c90dc09a2dceb195a76a..834975c91ef12b7e33f375f34e9e3c342c0980b0 100644 --- a/src/main/scala/xiangshan/backend/decode/isa/RVC.scala +++ b/src/main/scala/xiangshan/backend/decode/isa/RVC.scala @@ -57,13 +57,13 @@ object RVCInstr extends HasInstrType with HasRVCConst { // def C_XX = BitPat("b????????????????_???_?_10_987_65_432_10") def C_ILLEGAL = BitPat("b0000000000000000_000_0_00_000_00_000_00") def C_ADDI4SPN = BitPat("b????????????????_000_?_??_???_??_???_00") - // def C_FLD = BitPat("b????????????????_001_?_??_???_??_???_00") + def C_FLD = BitPat("b????????????????_001_?_??_???_??_???_00") // def C_LQ = BitPat("b????????????????_001_?_??_???_??_???_00") def C_LW = BitPat("b????????????????_010_?_??_???_??_???_00") // def C_FLW = BitPat("b????????????????_011_?_??_???_??_???_00") // RV32FC Only def C_LD = BitPat("b????????????????_011_?_??_???_??_???_00") // def C_LI = BitPat("b????????????????_100_?_??_???_??_???_00") //reserved - // def C_FSD = BitPat("b????????????????_101_?_??_???_??_???_00") + def C_FSD = BitPat("b????????????????_101_?_??_???_??_???_00") // def C_SQ = BitPat("b????????????????_101_?_??_???_??_???_00") def C_SW = BitPat("b????????????????_110_?_??_???_??_???_00") // def C_FSW = BitPat("b????????????????_111_?_??_???_??_???_00") // RV32FC Only @@ -97,7 +97,7 @@ object RVCInstr extends HasInstrType with HasRVCConst { //RVC 11 def C_SLLI = BitPat("b????????????????_000_?_??_???_??_???_10") // def C_SLLI64 = BitPat("b????????????????_000_0_??_???_00_000_10") - // def C_FLDSP = BitPat("b????????????????_001_?_??_???_??_???_10") + def C_FLDSP = BitPat("b????????????????_001_?_??_???_??_???_10") // def C_LQSP = BitPat("b????????????????_001_?_??_???_??_???_10") def C_LWSP = BitPat("b????????????????_010_?_??_???_??_???_10") // def C_FLWSP = BitPat("b????????????????_011_?_??_???_??_???_10") // RV32FC Only @@ -107,8 +107,8 @@ object RVCInstr extends HasInstrType with HasRVCConst { def C_EBREAK = BitPat("b????????????????_100_1_00_000_00_000_10") def C_JALR = BitPat("b????????????????_100_1_??_???_00_000_10") def C_ADD = BitPat("b????????????????_100_1_??_???_??_???_10") - // def C_FSDSP = BitPat("b????????????????_101_?_??_???_??_???_10") - // def C_SQSP = BitPat("b????????????????_101_?_??_???_??_???_10") + def C_FSDSP = BitPat("b????????????????_101_?_??_???_??_???_10") +// def C_SQSP = BitPat("b????????????????_101_?_??_???_??_???_10") def C_SWSP = BitPat("b????????????????_110_?_??_???_??_???_10") // def C_FSWSP = BitPat("b????????????????_111_?_??_???_??_???_10") // RV32FC Only def C_SDSP = BitPat("b????????????????_111_?_??_???_??_???_10") @@ -121,10 +121,10 @@ object RVCInstr extends HasInstrType with HasRVCConst { val table = Array( C_ILLEGAL -> List(InstrN, FuType.csr, CSROpType.jmp), C_ADDI4SPN -> List(InstrI, FuType.alu, ALUOpType.add), - // C_FLD -> List(InstrFI, FuType.ldu, LSUOpType.ld), + C_FLD -> List(InstrFI, FuType.ldu, LSUOpType.ld), C_LW -> List(InstrI, FuType.ldu, LSUOpType.lw), C_LD -> List(InstrI, FuType.ldu, LSUOpType.ld), - // C_FSD -> List(InstrFS, FuType.stu, LSUOpType.sd), + C_FSD -> List(InstrFS, FuType.stu, LSUOpType.sd), C_SW -> List(InstrS, FuType.stu, LSUOpType.sw), C_SD -> List(InstrS, FuType.stu, LSUOpType.sd), C_NOP -> List(InstrI, FuType.alu, ALUOpType.add), @@ -133,7 +133,7 @@ object RVCInstr extends HasInstrType with HasRVCConst { C_ADDIW -> List(InstrI, FuType.alu, ALUOpType.addw), C_LI -> List(InstrI, FuType.alu, ALUOpType.add), C_ADDI16SP -> List(InstrI, FuType.alu, ALUOpType.add), - C_LUI -> List(InstrU, FuType.alu, ALUOpType.add), + C_LUI -> List(InstrI, FuType.alu, ALUOpType.add), C_SRLI -> List(InstrI, FuType.alu, ALUOpType.srl), C_SRAI -> List(InstrI, FuType.alu, ALUOpType.sra), C_ANDI -> List(InstrI, FuType.alu, ALUOpType.and), @@ -147,7 +147,7 @@ object RVCInstr extends HasInstrType with HasRVCConst { C_BEQZ -> List(InstrB, FuType.alu, ALUOpType.beq), C_BNEZ -> List(InstrB, FuType.alu, ALUOpType.bne), C_SLLI -> List(InstrI, FuType.alu, ALUOpType.sll), - // C_FLDSP -> List(InstrI, FuType.alu, ALUOpType.add), + C_FLDSP -> List(InstrFI, FuType.ldu, LSUOpType.ld), C_LWSP -> List(InstrI, FuType.ldu, LSUOpType.lw), // C_FLWSP -> List(InstrI, FuType.alu, ALUOpType.add), C_LDSP -> List(InstrI, FuType.ldu, LSUOpType.ld), @@ -156,18 +156,18 @@ object RVCInstr extends HasInstrType with HasRVCConst { C_EBREAK -> List(InstrI, FuType.alu, ALUOpType.add), C_JALR -> List(InstrI, FuType.jmp, JumpOpType.jalr), C_ADD -> List(InstrR, FuType.alu, ALUOpType.add), - // C_FSDSP -> List(InstrI, FuType.alu, ALUOpType.add), + C_FSDSP -> List(InstrFS, FuType.stu, LSUOpType.sd), C_SWSP -> List(InstrS, FuType.stu, LSUOpType.sw), // C_FSWSP -> List(InstrI, FuType.alu, ALUOpType.add), C_SDSP -> List(InstrS, FuType.stu, LSUOpType.sd) ) - val cExtraTable = Array( + val cExtraTable = Array( C_ADDI4SPN -> List(ImmADD4SPN, REGx2, DtCare, REGrs2p), - // C_FLD -> List(ImmLD, REGrs1p, DtCare, REGrs2p), + C_FLD -> List(ImmLD, REGrs1p, DtCare, REGrs2p), C_LW -> List(ImmLW, REGrs1p, DtCare, REGrs2p), C_LD -> List(ImmLD, REGrs1p, DtCare, REGrs2p), - // C_FSD -> List(ImmSD, REGrs1p, REGrs2p, DtCare), + C_FSD -> List(ImmSD, REGrs1p, REGrs2p, DtCare), C_SW -> List(ImmSW, REGrs1p, REGrs2p, DtCare), C_SD -> List(ImmSD, REGrs1p, REGrs2p, DtCare), C_NOP -> List(ImmNone, DtCare, DtCare, DtCare), @@ -190,7 +190,7 @@ object RVCInstr extends HasInstrType with HasRVCConst { C_BEQZ -> List(ImmB, REGrs1p, DtCare, DtCare), // rd: x0 C_BNEZ -> List(ImmB, REGrs1p, DtCare, DtCare), // rd: x0 C_SLLI -> List(ImmLI, REGrd, DtCare, REGrd), - // C_FLDSP -> List(ImmLDSP, REGx2, DtCare, REGrd), + C_FLDSP -> List(ImmLDSP, REGx2, DtCare, REGrd), // C_LQSP -> List(), C_LWSP -> List(ImmLWSP, REGx2, DtCare, REGrd), C_LDSP -> List(ImmLDSP, REGx2, DtCare, REGrd), @@ -199,7 +199,7 @@ object RVCInstr extends HasInstrType with HasRVCConst { C_EBREAK -> List(ImmNone, DtCare, DtCare, DtCare), //not implemented C_JALR -> List(ImmNone, REGrs1, DtCare, REGx1), C_ADD -> List(ImmNone, REGrd, REGrs2, REGrd), - // C_FSDSP -> List(ImmSDSP, REGx2, REGrs2, DtCare), + C_FSDSP -> List(ImmSDSP, REGx2, REGrs2, DtCare), // C_SQSP -> List(), C_SWSP -> List(ImmSWSP, REGx2, REGrs2, DtCare), C_SDSP -> List(ImmSDSP, REGx2, REGrs2, DtCare) diff --git a/src/main/scala/xiangshan/backend/decode/isa/RVD.scala b/src/main/scala/xiangshan/backend/decode/isa/RVD.scala index f0283f36bd653f1b0addb5f44a05f1a5556e3468..b902d1e97c0fa6ac2373d64c296a6522d02666af 100644 --- a/src/main/scala/xiangshan/backend/decode/isa/RVD.scala +++ b/src/main/scala/xiangshan/backend/decode/isa/RVD.scala @@ -1,9 +1,11 @@ package xiangshan.backend.decode.isa import chisel3.util._ -import xiangshan.{FuType, HasXSParameter} +import xiangshan.HasXSParameter +import xiangshan.FuType._ import xiangshan.backend.decode._ import xiangshan.backend.LSUOpType +import xiangshan.backend.fu.fpu.FPUOpType._ object RVDInstr extends HasXSParameter with HasInstrType { @@ -41,54 +43,43 @@ object RVDInstr extends HasXSParameter with HasInstrType { def FNMADD_D = BitPat("b?????01??????????????????1001111") val table = Array( - FLD -> List(InstrFI, FuType.ldu, LSUOpType.ld), - FSD -> List(InstrFS, FuType.stu, LSUOpType.sd) - ) + FLD -> List(InstrFI, ldu, LSUOpType.ld), + FSD -> List(InstrFS, stu, LSUOpType.sd), + + // FR + FADD_D -> List(InstrFR, fmac, fadd), + FSUB_D -> List(InstrFR, fmac, fsub), + FMUL_D -> List(InstrFR, fmac, fmul), + FDIV_D -> List(InstrFR, fmisc, fdiv), + FMIN_D -> List(InstrFR, fmisc, fmin), + FMAX_D -> List(InstrFR, fmisc, fmax), + FSGNJ_D -> List(InstrFR, fmisc, fsgnj), + FSGNJN_D -> List(InstrFR, fmisc, fsgnjn), + FSGNJX_D -> List(InstrFR, fmisc, fsgnjx), + FSQRT_D -> List(InstrFR, fmisc, fsqrt), + FMADD_D -> List(InstrFR, fmac, fmadd), + FNMADD_D -> List(InstrFR, fmac, fnmadd), + FMSUB_D -> List(InstrFR, fmac, fmsub), + FNMSUB_D -> List(InstrFR, fmac, fnmsub), + FCVT_S_D -> List(InstrFR, fmisc, d2s), + FCVT_D_S -> List(InstrFR, fmisc, s2d), - // (isFp, src1Type, src2Type, src3Type, rfWen, fpWen, fuOpType, inputFunc, outputFunc) -// val table = Array( + // FtoG + FCLASS_D -> List(InstrFtoG, fmisc, fclass), + FMV_X_D -> List(InstrFtoG, fmisc, fmv_f2i), + FCVT_W_D -> List(InstrFtoG, fmisc, f2w), + FCVT_WU_D -> List(InstrFtoG, fmisc, f2wu), + FCVT_L_D -> List(InstrFtoG, fmisc, f2l), + FCVT_LU_D -> List(InstrFtoG, fmisc, f2lu), + FLE_D -> List(InstrFtoG, fmisc, fle), + FLT_D -> List(InstrFtoG, fmisc, flt), + FEQ_D -> List(InstrFtoG, fmisc, feq), -// FLD -> List(Y, reg, imm, imm, N, Y, LSUOpType.ld, in_raw, out_raw), -// C_FLD -> List(Y, reg, imm, imm, N, Y, LSUOpType.ld, in_raw, out_raw), -// C_FLDSP -> List(Y, reg, imm, imm, N, Y, LSUOpType.ld, in_raw, out_raw), -// FSD -> List(Y, reg, fp, imm, N, N, LSUOpType.sd, in_raw, out_raw), -// C_FSD -> List(Y, reg, fp, imm, N, N, LSUOpType.sd, in_raw, out_raw), -// C_FSDSP -> List(Y, reg, fp, imm, N, N, LSUOpType.sd, in_raw, out_raw), -// // fp fp -> fp -// FADD_D -> List(Y, fp, fp, imm, N, Y, fadd, in_raw, out_raw), -// FSUB_D -> List(Y, fp, fp, imm, N, Y, fsub, in_raw, out_raw), -// FMUL_D -> List(Y, fp, fp, imm, N, Y, fmul, in_raw, out_raw), -// FDIV_D -> List(Y, fp, fp, imm, N, Y, fdiv, in_raw, out_raw), -// FMIN_D -> List(Y, fp, fp, imm, N, Y, fmin, in_raw, out_raw), -// FMAX_D -> List(Y, fp, fp, imm, N, Y, fmax, in_raw, out_raw), -// FSGNJ_D -> List(Y, fp, fp, imm, N, Y, fsgnj, in_raw, out_raw), -// FSGNJN_D -> List(Y, fp, fp, imm, N, Y, fsgnjn, in_raw, out_raw), -// FSGNJX_D -> List(Y, fp, fp, imm, N, Y, fsgnjx, in_raw, out_raw), -// // fp -> fp -// FSQRT_D -> List(Y, fp, imm, imm, N, Y, fsqrt, in_raw, out_raw), -// FCVT_S_D -> List(Y, fp, imm, imm, N, Y, d2s, in_raw, out_box), -// FCVT_D_S -> List(Y, fp, imm, imm, N, Y, s2d, in_unbox, out_raw), -// // fp fp fp -> fp -// FMADD_D -> List(Y, fp, fp, fp, N, Y, fmadd, in_raw, out_raw), -// FNMADD_D -> List(Y, fp, fp, fp, N, Y, fnmadd, in_raw, out_raw), -// FMSUB_D -> List(Y, fp, fp, fp, N, Y, fmsub, in_raw, out_raw), -// FNMSUB_D -> List(Y, fp, fp, fp, N, Y, fnmsub, in_raw, out_raw), -// // fp -> gp -// FCLASS_D -> List(Y, fp, imm, imm, Y, N, fclass, in_raw, out_raw), -// FMV_X_D -> List(Y, fp, imm, imm, Y, N, fmv_f2i, in_raw, out_raw), -// FCVT_W_D -> List(Y, fp, imm, imm, Y, N, f2w, in_raw, out_sext), -// FCVT_WU_D -> List(Y, fp, imm, imm, Y, N, f2wu, in_raw, out_sext), -// FCVT_L_D -> List(Y, fp, imm, imm, Y, N, f2l, in_raw, out_raw), -// FCVT_LU_D -> List(Y, fp, imm, imm, Y, N, f2lu, in_raw, out_raw), -// // fp fp -> gp -// FLE_D -> List(Y, fp, fp, imm, Y, N, fle, in_raw, out_raw), -// FLT_D -> List(Y, fp, fp, imm, Y, N, flt, in_raw, out_raw), -// FEQ_D -> List(Y, fp, fp, imm, Y, N, feq, in_raw, out_raw), -// // gp -> fp -// FMV_D_X -> List(Y, reg, imm, imm, N, Y, fmv_i2f, in_raw, out_raw), -// FCVT_D_W -> List(Y, reg, imm, imm, N, Y, w2f, in_raw, out_raw), -// FCVT_D_WU -> List(Y, reg, imm, imm, N, Y, wu2f, in_raw, out_raw), -// FCVT_D_L -> List(Y, reg, imm, imm, N, Y, l2f, in_raw, out_raw), -// FCVT_D_LU -> List(Y, reg, imm, imm, N, Y, lu2f, in_raw, out_raw) -// ) -} \ No newline at end of file + // GtoF + FMV_D_X -> List(InstrGtoF, i2f, fmv_i2f), + FCVT_D_W -> List(InstrGtoF, i2f, w2f), + FCVT_D_WU -> List(InstrGtoF, i2f, wu2f), + FCVT_D_L -> List(InstrGtoF, i2f, l2f), + FCVT_D_LU -> List(InstrGtoF, i2f, lu2f) + ) +} diff --git a/src/main/scala/xiangshan/backend/decode/isa/RVF.scala b/src/main/scala/xiangshan/backend/decode/isa/RVF.scala index a153e5f4a3c644a5da1bf57909e228c22cb8fb90..45c7aa76f191f11165ad0c5d572af814bd844f42 100644 --- a/src/main/scala/xiangshan/backend/decode/isa/RVF.scala +++ b/src/main/scala/xiangshan/backend/decode/isa/RVF.scala @@ -2,8 +2,10 @@ package xiangshan.backend.decode.isa import chisel3.util._ import xiangshan.backend._ -import xiangshan.{FuType, HasXSParameter} +import xiangshan.HasXSParameter +import xiangshan.FuType._ import xiangshan.backend.decode._ +import xiangshan.backend.fu.fpu.FPUOpType._ object RVFInstr extends HasXSParameter with HasInstrType { @@ -39,48 +41,41 @@ object RVFInstr extends HasXSParameter with HasInstrType { def FNMADD_S = BitPat("b?????00??????????????????1001111") val table = Array( - FLW -> List(InstrFI, FuType.ldu, LSUOpType.flw), - FSW -> List(InstrFS, FuType.stu, LSUOpType.sw) - ) + FLW -> List(InstrFI, ldu, LSUOpType.flw), + FSW -> List(InstrFS, stu, LSUOpType.sw), + + // FR + FADD_S -> List(InstrFR, fmac, fadd), + FSUB_S -> List(InstrFR, fmac, fsub), + FMUL_S -> List(InstrFR, fmac, fmul), + FDIV_S -> List(InstrFR, fmisc, fdiv), + FMIN_S -> List(InstrFR, fmisc, fmin), + FMAX_S -> List(InstrFR, fmisc, fmax), + FSGNJ_S -> List(InstrFR, fmisc, fsgnj), + FSGNJN_S -> List(InstrFR, fmisc, fsgnjn), + FSGNJX_S -> List(InstrFR, fmisc, fsgnjx), + FSQRT_S -> List(InstrFR, fmisc, fsqrt), + FMADD_S -> List(InstrFR, fmac, fmadd), + FNMADD_S -> List(InstrFR, fmac, fnmadd), + FMSUB_S -> List(InstrFR, fmac, fmsub), + FNMSUB_S -> List(InstrFR, fmac, fnmsub), - // (isFp, src1Type, src2Type, src3Type, rfWen, fpWen, fuOpType, inputFunc, outputFunc) -// val DecodeDefault = List(N, imm, imm, imm, N, N, fadd, in_raw, out_raw) -// val table = Array( -// FLW -> List(Y, reg, imm, imm, N, Y, LSUOpType.flw, in_raw, out_raw), -// FSW -> List(Y, reg, fp, imm, N, N, LSUOpType.sw, in_raw, out_raw), -// // fp fp -> fp -// FADD_S -> List(Y, fp, fp, imm, N, Y, fadd, in_unbox, out_box), -// FSUB_S -> List(Y, fp, fp, imm, N, Y, fsub, in_unbox, out_box), -// FMUL_S -> List(Y, fp, fp, imm, N, Y, fmul, in_unbox, out_box), -// FDIV_S -> List(Y, fp, fp, imm, N, Y, fdiv, in_unbox, out_box), -// FMIN_S -> List(Y, fp, fp, imm, N, Y, fmin, in_unbox, out_box), -// FMAX_S -> List(Y, fp, fp, imm, N, Y, fmax, in_unbox, out_box), -// FSGNJ_S -> List(Y, fp, fp, imm, N, Y, fsgnj, in_unbox, out_box), -// FSGNJN_S -> List(Y, fp, fp, imm, N, Y, fsgnjn, in_unbox, out_box), -// FSGNJX_S -> List(Y, fp, fp, imm, N, Y, fsgnjx, in_unbox, out_box), -// // fp -> fp -// FSQRT_S -> List(Y, fp, imm, imm, N, Y, fsqrt, in_unbox, out_box), -// // fp fp fp -> fp -// FMADD_S -> List(Y, fp, fp, fp, N, Y, fmadd, in_unbox, out_box), -// FNMADD_S -> List(Y, fp, fp, fp, N, Y, fnmadd, in_unbox, out_box), -// FMSUB_S -> List(Y, fp, fp, fp, N, Y, fmsub, in_unbox, out_box), -// FNMSUB_S -> List(Y, fp, fp, fp, N, Y, fnmsub, in_unbox, out_box), -// // fp -> gp -// FCLASS_S -> List(Y, fp, imm, imm, Y, N, fclass, in_unbox, out_raw), -// FMV_X_W -> List(Y, fp, imm, imm, Y, N, fmv_f2i, in_raw, out_sext), -// FCVT_W_S -> List(Y, fp, imm, imm, Y, N, f2w, in_unbox, out_sext), -// FCVT_WU_S -> List(Y, fp, imm, imm, Y, N, f2wu, in_unbox, out_sext), -// FCVT_L_S -> List(Y, fp, imm, imm, Y, N, f2l, in_unbox, out_raw), -// FCVT_LU_S -> List(Y, fp, imm, imm, Y, N, f2lu, in_unbox, out_raw) , -// // fp fp -> gp -// FLE_S -> List(Y, fp, fp, imm, Y, N, fle, in_unbox, out_raw), -// FLT_S -> List(Y, fp, fp, imm, Y, N, flt, in_unbox, out_raw), -// FEQ_S -> List(Y, fp, fp, imm, Y, N, feq, in_unbox, out_raw), -// // gp -> fp -// FMV_W_X -> List(Y, reg, imm, imm, N, Y, fmv_i2f, in_raw, out_box), -// FCVT_S_W -> List(Y, reg, imm, imm, N, Y, w2f, in_raw, out_box), -// FCVT_S_WU -> List(Y, reg, imm, imm, N, Y, wu2f, in_raw, out_box), -// FCVT_S_L -> List(Y, reg, imm, imm, N, Y, l2f, in_raw, out_box), -// FCVT_S_LU -> List(Y, reg, imm, imm, N, Y, lu2f, in_raw, out_box) -// ) + // F -> G + FCLASS_S -> List(InstrFtoG, fmisc, fclass), + FMV_X_W -> List(InstrFtoG, fmisc, fmv_f2i), + FCVT_W_S -> List(InstrFtoG, fmisc, f2w), + FCVT_WU_S -> List(InstrFtoG, fmisc, f2wu), + FCVT_L_S -> List(InstrFtoG, fmisc, f2l), + FCVT_LU_S -> List(InstrFtoG, fmisc, f2lu), + FLE_S -> List(InstrFtoG, fmisc, fle), + FLT_S -> List(InstrFtoG, fmisc, flt), + FEQ_S -> List(InstrFtoG, fmisc, feq), + + // G -> F + FMV_W_X -> List(InstrGtoF, i2f, fmv_i2f), + FCVT_S_W -> List(InstrGtoF, i2f, w2f), + FCVT_S_WU -> List(InstrGtoF, i2f, wu2f), + FCVT_S_L -> List(InstrGtoF, i2f, l2f), + FCVT_S_LU -> List(InstrGtoF, i2f, lu2f) + ) } diff --git a/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala b/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala index 360eeb69647530101d1c43473a53d4cdc67b3fe8..541dd7b86ea39ca47d2bff033423ca0d22067ae8 100644 --- a/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala +++ b/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala @@ -38,14 +38,14 @@ class Dispatch extends XSModule { val lsIdxs = Input(Vec(RenameWidth, new LSIdx)) val dequeueRoqIndex = Input(Valid(new RoqPtr)) // read regfile - val readIntRf = Vec(NRIntReadPorts, Flipped(new RfReadPort)) + val readIntRf = Vec(NRIntReadPorts - NRMemReadPorts, Flipped(new RfReadPort)) val readFpRf = Vec(NRFpReadPorts - exuParameters.StuCnt, Flipped(new RfReadPort)) // read reg status (busy/ready) - val intPregRdy = Vec(NRIntReadPorts, Input(Bool())) + val intPregRdy = Vec(NRIntReadPorts - NRMemReadPorts, Input(Bool())) val fpPregRdy = Vec(NRFpReadPorts - exuParameters.StuCnt, Input(Bool())) // load + store reg status (busy/ready) - val intMemRegAddr = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W))) - val fpMemRegAddr = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W))) + val memIntRf = Vec(NRMemReadPorts, Flipped(new RfReadPort)) + val memFpRf = Vec(exuParameters.StuCnt, Flipped(new RfReadPort)) val intMemRegRdy = Vec(NRMemReadPorts, Input(Bool())) val fpMemRegRdy = Vec(exuParameters.StuCnt, Input(Bool())) // replay: set preg status to not ready @@ -53,7 +53,7 @@ class Dispatch extends XSModule { // to reservation stations val numExist = Input(Vec(exuParameters.ExuCnt, UInt(log2Ceil(IssQueSize).W))) val enqIQCtrl = Vec(exuParameters.ExuCnt, DecoupledIO(new MicroOp)) - val enqIQData = Vec(exuParameters.ExuCnt - exuParameters.LsExuCnt, Output(new ExuInput)) + val enqIQData = Vec(exuParameters.ExuCnt, Output(new ExuInput)) }) val dispatch1 = Module(new Dispatch1) @@ -128,14 +128,15 @@ class Dispatch extends XSModule { fpDq.io.deq <> DontCare io.readFpRf <> DontCare } - + // Load/store dispatch queue to load/store issue queues val lsDispatch = Module(new Dispatch2Ls) lsDispatch.io.fromDq <> lsDq.io.deq - lsDispatch.io.intRegAddr <> io.intMemRegAddr - lsDispatch.io.fpRegAddr <> io.fpMemRegAddr + lsDispatch.io.readIntRf <> io.memIntRf + lsDispatch.io.readFpRf <> io.memFpRf lsDispatch.io.intRegRdy <> io.intMemRegRdy lsDispatch.io.fpRegRdy <> io.fpMemRegRdy lsDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)}) lsDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)}) + lsDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)}) } diff --git a/src/main/scala/xiangshan/backend/dispatch/Dispatch2Int.scala b/src/main/scala/xiangshan/backend/dispatch/Dispatch2Int.scala index 8f5b1b70e690c80d6a0a0cbc955770ee84070f5c..b6495026b023a264be04077f623f635e4d53e66c 100644 --- a/src/main/scala/xiangshan/backend/dispatch/Dispatch2Int.scala +++ b/src/main/scala/xiangshan/backend/dispatch/Dispatch2Int.scala @@ -10,8 +10,8 @@ import xiangshan.backend.exu._ class Dispatch2Int extends XSModule { val io = IO(new Bundle() { val fromDq = Flipped(Vec(dpParams.IntDqDeqWidth, DecoupledIO(new MicroOp))) - val readRf = Vec(NRIntReadPorts, Flipped(new RfReadPort)) - val regRdy = Vec(NRIntReadPorts, Input(Bool())) + val readRf = Vec(NRIntReadPorts - NRMemReadPorts, Flipped(new RfReadPort)) + val regRdy = Vec(NRIntReadPorts - NRMemReadPorts, Input(Bool())) val numExist = Input(Vec(exuParameters.IntExuCnt, UInt(log2Ceil(IssQueSize).W))) val enqIQCtrl = Vec(exuParameters.IntExuCnt, DecoupledIO(new MicroOp)) val enqIQData = Vec(exuParameters.IntExuCnt, Output(new ExuInput)) diff --git a/src/main/scala/xiangshan/backend/dispatch/Dispatch2Ls.scala b/src/main/scala/xiangshan/backend/dispatch/Dispatch2Ls.scala index 206caa0d6c4141daacf95ec57f623d13eee143c9..a9b58c5c930d9c93a2bc5b2b3704feb2fd49bf60 100644 --- a/src/main/scala/xiangshan/backend/dispatch/Dispatch2Ls.scala +++ b/src/main/scala/xiangshan/backend/dispatch/Dispatch2Ls.scala @@ -10,12 +10,15 @@ import xiangshan.backend.exu._ class Dispatch2Ls extends XSModule { val io = IO(new Bundle() { val fromDq = Flipped(Vec(dpParams.LsDqDeqWidth, DecoupledIO(new MicroOp))) - val intRegAddr = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W))) - val fpRegAddr = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W))) + val readIntRf = Vec(NRMemReadPorts, Flipped(new RfReadPort)) + val readFpRf = Vec(exuParameters.StuCnt, Flipped(new RfReadPort)) + // val intRegAddr = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W))) + // val fpRegAddr = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W))) val intRegRdy = Vec(NRMemReadPorts, Input(Bool())) val fpRegRdy = Vec(exuParameters.StuCnt, Input(Bool())) val numExist = Input(Vec(exuParameters.LsExuCnt, UInt(log2Ceil(IssQueSize).W))) val enqIQCtrl = Vec(exuParameters.LsExuCnt, DecoupledIO(new MicroOp)) + val enqIQData = Vec(exuParameters.LsExuCnt, Output(new ExuInput)) }) /** @@ -70,12 +73,12 @@ class Dispatch2Ls extends XSModule { val readPort = Seq(0, 1, 2, 4) for (i <- 0 until exuParameters.LsExuCnt) { if (i < exuParameters.LduCnt) { - io.intRegAddr(readPort(i)) := io.fromDq(indexVec(i)).bits.psrc1 + io.readIntRf(readPort(i)).addr := io.fromDq(indexVec(i)).bits.psrc1 } else { - io.fpRegAddr(i - exuParameters.LduCnt) := io.fromDq(indexVec(i)).bits.psrc2 - io.intRegAddr(readPort(i) ) := io.fromDq(indexVec(i)).bits.psrc1 - io.intRegAddr(readPort(i)+1) := io.fromDq(indexVec(i)).bits.psrc2 + io.readFpRf(i - exuParameters.LduCnt).addr := io.fromDq(indexVec(i)).bits.psrc2 + io.readIntRf(readPort(i) ).addr := io.fromDq(indexVec(i)).bits.psrc1 + io.readIntRf(readPort(i)+1).addr := io.fromDq(indexVec(i)).bits.psrc2 } } @@ -111,4 +114,31 @@ class Dispatch2Ls extends XSModule { XSDebug(io.fromDq(i).valid && !io.fromDq(i).ready, p"pc 0x${Hexadecimal(io.fromDq(i).bits.cf.pc)} waits at Ls dispatch queue with index $i\n") } + + /** + * Part 5: the second stage of dispatch 2 (send data to reservation station) + */ + val uopReg = Reg(Vec(exuParameters.LsExuCnt, new MicroOp)) + val dataValidRegDebug = Reg(Vec(exuParameters.LsExuCnt, Bool())) + for (i <- 0 until exuParameters.LsExuCnt) { + uopReg(i) := io.enqIQCtrl(i).bits + dataValidRegDebug(i) := io.enqIQCtrl(i).fire() + + io.enqIQData(i) := DontCare + // assert(uopReg(i).ctrl.src1Type =/= SrcType.pc) + io.enqIQData(i).src1 := io.readIntRf(readPort(i)).data + if (i >= exuParameters.LduCnt) { + io.enqIQData(i).src2 := Mux( + uopReg(i).ctrl.src2Type === SrcType.imm, + uopReg(i).ctrl.imm, + Mux(uopReg(i).ctrl.src2Type === SrcType.fp, + io.readFpRf(i - exuParameters.LduCnt).data, + io.readIntRf(readPort(i) + 1).data)) + } + + XSDebug(dataValidRegDebug(i), + p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " + + p"(${readPort(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " + + p"(${readPort(i)+1}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n") + } } diff --git a/src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala b/src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala index 8f734ca73909328eb39d9068809f4424204893bf..5654dddb686528e48a09fa71856859b15f97c39a 100644 --- a/src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala +++ b/src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala @@ -190,7 +190,16 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, replayWidth: Int) exten // In case of replay, we need to walk back and recover preg states in the busy table. // We keep track of the number of entries needed to be walked instead of target position to reduce overhead // for 11111111, replayPosition is unuseful. We naively set Cnt to size.U - val dispatchReplayCnt = Mux(allReplay, size.U, Mux(maskedNeedReplay(size - 1), (dispatchPtr + replayPosition).value, (dispatchPtr - replayPosition).value)) + val dispatchReplayCnt = Mux( + allReplay, size.U, + Mux(maskedNeedReplay(size - 1), + // replay makes flag flipped + dispatchPtr.value + replayPosition, + // the new replay does not change the flag + Mux(dispatchPtr.value <= replayPosition, + // but we are currently in a replay that changes the flag + dispatchPtr.value + (size.U - replayPosition), + dispatchPtr.value - replayPosition))) val dispatchReplayCntReg = RegInit(0.U) // actually, if deqIndex points to head uops and they are replayed, there's no need for extraWalk // however, to simplify logic, we simply let it do extra walk now diff --git a/src/main/scala/xiangshan/backend/exu/AluExeUnit.scala b/src/main/scala/xiangshan/backend/exu/AluExeUnit.scala index 57198165a4dd3e2153bfa98153d1e453f4bba09a..826556b21bcfd404ba76284d3bd661c637e35030 100644 --- a/src/main/scala/xiangshan/backend/exu/AluExeUnit.scala +++ b/src/main/scala/xiangshan/backend/exu/AluExeUnit.scala @@ -2,7 +2,6 @@ package xiangshan.backend.exu import chisel3._ import chisel3.util._ -import chisel3.util.experimental.BoringUtils import xiangshan._ import xiangshan.FuType._ import utils._ @@ -77,6 +76,7 @@ class AluExeUnit extends Exu(Exu.aluExeUnitCfg) { io.out.valid := valid io.out.bits.uop <> io.in.bits.uop io.out.bits.data := aluRes + io.csrOnly <> DontCare XSDebug(io.in.valid || io.redirect.valid, "In(%d %d) Out(%d %d) Redirect:(%d %d %d %d) brTag:f:%d v:%d\n", @@ -91,8 +91,6 @@ class AluExeUnit extends Exu(Exu.aluExeUnitCfg) { io.redirect.bits.brTag.flag, io.redirect.bits.brTag.value ) - XSDebug(io.in.valid, "src1:%x src2:%x offset:%x func:%b pc:%x\n", - src1, src2, offset, func, pc) - XSDebug(io.out.valid, "res:%x aluRes:%x isRVC:%d isBranch:%d target:%x taken:%d\n", - io.out.bits.data, aluRes, isRVC, isBranch, target, taken) + XSDebug(io.in.valid, p"src1:${Hexadecimal(src1)} src2:${Hexadecimal(src2)} offset:${Hexadecimal(offset)} func:${Binary(func)} pc:${Hexadecimal(pc)} roqIdx:${uop.roqIdx}\n") + XSDebug(io.out.valid, p"res:${Hexadecimal(io.out.bits.data)} aluRes:${Hexadecimal(aluRes)} isRVC:${isRVC} isBranch:${isBranch} target:${Hexadecimal(target)} taken:${taken}\n") } \ No newline at end of file diff --git a/src/main/scala/xiangshan/backend/exu/DivExeUnit.scala b/src/main/scala/xiangshan/backend/exu/DivExeUnit.scala index c9c46b8b7a49a4952f57dfda00116585e86edbc2..8efc0027c48d4f6df2afdea49307eb6363ee1394 100644 --- a/src/main/scala/xiangshan/backend/exu/DivExeUnit.scala +++ b/src/main/scala/xiangshan/backend/exu/DivExeUnit.scala @@ -28,14 +28,17 @@ class DivExeUnit extends Exu(Exu.divExeUnitCfg) { x ) - divider.io.redirect := io.redirect + val dividerInputBits = divider.io.in.bits + val dividerInputCtrl = dividerInputBits.ext.get + + divider.io.redirectIn := io.redirect divider.io.in.valid := io.in.valid - divider.io.in.bits.ctrl.uop := io.in.bits.uop - divider.io.in.bits.ctrl.sign := isDivSign - divider.io.in.bits.ctrl.isW := isW - divider.io.in.bits.ctrl.isHi := isH - divider.io.in.bits.src1 := divInputFunc(src1) - divider.io.in.bits.src2 := divInputFunc(src2) + dividerInputBits.uop := io.in.bits.uop + dividerInputCtrl.sign := isDivSign + dividerInputCtrl.isW := isW + dividerInputCtrl.isHi := isH + dividerInputBits.src(0) := divInputFunc(src1) + dividerInputBits.src(1) := divInputFunc(src2) divider.io.out.ready := io.out.ready io.in.ready := divider.io.in.ready @@ -44,7 +47,7 @@ class DivExeUnit extends Exu(Exu.divExeUnitCfg) { io.out.bits.data := divider.io.out.bits.data io.out.bits.redirectValid := false.B io.out.bits.redirect <> DontCare - io.dmem <> DontCare + io.csrOnly <> DontCare io.out.bits.debug <> DontCare XSDebug(io.in.valid || io.redirect.valid, "In(%d %d) Out(%d %d) Redirect:(%d %d %d) brTag:%x\n", diff --git a/src/main/scala/xiangshan/backend/exu/Exu.scala b/src/main/scala/xiangshan/backend/exu/Exu.scala index 3e1b604eceb007339b1981326bea40f70b3cc0c8..5c598f21f5fe4d63d461226c2056f639a7af8c9b 100644 --- a/src/main/scala/xiangshan/backend/exu/Exu.scala +++ b/src/main/scala/xiangshan/backend/exu/Exu.scala @@ -4,7 +4,7 @@ import chisel3._ import chisel3.util._ import xiangshan._ import xiangshan.FuType._ -import xiangshan.backend.fu.FuConfig +import xiangshan.backend.fu.{CertainLatency, FuConfig, HasFuLatency, NexusLatency, UncertainLatency} import utils.ParallelOR import xiangshan.backend.fu.FunctionUnit._ @@ -44,6 +44,27 @@ case class ExuConfig val writeFpRf = supportedFuncUnits.map(_.writeFpRf).reduce(_||_) val hasRedirect = supportedFuncUnits.map(_.hasRedirect).reduce(_||_) + val latency: HasFuLatency = { + val lats = supportedFuncUnits.map(_.latency) + val latencyValue = lats.collectFirst{ + case x if x.latencyVal.nonEmpty => + x.latencyVal.get + } + val hasUncertain = lats.exists(x => x.latencyVal.isEmpty) + if(latencyValue.nonEmpty){ + if(hasUncertain) NexusLatency(latencyValue.get) else CertainLatency(latencyValue.get) + } else UncertainLatency() + } + val hasCertainLatency = latency.latencyVal.nonEmpty + val hasUncertainlatency = latency match { + case _: UncertainLatency => + true + case _: NexusLatency => + true + case _ => + false + } + def canAccept(fuType: UInt): Bool = { ParallelOR(supportedFuncUnits.map(_.fuType === fuType)) } @@ -51,8 +72,8 @@ case class ExuConfig abstract class Exu(val config: ExuConfig) extends XSModule { val io = IO(new ExuIO) - io.dmem <> DontCare io.out.bits.brUpdate <> DontCare + io.out.bits.fflags <> DontCare io.out.bits.debug.isMMIO := false.B } @@ -62,6 +83,7 @@ object Exu { val mulExeUnitCfg = ExuConfig("MulExu", Array(mulCfg), enableBypass = false) val divExeUnitCfg = ExuConfig("DivExu", Array(divCfg), enableBypass = false) val fenceExeUnitCfg = ExuConfig("FenceCfg", Array(fenceCfg), enableBypass = false) + val i2fExeUnitCfg = ExuConfig("I2fExu", Array(i2fCfg), enableBypass = false) val mulDivExeUnitCfg = ExuConfig("MulDivExu", Array(mulCfg, divCfg), enableBypass = false) val mulDivFenceExeUnitCfg = ExuConfig("MulDivFenceExu", Array(mulCfg, divCfg, fenceCfg), enableBypass = false) val ldExeUnitCfg = ExuConfig("LoadExu", Array(lduCfg), enableBypass = false) diff --git a/src/main/scala/xiangshan/backend/exu/FmacExeUnit.scala b/src/main/scala/xiangshan/backend/exu/FmacExeUnit.scala new file mode 100644 index 0000000000000000000000000000000000000000..e9c96ece281bd7f56eb470d1dce39ef0b9b2199a --- /dev/null +++ b/src/main/scala/xiangshan/backend/exu/FmacExeUnit.scala @@ -0,0 +1,39 @@ +package xiangshan.backend.exu + +import chisel3._ +import chisel3.util._ +import xiangshan.backend.exu.Exu.fmacExeUnitCfg +import xiangshan.backend.fu.fpu.fma.FMA +import xiangshan.backend.fu.fpu._ + +class FmacExeUnit extends Exu(fmacExeUnitCfg) { + + val frm = IO(Input(UInt(3.W))) + + val fma = Module(new FMA) + + fma.io.in.valid := io.in.valid + val input = io.in.bits + val fmaOut = fma.io.out.bits + val isRVD = !io.in.bits.uop.ctrl.isRVF + fma.io.in.bits.src := VecInit(Seq(input.src1, input.src2, input.src3).map(src => Mux(isRVD, src, unboxF64ToF32(src)))) + fma.io.in.bits.uop := io.in.bits.uop + val extraInput = fma.io.in.bits.ext.get + val instr_rm = io.in.bits.uop.cf.instr(14, 12) + extraInput.rm := Mux(instr_rm =/= 7.U, instr_rm, frm) + extraInput.op := io.in.bits.uop.ctrl.fuOpType(2, 0) + extraInput.isDouble := isRVD + + fma.io.redirectIn := io.redirect + fma.io.out.ready := io.out.ready + + io.in.ready := fma.io.in.ready + io.out.valid := fma.io.out.valid + io.out.bits.uop := fmaOut.uop + io.out.bits.data := Mux(fmaOut.uop.ctrl.isRVF, boxF32ToF64(fmaOut.data), fmaOut.data) + io.out.bits.fflags := fma.io.out.bits.ext.get + io.out.bits.redirectValid := false.B + io.out.bits.redirect <> DontCare + io.csrOnly <> DontCare + +} diff --git a/src/main/scala/xiangshan/backend/exu/FmiscExeUnit.scala b/src/main/scala/xiangshan/backend/exu/FmiscExeUnit.scala new file mode 100644 index 0000000000000000000000000000000000000000..094324722b84e82a219a96a01515611977dbd67f --- /dev/null +++ b/src/main/scala/xiangshan/backend/exu/FmiscExeUnit.scala @@ -0,0 +1,81 @@ +package xiangshan.backend.exu + + +import chisel3._ +import chisel3.util._ +import utils._ +import xiangshan.backend.exu.Exu.fmiscExeUnitCfg +import xiangshan.backend.fu.fpu.{F32toF64, F64toF32, FCMP, FMV, FPUSubModuleOutput, FloatToInt} +import xiangshan.backend.fu.fpu.divsqrt.DivSqrt +import xiangshan.backend.fu.fpu.FPUOpType._ +import xiangshan.backend.fu.fpu._ + +class FmiscExeUnit extends Exu(fmiscExeUnitCfg){ + + val frm = IO(Input(UInt(3.W))) + + val fcmp = Module(new FCMP) + val fmv = Module(new FMV(XLEN)) + val f2i = Module(new FloatToInt) + val f32toF64 = Module(new F32toF64) + val f64toF32 = Module(new F64toF32) + val fdivSqrt = Module(new DivSqrt) + + val subModules = Array( + (fcmp, FU_FCMP), + (fmv, FU_FMV), + (f2i, FU_F2I), + (f32toF64, FU_S2D), + (f64toF32, FU_D2S), + (fdivSqrt, FU_DIVSQRT) + ).map(x => (x._1, ("b" + x._2).U)) + + val fuOp = io.in.bits.uop.ctrl.fuOpType + assert(fuOp.getWidth == 7) // when fuOp's WIDTH change, here must change too + val fu = fuOp.head(4) + val op = fuOp.tail(4) + val isRVF = io.in.bits.uop.ctrl.isRVF + val (src1, src2) = (io.in.bits.src1, io.in.bits.src2) + + io.in.ready := Cat(subModules.map(x => fu===x._2 && x._1.io.in.ready)).orR() + + val instr_rm = io.in.bits.uop.cf.instr(14, 12) + subModules.foreach{ + case (module, fuSel) => + module.io.in.valid := io.in.valid && fu===fuSel + module.io.in.bits.uop := io.in.bits.uop + module.io.in.bits.src(0) := Mux( + (isRVF && fuOp=/=d2s && fuOp=/=fmv_f2i) || fuOp===s2d, + unboxF64ToF32(src1), + src1 + ) + module.io.in.bits.src(1) := Mux(isRVF, unboxF64ToF32(src2), src2) + val extraInput = module.io.in.bits.ext.get + extraInput.rm := Mux(instr_rm =/= 7.U, instr_rm, frm) + extraInput.isDouble := !isRVF + extraInput.op := op + module.io.redirectIn := io.redirect + } + + val wbArb = Module(new Arbiter(chiselTypeOf(subModules(0)._1.io.out.bits), subModules.length)) + + wbArb.io.in <> VecInit(subModules.map(_._1.io.out)) + + val out = wbArb.io.out + + out.ready := io.out.ready + io.out.valid := out.valid + io.out.bits.uop := out.bits.uop + io.out.bits.fflags := out.bits.ext.get + val outCtrl = out.bits.uop.ctrl + io.out.bits.data := Mux(outCtrl.isRVF && outCtrl.fpWen, + boxF32ToF64(out.bits.data), + Mux( (outCtrl.isRVF && outCtrl.fuOpType===fmv_f2i) || outCtrl.fuOpType===f2w || outCtrl.fuOpType===f2wu, + SignExt(out.bits.data(31, 0), XLEN), + out.bits.data + ) + ) + io.out.bits.redirectValid := DontCare + io.out.bits.redirect := DontCare + io.csrOnly <> DontCare +} diff --git a/src/main/scala/xiangshan/backend/exu/I2fExeUnit.scala b/src/main/scala/xiangshan/backend/exu/I2fExeUnit.scala new file mode 100644 index 0000000000000000000000000000000000000000..e086b37207bfcce4e2f3bda8be8ad61970a9aca8 --- /dev/null +++ b/src/main/scala/xiangshan/backend/exu/I2fExeUnit.scala @@ -0,0 +1,46 @@ +package xiangshan.backend.exu + +import chisel3._ +import chisel3.util._ +import xiangshan.backend.fu.fpu._ +import xiangshan.backend.fu.fpu.IntToFloatSingleCycle +import xiangshan.backend.fu.fpu.FPUOpType._ + +class I2fExeUnit extends Exu(Exu.i2fExeUnitCfg){ + + val frm = IO(Input(UInt(3.W))) + + val uopIn = io.in.bits.uop + val isDouble = !uopIn.ctrl.isRVF + val fuOp = uopIn.ctrl.fuOpType + val fu = fuOp.head(4) + val op = fuOp.tail(4) + + val valid = io.in.valid && !uopIn.roqIdx.needFlush(io.redirect) + val intToFloat = Module(new IntToFloatSingleCycle) + val extraInput = intToFloat.io.in.bits.ext.get + val instr_rm = io.in.bits.uop.cf.instr(14, 12) + extraInput.isDouble := isDouble + extraInput.rm := Mux(instr_rm =/= 7.U, instr_rm, frm) + extraInput.op := op + intToFloat.io.out.ready := io.out.ready + intToFloat.io.in.valid := valid && fu===("b"+FU_I2F).U + intToFloat.io.in.bits.src(0) := io.in.bits.src1 + intToFloat.io.in.bits.uop := uopIn + intToFloat.io.redirectIn := io.redirect + io.out.valid := valid + io.out.bits.data := Mux(intToFloat.io.out.valid, + Mux(isDouble, intToFloat.io.out.bits.data, boxF32ToF64(intToFloat.io.out.bits.data)), + Mux(isDouble, io.in.bits.src1, boxF32ToF64(io.in.bits.src1)) + ) + io.out.bits.fflags := Mux(intToFloat.io.out.valid, + intToFloat.io.out.bits.ext.get, + 0.U.asTypeOf(new Fflags) + ) + io.in.ready := true.B + io.out.bits.uop := uopIn + io.out.bits.redirect <> DontCare + io.out.bits.redirectValid := false.B + io.out.bits.debug <> DontCare + io.csrOnly <> DontCare +} diff --git a/src/main/scala/xiangshan/backend/exu/JmpExeUnit.scala b/src/main/scala/xiangshan/backend/exu/JmpExeUnit.scala index 1984513fa2a7854ea7f82d66b0dcb6e6916facdc..179639110906e439b0881ff7f46dc8b5b6fb3307 100644 --- a/src/main/scala/xiangshan/backend/exu/JmpExeUnit.scala +++ b/src/main/scala/xiangshan/backend/exu/JmpExeUnit.scala @@ -1,74 +1,141 @@ package xiangshan.backend.exu import chisel3._ -import xiangshan.{ExuOutput, FuType} +import xiangshan.{ExuOutput, FuType, SfenceBundle, TlbCsrBundle} import xiangshan.backend.fu.{CSR, Jump} -import xiangshan.backend.decode.isa._ +import xiangshan.backend.fu.fpu.Fflags import utils._ class JmpExeUnit extends Exu(Exu.jmpExeUnitCfg) { + val fflags = IO(Input(new Fflags)) + val dirty_fs = IO(Input(Bool())) + val frm = IO(Output(UInt(3.W))) + + val fenceToSbuffer = IO(new FenceToSbuffer) + val sfence = IO(Output(new SfenceBundle)) + val fencei = IO(Output(Bool())) + + val tlbCsrIO = IO(Output(new TlbCsrBundle)) + val (valid, src1, src2, uop, fuType, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.uop, io.in.bits.uop.ctrl.fuType, io.in.bits.uop.ctrl.fuOpType) val jmp = Module(new Jump) val csr = Module(new CSR) val fence = Module(new FenceExeUnit) + val i2f = Module(new I2fExeUnit) + + fenceToSbuffer <> fence.toSbuffer + sfence <> fence.sfence + fencei := fence.fencei + + fence.io.csrOnly <> DontCare + i2f.io.csrOnly <> DontCare val isJmp = fuType === FuType.jmp val isCsr = fuType === FuType.csr val isFence = fuType === FuType.fence + val isI2f = fuType === FuType.i2f + jmp.io <> DontCare jmp.io.in.valid := io.in.valid && isJmp - jmp.io.in.bits := io.in.bits jmp.io.out.ready := io.out.ready - jmp.io.exception <> DontCare - jmp.io.dmem <> DontCare - jmp.io.mcommit := DontCare - jmp.io.redirect := io.redirect + jmp.io.in.bits.connectToExuInput(io.in.bits) + jmp.io.redirectIn := io.redirect + + val jumpExuOut = Wire(new ExuOutput) + val jumpExtraOut = jmp.io.out.bits.ext.get + + jumpExuOut.uop := uop + jumpExuOut.data := jmp.io.out.bits.data + jumpExuOut.brUpdate := jumpExtraOut.brUpdate + jumpExuOut.fflags := DontCare + jumpExuOut.redirect := jumpExtraOut.redirect + jumpExuOut.redirectValid := jumpExtraOut.redirectValid + jumpExuOut.debug := DontCare + + + frm := csr.io.fpu_csr.frm + tlbCsrIO := csr.io.tlbCsrIO csr.io.cfIn := io.in.bits.uop.cf - csr.io.fpu_csr := DontCare - csr.io.exception <> io.exception + csr.io.fpu_csr.fflags := fflags + csr.io.fpu_csr.isIllegal := false.B // TODO: check illegal rounding mode + csr.io.fpu_csr.dirty_fs := dirty_fs csr.io.instrValid := DontCare csr.io.out.ready := io.out.ready - csr.io.in.bits.src3 := DontCare - val csrOut = csr.access( - valid = io.in.valid && fuType === FuType.csr, - src1 = io.in.bits.src1, - src2 = io.in.bits.src2, - func = io.in.bits.uop.ctrl.fuOpType - ) + csr.io.in.valid := io.in.valid && isCsr + csr.io.in.bits.ext.get := io.in.bits.uop.ctrl.fuOpType + csr.io.in.bits.connectToExuInput(io.in.bits) + csr.io.redirectIn := io.redirect + val csrOut = csr.io.out.bits.data + + + csr.io.perf <> DontCare + + csr.io.exception := io.csrOnly.exception + csr.io.isInterrupt := io.redirect.bits.isFlushPipe + csr.io.memExceptionVAddr := io.csrOnly.memExceptionVAddr + io.csrOnly.trapTarget := csr.io.trapTarget + csr.io.mtip := io.csrOnly.externalInterrupt.mtip + csr.io.msip := io.csrOnly.externalInterrupt.msip + csr.io.meip := io.csrOnly.externalInterrupt.meip + io.csrOnly.interrupt := csr.io.interrupt + // val uop = io.in.bits.uop val csrExuOut = Wire(new ExuOutput) csrExuOut.uop := uop csrExuOut.uop.cf := csr.io.cfOut csrExuOut.uop.ctrl.flushPipe := csr.io.flushPipe csrExuOut.data := csrOut - csrExuOut.redirectValid := csr.io.redirectValid + csrExuOut.fflags := DontCare + csrExuOut.redirectValid := csr.io.redirectOutValid csrExuOut.redirect.brTag := uop.brTag csrExuOut.redirect.isException := false.B csrExuOut.redirect.isMisPred := false.B csrExuOut.redirect.isFlushPipe := false.B csrExuOut.redirect.isReplay := false.B csrExuOut.redirect.roqIdx := uop.roqIdx - csrExuOut.redirect.target := csr.io.redirect.target + csrExuOut.redirect.target := csr.io.redirectOut.target csrExuOut.redirect.pc := uop.cf.pc csrExuOut.debug := DontCare csrExuOut.brUpdate := DontCare + fence.io <> DontCare fence.io.in.valid := valid && isFence fence.io.in.bits := io.in.bits fence.io.redirect <> DontCare // io.redirect // No need for fence is the first instr fence.io.mcommit <> DontCare - fence.io.exception <> DontCare - fence.io.dmem <> DontCare fence.io.out.ready := io.out.ready + i2f.io.in.valid := valid && isI2f + i2f.io.in.bits := io.in.bits + i2f.io.redirect <> io.redirect + i2f.io.mcommit <> DontCare + i2f.io.out.ready := io.out.ready + i2f.frm := frm + // NOTE: just one instr in this module at the same time - io.in.ready := jmp.io.in.ready && csr.io.in.ready && fence.io.in.ready - io.out.bits := Mux(jmp.io.out.valid, jmp.io.out.bits, Mux(csr.io.out.valid, csrExuOut, fence.io.out.bits)) - io.out.valid := jmp.io.out.valid || csr.io.out.valid || fence.io.out.valid + io.in.ready := jmp.io.in.ready && csr.io.in.ready && fence.io.in.ready && i2f.io.in.ready + io.out.bits := Mux(jmp.io.out.valid, + jumpExuOut, + Mux(csr.io.out.valid, + csrExuOut, + Mux(fence.io.out.valid, + fence.io.out.bits, + i2f.io.out.bits + ) + ) + ) + io.out.valid := jmp.io.out.valid || csr.io.out.valid || fence.io.out.valid || i2f.io.out.valid - XSDebug(io.in.valid, p"In(${io.in.valid} ${io.in.ready} ${jmp.io.in.ready}${csr.io.in.ready}${fence.io.in.ready}) pc:0x${Hexadecimal(io.in.bits.uop.cf.pc)} roqIdx:${io.in.bits.uop.roqIdx} fuType:b${Binary(io.in.bits.uop.ctrl.fuType)} fuOpType:b${Binary(io.in.bits.uop.ctrl.fuOpType)} isJmp:${isJmp} isCsr${isCsr} isFence:${isFence}\n") - XSDebug(io.out.valid, p"Out(${io.out.valid} ${io.out.ready} ${jmp.io.out.valid}${csr.io.out.valid}${fence.io.out.valid}) pc:0x${Hexadecimal(io.out.bits.uop.cf.pc)} roqIdx:${io.out.bits.uop.roqIdx} fuType:b${Binary(io.out.bits.uop.ctrl.fuType)} fuOpType:b${Binary(io.out.bits.uop.ctrl.fuOpType)}\n") -} \ No newline at end of file + XSDebug(io.in.valid, + p"In(${io.in.valid} ${io.in.ready} ${jmp.io.in.ready}${csr.io.in.ready}${fence.io.in.ready}${i2f.io.in.ready}) " + + p"pc:0x${Hexadecimal(io.in.bits.uop.cf.pc)} roqIdx:${io.in.bits.uop.roqIdx} " + + p"fuType:b${Binary(io.in.bits.uop.ctrl.fuType)} fuOpType:b${Binary(io.in.bits.uop.ctrl.fuOpType)} " + + p"isJmp:$isJmp isCsr$isCsr isFence:$isFence isI2f:$isI2f\n") + XSDebug(io.out.valid, + p"Out(${io.out.valid} ${io.out.ready} ${jmp.io.out.valid}${csr.io.out.valid}${fence.io.out.valid}${i2f.io.out.valid}) " + + p"pc:0x${Hexadecimal(io.out.bits.uop.cf.pc)} roqIdx:${io.out.bits.uop.roqIdx} " + + p"fuType:b${Binary(io.out.bits.uop.ctrl.fuType)} fuOpType:b${Binary(io.out.bits.uop.ctrl.fuOpType)}\n") +} diff --git a/src/main/scala/xiangshan/backend/exu/LsExeUnit.scala b/src/main/scala/xiangshan/backend/exu/LsExeUnit.scala deleted file mode 100644 index 75f7a43920a40819abad0062527428cc0d942586..0000000000000000000000000000000000000000 --- a/src/main/scala/xiangshan/backend/exu/LsExeUnit.scala +++ /dev/null @@ -1,232 +0,0 @@ -//package xiangshan.backend.exu -// -//import chisel3._ -//import chisel3.util._ -//import chisel3.util.experimental.BoringUtils -//import xiangshan._ -//import utils._ -//import bus.simplebus._ -//import xiangshan.AddressSpace -//import xiangshan.backend._ -//import xiangshan.backend.brq.BrqPtr -//import fpu.boxF32ToF64 -// -// -//class StoreQueueEntry extends XSBundle{ -// val src1 = UInt(XLEN.W) -// val src2 = UInt(XLEN.W) -// val addr = UInt(XLEN.W) -// val src3 = UInt(XLEN.W) -// val wdata = UInt(XLEN.W) -// val func = UInt(6.W) -// val pc = UInt(VAddrBits.W) //for debug -// val brTag = new BrqPtr //FIXIT -//} -// -//// Multi-cycle LSU ported from NOOP -//class LsExeUnit extends Exu(Exu.lsuExeUnitCfg){ -// -// // store buffer -// val stqData = Reg(Vec(8, new StoreQueueEntry)) -// val stqValid = RegInit(VecInit(List.fill(8)(false.B))) -// val stqPtr = Reg(Vec(8, UInt(3.W))) -// val stqHead = RegInit(0.U(3.W)) -// val stqTail = stqPtr(0) -// val stqCommited = RegInit(0.U(3.W)) -// val stqFull = stqHead === 7.U //stq_valid.reduce(_.valid && _.valid) -// val emptySlot = PriorityMux(~stqValid.asUInt, VecInit(List.tabulate(8)(_.U))) -// -// // when retiringStore, block all input insts -// val isStoreIn = io.in.valid && LSUOpType.isStore(io.in.bits.uop.ctrl.fuOpType) -// val retiringStore = RegInit(false.B) -// val (validIn, src1In, src2In, src3In, funcIn) = (io.in.valid, io.in.bits.src1, io.in.bits.uop.ctrl.imm, io.in.bits.src2, io.in.bits.uop.ctrl.fuOpType) -// val (valid, src1, src2, wdata, func) = -// ( -// Mux(retiringStore, stqValid(stqTail), validIn && !isStoreIn), -// Mux(retiringStore, stqData(stqTail).src1, src1In), -// Mux(retiringStore, stqData(stqTail).src2, src2In), -// Mux(retiringStore, stqData(stqTail).src3, src3In), -// Mux(retiringStore, stqData(stqTail).func, funcIn) -// ) -// // assert(!(retiringStore && !stqValid(stqTail))) -// -// def genWmask(addr: UInt, sizeEncode: UInt): UInt = { -// LookupTree(sizeEncode, List( -// "b00".U -> 0x1.U, //0001 << addr(2:0) -// "b01".U -> 0x3.U, //0011 -// "b10".U -> 0xf.U, //1111 -// "b11".U -> 0xff.U //11111111 -// )) << addr(2, 0) -// } -// def genWdata(data: UInt, sizeEncode: UInt): UInt = { -// LookupTree(sizeEncode, List( -// "b00".U -> Fill(8, data(7, 0)), -// "b01".U -> Fill(4, data(15, 0)), -// "b10".U -> Fill(2, data(31, 0)), -// "b11".U -> data -// )) -// } -// -// val dmem = io.dmem -// val addr = src1 + src2 -// val addrLatch = RegNext(addr) -// val isStore = valid && LSUOpType.isStore(func) -// val partialLoad = !isStore && (func =/= LSUOpType.ld) -// -// val s_idle :: s_wait_resp :: s_partialLoad :: Nil = Enum(3) -// val state = RegInit(s_idle) -// -// switch (state) { -// is (s_idle) { when (dmem.req.fire()) { state := Mux(isStore, s_partialLoad, s_wait_resp) } } -// is (s_wait_resp) { when (dmem.resp.fire()) { state := Mux(partialLoad, s_partialLoad, s_idle) } } -// is (s_partialLoad) { state := s_idle } -// } -// -// val size = func(1,0) -// dmem.req.bits.apply(addr = addr, size = size, wdata = genWdata(wdata, size), -// wmask = genWmask(addr, size), cmd = Mux(isStore, SimpleBusCmd.write, SimpleBusCmd.read)) -// dmem.req.valid := valid && (state === s_idle) -// dmem.resp.ready := true.B -// -// XSDebug("state %x req.valid/ready %x/%x resp.valid/ready %x/%x addr %x size %x data %x mask %x cmd %x\n", -// state, dmem.req.valid, dmem.req.ready, dmem.resp.valid, dmem.resp.ready, -// addr, size, genWdata(wdata, size), genWmask(addr, size), Mux(isStore, SimpleBusCmd.write, SimpleBusCmd.read) -// ) -// -// val rdata = Wire(UInt(XLEN.W)) -// val rdataLatch = RegNext(rdata) -// val rdataSel = LookupTree(addrLatch(2, 0), List( -// "b000".U -> rdataLatch(63, 0), -// "b001".U -> rdataLatch(63, 8), -// "b010".U -> rdataLatch(63, 16), -// "b011".U -> rdataLatch(63, 24), -// "b100".U -> rdataLatch(63, 32), -// "b101".U -> rdataLatch(63, 40), -// "b110".U -> rdataLatch(63, 48), -// "b111".U -> rdataLatch(63, 56) -// )) -// val rdataPartialLoad = LookupTree(func, List( -// LSUOpType.lb -> SignExt(rdataSel(7, 0) , XLEN), -// LSUOpType.lh -> SignExt(rdataSel(15, 0), XLEN), -// LSUOpType.lw -> SignExt(rdataSel(31, 0), XLEN), -// LSUOpType.lbu -> ZeroExt(rdataSel(7, 0) , XLEN), -// LSUOpType.lhu -> ZeroExt(rdataSel(15, 0), XLEN), -// LSUOpType.lwu -> ZeroExt(rdataSel(31, 0), XLEN), -// LSUOpType.flw -> boxF32ToF64(rdataSel(31,0)) -// )) -// -// // pop store queue if insts have been commited and dmem req fired successfully -// val storeFinish = retiringStore && dmem.resp.fire()//state === s_partialLoad -// val stqDequeue = storeFinish || !stqValid(stqTail) && stqHead > 0.U -// when(stqDequeue){ -// stqValid(stqTail) := false.B -// // update stq ptr -// for(i <- 1 until 8){ -// stqPtr(i-1) := stqPtr(i) -// } -// } -// -// // if store, add it to store queue -// val stqEnqueue = validIn && isStoreIn && !stqFull && !retiringStore && !io.redirect.valid && state === s_idle -// when(stqEnqueue){ -// stqPtr(stqHead - stqDequeue) := emptySlot -// stqData(emptySlot).src1 := src1In -// stqData(emptySlot).src2 := src2In -// stqData(emptySlot).addr := src1In + src2In -// stqData(emptySlot).src3 := genWdata(src3In, funcIn(1, 0)) -// stqData(emptySlot).pc := io.in.bits.uop.cf.pc -// stqData(emptySlot).func := funcIn -// stqData(emptySlot).brTag := io.in.bits.uop.brTag -// stqValid(emptySlot) := true.B -// } -// -// // if store insts have been commited, send dmem req -// // have to say it seems better to rebuild FSM instead of using such ugly wrapper -// val needRetireStore = stqCommited > 0.U && stqValid(stqTail) -// when( -// needRetireStore && !retiringStore && state === s_idle && (!io.in.valid || isStoreIn) -// ){ -// retiringStore := true.B -// } -// when(dmem.resp.fire() && retiringStore){ -// retiringStore := false.B -// } -// -// // update stqTail, stqCommited -// stqCommited := stqCommited + io.mcommit - storeFinish -// stqHead := stqHead + stqEnqueue - stqDequeue -// -// // Store addr forward match -// // If match, get data from store queue -// val dataBackVec = Wire(Vec(XLEN/8, (UInt((XLEN/8).W)))) -// for(j <- (0 to (XLEN/8 - 1))){ -// dataBackVec(j) := dmem.resp.bits.rdata(8*(j+1)-1, 8*j) -// } -// -// for(i <- 0 until 8){ -// when(stqValid(stqPtr(i)) && i.U < stqHead){ -// when(addr(PAddrBits-1, log2Up(XLEN/8)) === stqData(stqPtr(i)).addr(PAddrBits-1, log2Up(XLEN/8))){ -// for(j <- (0 to (XLEN/8 - 1))){ -// when(genWmask(stqData(stqPtr(i)).addr, stqData(stqPtr(i)).func(1, 0))(j)){ -// dataBackVec(j) := stqData(stqPtr(i)).src3(8*(j+1)-1, 8*j) -// XSDebug("forwarding data from stq, addr %x stqpos %d bitpos %d data %x\n", addr, i.U, j.U, stqData(stqPtr(i)).src3(8*(j+1)-1, 8*j)) -// } -// } -// } -// XSDebug("sbuffer id %d ptr %d pc %x addr %x data %x func %x wmask %b\n", -// i.U, stqPtr(i), stqData(stqPtr(i)).pc, stqData(stqPtr(i)).src1 + stqData(stqPtr(i)).src2, stqData(stqPtr(i)).src3, stqData(stqPtr(i)).func, genWmask(stqData(stqPtr(i)).addr, stqData(stqPtr(i)).func(1, 0)) -// ) -// } -// } -// rdata := dataBackVec.asUInt -// -// val expRedirect = io.redirect.valid && io.redirect.bits.isException -// val brRedirect = io.redirect.valid && io.redirect.bits.isMisPred -// for(i <- 0 until 8){ -// when((i.U >= stqCommited && i.U < stqHead) && (expRedirect || brRedirect && stqData(stqPtr(i)).brTag.needBrFlush(io.redirect.bits.brTag) && stqValid(stqPtr(i)))){ -// stqValid(stqPtr(i)) := false.B -// } -// XSDebug("sptrtable: id %d ptr %d valid %d\n", i.U, stqPtr(i), stqValid(stqPtr(i))) -// } -// when(expRedirect){ -// //invalidate uncommited store -// //FIXME -// } -// -// io.in.ready := io.out.fire() -// -// val validLoad = RegInit(false.B) -// when(state =/= s_idle && !io.in.valid) { validLoad := false.B } -// when(state === s_idle && io.in.valid && !retiringStore && dmem.req.fire()) { validLoad := true.B } -// io.out.valid := (!isStoreIn && !retiringStore && validLoad && Mux(partialLoad, state === s_partialLoad, dmem.resp.fire() && (state === s_wait_resp)) || stqEnqueue) && io.in.valid -// io.out.bits.uop <> io.in.bits.uop -// io.out.bits.data := Mux(partialLoad, rdataPartialLoad, rdata) -// // io.out.bits.debug.isMMIO := AddressSpace.isMMIO(addr) && io.out.valid -// io.out.bits.debug.isMMIO := AddressSpace.isMMIO(addr) //for debug -// io.out.bits.redirect := DontCare -// io.out.bits.redirectValid := false.B -// -// when(io.out.fire()){ -// XSDebug("LSU fire: pc %x addr %x mmio %x isStoreIn %x retiringStore %x partialLoad %x dmem %x stqEnqueue %x state %x dmemres %x fwdres %x\n", -// io.in.bits.uop.cf.pc, -// addr, -// io.out.bits.debug.isMMIO, -// isStoreIn, -// retiringStore, -// partialLoad, -// dmem.resp.fire(), -// stqEnqueue, -// state, -// dmem.resp.bits.rdata, -// io.out.bits.data -// ) -// } -// -// // debug -// XSDebug("state: %d (valid, ready): in (%d,%d) out (%d,%d)\n", state, io.in.valid, io.in.ready, io.out.valid, io.out.ready) -// XSDebug("stqinfo: stqValid.asUInt %b stqHead %d stqTail %d stqCommited %d emptySlot %d\n", stqValid.asUInt, stqHead, stqTail, stqCommited, emptySlot) -// XSDebug(retiringStore, "retiringStore now...\n") -// XSInfo(io.dmem.req.fire() && io.dmem.req.bits.cmd =/= SimpleBusCmd.write, "[DMEM LOAD REQ] addr 0x%x wdata 0x%x size %d\n", dmem.req.bits.addr, dmem.req.bits.wdata, dmem.req.bits.size) -// XSInfo(io.dmem.req.fire() && io.dmem.req.bits.cmd === SimpleBusCmd.write, "[DMEM STORE REQ] addr 0x%x wdata 0x%x size %d\n", dmem.req.bits.addr, dmem.req.bits.wdata, dmem.req.bits.size) -// XSInfo(io.dmem.resp.fire(), "[DMEM RESP] data %x\n", rdata) -//} diff --git a/src/main/scala/xiangshan/backend/exu/MulDivExeUnit.scala b/src/main/scala/xiangshan/backend/exu/MulDivExeUnit.scala index f4044438ea3e270a751bcdfe41a81db3e033ea10..89c1fab70c1e2fc1b548240d6198f9be5428ee7d 100644 --- a/src/main/scala/xiangshan/backend/exu/MulDivExeUnit.scala +++ b/src/main/scala/xiangshan/backend/exu/MulDivExeUnit.scala @@ -22,8 +22,7 @@ class MulDivFenceExeUnit extends Exu(Exu.mulDivFenceExeUnitCfg){ for(x <- Seq(mul.io, div.io, fence.io)){ x.mcommit <> DontCare - x.exception <> DontCare - x.dmem <> DontCare + x.csrOnly <> DontCare x.in.bits := io.in.bits x.redirect := io.redirect } @@ -73,9 +72,7 @@ class MulDivExeUnit extends Exu(Exu.mulDivExeUnitCfg){ val div = Module(new DivExeUnit) for(x <- Seq(mul.io, div.io)){ - x.mcommit <> DontCare - x.exception <> DontCare - x.dmem <> DontCare + x <> DontCare x.in.bits := io.in.bits x.redirect := io.redirect } @@ -93,6 +90,7 @@ class MulDivExeUnit extends Exu(Exu.mulDivExeUnitCfg){ arb.io.in(1) <> div.io.out io.out <> arb.io.out + io.csrOnly <> DontCare XSDebug(io.in.valid, "In(%d %d) Out(%d %d) Redirect:(%d %d %d) brTag:%x\n", io.in.valid, io.in.ready, diff --git a/src/main/scala/xiangshan/backend/exu/MulExeUnit.scala b/src/main/scala/xiangshan/backend/exu/MulExeUnit.scala index c5a008640b5c3c752495ada8db4f48a47bc849df..b53418c99ae7b3a5be5ba595aefb580a8729c070 100644 --- a/src/main/scala/xiangshan/backend/exu/MulExeUnit.scala +++ b/src/main/scala/xiangshan/backend/exu/MulExeUnit.scala @@ -28,16 +28,17 @@ class MulExeUnit extends Exu(Exu.mulExeUnitCfg){ val isH = MDUOpType.isH(func) val op = MDUOpType.getMulOp(func) - mul.io.redirect := io.redirect - mul.io.in.bits.ctrl.uop := io.in.bits.uop - mul.io.in.bits.ctrl.sign := DontCare //Mul don't use this - mul.io.in.bits.ctrl.isW := isW - mul.io.in.bits.ctrl.isHi := isH - mul.io.in.bits.src1 := LookupTree( + val mulInputCtrl = mul.io.in.bits.ext.get + mul.io.redirectIn := io.redirect + mul.io.in.bits.uop := io.in.bits.uop + mulInputCtrl.sign := DontCare //Mul don't use this + mulInputCtrl.isW := isW + mulInputCtrl.isHi := isH + mul.io.in.bits.src(0) := LookupTree( op, mulInputFuncTable.map(p => (p._1(1,0), p._2._1(src1))) ) - mul.io.in.bits.src2 := LookupTree( + mul.io.in.bits.src(1) := LookupTree( op, mulInputFuncTable.map(p => (p._1(1,0), p._2._2(src2))) ) @@ -50,6 +51,7 @@ class MulExeUnit extends Exu(Exu.mulExeUnitCfg){ io.out.bits.data := mul.io.out.bits.data io.out.bits.redirectValid := false.B io.out.bits.redirect <> DontCare + io.csrOnly <> DontCare XSDebug(io.in.valid, "In(%d %d) Out(%d %d) Redirect:(%d %d %d) brTag:%x\n", io.in.valid, io.in.ready, @@ -59,9 +61,7 @@ class MulExeUnit extends Exu(Exu.mulExeUnitCfg){ io.redirect.bits.isFlushPipe, io.redirect.bits.brTag.value ) - XSDebug(io.in.valid, "src1:%x src2:%x pc:%x\n", src1, src2, io.in.bits.uop.cf.pc) - XSDebug(io.out.valid, "Out(%d %d) res:%x pc:%x\n", - io.out.valid, io.out.ready, io.out.bits.data, io.out.bits.uop.cf.pc - ) + XSDebug(io.in.valid, p"src1:${Hexadecimal(src1)} src2:${Hexadecimal(src2)} pc:${Hexadecimal(io.in.bits.uop.cf.pc)} roqIdx:${io.in.bits.uop.roqIdx}\n") + XSDebug(io.out.valid, p"Out(${io.out.valid} ${io.out.ready}) res:${Hexadecimal(io.out.bits.data)} pc:${io.out.bits.uop.cf.pc} roqIdx:${io.out.bits.uop.roqIdx}\n") XSDebug(io.redirect.valid, p"redirect: ${io.redirect.bits.brTag}\n") } diff --git a/src/main/scala/xiangshan/backend/exu/Wbu.scala b/src/main/scala/xiangshan/backend/exu/Wbu.scala index ad2d2428d458fc9405636970dade988ef9f89493..5e2d0ddd590e2352bffb0a552a8956aa78922678 100644 --- a/src/main/scala/xiangshan/backend/exu/Wbu.scala +++ b/src/main/scala/xiangshan/backend/exu/Wbu.scala @@ -85,10 +85,15 @@ class Wbu(exuConfigs: Array[ExuConfig]) extends XSModule{ def splitN[T](in: Seq[T], n: Int): Seq[Option[Seq[T]]] = { require(n > 0) - if(in.size < n) Seq(Some(in)) ++ Seq.fill(n-1)(None) - else { - val m = in.size/n - Some(in.take(m)) +: splitN(in.drop(m), n-1) + if(n == 1){ + return Seq(Some(in)) + } else { + if(in.size < n ){ + Seq(Some(in)) ++ Seq.fill(n-1)(None) + } else { + val m = in.size / n + Some(in.take(m)) +: splitN(in.drop(m), n-1) + } } } diff --git a/src/main/scala/xiangshan/backend/fu/Alu.scala b/src/main/scala/xiangshan/backend/fu/Alu.scala deleted file mode 100644 index 44d567860f36eea65681be1a76453cbc77e60623..0000000000000000000000000000000000000000 --- a/src/main/scala/xiangshan/backend/fu/Alu.scala +++ /dev/null @@ -1,88 +0,0 @@ -//package xiangshan.backend.fu -// -//import chisel3._ -//import chisel3.util._ -//import xiangshan._ -//import utils._ -//import xiangshan.backend._ -// -//import xiangshan.backend.fu.FunctionUnit._ -// -//class Alu extends FunctionUnit(aluCfg) { -// val io = IO(new ExuIO) -// -// -// override def toString: String = "Alu" -// -// val (iovalid, src1, src2, offset, func, pc, uop) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, -// io.in.bits.uop.ctrl.imm, io.in.bits.uop.ctrl.fuOpType, SignExt(io.in.bits.uop.cf.pc, AddrBits), io.in.bits.uop) -// -// val redirectHit = uop.brTag.needFlush(io.redirect) -// val valid = iovalid && !redirectHit -// -// val isAdderSub = (func =/= ALUOpType.add) && (func =/= ALUOpType.addw) && !ALUOpType.isJump(func) -// val adderRes = (src1 +& (src2 ^ Fill(XLEN, isAdderSub))) + isAdderSub -// val xorRes = src1 ^ src2 -// val sltu = !adderRes(XLEN) -// val slt = xorRes(XLEN-1) ^ sltu -// -// val shsrc1 = LookupTreeDefault(func, src1, List( -// ALUOpType.srlw -> ZeroExt(src1(31,0), 64), -// ALUOpType.sraw -> SignExt(src1(31,0), 64) -// )) -// val shamt = Mux(ALUOpType.isWordOp(func), src2(4, 0), src2(5, 0)) -// val res = LookupTreeDefault(func(3, 0), adderRes, List( -// ALUOpType.sll -> ((shsrc1 << shamt)(XLEN-1, 0)), -// ALUOpType.slt -> ZeroExt(slt, XLEN), -// ALUOpType.sltu -> ZeroExt(sltu, XLEN), -// ALUOpType.xor -> xorRes, -// ALUOpType.srl -> (shsrc1 >> shamt), -// ALUOpType.or -> (src1 | src2), -// ALUOpType.and -> (src1 & src2), -// ALUOpType.sra -> ((shsrc1.asSInt >> shamt).asUInt) -// )) -// val aluRes = Mux(ALUOpType.isWordOp(func), SignExt(res(31,0), 64), res) -// -// val branchOpTable = List( -// ALUOpType.getBranchType(ALUOpType.beq) -> !xorRes.orR, -// ALUOpType.getBranchType(ALUOpType.blt) -> slt, -// ALUOpType.getBranchType(ALUOpType.bltu) -> sltu -// ) -// -// val isBru = ALUOpType.isBru(func) -// // val isBranch = io.in.bits.uop.cf.isBr// ALUOpType.isBranch(func) -// val isBranch = ALUOpType.isBranch(func) -// val isJump = ALUOpType.isJump(func) -// val taken = LookupTree(ALUOpType.getBranchType(func), branchOpTable) ^ ALUOpType.isBranchInvert(func) -// val target = Mux(isBranch, pc + offset, adderRes)(VAddrBits-1,0) -// val isRVC = uop.cf.isRVC//(io.in.bits.cf.instr(1,0) =/= "b11".U) -// -// io.in.ready := io.out.ready -// val pcLatchSlot = Mux(isRVC, pc + 2.U, pc + 4.U) -// io.out.bits.redirectValid := io.out.valid && isBru//isBranch -// io.out.bits.redirect.target := Mux(!taken && isBranch, pcLatchSlot, target) -// io.out.bits.redirect.brTag := uop.brTag -// io.out.bits.redirect.isException := DontCare // false.B -// io.out.bits.redirect.roqIdx := uop.roqIdx -// -// io.out.valid := valid -// io.out.bits.uop <> io.in.bits.uop -// io.out.bits.data := Mux(isJump, pcLatchSlot, aluRes) -// -// XSDebug(io.in.valid, -// "In(%d %d) Out(%d %d) Redirect:(%d %d %d) brTag:f:%d v:%d\n", -// io.in.valid, -// io.in.ready, -// io.out.valid, -// io.out.ready, -// io.redirect.valid, -// io.redirect.bits.isException, -// redirectHit, -// io.redirect.bits.brTag.flag, -// io.redirect.bits.brTag.value -// ) -// XSDebug(io.in.valid, "src1:%x src2:%x offset:%x func:%b pc:%x\n", -// src1, src2, offset, func, pc) -// XSDebug(io.out.valid, "res:%x aluRes:%x isRVC:%d isBru:%d isBranch:%d isJump:%d target:%x taken:%d\n", -// io.out.bits.data, aluRes, isRVC, isBru, isBranch, isJump, target, taken) -//} diff --git a/src/main/scala/xiangshan/backend/fu/CSR.scala b/src/main/scala/xiangshan/backend/fu/CSR.scala index ea7d54255079ea39c6d35506978498eb938e7647..a676ab22fa3b92d37db89bd74f14777cff86b1cf 100644 --- a/src/main/scala/xiangshan/backend/fu/CSR.scala +++ b/src/main/scala/xiangshan/backend/fu/CSR.scala @@ -1,11 +1,9 @@ package xiangshan.backend.fu import chisel3._ -import chisel3.ExcitingUtils.ConnectionType +import chisel3.ExcitingUtils.{ConnectionType, Debug} import chisel3.util._ -import chisel3.util.experimental.BoringUtils import fpu.Fflags -import noop.MMUIO import utils._ import xiangshan._ import xiangshan.backend._ @@ -53,11 +51,11 @@ trait HasCSRConst { // Supervisor Protection and Translation val Satp = 0x180 - // Machine Information Registers - val Mvendorid = 0xF11 - val Marchid = 0xF12 - val Mimpid = 0xF13 - val Mhartid = 0xF14 + // Machine Information Registers + val Mvendorid = 0xF11 + val Marchid = 0xF12 + val Mimpid = 0xF13 + val Mhartid = 0xF14 // Machine Trap Setup val Mstatus = 0x300 @@ -84,7 +82,7 @@ trait HasCSRConst { val PmpaddrBase = 0x3B0 // Machine Counter/Timers - // Currently, we uses perfcnt csr set instead of standard Machine Counter/Timers + // Currently, we uses perfcnt csr set instead of standard Machine Counter/Timers // 0xB80 - 0x89F are also used as perfcnt csr // Machine Counter Setup (not implemented) @@ -166,35 +164,50 @@ class FpuCsrIO extends XSBundle { val frm = Input(UInt(3.W)) } -class CSRIO extends FunctionUnitIO { + +class PerfCounterIO extends XSBundle { + val value = Input(UInt(XLEN.W)) +} + +class CSRIO extends FunctionUnitIO[UInt, Null](csrCfg, len=64, extIn= FuOpType()) { val cfIn = Input(new CtrlFlow) - val redirect = Output(new Redirect) - val redirectValid = Output(Bool()) + val redirectOut = Output(new Redirect) + val redirectOutValid = Output(Bool()) val fpu_csr = Flipped(new FpuCsrIO) val cfOut = Output(new CtrlFlow) // from rob val exception = Flipped(ValidIO(new MicroOp)) + val isInterrupt = Input(Bool()) // for exception check val instrValid = Input(Bool()) val flushPipe = Output(Bool()) // for differential testing // val intrNO = Output(UInt(XLEN.W)) val wenFix = Output(Bool()) + + override def cloneType: CSRIO.this.type = + new CSRIO().asInstanceOf[this.type] + + val perf = Vec(NumPerfCounters, new PerfCounterIO) + val memExceptionVAddr = Input(UInt(VAddrBits.W)) + val trapTarget = Output(UInt(VAddrBits.W)) + val mtip = Input(Bool()) + val msip = Input(Bool()) + val meip = Input(Bool()) + val interrupt = Output(Bool()) + val tlbCsrIO = Output(new TlbCsrBundle) } -class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ +class CSR extends XSModule + with HasCSRConst +{ + val io = IO(new CSRIO) io.cfOut := io.cfIn - val (valid, src1, src2, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.func) - def access(valid: Bool, src1: UInt, src2: UInt, func: UInt): UInt = { - this.valid := valid - this.src1 := src1 - this.src2 := src2 - this.func := func - io.out.bits - } + val (valid, src1, src2, func) = + (io.in.valid, io.in.bits.src(0), io.in.bits.uop.ctrl.imm, io.in.bits.ext.get) // CSR define @@ -257,14 +270,14 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ val mipFixMask = GenMask(9) | GenMask(5) | GenMask(1) val mip = (mipWire.asUInt | mipReg).asTypeOf(new Interrupt) - def getMisaMxl(mxl: Int): UInt = {mxl.U << (XLEN-2)} - def getMisaExt(ext: Char): UInt = {1.U << (ext.toInt - 'a'.toInt)} + def getMisaMxl(mxl: Int): UInt = {mxl.U << (XLEN-2)}.asUInt() + def getMisaExt(ext: Char): UInt = {1.U << (ext.toInt - 'a'.toInt)}.asUInt() var extList = List('a', 's', 'i', 'u') if(HasMExtension){ extList = extList :+ 'm'} if(HasCExtension){ extList = extList :+ 'c'} if(HasFPU){ extList = extList ++ List('f', 'd')} - val misaInitVal = getMisaMxl(2) | extList.foldLeft(0.U)((sum, i) => sum | getMisaExt(i)) //"h8000000000141105".U - val misa = RegInit(UInt(XLEN.W), misaInitVal) + val misaInitVal = getMisaMxl(2) | extList.foldLeft(0.U)((sum, i) => sum | getMisaExt(i)) //"h8000000000141105".U + val misa = RegInit(UInt(XLEN.W), misaInitVal) // MXL = 2 | 0 | EXT = b 00 0000 0100 0001 0001 0000 0101 // (XLEN-1, XLEN-2) | |(25, 0) ZY XWVU TSRQ PONM LKJI HGFE DCBA @@ -300,12 +313,12 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ mstatusNew } - val mstatusMask = ~ZeroExt(( + val mstatusMask = (~ZeroExt(( GenMask(XLEN-2, 38) | GenMask(31, 23) | GenMask(10, 9) | GenMask(2) | GenMask(37) | // MBE GenMask(36) | // SBE GenMask(6) // UBE - ), 64) + ), 64)).asUInt() val medeleg = RegInit(UInt(XLEN.W), 0.U) val mideleg = RegInit(UInt(XLEN.W), 0.U) @@ -338,7 +351,7 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ val sipMask = "h222".U & mideleg val satp = RegInit(0.U(XLEN.W)) // val satp = RegInit(UInt(XLEN.W), "h8000000000087fbe".U) // only use for tlb naive debug - val satpMask = "h80000fffffffffff".U // disable asid, mode can only be 8 / 0 + val satpMask = "h80000fffffffffff".U // disable asid, mode can only be 8 / 0 // val satp = RegInit(UInt(XLEN.W), 0.U) val sepc = RegInit(UInt(XLEN.W), 0.U) val scause = RegInit(UInt(XLEN.W), 0.U) @@ -347,11 +360,8 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ val scounteren = RegInit(UInt(XLEN.W), 0.U) val tlbBundle = Wire(new TlbCsrBundle) - // val sfence = Wire(new SfenceBundle) tlbBundle.satp := satp.asTypeOf(new SatpStruct) - // sfence := 0.U.asTypeOf(new SfenceBundle) - BoringUtils.addSource(tlbBundle, "TLBCSRIO") - // BoringUtils.addSource(sfence, "SfenceBundle") // FIXME: move to MOU + io.tlbCsrIO := tlbBundle // User-Level CSRs val uepc = Reg(UInt(XLEN.W)) @@ -401,11 +411,6 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ // val setLrAddr = WireInit(UInt(AddrBits.W), DontCare) //TODO : need check // val lr = RegInit(Bool(), false.B) // val lrAddr = RegInit(UInt(AddrBits.W), 0.U) -// BoringUtils.addSink(setLr, "set_lr") -// BoringUtils.addSink(setLrVal, "set_lr_val") -// BoringUtils.addSink(setLrAddr, "set_lr_addr") -// BoringUtils.addSource(lr, "lr") -// BoringUtils.addSource(lrAddr, "lr_addr") // // when(setLr){ // lr := setLrVal @@ -511,15 +516,16 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ )) // satp wen check - val satpLegalMode = (wdata.asTypeOf(new SatpStruct).mode===0.U) || (wdata.asTypeOf(new SatpStruct).mode===8.U) + val satpLegalMode = (wdata.asTypeOf(new SatpStruct).mode===0.U) || (wdata.asTypeOf(new SatpStruct).mode===8.U) // general CSR wen check val wen = valid && func =/= CSROpType.jmp && (addr=/=Satp.U || satpLegalMode) - val permitted = csrAccessPermissionCheck(addr, false.B, priviledgeMode) + val permitted = csrAccessPermissionCheck(addr, false.B, priviledgeMode) // Writeable check is ingored. // Currently, write to illegal csr addr will be ignored MaskedRegMap.generate(mapping, addr, rdata, wen && permitted, wdata) - io.out.bits := rdata + io.out.bits.data := rdata + io.out.bits.uop := io.in.bits.uop // Fix Mip/Sip write val fixMapping = Map( @@ -595,27 +601,7 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ val hasLoadAddrMisaligned = io.exception.bits.cf.exceptionVec(loadAddrMisaligned) && io.exception.valid // mtval write logic - val lsroqExceptionAddr = WireInit(0.U(VAddrBits.W)) - if(EnableUnifiedLSQ){ - ExcitingUtils.addSource(io.exception.bits.lsroqIdx, "EXECPTION_LSROQIDX") - ExcitingUtils.addSink(lsroqExceptionAddr, "EXECPTION_VADDR") - } else { - val lsIdx = WireInit(0.U.asTypeOf(new LSIdx())) - lsIdx.lqIdx := io.exception.bits.lqIdx - lsIdx.sqIdx := io.exception.bits.sqIdx - ExcitingUtils.addSource(lsIdx, "EXECPTION_LSROQIDX") - val lqExceptionAddr = WireInit(0.U(VAddrBits.W)) - val sqExceptionAddr = WireInit(0.U(VAddrBits.W)) - ExcitingUtils.addSink(lqExceptionAddr, "EXECPTION_LOAD_VADDR") - ExcitingUtils.addSink(sqExceptionAddr, "EXECPTION_STORE_VADDR") - lsroqExceptionAddr := Mux(CommitType.lsInstIsStore(io.exception.bits.ctrl.commitType), sqExceptionAddr, lqExceptionAddr) - } - - val atomExceptionAddr = WireInit(0.U(VAddrBits.W)) - val atomOverrideXtval = WireInit(false.B) - ExcitingUtils.addSink(atomExceptionAddr, "ATOM_EXECPTION_VADDR") - ExcitingUtils.addSink(atomOverrideXtval, "ATOM_OVERRIDE_XTVAL") - val memExceptionAddr = Mux(atomOverrideXtval, atomExceptionAddr, lsroqExceptionAddr) + val memExceptionAddr = SignExt(io.memExceptionVAddr, XLEN) when(hasInstrPageFault || hasLoadPageFault || hasStorePageFault){ val tval = Mux( hasInstrPageFault, @@ -624,7 +610,7 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ SignExt(io.exception.bits.cf.pc + 2.U, XLEN), SignExt(io.exception.bits.cf.pc, XLEN) ), - SignExt(memExceptionAddr, XLEN) + memExceptionAddr ) when(priviledgeMode === ModeM){ mtval := tval @@ -635,7 +621,7 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ when(hasLoadAddrMisaligned || hasStoreAddrMisaligned) { - mtval := SignExt(memExceptionAddr, XLEN) + mtval := memExceptionAddr } // Exception and Intr @@ -650,20 +636,14 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ intrVecEnable.zip(ideleg.asBools).map{case(x,y) => x := priviledgedEnableDetect(y)} val intrVec = mie(11,0) & mip.asUInt & intrVecEnable.asUInt val intrBitSet = intrVec.orR() - ExcitingUtils.addSource(intrBitSet, "intrBitSetIDU") + io.interrupt := intrBitSet val intrNO = IntPriority.foldRight(0.U)((i: Int, sum: UInt) => Mux(intrVec(i), i.U, sum)) - val raiseIntr = intrBitSet && io.exception.valid + val raiseIntr = intrBitSet && io.exception.valid && io.isInterrupt XSDebug(raiseIntr, "interrupt: pc=0x%x, %d\n", io.exception.bits.cf.pc, intrNO) - val mtip = WireInit(false.B) - val msip = WireInit(false.B) - val meip = WireInit(false.B) - ExcitingUtils.addSink(mtip, "mtip") - ExcitingUtils.addSink(msip, "msip") - ExcitingUtils.addSink(meip, "meip") - mipWire.t.m := mtip - mipWire.s.m := msip - mipWire.e.m := meip + mipWire.t.m := io.mtip + mipWire.s.m := io.msip + mipWire.e.m := io.meip // exceptions val csrExceptionVec = Wire(Vec(16, Bool())) @@ -692,15 +672,13 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ val raiseExceptionIntr = io.exception.valid val retTarget = Wire(UInt(VAddrBits.W)) - val trapTarget = Wire(UInt(VAddrBits.W)) - ExcitingUtils.addSource(trapTarget, "trapTarget") val resetSatp = addr === Satp.U && wen // write to satp will cause the pipeline be flushed - io.redirect := DontCare - io.redirectValid := valid && func === CSROpType.jmp && !isEcall - io.redirect.target := retTarget + io.redirectOut := DontCare + io.redirectOutValid := valid && func === CSROpType.jmp && !isEcall + io.redirectOut.target := retTarget io.flushPipe := resetSatp - XSDebug(io.redirectValid, "redirect to %x, pc=%x\n", io.redirect.target, io.cfIn.pc) + XSDebug(io.redirectOutValid, "redirect to %x, pc=%x\n", io.redirectOut.target, io.cfIn.pc) XSDebug(raiseExceptionIntr, "int/exc: pc %x int (%d):%x exc: (%d):%x\n",io.exception.bits.cf.pc, intrNO, io.exception.bits.cf.intrVec.asUInt, exceptionNO, raiseExceptionVec.asUInt) XSDebug(raiseExceptionIntr, "pc %x mstatus %x mideleg %x medeleg %x mode %x\n", io.exception.bits.cf.pc, mstatus, mideleg, medeleg, priviledgeMode) @@ -712,7 +690,7 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ val delegS = (deleg(causeNO(3,0))) && (priviledgeMode < ModeM) val tvalWen = !(hasInstrPageFault || hasLoadPageFault || hasStorePageFault || hasLoadAddrMisaligned || hasStoreAddrMisaligned) || raiseIntr // TODO: need check - trapTarget := Mux(delegS, stvec, mtvec)(VAddrBits-1, 0) + io.trapTarget := Mux(delegS, stvec, mtvec)(VAddrBits-1, 0) retTarget := DontCare // val illegalEret = TODO @@ -784,10 +762,10 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ io.out.valid := valid - XSDebug(io.redirectValid, "Rediret %x raiseExcepIntr:%d isSret:%d retTarget:%x sepc:%x delegs:%d deleg:%x cfInpc:%x valid:%d instrValid:%x \n", - io.redirect.target, raiseExceptionIntr, isSret, retTarget, sepc, delegS, deleg, io.cfIn.pc, valid, io.instrValid) + XSDebug(io.redirectOutValid, "Rediret %x raiseExcepIntr:%d isSret:%d retTarget:%x sepc:%x delegs:%d deleg:%x cfInpc:%x valid:%d instrValid:%x \n", + io.redirectOut.target, raiseExceptionIntr, isSret, retTarget, sepc, delegS, deleg, io.cfIn.pc, valid, io.instrValid) XSDebug(raiseExceptionIntr && delegS, "Red(%d, %x) raiseExcepIntr:%d isSret:%d retTarget:%x sepc:%x delegs:%d deleg:%x cfInpc:%x valid:%d instrValid:%x \n", - io.redirectValid, io.redirect.target, raiseExceptionIntr, isSret, retTarget, sepc, delegS, deleg, io.cfIn.pc, valid, io.instrValid) + io.redirectOutValid, io.redirectOut.target, raiseExceptionIntr, isSret, retTarget, sepc, delegS, deleg, io.cfIn.pc, valid, io.instrValid) XSDebug(raiseExceptionIntr && delegS, "sepc is writen!!! pc:%x\n", io.cfIn.pc) @@ -872,33 +850,24 @@ class CSR extends FunctionUnit(csrCfg) with HasCSRConst{ } } - // for differential testing -// BoringUtils.addSource(RegNext(priviledgeMode), "difftestMode") -// BoringUtils.addSource(RegNext(mstatus), "difftestMstatus") -// BoringUtils.addSource(RegNext(mstatus & sstatusRmask), "difftestSstatus") -// BoringUtils.addSource(RegNext(mepc), "difftestMepc") -// BoringUtils.addSource(RegNext(sepc), "difftestSepc") -// BoringUtils.addSource(RegNext(mcause), "difftestMcause") -// BoringUtils.addSource(RegNext(scause), "difftestScause") - BoringUtils.addSource(priviledgeMode, "difftestMode") - BoringUtils.addSource(mstatus, "difftestMstatus") - BoringUtils.addSource(mstatus & sstatusRmask, "difftestSstatus") - BoringUtils.addSource(mepc, "difftestMepc") - BoringUtils.addSource(sepc, "difftestSepc") - BoringUtils.addSource(mtval, "difftestMtval") - BoringUtils.addSource(stval, "difftestStval") - BoringUtils.addSource(mtvec, "difftestMtvec") - BoringUtils.addSource(stvec, "difftestStvec") - BoringUtils.addSource(mcause, "difftestMcause") - BoringUtils.addSource(scause, "difftestScause") - BoringUtils.addSource(satp, "difftestSatp") - BoringUtils.addSource(mipReg, "difftestMip") - BoringUtils.addSource(mie, "difftestMie") - BoringUtils.addSource(mscratch, "difftestMscratch") - BoringUtils.addSource(sscratch, "difftestSscratch") - BoringUtils.addSource(mideleg, "difftestMideleg") - BoringUtils.addSource(medeleg, "difftestMedeleg") + ExcitingUtils.addSource(priviledgeMode, "difftestMode", Debug) + ExcitingUtils.addSource(mstatus, "difftestMstatus", Debug) + ExcitingUtils.addSource(mstatus & sstatusRmask, "difftestSstatus", Debug) + ExcitingUtils.addSource(mepc, "difftestMepc", Debug) + ExcitingUtils.addSource(sepc, "difftestSepc", Debug) + ExcitingUtils.addSource(mtval, "difftestMtval", Debug) + ExcitingUtils.addSource(stval, "difftestStval", Debug) + ExcitingUtils.addSource(mtvec, "difftestMtvec", Debug) + ExcitingUtils.addSource(stvec, "difftestStvec", Debug) + ExcitingUtils.addSource(mcause, "difftestMcause", Debug) + ExcitingUtils.addSource(scause, "difftestScause", Debug) + ExcitingUtils.addSource(satp, "difftestSatp", Debug) + ExcitingUtils.addSource(mipReg, "difftestMip", Debug) + ExcitingUtils.addSource(mie, "difftestMie", Debug) + ExcitingUtils.addSource(mscratch, "difftestMscratch", Debug) + ExcitingUtils.addSource(sscratch, "difftestSscratch", Debug) + ExcitingUtils.addSource(mideleg, "difftestMideleg", Debug) + ExcitingUtils.addSource(medeleg, "difftestMedeleg", Debug) } else { -// BoringUtils.addSource(readWithScala(perfCntList("Minstret")._1), "ilaInstrCnt") } } diff --git a/src/main/scala/xiangshan/backend/fu/Divider.scala b/src/main/scala/xiangshan/backend/fu/Divider.scala index 5a3a1401440ff02e893bc998e5893df0e5f98caf..5d7e8f16ffa0c83fdd820429ef8010c23dcf12fe 100644 --- a/src/main/scala/xiangshan/backend/fu/Divider.scala +++ b/src/main/scala/xiangshan/backend/fu/Divider.scala @@ -8,8 +8,7 @@ import xiangshan.backend._ import xiangshan.backend.fu.FunctionUnit._ -class Divider(len: Int) extends FunctionUnit(divCfg) { - val io = IO(new MulDivIO(len)) +class Divider(len: Int) extends FunctionUnit(divCfg, 64, extIn = new MulDivCtrl) { def abs(a: UInt, sign: Bool): (Bool, UInt) = { val s = a(len - 1) && sign @@ -20,7 +19,7 @@ class Divider(len: Int) extends FunctionUnit(divCfg) { val state = RegInit(s_idle) val newReq = (state === s_idle) && io.in.fire() - val (a, b) = (io.in.bits.src1, io.in.bits.src2) + val (a, b) = (io.in.bits.src(0), io.in.bits.src(1)) val divBy0 = b === 0.U(len.W) val divBy0Reg = RegEnable(divBy0, newReq) @@ -28,13 +27,18 @@ class Divider(len: Int) extends FunctionUnit(divCfg) { val hi = shiftReg(len * 2, len) val lo = shiftReg(len - 1, 0) - val (aSign, aVal) = abs(a, io.in.bits.ctrl.sign) - val (bSign, bVal) = abs(b, io.in.bits.ctrl.sign) + val ctrl = io.in.bits.ext.get + val sign = io.in.bits.ext.get.sign + val uop = io.in.bits.uop + + val (aSign, aVal) = abs(a, sign) + val (bSign, bVal) = abs(b, sign) val aSignReg = RegEnable(aSign, newReq) val qSignReg = RegEnable((aSign ^ bSign) && !divBy0, newReq) val bReg = RegEnable(bVal, newReq) val aValx2Reg = RegEnable(Cat(aVal, "b0".U), newReq) - val ctrlReg = RegEnable(io.in.bits.ctrl, newReq) + val ctrlReg = RegEnable(ctrl, newReq) + val uopReg = RegEnable(uop, newReq) val cnt = Counter(len) when (newReq) { @@ -67,7 +71,7 @@ class Divider(len: Int) extends FunctionUnit(divCfg) { } } - when(state=/=s_idle && ctrlReg.uop.roqIdx.needFlush(io.redirect)){ + when(state=/=s_idle && uopReg.roqIdx.needFlush(io.redirectIn)){ state := s_idle } @@ -78,7 +82,7 @@ class Divider(len: Int) extends FunctionUnit(divCfg) { val xlen = io.out.bits.data.getWidth val res = Mux(ctrlReg.isHi, resR, resQ) io.out.bits.data := Mux(ctrlReg.isW, SignExt(res(31,0),xlen), res) - io.out.bits.uop := ctrlReg.uop + io.out.bits.uop := uopReg io.out.valid := state === s_finish io.in.ready := state === s_idle diff --git a/src/main/scala/xiangshan/backend/fu/FDivSqrt.scala b/src/main/scala/xiangshan/backend/fu/FDivSqrt.scala deleted file mode 100644 index 673decc877d39e8974b7910541aa3c4d0a6d6e03..0000000000000000000000000000000000000000 --- a/src/main/scala/xiangshan/backend/fu/FDivSqrt.scala +++ /dev/null @@ -1,14 +0,0 @@ -package xiangshan.backend.fu - -import chisel3._ -import chisel3.util._ -import xiangshan._ -import utils._ -import xiangshan.backend._ - -import xiangshan.backend.fu.FunctionUnit._ - -class FDivSqrt extends FunctionUnit(fDivSqrtCfg){ - val io = IO(new Bundle() {}) - override def toString: String = "FDivSqrt" -} diff --git a/src/main/scala/xiangshan/backend/fu/Fence.scala b/src/main/scala/xiangshan/backend/fu/Fence.scala index 8947b54150e2c934675ff663e29429114966b022..4b38b6f86e48d6a6c2e5c3db2a5a56bff4e42200 100644 --- a/src/main/scala/xiangshan/backend/fu/Fence.scala +++ b/src/main/scala/xiangshan/backend/fu/Fence.scala @@ -4,25 +4,29 @@ import chisel3._ import chisel3.util._ import xiangshan._ import utils._ -import chisel3.util.experimental.BoringUtils import xiangshan.backend.FenceOpType +class FenceToSbuffer extends XSBundle { + val flushSb = Output(Bool()) + val sbIsEmpty = Input(Bool()) +} + class FenceExeUnit extends Exu(Exu.fenceExeUnitCfg) { + + val sfence = IO(Output(new SfenceBundle)) + val fencei = IO(Output(Bool())) + val toSbuffer = IO(new FenceToSbuffer) + val (valid, src1, src2, uop, func, lsrc1, lsrc2) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.uop, io.in.bits.uop.ctrl.fuOpType, io.in.bits.uop.ctrl.lsrc1, io.in.bits.uop.ctrl.lsrc2) val s_sb :: s_tlb :: s_icache :: s_none :: Nil = Enum(4) val state = RegInit(s_sb) - val sfence = WireInit(0.U.asTypeOf(new SfenceBundle)) - val sbuffer = WireInit(false.B) - val fencei = WireInit(false.B) - val sbEmpty = WireInit(false.B) - BoringUtils.addSource(sbuffer, "FenceUnitSbufferFlush") - BoringUtils.addSource(sfence, "SfenceBundle") - BoringUtils.addSource(fencei, "FenceI") - BoringUtils.addSink(sbEmpty, "SBufferEmpty") + val sbuffer = toSbuffer.flushSb + val sbEmpty = toSbuffer.sbIsEmpty + // NOTE: icache & tlb & sbuffer must receive flush signal at any time sbuffer := valid && state === s_sb && !sbEmpty fencei := (state === s_icache && sbEmpty) || (state === s_sb && valid && sbEmpty && func === FenceOpType.fencei) @@ -44,8 +48,9 @@ class FenceExeUnit extends Exu(Exu.fenceExeUnitCfg) { io.out.bits.redirect <> DontCare io.out.bits.redirectValid := false.B io.out.bits.debug <> DontCare + io.csrOnly <> DontCare assert(!(valid || state =/= s_sb) || io.out.ready) // NOTE: fence instr must be the first(only one) instr, so io.out.ready must be true XSDebug(valid || state=/=s_sb || io.out.valid, p"In(${io.in.valid} ${io.in.ready}) Out(${io.out.valid} ${io.out.ready}) state:${state} sbuffer(flush:${sbuffer} empty:${sbEmpty}) fencei:${fencei} sfence:${sfence} Inpc:0x${Hexadecimal(io.in.bits.uop.cf.pc)} InroqIdx:${io.in.bits.uop.roqIdx} Outpc:0x${Hexadecimal(io.out.bits.uop.cf.pc)} OutroqIdx:${io.out.bits.uop.roqIdx}\n") -} \ No newline at end of file +} diff --git a/src/main/scala/xiangshan/backend/fu/Fmac.scala b/src/main/scala/xiangshan/backend/fu/Fmac.scala deleted file mode 100644 index 25c864ae99506d57ec65d36437c3f6e3d3ec33d3..0000000000000000000000000000000000000000 --- a/src/main/scala/xiangshan/backend/fu/Fmac.scala +++ /dev/null @@ -1,14 +0,0 @@ -package xiangshan.backend.fu - -import chisel3._ -import chisel3.util._ -import xiangshan._ -import utils._ -import xiangshan.backend._ - -import xiangshan.backend.fu.FunctionUnit._ - -class Fmac extends FunctionUnit(fmacCfg){ - val io = IO(new Bundle() {}) - override def toString: String = "Fmac" -} diff --git a/src/main/scala/xiangshan/backend/fu/Fmisc.scala b/src/main/scala/xiangshan/backend/fu/Fmisc.scala deleted file mode 100644 index 22ec4b9575d11403a1f86f216c93c32db38b3fad..0000000000000000000000000000000000000000 --- a/src/main/scala/xiangshan/backend/fu/Fmisc.scala +++ /dev/null @@ -1,14 +0,0 @@ -package xiangshan.backend.fu - -import chisel3._ -import chisel3.util._ -import xiangshan._ -import utils._ -import xiangshan.backend._ - -import xiangshan.backend.fu.FunctionUnit._ - -class Fmisc extends FunctionUnit(fmiscCfg){ - val io = IO(new Bundle() {}) - override def toString: String = "Fmisc" -} diff --git a/src/main/scala/xiangshan/backend/fu/FunctionUnit.scala b/src/main/scala/xiangshan/backend/fu/FunctionUnit.scala index 899feee3f32137515ea36b5ffdbf31da97ac363d..830b5476b38a7a9edc02bdff825a376a5043dbd0 100644 --- a/src/main/scala/xiangshan/backend/fu/FunctionUnit.scala +++ b/src/main/scala/xiangshan/backend/fu/FunctionUnit.scala @@ -2,17 +2,31 @@ package xiangshan.backend.fu import chisel3._ import chisel3.util._ - import xiangshan._ -import utils._ - -import FunctionUnit._ /* XiangShan Function Unit A Exu can have one or more function units */ +trait HasFuLatency { + val latencyVal: Option[Int] +} + +case class CertainLatency(value: Int) extends HasFuLatency{ + override val latencyVal: Option[Int] = Some(value) +} + +case class UncertainLatency() extends HasFuLatency { + override val latencyVal: Option[Int] = None +} + +case class NexusLatency(value: Int) extends HasFuLatency { + override val latencyVal: Option[Int] = Some(value) +} + + + case class FuConfig ( fuType: UInt, @@ -20,20 +34,99 @@ case class FuConfig numFpSrc: Int, writeIntRf: Boolean, writeFpRf: Boolean, - hasRedirect: Boolean -) - -class FunctionUnitIO extends XSBundle { - val in = Flipped(Decoupled(new Bundle { - val src1 = Output(UInt(XLEN.W)) - val src2 = Output(UInt(XLEN.W)) - val src3 = Output(UInt(XLEN.W)) - val func = Output(FuOpType()) + hasRedirect: Boolean, + latency: HasFuLatency = CertainLatency(0) +) { + def srcCnt: Int = math.max(numIntSrc, numFpSrc) +} + +class FunctionUnitIO[TI <: Data, TO <: Data] +( + cfg: FuConfig, + len: Int, + extIn: => TI = null, + extOut: => TO = null +) extends XSBundle +{ + val in = Flipped(DecoupledIO(new Bundle() { + val src = Vec(cfg.srcCnt, UInt(len.W)) + val uop = new MicroOp + val ext = if(extIn == null) None else Some(extIn.cloneType) + + def connectToExuInput(exuIn: ExuInput): Unit = { + val exuSrcIn = Seq(exuIn.src1, exuIn.src2, exuIn.src3) + src.zip(exuSrcIn).foreach{case (x, y) => x := y} + uop := exuIn.uop + } })) - val out = Decoupled(Output(UInt(XLEN.W))) + + val out = DecoupledIO(new Bundle() { + val data = UInt(XLEN.W) + val uop = new MicroOp + val ext = if(extOut == null) None else Some(extOut.cloneType) + }) + + val redirectIn = Flipped(ValidIO(new Redirect)) + + override def cloneType: FunctionUnitIO.this.type = + new FunctionUnitIO(cfg, len, extIn, extOut).asInstanceOf[this.type] } -abstract class FunctionUnit(cfg: FuConfig) extends XSModule +abstract class FunctionUnit[TI <: Data, TO <: Data] +( + cfg: FuConfig, + len: Int = 64, + extIn: => TI = null, + extOut: => TO = null, + val latency: Int = 0 +) extends XSModule { + + val io = IO(new FunctionUnitIO[TI, TO](cfg, len, extIn, extOut)) + +} + +trait HasPipelineReg[TI <: Data, TO <: Data] { + this: FunctionUnit[TI, TO] => + + val validVec = io.in.valid +: Array.fill(latency)(RegInit(false.B)) + val rdyVec = Array.fill(latency)(Wire(Bool())) :+ io.out.ready + val uopVec = io.in.bits.uop +: Array.fill(latency)(Reg(new MicroOp)) + + + val flushVec = uopVec.zip(validVec).map(x => x._2 && x._1.roqIdx.needFlush(io.redirectIn)) + + for (i <- 0 until latency) { + rdyVec(i) := !validVec(i + 1) || rdyVec(i + 1) + } + + for (i <- 1 to latency) { + when(flushVec(i - 1) || rdyVec(i) && !validVec(i - 1)) { + validVec(i) := false.B + }.elsewhen(rdyVec(i - 1) && validVec(i - 1) && !flushVec(i - 1)) { + validVec(i) := validVec(i - 1) + uopVec(i) := uopVec(i - 1) + } + } + + io.in.ready := rdyVec(0) + io.out.valid := validVec.last && !flushVec.last + io.out.bits.uop := uopVec.last + + def PipelineReg[TT <: Data](i: Int)(next: TT) = RegEnable( + next, + enable = validVec(i - 1) && rdyVec(i - 1) && !flushVec(i - 1) + ) + + def S1Reg[TT <: Data](next: TT): TT = PipelineReg[TT](1)(next) + + def S2Reg[TT <: Data](next: TT): TT = PipelineReg[TT](2)(next) + + def S3Reg[TT <: Data](next: TT): TT = PipelineReg[TT](3)(next) + + def S4Reg[TT <: Data](next: TT): TT = PipelineReg[TT](4)(next) + + def S5Reg[TT <: Data](next: TT): TT = PipelineReg[TT](5)(next) +} object FunctionUnit { @@ -50,30 +143,45 @@ object FunctionUnit { FuConfig(FuType.alu, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = true) val mulCfg = - FuConfig(FuType.mul, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false) + FuConfig(FuType.mul, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false, + UncertainLatency()// CertainLatency(3) + ) val divCfg = - FuConfig(FuType.div, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false) + FuConfig(FuType.div, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false, + UncertainLatency() + ) val fenceCfg = FuConfig(FuType.fence, 2, 0, writeIntRf = false, writeFpRf = false, hasRedirect = false/*NOTE: need redirect but when commit*/) val lduCfg = - FuConfig(FuType.ldu, 1, 0, writeIntRf = true, writeFpRf = true, hasRedirect = false) + FuConfig(FuType.ldu, 1, 0, writeIntRf = true, writeFpRf = true, hasRedirect = false, + UncertainLatency() + ) val stuCfg = - FuConfig(FuType.stu, 2, 1, writeIntRf = false, writeFpRf = false, hasRedirect = false) + FuConfig(FuType.stu, 2, 1, writeIntRf = false, writeFpRf = false, hasRedirect = false, + UncertainLatency() + ) - // use ldu's write back port, so set writeIntRf to false - val mouCfg = - FuConfig(FuType.mou, 2, 0, writeIntRf = false, writeFpRf = false, hasRedirect = false) + val mouCfg = + FuConfig(FuType.mou, 2, 0, writeIntRf = false, writeFpRf = false, hasRedirect = false, + UncertainLatency() + ) val fmacCfg = - FuConfig(FuType.fmac, 0, 3, writeIntRf = false, writeFpRf = true, hasRedirect = false) + FuConfig(FuType.fmac, 0, 3, writeIntRf = false, writeFpRf = true, hasRedirect = false, + CertainLatency(5) + ) val fmiscCfg = - FuConfig(FuType.fmisc, 0, 2, writeIntRf = false, writeFpRf = true, hasRedirect = false) + FuConfig(FuType.fmisc, 0, 2, writeIntRf = true, writeFpRf = true, hasRedirect = false, + UncertainLatency() + ) val fDivSqrtCfg = - FuConfig(FuType.fDivSqrt, 0, 2, writeIntRf = false, writeFpRf = true, hasRedirect = false) + FuConfig(FuType.fDivSqrt, 0, 2, writeIntRf = false, writeFpRf = true, hasRedirect = false, + UncertainLatency() + ) } diff --git a/src/main/scala/xiangshan/backend/fu/I2f.scala b/src/main/scala/xiangshan/backend/fu/I2f.scala deleted file mode 100644 index 25d020fc1bb6d2ed03d9734ba00c89d6c6e82eb5..0000000000000000000000000000000000000000 --- a/src/main/scala/xiangshan/backend/fu/I2f.scala +++ /dev/null @@ -1,12 +0,0 @@ -package xiangshan.backend.fu - -import chisel3._ -import chisel3.util._ -import xiangshan._ -import utils._ -import xiangshan.backend._ -import xiangshan.backend.fu.FunctionUnit._ - -class I2f extends FunctionUnit(i2fCfg){ - val io = IO(new Bundle() {}) -} diff --git a/src/main/scala/xiangshan/backend/fu/Jump.scala b/src/main/scala/xiangshan/backend/fu/Jump.scala index ff98e319df14e2447042ff8071c0a12d99ec4083..37b71e2f4c97652d82b26fc7f975fd7ba85ec7d8 100644 --- a/src/main/scala/xiangshan/backend/fu/Jump.scala +++ b/src/main/scala/xiangshan/backend/fu/Jump.scala @@ -8,36 +8,49 @@ import xiangshan.backend._ import xiangshan.backend.fu.FunctionUnit._ import xiangshan.backend.decode.isa._ -class Jump extends FunctionUnit(jmpCfg){ - val io = IO(new ExuIO) +class RedirectOut extends XSBundle { + val redirectValid = Bool() + val redirect = new Redirect + val brUpdate = new BranchUpdateInfo +} + +class Jump extends FunctionUnit(jmpCfg, extOut = new RedirectOut) { - val (iovalid, src1, offset, func, pc, uop) = (io.in.valid, io.in.bits.src1, io.in.bits.uop.ctrl.imm, io.in.bits.uop.ctrl.fuOpType, SignExt(io.in.bits.uop.cf.pc, AddrBits), io.in.bits.uop) + val (iovalid, src1, offset, func, pc, uop) = ( + io.in.valid, + io.in.bits.src(0), + io.in.bits.uop.ctrl.imm, + io.in.bits.uop.ctrl.fuOpType, + SignExt(io.in.bits.uop.cf.pc, AddrBits), + io.in.bits.uop + ) - val redirectHit = uop.roqIdx.needFlush(io.redirect) + val redirectHit = uop.roqIdx.needFlush(io.redirectIn) val valid = iovalid && !redirectHit val isRVC = uop.cf.brUpdate.pd.isRVC val pcDelaySlot = Mux(isRVC, pc + 2.U, pc + 4.U) val target = src1 + offset // NOTE: src1 is (pc/rf(rs1)), src2 is (offset) - io.out.bits.redirectValid := valid - io.out.bits.redirect.pc := uop.cf.pc - io.out.bits.redirect.target := target - io.out.bits.redirect.brTag := uop.brTag - io.out.bits.redirect.isException := false.B - io.out.bits.redirect.isFlushPipe := false.B - io.out.bits.redirect.isMisPred := DontCare // check this in brq - io.out.bits.redirect.isReplay := false.B - io.out.bits.redirect.roqIdx := uop.roqIdx + val redirectOut = io.out.bits.ext.get.redirect + val brUpdate = io.out.bits.ext.get.brUpdate + + io.out.bits.ext.get.redirectValid := valid + redirectOut.pc := uop.cf.pc + redirectOut.target := target + redirectOut.brTag := uop.brTag + redirectOut.isException := false.B + redirectOut.isFlushPipe := false.B + redirectOut.isMisPred := DontCare // check this in brq + redirectOut.isReplay := false.B + redirectOut.roqIdx := uop.roqIdx - io.out.bits.brUpdate := uop.cf.brUpdate - io.out.bits.brUpdate.pc := uop.cf.pc - io.out.bits.brUpdate.target := target - io.out.bits.brUpdate.brTarget := target // DontCare - // io.out.bits.brUpdate.btbType := LookupTree(func, RV32I_BRUInstr.bruFuncTobtbTypeTable) - io.out.bits.brUpdate.taken := true.B + brUpdate := uop.cf.brUpdate + brUpdate.pc := uop.cf.pc + brUpdate.target := target + brUpdate.brTarget := target // DontCare + brUpdate.taken := true.B // io.out.bits.brUpdate.fetchIdx := uop.cf.brUpdate.fetchOffset >> 1.U //TODO: consider RVC - io.out.bits.brUpdate.brTag := uop.brTag // Output val res = pcDelaySlot @@ -47,20 +60,17 @@ class Jump extends FunctionUnit(jmpCfg){ io.out.bits.uop <> io.in.bits.uop io.out.bits.data := res - io.dmem <> DontCare - io.out.bits.debug <> DontCare - // NOTE: the debug info is for one-cycle exec, if FMV needs multi-cycle, may needs change it XSDebug(io.in.valid, "In(%d %d) Out(%d %d) Redirect:(%d %d %d %d) brTag:%x\n", io.in.valid, io.in.ready, io.out.valid, io.out.ready, - io.redirect.valid, - io.redirect.bits.isException, - io.redirect.bits.isFlushPipe, + io.redirectIn.valid, + io.redirectIn.bits.isException, + io.redirectIn.bits.isFlushPipe, redirectHit, - io.redirect.bits.brTag.value + io.redirectIn.bits.brTag.value ) XSDebug(io.in.valid, "src1:%x offset:%x func:%b type:JUMP pc:%x res:%x\n", src1, offset, func, pc, res) } diff --git a/src/main/scala/xiangshan/backend/fu/Multiplier.scala b/src/main/scala/xiangshan/backend/fu/Multiplier.scala index 8f8cdbb01b153a2a0dc4babe145bfaa322032ff0..bcb89cf5f8084c0bbb8e30896c15a8a01d9820a4 100644 --- a/src/main/scala/xiangshan/backend/fu/Multiplier.scala +++ b/src/main/scala/xiangshan/backend/fu/Multiplier.scala @@ -8,7 +8,6 @@ import xiangshan.backend._ import xiangshan.backend.fu.FunctionUnit._ class MulDivCtrl extends Bundle{ - val uop = new MicroOp val sign = Bool() val isW = Bool() val isHi = Bool() // return hi bits of result ? @@ -31,45 +30,12 @@ class MulDivIO(val len: Int) extends XSBundle { abstract class Multiplier ( val len: Int, - val latency: Int = 3 -) extends FunctionUnit(mulCfg) { - val io = IO(new MulDivIO(len)) -} - -trait HasPipelineReg { this: ArrayMultiplier => - - val validVec = io.in.valid +: Array.fill(latency)(RegInit(false.B)) - val rdyVec = Array.fill(latency)(Wire(Bool())) :+ io.out.ready - val ctrlVec = io.in.bits.ctrl +: Array.fill(latency)(Reg(new MulDivCtrl)) - val flushVec = ctrlVec.zip(validVec).map(x => x._2 && x._1.uop.roqIdx.needFlush(io.redirect)) - - for(i <- 0 until latency){ - rdyVec(i) := !validVec(i+1) || rdyVec(i+1) - } - - for(i <- 1 to latency){ - when(flushVec(i-1) || rdyVec(i) && !validVec(i-1)){ - validVec(i) := false.B - }.elsewhen(rdyVec(i-1) && validVec(i-1) && !flushVec(i-1)){ - validVec(i) := validVec(i-1) - ctrlVec(i) := ctrlVec(i-1) - } - } - - io.in.ready := rdyVec(0) - io.out.valid := validVec.last && !flushVec.last - io.out.bits.uop := ctrlVec.last.uop - - def PipelineReg[T<:Data](i: Int)(next: T) = RegEnable( - next, - enable = validVec(i-1) && rdyVec(i-1) && !flushVec(i-1) - ) + latency: Int = 3 +) extends FunctionUnit(cfg = mulCfg, len, extIn = new MulDivCtrl, latency = latency) + with HasPipelineReg[MulDivCtrl, Null] +{ - def S1Reg[T<:Data](next: T):T = PipelineReg[T](1)(next) - def S2Reg[T<:Data](next: T):T = PipelineReg[T](2)(next) - def S3Reg[T<:Data](next: T):T = PipelineReg[T](3)(next) - def S4Reg[T<:Data](next: T):T = PipelineReg[T](4)(next) - def S5Reg[T<:Data](next: T):T = PipelineReg[T](5)(next) + val (src1, src2) = (io.in.bits.src(0), io.in.bits.src(1)) } class ArrayMultiplier @@ -77,14 +43,16 @@ class ArrayMultiplier len: Int, latency: Int = 3, realArray: Boolean = false -) extends Multiplier(len, latency) with HasPipelineReg { +) extends Multiplier(len, latency) { - val mulRes = io.in.bits.src1.asSInt() * io.in.bits.src2.asSInt() + val mulRes = src1.asSInt() * src2.asSInt() var dataVec = Seq(mulRes.asUInt()) + var ctrlVec = Seq(io.in.bits.ext.get) for(i <- 1 to latency){ dataVec = dataVec :+ PipelineReg(i)(dataVec(i-1)) + ctrlVec = ctrlVec :+ PipelineReg(i)(ctrlVec(i-1)) } val xlen = io.out.bits.data.getWidth diff --git a/src/main/scala/fpu/Classify.scala b/src/main/scala/xiangshan/backend/fu/fpu/Classify.scala similarity index 98% rename from src/main/scala/fpu/Classify.scala rename to src/main/scala/xiangshan/backend/fu/fpu/Classify.scala index 45353000f4fe66c4a41b2523341b5a3488336448..b27c47e9605e3f78d0b8fd7dd2df7ac23c1143b6 100644 --- a/src/main/scala/fpu/Classify.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/Classify.scala @@ -1,4 +1,4 @@ -package fpu +package xiangshan.backend.fu.fpu import chisel3._ import chisel3.util._ diff --git a/src/main/scala/fpu/F32toF64.scala b/src/main/scala/xiangshan/backend/fu/fpu/F32toF64.scala similarity index 80% rename from src/main/scala/fpu/F32toF64.scala rename to src/main/scala/xiangshan/backend/fu/fpu/F32toF64.scala index 454dddabdc4830c91104b922669b930bc350ca09..3d1e3c3bea070425ea3782c43efb6096b2004687 100644 --- a/src/main/scala/fpu/F32toF64.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/F32toF64.scala @@ -1,12 +1,12 @@ -package fpu +package xiangshan.backend.fu.fpu import chisel3._ import chisel3.util._ +import xiangshan.backend.fu.FunctionUnit._ -class F32toF64 extends FPUSubModule with HasPipelineReg { - def latency: Int = 2 +class F32toF64 extends FPUPipelineModule(fmiscCfg, 2) { - val a = io.in.bits.a + val a = io.in.bits.src(0) val f32 = Float32(a) val classify = Module(new Classify(Float32.expWidth, Float32.mantWidth)) @@ -56,10 +56,10 @@ class F32toF64 extends FPUSubModule with HasPipelineReg { ) val result = Mux(s1_isNaN, Float64.defaultNaN, commonResult) - io.out.bits.result := S2Reg(result) - io.out.bits.fflags.invalid := S2Reg(s1_isSNaN) - io.out.bits.fflags.overflow := false.B - io.out.bits.fflags.underflow := false.B - io.out.bits.fflags.infinite := false.B - io.out.bits.fflags.inexact := false.B + io.out.bits.data := S2Reg(result) + fflags.invalid := S2Reg(s1_isSNaN) + fflags.overflow := false.B + fflags.underflow := false.B + fflags.infinite := false.B + fflags.inexact := false.B } diff --git a/src/main/scala/fpu/F64toF32.scala b/src/main/scala/xiangshan/backend/fu/fpu/F64toF32.scala similarity index 80% rename from src/main/scala/fpu/F64toF32.scala rename to src/main/scala/xiangshan/backend/fu/fpu/F64toF32.scala index e2eb4b094043d29eb2bc11bd6dec86912a95c410..f6b2acb148f8eeac9dbd788f8e09fe87fe905917 100644 --- a/src/main/scala/fpu/F64toF32.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/F64toF32.scala @@ -1,15 +1,14 @@ -package fpu +package xiangshan.backend.fu.fpu import chisel3._ import chisel3.util._ -import fpu.util.ShiftRightJam +import xiangshan.backend.fu.FunctionUnit.fmiscCfg +import xiangshan.backend.fu.fpu.util.ShiftRightJam -class F64toF32 extends FPUSubModule with HasPipelineReg { - def latency: Int = 2 +class F64toF32 extends FPUPipelineModule(fmiscCfg, 2) { def SEXP_WIDTH = Float64.expWidth + 2 - val rm = io.in.bits.rm - val a = io.in.bits.a + val a = io.in.bits.src(0) val classify = Module(new Classify(Float64.expWidth, Float64.mantWidth)) classify.io.in := a @@ -67,11 +66,11 @@ class F64toF32 extends FPUSubModule with HasPipelineReg { ) ) - io.out.bits.result := S2Reg(result) - io.out.bits.fflags.invalid := S2Reg(s1_isSNaN) - io.out.bits.fflags.overflow := S2Reg(overflow) - io.out.bits.fflags.underflow := S2Reg(underflow) - io.out.bits.fflags.infinite := false.B - io.out.bits.fflags.inexact := S2Reg(inexact) + io.out.bits.data := S2Reg(result) + fflags.invalid := S2Reg(s1_isSNaN) + fflags.overflow := S2Reg(overflow) + fflags.underflow := S2Reg(underflow) + fflags.infinite := false.B + fflags.inexact := S2Reg(inexact) } diff --git a/src/main/scala/fpu/FCMP.scala b/src/main/scala/xiangshan/backend/fu/fpu/FCMP.scala similarity index 71% rename from src/main/scala/fpu/FCMP.scala rename to src/main/scala/xiangshan/backend/fu/fpu/FCMP.scala index d90dd9829b80a1d729927b8d5bdafe5b8dd0d18e..3ea2eb8c5ef0e6c66b795bf938f3bece1c17ba3f 100644 --- a/src/main/scala/fpu/FCMP.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/FCMP.scala @@ -1,13 +1,12 @@ -package fpu +package xiangshan.backend.fu.fpu import chisel3._ import chisel3.util._ +import xiangshan.backend.fu.FunctionUnit._ -class FCMP extends FPUSubModule with HasPipelineReg{ - def latency = 2 +class FCMP extends FPUPipelineModule(fmiscCfg, 2){ - val isDouble = io.in.bits.isDouble - val src = Seq(io.in.bits.a, io.in.bits.b).map(x => Mux(isDouble, x, extF32ToF64(x))) + val src = io.in.bits.src.map(x => Mux(isDouble, x, extF32ToF64(x))) val sign = src.map(_(63)) val aSign = sign(0) @@ -20,8 +19,8 @@ class FCMP extends FPUSubModule with HasPipelineReg{ val srcIsSNaN = classify.map(_.isSNaN) val isDoubleReg = S1Reg(isDouble) - val opReg = S1Reg(io.in.bits.op) - val srcReg = Seq(io.in.bits.a, io.in.bits.b).map(S1Reg) + val opReg = S1Reg(op) + val srcReg = io.in.bits.src.map(S1Reg) val (aSignReg, bSignReg) = (S1Reg(sign(0)), S1Reg(sign(1))) val hasNaNReg = S1Reg(srcIsNaN(0) || srcIsNaN(1)) @@ -50,10 +49,10 @@ class FCMP extends FPUSubModule with HasPipelineReg{ val min = Mux(bothNaNReg, defaultNaN, Mux(sel_a && !aIsNaNReg, srcReg(0), srcReg(1))) val max = Mux(bothNaNReg, defaultNaN, Mux(!sel_a && !aIsNaNReg, srcReg(0), srcReg(1))) - io.out.bits.fflags.inexact := false.B - io.out.bits.fflags.underflow := false.B - io.out.bits.fflags.overflow := false.B - io.out.bits.fflags.infinite := false.B - io.out.bits.fflags.invalid := S2Reg(invalid) - io.out.bits.result := S2Reg(Mux(opReg===0.U, min, Mux(opReg===1.U, max, fcmpResult))) + fflags.inexact := false.B + fflags.underflow := false.B + fflags.overflow := false.B + fflags.infinite := false.B + fflags.invalid := S2Reg(invalid) + io.out.bits.data := S2Reg(Mux(opReg===0.U, min, Mux(opReg===1.U, max, fcmpResult))) } \ No newline at end of file diff --git a/src/main/scala/fpu/FMV.scala b/src/main/scala/xiangshan/backend/fu/fpu/FMV.scala similarity index 71% rename from src/main/scala/fpu/FMV.scala rename to src/main/scala/xiangshan/backend/fu/fpu/FMV.scala index 06e53475f0db285d9f1fe765d370f6b0a2b76578..70bc2f75a3903d608ab3cbb8e6fcc0034e127cb4 100644 --- a/src/main/scala/fpu/FMV.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/FMV.scala @@ -1,15 +1,12 @@ -package fpu +package xiangshan.backend.fu.fpu import chisel3._ import chisel3.util._ +import xiangshan.backend.fu.FunctionUnit._ -class FMV(XLEN: Int) extends FPUSubModule with HasPipelineReg { +class FMV(XLEN: Int) extends FPUPipelineModule(fmiscCfg, 1) { - def latency = 1 - - val isDouble = io.in.bits.isDouble - val op = io.in.bits.op - val src = Seq(io.in.bits.a, io.in.bits.b).map(x => + val src = io.in.bits.src.map(x => Mux(isDouble || op(2,1)==="b00".U, x, extF32ToF64(x)) ) val aSign = Mux(op(2,1)==="b00".U && !isDouble, src(0)(31), src(0)(63)) @@ -39,12 +36,12 @@ class FMV(XLEN: Int) extends FPUSubModule with HasPipelineReg { val result = Mux(op === "b010".U, classifyResult, Mux(isDouble, - Cat(resSign, io.in.bits.a(62, 0)), - Cat(resSign, io.in.bits.a(30 ,0)) + Cat(resSign, io.in.bits.src(0)(62, 0)), + Cat(resSign, io.in.bits.src(0)(30 ,0)) ) ) val resultReg = S1Reg(result) - io.out.bits.result := resultReg - io.out.bits.fflags := 0.U.asTypeOf(new Fflags) + io.out.bits.data := resultReg + fflags := 0.U.asTypeOf(new Fflags) } diff --git a/src/main/scala/xiangshan/backend/fu/fpu/FPUSubModule.scala b/src/main/scala/xiangshan/backend/fu/fpu/FPUSubModule.scala new file mode 100644 index 0000000000000000000000000000000000000000..707a017a8aef885680e7ad7436617eee6c67baa8 --- /dev/null +++ b/src/main/scala/xiangshan/backend/fu/fpu/FPUSubModule.scala @@ -0,0 +1,84 @@ +package xiangshan.backend.fu.fpu + +import chisel3._ +import chisel3.util._ +import xiangshan.backend.fu.{FuConfig, FunctionUnit, HasPipelineReg} + + +class FPUSubModuleInput extends Bundle{ + val op = UInt(3.W) + val isDouble = Bool() + val a, b, c = UInt(64.W) + val rm = UInt(3.W) +} + +class FPUSubModuleOutput extends Bundle{ + val fflags = new Fflags + val result = UInt(64.W) +} + +class FPUSubModuleIO extends Bundle{ + val in = Flipped(DecoupledIO(new FPUSubModuleInput)) + val out = DecoupledIO(new FPUSubModuleOutput) +} + +//trait HasPipelineReg { this: FPUSubModule => +// def latency: Int +// +// val ready = Wire(Bool()) +// val cnt = RegInit(0.U((log2Up(latency)+1).W)) +// +// ready := (cnt < latency.U) || (cnt === latency.U && io.out.ready) +// cnt := cnt + io.in.fire() - io.out.fire() +// +// val valids = io.in.valid +: Array.fill(latency)(RegInit(false.B)) +// for(i <- 1 to latency){ +// when(ready){ valids(i) := valids(i-1) } +// } +// +// def PipelineReg[T<:Data](i: Int)(next: T) = RegEnable(next, enable = valids(i-1) && ready) +// def S1Reg[T<:Data](next: T):T = PipelineReg[T](1)(next) +// def S2Reg[T<:Data](next: T):T = PipelineReg[T](2)(next) +// def S3Reg[T<:Data](next: T):T = PipelineReg[T](3)(next) +// def S4Reg[T<:Data](next: T):T = PipelineReg[T](4)(next) +// def S5Reg[T<:Data](next: T):T = PipelineReg[T](5)(next) +// +// io.in.ready := ready +// io.out.valid := valids.last +//} + +trait HasUIntToSIntHelper { + implicit class UIntToSIntHelper(x: UInt){ + def toSInt: SInt = Cat(0.U(1.W), x).asSInt() + } +} + +//abstract class FPUSubModule extends Module with HasUIntToSIntHelper { +// val io = IO(new FPUSubModuleIO) +//} + +class FPUExtraInput extends Bundle { + val op = UInt(3.W) + val isDouble = Bool() + val rm = UInt(3.W) +} + +trait HasFPUSigs { this: FPUSubModule => + val extraIn = io.in.bits.ext.get + val op = extraIn.op + val isDouble = extraIn.isDouble + val rm = extraIn.rm + val fflags = io.out.bits.ext.get +} + +abstract class FPUSubModule(cfg: FuConfig, latency: Int = 0) extends FunctionUnit( + cfg, + latency = latency, + extIn = new FPUExtraInput, + extOut = new Fflags +) with HasUIntToSIntHelper + with HasFPUSigs + +abstract class FPUPipelineModule(cfg: FuConfig, latency: Int) + extends FPUSubModule(cfg, latency) + with HasPipelineReg[FPUExtraInput, Fflags] \ No newline at end of file diff --git a/src/main/scala/fpu/FloatToInt.scala b/src/main/scala/xiangshan/backend/fu/fpu/FloatToInt.scala similarity index 81% rename from src/main/scala/fpu/FloatToInt.scala rename to src/main/scala/xiangshan/backend/fu/fpu/FloatToInt.scala index 2ee0c860e80c4182476ea475b39f39687462b915..2b3e1ec20bbb627c8bcf1972b454aea4a4267005 100644 --- a/src/main/scala/fpu/FloatToInt.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/FloatToInt.scala @@ -1,27 +1,23 @@ -package fpu +package xiangshan.backend.fu.fpu import chisel3._ import chisel3.util._ -import fpu.util.{ORTree, ShiftRightJam} +import xiangshan.backend.fu.fpu.util.{ORTree, ShiftRightJam} +import xiangshan.backend.fu.FunctionUnit._ //def f2w:UInt = FpuOp("011", "000") //def f2wu:UInt = FpuOp("011", "001") //def f2l:UInt = FpuOp("011", "010") //def f2lu:UInt = FpuOp("011", "011") -class FloatToInt extends FPUSubModule with HasPipelineReg { - - def latency = 2 +class FloatToInt extends FPUPipelineModule(fmiscCfg, 2) { def SEXP_WIDTH = Float64.expWidth + 2 /** Stage 1: Shift Operand */ - val op = io.in.bits.op - val rm = io.in.bits.rm - val isDouble = io.in.bits.isDouble - val a = Mux(isDouble, io.in.bits.a, extF32ToF64(io.in.bits.a)) + val a = Mux(isDouble, io.in.bits.src(0), extF32ToF64(io.in.bits.src(0))) val f64 = Float64(a) val cls = Module(new Classify(Float64.expWidth, Float64.mantWidth)) @@ -97,10 +93,10 @@ class FloatToInt extends FPUSubModule with HasPipelineReg { /** Assign Outputs */ - io.out.bits.result := s2_result - io.out.bits.fflags.invalid := s2_invalid - io.out.bits.fflags.overflow := false.B - io.out.bits.fflags.underflow := false.B - io.out.bits.fflags.infinite := false.B - io.out.bits.fflags.inexact := s2_inexact + io.out.bits.data := s2_result + fflags.invalid := s2_invalid + fflags.overflow := false.B + fflags.underflow := false.B + fflags.infinite := false.B + fflags.inexact := s2_inexact } \ No newline at end of file diff --git a/src/main/scala/fpu/IntToFloat.scala b/src/main/scala/xiangshan/backend/fu/fpu/IntToFloat.scala similarity index 79% rename from src/main/scala/fpu/IntToFloat.scala rename to src/main/scala/xiangshan/backend/fu/fpu/IntToFloat.scala index af63994ebd73ad0a553cde408e1581fa3e705ba5..5f03eaf9567142a175f94cd6f928a98c4a488014 100644 --- a/src/main/scala/fpu/IntToFloat.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/IntToFloat.scala @@ -1,18 +1,15 @@ -package fpu +package xiangshan.backend.fu.fpu import chisel3._ import chisel3.util._ -import fpu.util.ORTree - -class IntToFloat extends FPUSubModule with HasPipelineReg { - def latency = 2 +import xiangshan.backend.fu.fpu.util.ORTree +import xiangshan.backend.fu.FunctionUnit.i2fCfg +class IntToFloat extends FPUPipelineModule(i2fCfg, 2) { /** Stage 1: Count leading zeros and shift */ - val op = io.in.bits.op - val isDouble = io.in.bits.isDouble - val a = io.in.bits.a + val a = io.in.bits.src(0) val aNeg = (~a).asUInt() val aComp = aNeg + 1.U val aSign = Mux(op(0), false.B, Mux(op(1), a(63), a(31))) @@ -32,7 +29,7 @@ class IntToFloat extends FPUSubModule with HasPipelineReg { ) ) val leadingZeroHasError = S1Reg(aSign && (leadingZerosComp=/=leadingZerosNeg)) - val rmReg = S1Reg(io.in.bits.rm) + val rmReg = S1Reg(rm) val opReg = S1Reg(op) val isDoubleReg = S1Reg(isDouble) val aIsZeroReg = S1Reg(a===0.U) @@ -70,10 +67,10 @@ class IntToFloat extends FPUSubModule with HasPipelineReg { ) val resD = Cat(aSignReg, expRounded, mantRounded) - io.out.bits.result := S2Reg(Mux(aIsZeroReg, 0.U, Mux(isDoubleReg, resD, resS))) - io.out.bits.fflags.inexact := S2Reg(roudingUnit.io.out.inexact) - io.out.bits.fflags.underflow := false.B - io.out.bits.fflags.overflow := false.B - io.out.bits.fflags.infinite := false.B - io.out.bits.fflags.invalid := false.B + io.out.bits.data := S2Reg(Mux(aIsZeroReg, 0.U, Mux(isDoubleReg, resD, resS))) + fflags.inexact := S2Reg(roudingUnit.io.out.inexact) + fflags.underflow := false.B + fflags.overflow := false.B + fflags.infinite := false.B + fflags.invalid := false.B } diff --git a/src/main/scala/xiangshan/backend/fu/fpu/IntToFloatSingleCycle.scala b/src/main/scala/xiangshan/backend/fu/fpu/IntToFloatSingleCycle.scala new file mode 100644 index 0000000000000000000000000000000000000000..ffb6fb18dd702dce213761ef467e61d139d1d334 --- /dev/null +++ b/src/main/scala/xiangshan/backend/fu/fpu/IntToFloatSingleCycle.scala @@ -0,0 +1,77 @@ +package xiangshan.backend.fu.fpu + +import chisel3._ +import chisel3.util._ +import xiangshan.backend.fu.fpu.util.ORTree +import xiangshan.backend.fu.FunctionUnit.i2fCfg + +class IntToFloatSingleCycle extends FPUSubModule(cfg = i2fCfg) { + + val a = io.in.bits.src(0) + val aNeg = (~a).asUInt() + val aComp = aNeg + 1.U + val aSign = Mux(op(0), false.B, Mux(op(1), a(63), a(31))) + + val leadingZerosComp = PriorityEncoder(Mux(op(1), aComp, aComp(31, 0)).asBools().reverse) + val leadingZerosNeg = PriorityEncoder(Mux(op(1), aNeg, aNeg(31, 0)).asBools().reverse) + val leadingZerosPos = PriorityEncoder(Mux(op(1), a, a(31,0)).asBools().reverse) + + val aVal = Mux(aSign, Mux(op(1), aComp, aComp(31, 0)), Mux(op(1), a, a(31, 0))) + val leadingZeros = Mux(aSign, leadingZerosNeg, leadingZerosPos) + + // exp = xlen - 1 - leadingZeros + bias + val expUnrounded = Mux(isDouble, + (64 - 1 + Float64.expBiasInt).U - leadingZeros, + (64 - 1 + Float32.expBiasInt).U - leadingZeros + ) + + val leadingZeroHasError = aSign && (leadingZerosComp=/=leadingZerosNeg) + val rmReg = rm + val opReg = op + val isDoubleReg = isDouble + val aIsZeroReg = a===0.U + val aSignReg = aSign + val aShifted = (aVal << leadingZeros)(63, 0) + + /** Stage 2: Rounding + */ + val aShiftedFix = Mux(leadingZeroHasError, aShifted(63, 1), aShifted(62, 0)) + val mantD = aShiftedFix(62, 62-51) + val mantS = aShiftedFix(62, 62-22) + + val g = Mux(isDoubleReg, aShiftedFix(62-52), aShiftedFix(62-23)) + val r = Mux(isDoubleReg, aShiftedFix(62-53), aShiftedFix(62-24)) + val s = Mux(isDoubleReg, ORTree(aShiftedFix(62-54, 0)), ORTree(aShiftedFix(62-25, 0))) + + val roudingUnit = Module(new RoundingUnit(Float64.mantWidth)) + roudingUnit.io.in.rm := rmReg + roudingUnit.io.in.mant := Mux(isDoubleReg, mantD, mantS) + roudingUnit.io.in.sign := aSignReg + roudingUnit.io.in.guard := g + roudingUnit.io.in.round := r + roudingUnit.io.in.sticky := s + + val mantRounded = roudingUnit.io.out.mantRounded + val expRounded = Mux(isDoubleReg, + expUnrounded + roudingUnit.io.out.mantCout, + expUnrounded + mantRounded(Float32.mantWidth) + ) + leadingZeroHasError + + val resS = Cat( + aSignReg, + expRounded(Float32.expWidth-1, 0), + mantRounded(Float32.mantWidth-1, 0) + ) + val resD = Cat(aSignReg, expRounded, mantRounded) + + io.in.ready := true.B + io.out.valid := io.in.valid + io.out.bits.uop := io.in.bits.uop + io.out.bits.data := Mux(aIsZeroReg, 0.U, Mux(isDoubleReg, resD, resS)) + fflags.inexact := roudingUnit.io.out.inexact + fflags.underflow := false.B + fflags.overflow := false.B + fflags.infinite := false.B + fflags.invalid := false.B + +} diff --git a/src/main/scala/fpu/README.md b/src/main/scala/xiangshan/backend/fu/fpu/README.md similarity index 100% rename from src/main/scala/fpu/README.md rename to src/main/scala/xiangshan/backend/fu/fpu/README.md diff --git a/src/main/scala/fpu/RoundingUnit.scala b/src/main/scala/xiangshan/backend/fu/fpu/RoundingUnit.scala similarity index 97% rename from src/main/scala/fpu/RoundingUnit.scala rename to src/main/scala/xiangshan/backend/fu/fpu/RoundingUnit.scala index 1f1534ed931dbc384562c8d1fc72a32394cdb1a6..6e2da25ddb4c1959bbeb4556d62fd769de71584e 100644 --- a/src/main/scala/fpu/RoundingUnit.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/RoundingUnit.scala @@ -1,9 +1,9 @@ -package fpu +package xiangshan.backend.fu.fpu import chisel3._ import chisel3.util._ -import fpu.RoudingMode._ -import fpu.util.ORTree +import xiangshan.backend.fu.fpu.RoudingMode._ +import xiangshan.backend.fu.fpu.util.ORTree class RoundingUnit(mantWidth: Int) extends Module{ val io = IO(new Bundle() { diff --git a/src/main/scala/fpu/divsqrt/DivSqrt.scala b/src/main/scala/xiangshan/backend/fu/fpu/divsqrt/DivSqrt.scala similarity index 81% rename from src/main/scala/fpu/divsqrt/DivSqrt.scala rename to src/main/scala/xiangshan/backend/fu/fpu/divsqrt/DivSqrt.scala index de5de63f29e0e2958570ec0a61eb5230124da80f..508386d24fc26483b657988c01d2c0aedf2de154 100644 --- a/src/main/scala/fpu/divsqrt/DivSqrt.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/divsqrt/DivSqrt.scala @@ -1,12 +1,12 @@ -package fpu.divsqrt +package xiangshan.backend.fu.fpu.divsqrt -import fpu._ +import xiangshan.backend.fu.fpu._ import chisel3._ import chisel3.util._ -import fpu.util.{FPUDebug, ORTree, ShiftRightJam} +import xiangshan.backend.fu.fpu.util.{FPUDebug, ORTree, ShiftRightJam} +import xiangshan.backend.fu.FunctionUnit.fDivSqrtCfg - -class DivSqrt extends FPUSubModule { +class DivSqrt extends FPUSubModule(fDivSqrtCfg) { def SEXP_WIDTH: Int = Float64.expWidth + 2 def D_MANT_WIDTH: Int = Float64.mantWidth + 1 @@ -16,16 +16,16 @@ class DivSqrt extends FPUSubModule { val s_idle :: s_norm :: s_start :: s_compute :: s_round:: s_finish :: Nil = Enum(6) val state = RegInit(s_idle) - val rm = io.in.bits.rm + + val uopReg = RegEnable(io.in.bits.uop, io.in.fire()) val rmReg = RegEnable(rm, io.in.fire()) - val isDiv = !io.in.bits.op(0) + val isDiv = !op(0) val isDivReg = RegEnable(isDiv, io.in.fire()) - val isDouble = io.in.bits.isDouble val isDoubleReg = RegEnable(isDouble, io.in.fire()) val (a, b) = ( - Mux(isDouble, io.in.bits.a, extF32ToF64(io.in.bits.a)), - Mux(isDouble, io.in.bits.b, extF32ToF64(io.in.bits.b)) + Mux(isDouble, io.in.bits.src(0), extF32ToF64(io.in.bits.src(0))), + Mux(isDouble, io.in.bits.src(1), extF32ToF64(io.in.bits.src(1))) ) @@ -282,7 +282,7 @@ class DivSqrt extends FPUSubModule { io.in.ready := (state === s_idle) && io.out.ready io.out.valid := state === s_finish - io.out.bits.result := Mux(specialCaseHappenReg, + io.out.bits.data := Mux(specialCaseHappenReg, specialResult, Mux(overflowReg, Mux(isDoubleReg, @@ -292,40 +292,41 @@ class DivSqrt extends FPUSubModule { commonResult ) ) - - io.out.bits.fflags.invalid := Mux(isDivReg, divInvalidReg, sqrtInvalidReg) - io.out.bits.fflags.underflow := !specialCaseHappenReg && underflowReg - io.out.bits.fflags.overflow := !specialCaseHappenReg && overflowReg - io.out.bits.fflags.infinite := Mux(isDivReg, divInfReg, false.B) - io.out.bits.fflags.inexact := !specialCaseHappenReg && (inexactReg || overflowReg || underflowReg) - - FPUDebug() { - // printf(p"$cnt in:${Hexadecimal(io.in.bits.src0)} \n") - when(io.in.fire()) { - printf(p"[In.fire] " + - p"a:${Hexadecimal(io.in.bits.a)} aexp:${aExp.asSInt()} amant:${Hexadecimal(aMant)} " + - p"b:${Hexadecimal(io.in.bits.b)} bexp:${bExp.asSInt()} bmant:${Hexadecimal(bMant)}\n") - } -// when(state === s_norm) { -// printf(p"[norm] lz:$aMantLez\n") + io.out.bits.uop := uopReg + + fflags.invalid := Mux(isDivReg, divInvalidReg, sqrtInvalidReg) + fflags.underflow := !specialCaseHappenReg && underflowReg + fflags.overflow := !specialCaseHappenReg && overflowReg + fflags.infinite := Mux(isDivReg, divInfReg, false.B) + fflags.inexact := !specialCaseHappenReg && (inexactReg || overflowReg || underflowReg) + +// FPUDebug() { +// // printf(p"$cnt in:${Hexadecimal(io.in.bits.src0)} \n") +// when(io.in.fire()) { +// printf(p"[In.fire] " + +// p"a:${Hexadecimal(io.in.bits.a)} aexp:${aExp.asSInt()} amant:${Hexadecimal(aMant)} " + +// p"b:${Hexadecimal(io.in.bits.b)} bexp:${bExp.asSInt()} bmant:${Hexadecimal(bMant)}\n") // } - when(state === s_compute){ -// when(sqrt.io.out.fire()){ -// printf(p"[compute] ") -// } - } - when(state === s_start) { - printf(p"[start] sign:$resSignReg mant:${Hexadecimal(aMantReg)} exp:${aExpReg.asSInt()}\n") - } - when(state === s_round){ - printf(p"[round] exp before round:${aExpReg} g:$gReg r:$rReg s:$sReg mant:${Hexadecimal(aMantReg)}\n" + - p"[round] mantRounded:${Hexadecimal(mantRounded)}\n") - } - when(io.out.valid) { - printf(p"[Out.valid] " + - p"invalid:$sqrtInvalidReg result:${Hexadecimal(commonResult)}\n" + - p"output:${Hexadecimal(io.out.bits.result)} " + - p"exp:${aExpReg.asSInt()} \n") - } - } +//// when(state === s_norm) { +//// printf(p"[norm] lz:$aMantLez\n") +//// } +// when(state === s_compute){ +//// when(sqrt.io.out.fire()){ +//// printf(p"[compute] ") +//// } +// } +// when(state === s_start) { +// printf(p"[start] sign:$resSignReg mant:${Hexadecimal(aMantReg)} exp:${aExpReg.asSInt()}\n") +// } +// when(state === s_round){ +// printf(p"[round] exp before round:${aExpReg} g:$gReg r:$rReg s:$sReg mant:${Hexadecimal(aMantReg)}\n" + +// p"[round] mantRounded:${Hexadecimal(mantRounded)}\n") +// } +// when(io.out.valid) { +// printf(p"[Out.valid] " + +// p"invalid:$sqrtInvalidReg result:${Hexadecimal(commonResult)}\n" + +// p"output:${Hexadecimal(io.out.bits.result)} " + +// p"exp:${aExpReg.asSInt()} \n") +// } +// } } diff --git a/src/main/scala/fpu/divsqrt/MantDivSqrt.scala b/src/main/scala/xiangshan/backend/fu/fpu/divsqrt/MantDivSqrt.scala similarity index 96% rename from src/main/scala/fpu/divsqrt/MantDivSqrt.scala rename to src/main/scala/xiangshan/backend/fu/fpu/divsqrt/MantDivSqrt.scala index 500eb5df9365cef6039470c413e844bb949fe56d..589d14a8116f126ae843245a5af736239021d584 100644 --- a/src/main/scala/fpu/divsqrt/MantDivSqrt.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/divsqrt/MantDivSqrt.scala @@ -1,9 +1,9 @@ -package fpu.divsqrt +package xiangshan.backend.fu.fpu.divsqrt import chisel3._ import chisel3.util._ -import fpu.util._ -import fpu.util.FPUDebug +import xiangshan.backend.fu.fpu.util._ +import xiangshan.backend.fu.fpu.util.FPUDebug class MantDivSqrt(len: Int) extends Module{ val io = IO(new Bundle() { diff --git a/src/main/scala/fpu/divsqrt/OnTheFlyConv.scala b/src/main/scala/xiangshan/backend/fu/fpu/divsqrt/OnTheFlyConv.scala similarity index 95% rename from src/main/scala/fpu/divsqrt/OnTheFlyConv.scala rename to src/main/scala/xiangshan/backend/fu/fpu/divsqrt/OnTheFlyConv.scala index 115a1d22acc76955ae014dc8c2722c3b950c8b0c..c7af9194a0cd8ab208b6913066cffe0c7c5210e2 100644 --- a/src/main/scala/fpu/divsqrt/OnTheFlyConv.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/divsqrt/OnTheFlyConv.scala @@ -1,10 +1,10 @@ -package fpu.divsqrt +package xiangshan.backend.fu.fpu.divsqrt import chisel3._ import chisel3.util._ import utils._ -import fpu._ -import fpu.util.FPUDebug +import xiangshan.backend.fu.fpu._ +import xiangshan.backend.fu.fpu.util.FPUDebug class OnTheFlyConv(len: Int) extends Module { val io = IO(new Bundle() { diff --git a/src/main/scala/fpu/divsqrt/SrtTable.scala b/src/main/scala/xiangshan/backend/fu/fpu/divsqrt/SrtTable.scala similarity index 92% rename from src/main/scala/fpu/divsqrt/SrtTable.scala rename to src/main/scala/xiangshan/backend/fu/fpu/divsqrt/SrtTable.scala index 66f178dab7a05e3bb8f27eaaf62bf2f5100e6dc5..5c3a9615efbd1a3e41d1c434571baa9e62724cd9 100644 --- a/src/main/scala/fpu/divsqrt/SrtTable.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/divsqrt/SrtTable.scala @@ -1,10 +1,10 @@ -package fpu.divsqrt +package xiangshan.backend.fu.fpu.divsqrt import chisel3._ import chisel3.util._ import utils._ -import fpu._ +import xiangshan.backend.fu.fpu._ class SrtTable extends Module { val io = IO(new Bundle() { diff --git a/src/main/scala/fpu/fma/ArrayMultiplier.scala b/src/main/scala/xiangshan/backend/fu/fpu/fma/ArrayMultiplier.scala similarity index 98% rename from src/main/scala/fpu/fma/ArrayMultiplier.scala rename to src/main/scala/xiangshan/backend/fu/fpu/fma/ArrayMultiplier.scala index 0e1926b586c48522fd526507a2536050e1e6fe95..0f61837c96694ba130432e6120bb4c1164246b8c 100644 --- a/src/main/scala/fpu/fma/ArrayMultiplier.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/fma/ArrayMultiplier.scala @@ -1,8 +1,8 @@ -package fpu.fma +package xiangshan.backend.fu.fpu.fma import chisel3._ import chisel3.util._ -import fpu.util._ +import xiangshan.backend.fu.fpu.util._ import utils.SignExt class ArrayMultiplier(len: Int, regDepth: Int = 0, realArraryMult: Boolean = false) extends Module { diff --git a/src/main/scala/fpu/fma/FMA.scala b/src/main/scala/xiangshan/backend/fu/fpu/fma/FMA.scala similarity index 87% rename from src/main/scala/fpu/fma/FMA.scala rename to src/main/scala/xiangshan/backend/fu/fpu/fma/FMA.scala index f79486f5c495837f7d56f28c0839c08c160d1f11..996edf14b54f86279f2e0a674996a65bcfa65b08 100644 --- a/src/main/scala/fpu/fma/FMA.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/fma/FMA.scala @@ -1,13 +1,13 @@ -package fpu.fma +package xiangshan.backend.fu.fpu.fma import chisel3._ import chisel3.util._ -import fpu._ -import fpu.util.{CSA3_2, FPUDebug, ORTree, ShiftLeftJam, ShiftRightJam} +import xiangshan.backend.fu.fpu._ +import xiangshan.backend.fu.fpu.util.{CSA3_2, FPUDebug, ORTree, ShiftLeftJam, ShiftRightJam} +import xiangshan.backend.fu.FunctionUnit.fmacCfg -class FMA extends FPUSubModule with HasPipelineReg { - def latency = 5 +class FMA extends FPUPipelineModule(fmacCfg, 5) { def UseRealArraryMult = false @@ -21,12 +21,9 @@ class FMA extends FPUSubModule with HasPipelineReg { * Stage 1: Decode Operands *****************************************************************/ - val rm = io.in.bits.rm - val isDouble = io.in.bits.isDouble - val op = io.in.bits.op - val rs0 = io.in.bits.a - val rs1 = io.in.bits.b - val rs2 = io.in.bits.c + val rs0 = io.in.bits.src(0) + val rs1 = io.in.bits.src(1) + val rs2 = io.in.bits.src(2) val zero = 0.U(Float64.getWidth.W) val one = Mux(isDouble, Cat(0.U(1.W), Float64.expBiasInt.U(Float64.expWidth.W), 0.U(Float64.mantWidth.W)), @@ -151,11 +148,11 @@ class FMA extends FPUSubModule with HasPipelineReg { val s1_discardAMant = S1Reg(aIsZero || expDiff > (ADD_WIDTH+3).S) val s1_invalid = S1Reg(invalid) - FPUDebug(){ - when(valids(1) && ready){ - printf(p"[s1] prodExp+56:${s1_prodExpAdj} aExp:${s1_aExpRaw} diff:${s1_expDiff}\n") - } - } +// FPUDebug(){ +// when(valids(1) && ready){ +// printf(p"[s1] prodExp+56:${s1_prodExpAdj} aExp:${s1_aExpRaw} diff:${s1_expDiff}\n") +// } +// } /****************************************************************** @@ -188,11 +185,11 @@ class FMA extends FPUSubModule with HasPipelineReg { val s2_effSub = S2Reg(effSub) - FPUDebug(){ - when(valids(1) && ready){ - printf(p"[s2] discardAMant:${s1_discardAMant} discardProd:${s1_discardProdMant} \n") - } - } +// FPUDebug(){ +// when(valids(1) && ready){ +// printf(p"[s2] discardAMant:${s1_discardAMant} discardProd:${s1_discardProdMant} \n") +// } +// } /****************************************************************** * Stage 3: A + Prod => adder result @@ -284,14 +281,14 @@ class FMA extends FPUSubModule with HasPipelineReg { val s4_expPostNorm = S4Reg(expPostNorm) val s4_invalid = S4Reg(s3_invalid) - FPUDebug(){ - when(valids(3) && ready){ - printf(p"[s4] expPreNorm:${s3_expPreNorm} normShift:${s3_normShift} expPostNorm:${expPostNorm} " + - p"denormShift:${denormShift}" + - p"" + - p" \n") - } - } +// FPUDebug(){ +// when(valids(3) && ready){ +// printf(p"[s4] expPreNorm:${s3_expPreNorm} normShift:${s3_normShift} expPostNorm:${expPostNorm} " + +// p"denormShift:${denormShift}" + +// p"" + +// p" \n") +// } +// } /****************************************************************** * Stage 5: Rounding @@ -342,11 +339,11 @@ class FMA extends FPUSubModule with HasPipelineReg { val s5_inexact = S5Reg(inexact) val s5_ovSetInf = S5Reg(s4_ovSetInf) - FPUDebug(){ - when(valids(4) && ready){ - printf(p"[s5] expPostNorm:${s4_expPostNorm} expRounded:${expRounded}\n") - } - } +// FPUDebug(){ +// when(valids(4) && ready){ +// printf(p"[s5] expPostNorm:${s4_expPostNorm} expRounded:${expRounded}\n") +// } +// } /****************************************************************** * Assign Outputs @@ -375,22 +372,22 @@ class FMA extends FPUSubModule with HasPipelineReg { ) ) - io.out.bits.result := result - io.out.bits.fflags.invalid := s5_invalid - io.out.bits.fflags.inexact := s5_inexact - io.out.bits.fflags.overflow := s5_overflow - io.out.bits.fflags.underflow := s5_underflow - io.out.bits.fflags.infinite := false.B - - FPUDebug(){ - //printf(p"v0:${valids(0)} v1:${valids(1)} v2:${valids(2)} v3:${valids(3)} v4:${valids(4)} v5:${valids(5)}\n") - when(io.in.fire()){ - printf(p"[in] a:${Hexadecimal(a)} b:${Hexadecimal(b)} c:${Hexadecimal(c)}\n") - } - when(io.out.fire()){ - printf(p"[out] res:${Hexadecimal(io.out.bits.result)}\n") - } - } + io.out.bits.data := result + fflags.invalid := s5_invalid + fflags.inexact := s5_inexact + fflags.overflow := s5_overflow + fflags.underflow := s5_underflow + fflags.infinite := false.B + +// FPUDebug(){ +// //printf(p"v0:${valids(0)} v1:${valids(1)} v2:${valids(2)} v3:${valids(3)} v4:${valids(4)} v5:${valids(5)}\n") +// when(io.in.fire()){ +// printf(p"[in] a:${Hexadecimal(a)} b:${Hexadecimal(b)} c:${Hexadecimal(c)}\n") +// } +// when(io.out.fire()){ +// printf(p"[out] res:${Hexadecimal(io.out.bits.result)}\n") +// } +// } } diff --git a/src/main/scala/fpu/fma/LZA.scala b/src/main/scala/xiangshan/backend/fu/fpu/fma/LZA.scala similarity index 98% rename from src/main/scala/fpu/fma/LZA.scala rename to src/main/scala/xiangshan/backend/fu/fpu/fma/LZA.scala index bf5056b2bfba52411f0f753c3fa70a55e475d814..fac97bed3726b439cdc47c237c54cf0d7c436f92 100644 --- a/src/main/scala/fpu/fma/LZA.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/fma/LZA.scala @@ -1,4 +1,4 @@ -package fpu.fma +package xiangshan.backend.fu.fpu.fma import chisel3._ import chisel3.util._ diff --git a/src/main/scala/xiangshan/backend/fu/fpu/package.scala b/src/main/scala/xiangshan/backend/fu/fpu/package.scala new file mode 100644 index 0000000000000000000000000000000000000000..80b436325da2ee4a3d033aa81ac483d9453ba205 --- /dev/null +++ b/src/main/scala/xiangshan/backend/fu/fpu/package.scala @@ -0,0 +1,143 @@ +package xiangshan.backend.fu.fpu + +import chisel3._ +import chisel3.util._ + +object FPUOpType { + def funcWidth = 6 + def FpuOp(fu: String, op: String): UInt = ("b" + fu + op).U(funcWidth.W) + + def FU_FMAC = "000" + def FU_FCMP = "001" + def FU_FMV = "010" + def FU_F2I = "011" + def FU_I2F = "100" + def FU_S2D = "101" + def FU_D2S = "110" + def FU_DIVSQRT = "111" + + // FMA + def fadd:UInt = FpuOp(FU_FMAC, "000") + def fsub:UInt = FpuOp(FU_FMAC, "001") + def fmadd:UInt = FpuOp(FU_FMAC, "100") + def fmsub:UInt = FpuOp(FU_FMAC, "101") + def fnmsub:UInt = FpuOp(FU_FMAC, "110") + def fnmadd:UInt = FpuOp(FU_FMAC, "111") + def fmul:UInt = FpuOp(FU_FMAC, "010") + + // FCMP + def fmin:UInt = FpuOp(FU_FCMP, "000") + def fmax:UInt = FpuOp(FU_FCMP, "001") + def fle:UInt = FpuOp(FU_FCMP, "010") + def flt:UInt = FpuOp(FU_FCMP, "011") + def feq:UInt = FpuOp(FU_FCMP, "100") + + // FMV + def fmv_f2i:UInt= FpuOp(FU_FMV, "000") + def fmv_i2f:UInt= FpuOp(FU_FMV, "001") + def fclass:UInt = FpuOp(FU_FMV, "010") + def fsgnj:UInt = FpuOp(FU_FMV, "110") + def fsgnjn:UInt = FpuOp(FU_FMV, "101") + def fsgnjx:UInt = FpuOp(FU_FMV, "100") + + // FloatToInt + def f2w:UInt = FpuOp(FU_F2I, "000") + def f2wu:UInt = FpuOp(FU_F2I, "001") + def f2l:UInt = FpuOp(FU_F2I, "010") + def f2lu:UInt = FpuOp(FU_F2I, "011") + + // IntToFloat + def w2f:UInt = FpuOp(FU_I2F, "000") + def wu2f:UInt = FpuOp(FU_I2F, "001") + def l2f:UInt = FpuOp(FU_I2F, "010") + def lu2f:UInt = FpuOp(FU_I2F, "011") + + // FloatToFloat + def s2d:UInt = FpuOp(FU_S2D, "000") + def d2s:UInt = FpuOp(FU_D2S, "000") + + // Div/Sqrt + def fdiv:UInt = FpuOp(FU_DIVSQRT, "000") + def fsqrt:UInt = FpuOp(FU_DIVSQRT, "001") +} + +object FPUIOFunc { + def in_raw = 0.U(1.W) + def in_unbox = 1.U(1.W) + + def out_raw = 0.U(2.W) + def out_box = 1.U(2.W) + def out_sext = 2.U(2.W) + def out_zext = 3.U(2.W) + + def apply(inputFunc: UInt, outputFunc:UInt) = Cat(inputFunc, outputFunc) +} + +class Fflags extends Bundle { + val invalid = Bool() // 4 + val infinite = Bool() // 3 + val overflow = Bool() // 2 + val underflow = Bool() // 1 + val inexact = Bool() // 0 +} + +object RoudingMode { + val RNE = "b000".U(3.W) + val RTZ = "b001".U(3.W) + val RDN = "b010".U(3.W) + val RUP = "b011".U(3.W) + val RMM = "b100".U(3.W) +} + +class FloatPoint(val expWidth: Int, val mantWidth:Int) extends Bundle{ + val sign = Bool() + val exp = UInt(expWidth.W) + val mant = UInt(mantWidth.W) + def defaultNaN: UInt = Cat(0.U(1.W), Fill(expWidth+1,1.U(1.W)), Fill(mantWidth-1,0.U(1.W))) + def posInf: UInt = Cat(0.U(1.W), Fill(expWidth, 1.U(1.W)), 0.U(mantWidth.W)) + def negInf: UInt = Cat(1.U(1.W), posInf.tail(1)) + def maxNorm: UInt = Cat(0.U(1.W), Fill(expWidth-1, 1.U(1.W)), 0.U(1.W), Fill(mantWidth, 1.U(1.W))) + def expBias: UInt = Fill(expWidth-1, 1.U(1.W)) + def expBiasInt: Int = (1 << (expWidth-1)) - 1 + def mantExt: UInt = Cat(exp=/=0.U, mant) + def apply(x: UInt): FloatPoint = x.asTypeOf(new FloatPoint(expWidth, mantWidth)) +} + +object Float32 extends FloatPoint(8, 23) +object Float64 extends FloatPoint(11, 52) + + +object expOverflow { + def apply(sexp: SInt, expWidth: Int): Bool = + sexp >= Cat(0.U(1.W), Fill(expWidth, 1.U(1.W))).asSInt() + + def apply(uexp: UInt, expWidth: Int): Bool = + expOverflow(Cat(0.U(1.W), uexp).asSInt(), expWidth) +} + +object boxF32ToF64 { + def apply(x: UInt): UInt = Cat(Fill(32, 1.U(1.W)), x(31, 0)) +} + +object unboxF64ToF32 { + def apply(x: UInt): UInt = + Mux(x(63, 32)===Fill(32, 1.U(1.W)), x(31, 0), Float32.defaultNaN) +} + +object extF32ToF64 { + def apply(x: UInt): UInt = { + val f32 = Float32(x) + Cat( + f32.sign, + Mux(f32.exp === 0.U, + 0.U(Float64.expWidth.W), + Mux((~f32.exp).asUInt() === 0.U, + Cat("b111".U(3.W), f32.exp), + Cat("b0111".U(4.W) + f32.exp.head(1), f32.exp.tail(1)) + ) + ), + Cat(f32.mant, 0.U((Float64.mantWidth - Float32.mantWidth).W)) + ) + } +} + diff --git a/src/main/scala/fpu/util/CarrySaveAdder.scala b/src/main/scala/xiangshan/backend/fu/fpu/util/CarrySaveAdder.scala similarity index 97% rename from src/main/scala/fpu/util/CarrySaveAdder.scala rename to src/main/scala/xiangshan/backend/fu/fpu/util/CarrySaveAdder.scala index 5ec4ac2692e25cd716d2e9006bcb053c38c3d048..a6e7020ca82fd6187658147358355b5ac6798b55 100644 --- a/src/main/scala/fpu/util/CarrySaveAdder.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/util/CarrySaveAdder.scala @@ -1,4 +1,4 @@ -package fpu.util +package xiangshan.backend.fu.fpu.util import chisel3._ import chisel3.util._ diff --git a/src/main/scala/fpu/util/FPUDebug.scala b/src/main/scala/xiangshan/backend/fu/fpu/util/FPUDebug.scala similarity index 83% rename from src/main/scala/fpu/util/FPUDebug.scala rename to src/main/scala/xiangshan/backend/fu/fpu/util/FPUDebug.scala index 6b8dfa5bb38ffe0e9778b38f9914bb233f28277d..a5cc302be383fb7453b3ab3498d95d9c1bbda877 100644 --- a/src/main/scala/fpu/util/FPUDebug.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/util/FPUDebug.scala @@ -1,4 +1,4 @@ -package fpu.util +package xiangshan.backend.fu.fpu.util import chisel3._ diff --git a/src/main/scala/fpu/util/ORTree.scala b/src/main/scala/xiangshan/backend/fu/fpu/util/ORTree.scala similarity index 87% rename from src/main/scala/fpu/util/ORTree.scala rename to src/main/scala/xiangshan/backend/fu/fpu/util/ORTree.scala index 2d40b0ba0f574a4450e0e93a7d51bccb70b77179..08c54c26a620810b9d61544e5c124906950c0406 100644 --- a/src/main/scala/fpu/util/ORTree.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/util/ORTree.scala @@ -1,4 +1,4 @@ -package fpu.util +package xiangshan.backend.fu.fpu.util import chisel3._ diff --git a/src/main/scala/fpu/util/ShiftLeftJam.scala b/src/main/scala/xiangshan/backend/fu/fpu/util/ShiftLeftJam.scala similarity index 91% rename from src/main/scala/fpu/util/ShiftLeftJam.scala rename to src/main/scala/xiangshan/backend/fu/fpu/util/ShiftLeftJam.scala index f0043c246b9d2b2b24f2676dfda5d041ddf784e6..b7aafb7a5f84a46b3eb31ba6f9fe5a158b313ff8 100644 --- a/src/main/scala/fpu/util/ShiftLeftJam.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/util/ShiftLeftJam.scala @@ -1,4 +1,4 @@ -package fpu.util +package xiangshan.backend.fu.fpu.util import chisel3._ import chisel3.util._ diff --git a/src/main/scala/fpu/util/ShiftRightJam.scala b/src/main/scala/xiangshan/backend/fu/fpu/util/ShiftRightJam.scala similarity index 94% rename from src/main/scala/fpu/util/ShiftRightJam.scala rename to src/main/scala/xiangshan/backend/fu/fpu/util/ShiftRightJam.scala index 610c5e36caa7270edfeb15614878691ee0f623fc..2ed38fb54ddec8adc2a7fc6b4d5729e6d64bd80f 100644 --- a/src/main/scala/fpu/util/ShiftRightJam.scala +++ b/src/main/scala/xiangshan/backend/fu/fpu/util/ShiftRightJam.scala @@ -1,4 +1,4 @@ -package fpu.util +package xiangshan.backend.fu.fpu.util import chisel3._ import chisel3.util._ diff --git a/src/main/scala/xiangshan/backend/issue/IssueQueue.scala b/src/main/scala/xiangshan/backend/issue/IssueQueue.scala index 69a484732636114b5be0b9b195b1a5654390e70a..4d0a444a24c7ab845e2e61fdb41dd604280a30bb 100644 --- a/src/main/scala/xiangshan/backend/issue/IssueQueue.scala +++ b/src/main/scala/xiangshan/backend/issue/IssueQueue.scala @@ -246,7 +246,7 @@ class IssueQueue XSDebug("State Dump: ") for(i <- readyVec.indices.reverse){ - val r = readyVec(idxQueue(i)) + val r = (stateQueue(i)=/=s_invalid) && readyVec(idxQueue(i)) XSDebug(false, r, p"r") XSDebug(false, !r, p"-") } @@ -289,7 +289,8 @@ class IssueQueue XSInfo( io.enq.fire(), p"enq fire: pc:${Hexadecimal(io.enq.bits.cf.pc)} roqIdx:${io.enq.bits.roqIdx} " + - p"src1: ${io.enq.bits.psrc1} src2:${io.enq.bits.psrc2} pdst:${io.enq.bits.pdest}\n" + p"src1: ${io.enq.bits.psrc1} state: ${io.enq.bits.src1State} " + + p"src2: ${io.enq.bits.psrc2} state: ${io.enq.bits.src2State} pdst:${io.enq.bits.pdest}\n" ) XSInfo( io.deq.fire(), diff --git a/src/main/scala/xiangshan/backend/issue/ReservationStation.scala b/src/main/scala/xiangshan/backend/issue/ReservationStation.scala index 278b14eed8e5c94027b2899f25cc0fb9c041ef3a..104732a193b08e24ea2f01bc1f071f2498c0fdf7 100644 --- a/src/main/scala/xiangshan/backend/issue/ReservationStation.scala +++ b/src/main/scala/xiangshan/backend/issue/ReservationStation.scala @@ -32,7 +32,6 @@ class ReservationStation val enableBypass: Boolean = false, val fifo: Boolean = false ) extends XSModule with HasIQConst { - val src2Use = true val src3Use = (exuCfg.intSrcCnt > 2) || (exuCfg.fpSrcCnt > 2) val src2Listen = true @@ -62,6 +61,9 @@ class ReservationStation // to Dispatch val numExist = Output(UInt(iqIdxWidth.W)) + + // tlb hit, inst can deq, only used in ld/st reservation stations + val tlbFeedback = Flipped(ValidIO(new TlbFeedback)) // TODO }) val srcAllNum = 3 @@ -297,7 +299,8 @@ class ReservationStation } } } - + + if (bypassCnt > 0) { val bpPdest = io.bypassUops.map(_.bits.pdest) val bpValid = io.bypassUops.map(_.valid) val bpData = io.bypassData.map(_.bits.data) @@ -357,7 +360,8 @@ class ReservationStation enqSelIq, i.U, enqPsrc(i), k.U, bpData(k), io.bypassUops(k).bits.cf.pc, io.bypassUops(k).bits.roqIdx.asUInt) } } - + } + if (enableBypass) { // send out bypass val sel = io.selectedUop @@ -369,6 +373,7 @@ class ReservationStation sel.bits.ctrl.rfWen := issQue(deqSelIq).uop.ctrl.rfWen sel.bits.ctrl.fpWen := issQue(deqSelIq).uop.ctrl.fpWen } + XSInfo(io.redirect.valid, "Redirect: valid:%d isExp:%d isFpp:%d brTag:%d redHitVec:%b redIdHitVec:%b enqHit:%d selIsRed:%d\n", io.redirect.valid, io.redirect.bits.isException, io.redirect.bits.isFlushPipe, io.redirect.bits.brTag.value, VecInit(redHitVec).asUInt, VecInit(redIdHitVec).asUInt, enqRedHit, selIsRed) XSInfo(enqFire, s"EnqCtrl(%d %d) enqSelIq:%d Psrc/Rdy(%d:%d %d:%d %d:%d) Dest:%d oldDest:%d pc:%x roqIdx:%x\n", diff --git a/src/main/scala/xiangshan/backend/issue/ReservationStationNew.scala b/src/main/scala/xiangshan/backend/issue/ReservationStationNew.scala new file mode 100644 index 0000000000000000000000000000000000000000..11ae43c7452707b24d2546eb81e3923fb55b8ed9 --- /dev/null +++ b/src/main/scala/xiangshan/backend/issue/ReservationStationNew.scala @@ -0,0 +1,377 @@ +package xiangshan.backend.issue + +import chisel3._ +import chisel3.util._ +import xiangshan._ +import utils._ +import xiangshan.backend.exu.{Exu, ExuConfig} +import java.rmi.registry.Registry +import java.{util => ju} + +class SrcBundle extends XSBundle { + val src = UInt(PhyRegIdxWidth.W) + val state = SrcState() + val srctype = SrcType() + + def hit(uop: MicroOp) : Bool = { + (src === uop.pdest) && (state === SrcState.busy) && + ((srctype === SrcType.reg && uop.ctrl.rfWen && src=/=0.U) || + (srctype === SrcType.fp && uop.ctrl.fpWen)) // TODO: check if zero map to zero when rename + } + + override def toPrintable: Printable = { + p"src:${src} state:${state} type:${srctype}" + } +} + +object SrcBundle { + def apply(src: UInt, state: UInt/*SrcState*/, srctype: UInt/*SrcType*/): SrcBundle = { + val b = Wire(new SrcBundle) + b.src := src + b.state := state + b.srctype := srctype + b + } + + def stateCheck(src: SrcBundle): UInt /*SrcState*/ = { + Mux( (src.srctype=/=SrcType.reg && src.srctype=/=SrcType.fp) || + (src.srctype===SrcType.reg && src.src===0.U), SrcState.rdy, src.state) + } + + def check(src: UInt, state: UInt, srctype: UInt): SrcBundle = { + val b = Wire(new SrcBundle) + b.src := src + b.state := stateCheck(SrcBundle(src, state, srctype)) + b.srctype := srctype + b + } +} + +class BypassQueue(number: Int) extends XSModule { + val io = IO(new Bundle { + val in = Flipped(ValidIO(new MicroOp)) + val out = ValidIO(new MicroOp) + val redirect = Flipped(ValidIO(new Redirect)) + }) + if (number < 0) { + io.out.valid := false.B + io.out.bits := DontCare + } else if(number == 0) { + io.in <> io.out + io.out.valid := io.in.valid && !io.out.bits.roqIdx.needFlush(io.redirect) + } else { + val queue = Seq.fill(number)(RegInit(0.U.asTypeOf(new Bundle{ + val valid = Bool() + val bits = new MicroOp + }))) + queue(0).valid := io.in.valid + queue(0).bits := io.in.bits + (0 until (number-1)).map{i => + queue(i+1) := queue(i) + queue(i+1).valid := queue(i).valid && !queue(i).bits.roqIdx.needFlush(io.redirect) + } + io.out.valid := queue(number-1).valid && !queue(number-1).bits.roqIdx.needFlush(io.redirect) + io.out.bits := queue(number-1).bits + for (i <- 0 until number) { + XSDebug(queue(i).valid, p"BPQue(${i.U}): pc:${Hexadecimal(queue(i).bits.cf.pc)} roqIdx:${queue(i).bits.roqIdx} pdest:${queue(i).bits.pdest} rfWen:${queue(i).bits.ctrl.rfWen} fpWen${queue(i).bits.ctrl.fpWen}\n") + } + } +} + +class ReservationStationNew +( + val exuCfg: ExuConfig, + wakeupCnt: Int, + extraListenPortsCnt: Int, + srcNum: Int = 3, + fixedDelay: Int, + feedback: Boolean, + replayDelay: Int = 16 +) extends XSModule { + + + val iqSize = IssQueSize + val iqIdxWidth = log2Up(iqSize) + + val io = IO(new XSBundle { + // flush Issue Queue + val redirect = Flipped(ValidIO(new Redirect)) + + // enq Ctrl sigs at dispatch-2 + val enqCtrl = Flipped(DecoupledIO(new MicroOp)) + // enq Data at next cycle (regfile has 1 cycle latency) + val enqData = Input(new ExuInput) + + // broadcast selected uop to other issue queues + val selectedUop = ValidIO(new MicroOp) + + // send to exu + val deq = DecoupledIO(new ExuInput) + + // recv broadcasted uops form any relative issue queue, + // to simplify wake up logic, the uop broadcasted by this queue self + // are also in 'boradcastedUops' + val broadcastedUops = Vec(wakeupCnt, Flipped(ValidIO(new MicroOp))) + + // listen to write back data bus + val writeBackedData = Vec(wakeupCnt, Input(UInt(XLEN.W))) + + // for some function units with uncertain latency, + // we have to wake up relative uops until those function units write back + val extraListenPorts = Vec(extraListenPortsCnt, Flipped(ValidIO(new ExuOutput))) + + // to Dispatch + val numExist = Output(UInt(iqIdxWidth.W)) + + // TODO: support replay for future use if exu is ldu/stu + val tlbFeedback = Flipped(ValidIO(new TlbFeedback)) // TODO: change its name + }) + +// io <> DontCare + + // GOAL: + // 1. divide control part and data part + // 2. store control signal in sending RS and send out when after paticular cycles + // 3. one RS only have one paticular delay + // 4. remove the issue stage + // 5. support replay will cause one or two more latency for state machine change + // so would not support replay in current edition. + + // here is three logial part: + // control part: psrc(5.W)*3 srcState(1.W)*3 fuOpType/Latency(3.W) roqIdx + // data part: data(64.W)*3 + // other part: lsroqIdx and many other signal in uop. may set them to control part(close to dispatch) + + // control part: + + val s_idle :: s_valid :: s_wait :: s_replay :: Nil = Enum(4) + + val needFeedback = if (feedback) true.B else false.B + val stateQueue = RegInit(VecInit(Seq.fill(iqSize)(s_idle))) + val validQueue = stateQueue.map(_ === s_valid) + val emptyQueue = stateQueue.map(_ === s_idle) + val srcQueue = Reg(Vec(iqSize, Vec(srcNum, new SrcBundle))) + val cntQueue = Reg(Vec(iqSize, UInt(log2Up(replayDelay).W))) + + // data part: + val data = Reg(Vec(iqSize, Vec(3, UInt(XLEN.W)))) + + // other part: + val uop = Reg(Vec(iqSize, new MicroOp)) + + // rs queue part: + val tailPtr = RegInit(0.U((iqIdxWidth+1).W)) + val idxQueue = RegInit(VecInit((0 until iqSize).map(_.U(iqIdxWidth.W)))) + val readyQueue = VecInit(srcQueue.map(a => ParallelAND(a.map(_.state === SrcState.rdy)).asBool). + zip(validQueue).map{ case (a,b) => a&b }) + + // select ready + // for no replay, select just equal to deq (attached) + // with replay, select is just two stage with deq. + val moveMask = WireInit(0.U(iqSize.W)) + val selectedIdxRegOH = Wire(UInt(iqSize.W)) + val selectMask = WireInit(VecInit( + (0 until iqSize).map(i => + readyQueue(i) && !(selectedIdxRegOH(i) && io.deq.fire()) + // TODO: add redirect here, may cause long latency , change it + ) + )) + val haveBubble = Wire(Bool()) + val (selectedIdxWire, selected) = PriorityEncoderWithFlag(selectMask) + val redSel = uop(idxQueue(selectedIdxWire)).roqIdx.needFlush(io.redirect) + val selValid = !redSel && selected && !haveBubble + val selReg = RegNext(selValid) + val selectedIdxReg = RegNext(selectedIdxWire - moveMask(selectedIdxWire)) + selectedIdxRegOH := UIntToOH(selectedIdxReg) + + // sel bubble + // TODO: + val bubIdxRegOH = Wire(UInt(iqSize.W)) + val bubMask = WireInit(VecInit( + (0 until iqSize).map(i => emptyQueue(i) && !bubIdxRegOH(i)) + )) + val (firstBubble, findBubble) = PriorityEncoderWithFlag(bubMask) + haveBubble := findBubble && (firstBubble < tailPtr) + val bubValid = haveBubble + val bubReg = RegNext(bubValid) + val bubIdxReg = RegNext(firstBubble - moveMask(firstBubble)) + bubIdxRegOH := UIntToOH(bubIdxReg) + + // deq + // TODO: divide needFeedback and not needFeedback + val deqValid = bubReg/*fire an bubble*/ || (selReg && io.deq.ready && !needFeedback/*fire an rdy*/) + val deqIdx = Mux(bubReg, bubIdxReg, selectedIdxReg) // TODO: may have one more cycle delay than fire slot + moveMask := { + (Fill(iqSize, 1.U(1.W)) << deqIdx)(iqSize-1, 0) + } & Fill(iqSize, deqValid) + + for(i <- 0 until iqSize-1){ + when(moveMask(i)){ + idxQueue(i) := idxQueue(i+1) + srcQueue(i).zip(srcQueue(i+1)).map{case (a,b) => a := b} + stateQueue(i) := stateQueue(i+1) + } + } + when(deqValid){ + idxQueue.last := idxQueue(deqIdx) + stateQueue.last := s_idle + } + + when (selReg && io.deq.ready && needFeedback) { + stateQueue(selectedIdxReg) := s_wait + } + + // redirect + val redHitVec = (0 until iqSize).map(i => uop(idxQueue(i)).roqIdx.needFlush(io.redirect)) + val fbMatchVec = (0 until iqSize).map(i => + uop(idxQueue(i)).roqIdx.asUInt === io.tlbFeedback.bits.roqIdx.asUInt && io.tlbFeedback.valid && (stateQueue(i) === s_wait || stateQueue(i)===s_valid)) + // TODO: feedback at the same cycle now, may change later + //redHitVec.zip(validQueue).map{ case (r,v) => when (r) { v := false.B } } + for (i <- 0 until iqSize) { + val cnt = cntQueue(idxQueue(i)) + + if (i != 0) { // TODO: combine the two case + val nextIdx = i.U - moveMask(i-1) + when (stateQueue(i)===s_replay) { + when (cnt===0.U) { stateQueue(nextIdx) := s_valid } + .otherwise { cnt := cnt - 1.U } + } + when (fbMatchVec(i)) { + stateQueue(nextIdx) := Mux(io.tlbFeedback.bits.hit, s_idle, s_replay) + cnt := Mux(io.tlbFeedback.bits.hit, cnt, (replayDelay-1).U) + } + when (redHitVec(i)) { stateQueue(nextIdx) := s_idle } + } else { when (!moveMask(i)) { + val nextIdx = i + when (stateQueue(i)===s_replay) { + when (cnt===0.U) { stateQueue(nextIdx) := s_valid } + .otherwise { cnt := cnt - 1.U } + } + when (fbMatchVec(i)) { + stateQueue(nextIdx) := Mux(io.tlbFeedback.bits.hit, s_idle, s_replay) + cnt := Mux(io.tlbFeedback.bits.hit, cnt, (replayDelay-1).U) + } + when (redHitVec(i)) { stateQueue(nextIdx) := s_idle } + }} + } + + // bypass send + // store selected uops and send out one cycle before result back + def bpSelCheck(uop: MicroOp): Bool = { // TODO: wanna a map from FunctionUnit.scala + val fuType = uop.ctrl.fuType + (fuType === FuType.alu) || + (fuType === FuType.jmp) || + (fuType === FuType.i2f) || + (fuType === FuType.csr) || + (fuType === FuType.fence) || + (fuType === FuType.fmac) + } + val bpQueue = Module(new BypassQueue(fixedDelay)) + bpQueue.io.in.valid := selValid // FIXME: error when function is blocked => fu should not be blocked + bpQueue.io.in.bits := uop(idxQueue(selectedIdxWire)) + bpQueue.io.redirect := io.redirect + io.selectedUop.valid := bpQueue.io.out.valid && bpSelCheck(bpQueue.io.out.bits) + io.selectedUop.bits := bpQueue.io.out.bits + if(fixedDelay > 0) { + XSDebug(io.selectedUop.valid, p"SelBypass: pc:0x${Hexadecimal(io.selectedUop.bits.cf.pc)} roqIdx:${io.selectedUop.bits.roqIdx} pdest:${io.selectedUop.bits.pdest} rfWen:${io.selectedUop.bits.ctrl.rfWen} fpWen:${io.selectedUop.bits.ctrl.fpWen}\n" ) + } + + // output + io.deq.valid := selReg && !uop(idxQueue(selectedIdxReg)).roqIdx.needFlush(io.redirect)// TODO: read it and add assert for rdyQueue + io.deq.bits.uop := uop(idxQueue(selectedIdxReg)) + io.deq.bits.src1 := data(idxQueue(selectedIdxReg))(0) + if(srcNum > 1) { io.deq.bits.src2 := data(idxQueue(selectedIdxReg))(1) } + if(srcNum > 2) { io.deq.bits.src3 := data(idxQueue(selectedIdxReg))(2) } // TODO: beautify it + + // enq + val tailAfterRealDeq = tailPtr - (io.deq.fire() && !needFeedback|| bubReg) + val isFull = tailAfterRealDeq.head(1).asBool() // tailPtr===qsize.U + tailPtr := tailAfterRealDeq + io.enqCtrl.fire() + + io.enqCtrl.ready := !isFull && !io.redirect.valid // TODO: check this redirect && need more optimization + val enqUop = io.enqCtrl.bits + val srcTypeSeq = Seq(enqUop.ctrl.src1Type, enqUop.ctrl.src2Type, enqUop.ctrl.src3Type) + val srcSeq = Seq(enqUop.psrc1, enqUop.psrc2, enqUop.psrc3) + val srcStateSeq = Seq(enqUop.src1State, enqUop.src2State, enqUop.src3State) + val srcDataSeq = Seq(io.enqData.src1, io.enqData.src2, io.enqData.src3) + + val enqPtr = Mux(tailPtr.head(1).asBool, selectedIdxReg, tailPtr.tail(1)) + val enqIdx_data = idxQueue(enqPtr) + val enqIdx_ctrl = tailAfterRealDeq.tail(1) + val enqIdxNext = RegNext(enqIdx_data) + val enqBpVec = (0 until srcNum).map(i => bypass(SrcBundle(srcSeq(i), srcStateSeq(i), srcTypeSeq(i)), true.B)) + + when (io.enqCtrl.fire()) { + uop(enqIdx_data) := enqUop + stateQueue(enqIdx_ctrl) := s_valid + srcQueue(enqIdx_ctrl).zipWithIndex.map{ case (s,i) => + s := SrcBundle.check(srcSeq(i), Mux(enqBpVec(i)._1, SrcState.rdy, srcStateSeq(i)), srcTypeSeq(i)) } + + XSDebug(p"EnqCtrlFire: roqIdx:${enqUop.roqIdx} pc:0x${Hexadecimal(enqUop.cf.pc)} src1:${srcSeq(0)} state:${srcStateSeq(0)} type:${srcTypeSeq(0)} src2:${srcSeq(1)} state:${srcStateSeq(1)} type:${srcTypeSeq(1)} src3:${srcSeq(2)} state:${srcStateSeq(2)} type:${srcTypeSeq(2)} enqBpHit:${enqBpVec(0)._1}${enqBpVec(1)._1}${enqBpVec(2)._1}\n") + } + when (RegNext(io.enqCtrl.fire())) { + for(i <- data(0).indices) { data(enqIdxNext)(i) := Mux(enqBpVec(i)._2, enqBpVec(i)._3, srcDataSeq(i)) } + + XSDebug(p"EnqDataFire: idx:${enqIdxNext} src1:0x${Hexadecimal(srcDataSeq(0))} src2:0x${Hexadecimal(srcDataSeq(1))} src3:0x${Hexadecimal(srcDataSeq(2))} enqBpHit:(${enqBpVec(0)._2}|0x${Hexadecimal(enqBpVec(0)._3)})(${enqBpVec(1)._2}|0x${Hexadecimal(enqBpVec(1)._3)})(${enqBpVec(2)._2}|0x${Hexadecimal(enqBpVec(2)._3)}\n") + } + + // wakeup and bypass + def wakeup(src: SrcBundle, valid: Bool) : (Bool, UInt) = { + val hitVec = io.extraListenPorts.map(port => src.hit(port.bits.uop) && port.valid) + assert(RegNext(PopCount(hitVec)===0.U || PopCount(hitVec)===1.U)) + + val hit = ParallelOR(hitVec) && valid + (hit, ParallelMux(hitVec zip io.extraListenPorts.map(_.bits.data))) + } + + def bypass(src: SrcBundle, valid: Bool) : (Bool, Bool, UInt) = { + val hitVec = io.broadcastedUops.map(port => src.hit(port.bits) && port.valid) + assert(RegNext(PopCount(hitVec)===0.U || PopCount(hitVec)===1.U)) + + val hit = ParallelOR(hitVec) && valid + (hit, RegNext(hit), ParallelMux(hitVec.map(RegNext(_)) zip io.writeBackedData)) + } + + for (i <- 0 until iqSize) { + for (j <- 0 until srcNum) { + val (wuHit, wuData) = wakeup(srcQueue(i)(j), validQueue(i)) + val (bpHit, bpHitReg, bpData) = bypass(srcQueue(i)(j), validQueue(i)) + when (wuHit || bpHit) { srcQueue(i.U - moveMask(i))(j).state := SrcState.rdy } + when (wuHit) { data(idxQueue(i))(j) := wuData } + when (bpHitReg) { data(RegNext(idxQueue(i)))(j) := bpData } + + XSDebug(wuHit, p"WUHit: (${i.U})(${j.U}) Data:0x${Hexadecimal(wuData)} idx:${idxQueue(i)}\n") + XSDebug(bpHit, p"BPHit: (${i.U})(${j.U}) Ctrl idx:${idxQueue(i)}\n") + XSDebug(bpHitReg, p"BPHit: (${i.U})(${j.U}) Data:0x${Hexadecimal(bpData)} idx:${idxQueue(i)}\n") + } + } + + // other io + io.numExist := tailPtr + + // assert + assert(tailPtr <= iqSize.U) + + // log + // TODO: add log + val print = io.enqCtrl.valid || io.deq.valid || ParallelOR(validQueue) || tailPtr=/=0.U || true.B + XSDebug(print, p"In(${io.enqCtrl.valid} ${io.enqCtrl.ready}) Out(${io.deq.valid} ${io.deq.ready}) tailPtr:${tailPtr} tailPtr.tail:${tailPtr.tail(1)} tailADeq:${tailAfterRealDeq} isFull:${isFull} validQue:b${Binary(VecInit(validQueue).asUInt)} readyQueue:${Binary(readyQueue.asUInt)} needFeedback:${needFeedback}\n") + XSDebug(io.redirect.valid && print, p"Redirect: roqIdx:${io.redirect.bits.roqIdx} isException:${io.redirect.bits.isException} isMisPred:${io.redirect.bits.isMisPred} isReplay:${io.redirect.bits.isReplay} isFlushPipe:${io.redirect.bits.isFlushPipe} RedHitVec:b${Binary(VecInit(redHitVec).asUInt)}\n") + XSDebug(io.tlbFeedback.valid && print, p"TlbFeedback: roqIdx:${io.tlbFeedback.bits.roqIdx} hit:${io.tlbFeedback.bits.hit} fbMatchVec:${Binary(VecInit(fbMatchVec).asUInt)}\n") + XSDebug(print, p"SelMask:b${Binary(selectMask.asUInt)} MoveMask:b${Binary(moveMask.asUInt)} rdyQue:b${Binary(readyQueue.asUInt)} selIdxWire:${selectedIdxWire} sel:${selected} redSel:${redSel} selValid:${selValid} selIdxReg:${selectedIdxReg} selReg:${selReg} haveBubble:${haveBubble} deqValid:${deqValid} firstBubble:${firstBubble} findBubble:${findBubble} bubReg:${bubReg} bubIdxReg:${bubIdxReg} selRegOH:b${Binary(selectedIdxRegOH)}\n") + XSDebug(io.selectedUop.valid, p"Select: roqIdx:${io.selectedUop.bits.roqIdx} pc:0x${Hexadecimal(io.selectedUop.bits.cf.pc)} fuType:b${Binary(io.selectedUop.bits.ctrl.fuType)} FuOpType:b${Binary(io.selectedUop.bits.ctrl.fuOpType)}}\n") + XSDebug(io.deq.fire, p"Deq: SelIdxReg:${selectedIdxReg} pc:0x${Hexadecimal(io.deq.bits.uop.cf.pc)} Idx:${idxQueue(selectedIdxReg)} roqIdx:${io.deq.bits.uop.roqIdx} src1:0x${Hexadecimal(io.deq.bits.src1)} src2:0x${Hexadecimal(io.deq.bits.src2)} src3:0x${Hexadecimal(io.deq.bits.src3)}\n") + val broadcastedUops = io.broadcastedUops + val extraListenPorts = io.extraListenPorts + for (i <- broadcastedUops.indices) { + XSDebug(broadcastedUops(i).valid && print, p"BpUops(${i.U}): pc:0x${Hexadecimal(broadcastedUops(i).bits.cf.pc)} roqIdx:${broadcastedUops(i).bits.roqIdx} idxQueue:${selectedIdxWire} pdest:${broadcastedUops(i).bits.pdest} rfWen:${broadcastedUops(i).bits.ctrl.rfWen} fpWen:${broadcastedUops(i).bits.ctrl.fpWen} data(last):0x${Hexadecimal(io.writeBackedData(i))}\n") + XSDebug(RegNext(broadcastedUops(i).valid && print), p"BpUopData(${i.U}): data(last):0x${Hexadecimal(io.writeBackedData(i))}\n") + } + for (i <- extraListenPorts.indices) { + XSDebug(extraListenPorts(i).valid && print, p"WakeUp(${i.U}): pc:0x${Hexadecimal(extraListenPorts(i).bits.uop.cf.pc)} roqIdx:${extraListenPorts(i).bits.uop.roqIdx} pdest:${extraListenPorts(i).bits.uop.pdest} rfWen:${extraListenPorts(i).bits.uop.ctrl.rfWen} fpWen:${extraListenPorts(i).bits.uop.ctrl.fpWen} data:0x${Hexadecimal(extraListenPorts(i).bits.data)}\n") + } + XSDebug(print, " :IQ|s|r|cnt| src1 |src2 | src3|pdest(rf|fp)| roqIdx|pc\n") + for(i <- 0 until iqSize) { + XSDebug(print, p"${i.U}: ${idxQueue(i)}|${stateQueue(i)}|${readyQueue(i)}| ${cntQueue(idxQueue(i))}|${srcQueue(i)(0)} 0x${Hexadecimal(data(idxQueue(i))(0))}|${srcQueue(i)(1)} 0x${Hexadecimal(data(idxQueue(i))(1))}|${srcQueue(i)(2)} 0x${Hexadecimal(data(idxQueue(i))(2))}|${uop(idxQueue(i)).pdest}(${uop(idxQueue(i)).ctrl.rfWen}|${uop(idxQueue(i)).ctrl.fpWen})|${uop(idxQueue(i)).roqIdx}|${Hexadecimal(uop(idxQueue(i)).cf.pc)}\n") + } +} diff --git a/src/main/scala/xiangshan/backend/regfile/Regfile.scala b/src/main/scala/xiangshan/backend/regfile/Regfile.scala index 7ed3bc9e06ed46d1dda923b6eb3d5f7248e153d2..7b64492dc3c28828bfa45841d08e3eebf480ddd2 100644 --- a/src/main/scala/xiangshan/backend/regfile/Regfile.scala +++ b/src/main/scala/xiangshan/backend/regfile/Regfile.scala @@ -2,7 +2,6 @@ package xiangshan.backend.regfile import chisel3._ import chisel3.util._ -import chisel3.util.experimental.BoringUtils import xiangshan._ class RfReadPort extends XSBundle { @@ -20,8 +19,7 @@ class Regfile ( numReadPorts: Int, numWirtePorts: Int, - hasZero: Boolean, - isMemRf: Boolean = false + hasZero: Boolean ) extends XSModule { val io = IO(new Bundle() { val readPorts = Vec(numReadPorts, new RfReadPort) @@ -29,19 +27,6 @@ class Regfile }) val mem = Mem(NRPhyRegs, UInt(XLEN.W)) - - val debugRegSync = WireInit(0.U(XLEN.W)) - val debugCnt = RegInit(0.U((PhyRegIdxWidth+1).W)) - when(!debugCnt.head(1).asBool()){ - debugCnt := debugCnt + 1.U - if(isMemRf){ - BoringUtils.addSink(debugRegSync, "DEBUG_REG_SYNC") - mem(debugCnt) := debugRegSync - } else if (hasZero) { - debugRegSync := mem(debugCnt) - BoringUtils.addSource(debugRegSync, "DEBUG_REG_SYNC") - } - } for(r <- io.readPorts){ val addr_reg = RegNext(r.addr) @@ -54,11 +39,19 @@ class Regfile } } - if(!isMemRf){ - val debugArchRat = WireInit(VecInit(Seq.fill(32)(0.U(PhyRegIdxWidth.W)))) - BoringUtils.addSink(debugArchRat, if(hasZero) "DEBUG_INI_ARCH_RAT" else "DEBUG_FP_ARCH_RAT") - - val debugArchReg = WireInit(VecInit(debugArchRat.zipWithIndex.map(x => if(hasZero && x._2==0) 0.U else mem(x._1)))) - BoringUtils.addSource(debugArchReg, if(hasZero) "DEBUG_INT_ARCH_REG" else "DEBUG_FP_ARCH_REG") - } + val debugArchRat = WireInit(VecInit(Seq.fill(32)(0.U(PhyRegIdxWidth.W)))) + ExcitingUtils.addSink( + debugArchRat, + if(hasZero) "DEBUG_INI_ARCH_RAT" else "DEBUG_FP_ARCH_RAT", + ExcitingUtils.Debug + ) + + val debugArchReg = WireInit(VecInit(debugArchRat.zipWithIndex.map( + x => if(hasZero && x._2==0) 0.U else mem(x._1) + ))) + ExcitingUtils.addSource( + debugArchReg, + if(hasZero) "DEBUG_INT_ARCH_REG" else "DEBUG_FP_ARCH_REG", + ExcitingUtils.Debug + ) } diff --git a/src/main/scala/xiangshan/backend/rename/Rename.scala b/src/main/scala/xiangshan/backend/rename/Rename.scala index 9b688a7b8b34f5947a4bd248e8277b735fc8e777..19a89efe0b0eadfed0ecc7be1cf9dba595d73d60 100644 --- a/src/main/scala/xiangshan/backend/rename/Rename.scala +++ b/src/main/scala/xiangshan/backend/rename/Rename.scala @@ -11,9 +11,9 @@ class Rename extends XSModule { val roqCommits = Vec(CommitWidth, Flipped(ValidIO(new RoqCommit))) val wbIntResults = Vec(NRIntWritePorts, Flipped(ValidIO(new ExuOutput))) val wbFpResults = Vec(NRFpWritePorts, Flipped(ValidIO(new ExuOutput))) - val intRfReadAddr = Vec(NRIntReadPorts + NRMemReadPorts, Input(UInt(PhyRegIdxWidth.W))) + val intRfReadAddr = Vec(NRIntReadPorts, Input(UInt(PhyRegIdxWidth.W))) val fpRfReadAddr = Vec(NRFpReadPorts, Input(UInt(PhyRegIdxWidth.W))) - val intPregRdy = Vec(NRIntReadPorts + NRMemReadPorts, Output(Bool())) + val intPregRdy = Vec(NRIntReadPorts, Output(Bool())) val fpPregRdy = Vec(NRFpReadPorts, Output(Bool())) // set preg to busy when replay val replayPregReq = Vec(ReplayWidth, Input(new ReplayPregReq)) @@ -44,7 +44,7 @@ class Rename extends XSModule { val fpRat = Module(new RenameTable(float = true)).io val intRat = Module(new RenameTable(float = false)).io val fpBusyTable = Module(new BusyTable(NRFpReadPorts, NRFpWritePorts)).io - val intBusyTable = Module(new BusyTable(NRIntReadPorts+NRMemReadPorts, NRIntWritePorts)).io + val intBusyTable = Module(new BusyTable(NRIntReadPorts, NRIntWritePorts)).io fpFreeList.redirect := io.redirect intFreeList.redirect := io.redirect diff --git a/src/main/scala/xiangshan/backend/rename/RenameTable.scala b/src/main/scala/xiangshan/backend/rename/RenameTable.scala index 5884aeb50e9303e6ce95a6fc85cf6e19e66e20a2..fbc3f7bc8ae0c1b70bf5d984943a771c92fdb5bf 100644 --- a/src/main/scala/xiangshan/backend/rename/RenameTable.scala +++ b/src/main/scala/xiangshan/backend/rename/RenameTable.scala @@ -2,7 +2,6 @@ package xiangshan.backend.rename import chisel3._ import chisel3.util._ -import chisel3.util.experimental.BoringUtils import xiangshan._ class RatReadPort extends XSBundle { @@ -52,5 +51,9 @@ class RenameTable(float: Boolean) extends XSModule { } } - BoringUtils.addSource(arch_table, if(float) "DEBUG_FP_ARCH_RAT" else "DEBUG_INI_ARCH_RAT") + ExcitingUtils.addSource( + arch_table, + if(float) "DEBUG_FP_ARCH_RAT" else "DEBUG_INI_ARCH_RAT", + ExcitingUtils.Debug + ) } \ No newline at end of file diff --git a/src/main/scala/xiangshan/backend/roq/Roq.scala b/src/main/scala/xiangshan/backend/roq/Roq.scala index 009e407621998d246ae83ddd85541bec88eed5d4..4ddecf80dbc717ac31c118e3c8740b5c7d0f0fff 100644 --- a/src/main/scala/xiangshan/backend/roq/Roq.scala +++ b/src/main/scala/xiangshan/backend/roq/Roq.scala @@ -5,9 +5,8 @@ import chisel3._ import chisel3.util._ import xiangshan._ import utils._ -import chisel3.util.experimental.BoringUtils import xiangshan.backend.LSUOpType -import xiangshan.backend.decode.isa.Privileged.WFI +import xiangshan.backend.fu.fpu.Fflags class RoqPtr extends CircularQueuePtr(RoqPtr.RoqSize) with HasCircularQueuePtrHelper { @@ -40,6 +39,11 @@ class Roq extends XSModule with HasCircularQueuePtrHelper { val bcommit = Output(UInt(BrTagWidth.W)) val commitRoqIndex = Output(Valid(new RoqPtr)) val roqDeqPtr = Output(new RoqPtr) + val intrBitSet = Input(Bool()) + val trapTarget = Input(UInt(VAddrBits.W)) + + val fflags = Output(new Fflags) + val dirty_fs = Output(Bool()) }) val numWbPorts = io.exeWbResults.length @@ -49,6 +53,7 @@ class Roq extends XSModule with HasCircularQueuePtrHelper { val flag = RegInit(VecInit(List.fill(RoqSize)(false.B))) val writebacked = Reg(Vec(RoqSize, Bool())) + val exuFflags = Mem(RoqSize, new Fflags) val exuData = Reg(Vec(RoqSize, UInt(XLEN.W)))//for debug val exuDebug = Reg(Vec(RoqSize, new DebugBundle))//for debug @@ -116,6 +121,7 @@ class Roq extends XSModule with HasCircularQueuePtrHelper { microOp(wbIdx).ctrl.flushPipe := io.exeWbResults(i).bits.uop.ctrl.flushPipe microOp(wbIdx).diffTestDebugLrScValid := io.exeWbResults(i).bits.uop.diffTestDebugLrScValid exuData(wbIdx) := io.exeWbResults(i).bits.data + exuFflags(wbIdx) := io.exeWbResults(i).bits.fflags exuDebug(wbIdx) := io.exeWbResults(i).bits.debug val debugUop = microOp(wbIdx) @@ -128,24 +134,25 @@ class Roq extends XSModule with HasCircularQueuePtrHelper { } // roq redirect only used for exception - val intrBitSet = WireInit(false.B) - ExcitingUtils.addSink(intrBitSet, "intrBitSetIDU") - val trapTarget = WireInit(0.U(VAddrBits.W)) - ExcitingUtils.addSink(trapTarget, "trapTarget") + // val intrBitSet = WireInit(false.B) + // ExcitingUtils.addSink(intrBitSet, "intrBitSetIDU") + // val trapTarget = WireInit(0.U(VAddrBits.W)) + // ExcitingUtils.addSink(trapTarget, "trapTarget") val deqUop = microOp(deqPtr) val deqPtrWritebacked = writebacked(deqPtr) && valid(deqPtr) - val intrEnable = intrBitSet && !isEmpty && !hasNoSpec && + val intrEnable = io.intrBitSet && !isEmpty && !hasNoSpec && deqUop.ctrl.commitType =/= CommitType.STORE && deqUop.ctrl.commitType =/= CommitType.LOAD// TODO: wanna check why has hasCsr(hasNoSpec) val exceptionEnable = deqPtrWritebacked && Cat(deqUop.cf.exceptionVec).orR() val isFlushPipe = deqPtrWritebacked && deqUop.ctrl.flushPipe io.redirect := DontCare io.redirect.valid := (state === s_idle) && (intrEnable || exceptionEnable || isFlushPipe)// TODO: add fence flush to flush the whole pipe io.redirect.bits.isException := intrEnable || exceptionEnable - io.redirect.bits.isFlushPipe := isFlushPipe - io.redirect.bits.target := Mux(isFlushPipe, deqUop.cf.pc + 4.U, trapTarget) + // reuse isFlushPipe to represent interrupt for CSR + io.redirect.bits.isFlushPipe := isFlushPipe || intrEnable + io.redirect.bits.target := Mux(isFlushPipe, deqUop.cf.pc + 4.U, io.trapTarget) io.exception := deqUop - XSDebug(io.redirect.valid, "generate redirect: pc 0x%x intr %d excp %d flushpp %d target:0x%x Traptarget 0x%x exceptionVec %b\n", io.exception.cf.pc, intrEnable, exceptionEnable, isFlushPipe, io.redirect.bits.target, trapTarget, Cat(microOp(deqPtr).cf.exceptionVec)) + XSDebug(io.redirect.valid, "generate redirect: pc 0x%x intr %d excp %d flushpp %d target:0x%x Traptarget 0x%x exceptionVec %b\n", io.exception.cf.pc, intrEnable, exceptionEnable, isFlushPipe, io.redirect.bits.target, io.trapTarget, Cat(microOp(deqPtr).cf.exceptionVec)) // Commit uop to Rename (walk) val shouldWalkVec = Wire(Vec(CommitWidth, Bool())) @@ -168,6 +175,9 @@ class Roq extends XSModule with HasCircularQueuePtrHelper { val storeCommitVec = WireInit(VecInit(Seq.fill(CommitWidth)(false.B))) val cfiCommitVec = WireInit(VecInit(Seq.fill(CommitWidth)(false.B))) + // wiring to csr + val fflags = WireInit(0.U.asTypeOf(new Fflags)) + val dirty_fs = WireInit(false.B) for(i <- 0 until CommitWidth){ io.commits(i) := DontCare switch(state){ @@ -187,15 +197,28 @@ class Roq extends XSModule with HasCircularQueuePtrHelper { cfiCommitVec(i) := io.commits(i).valid && !commitUop.cf.brUpdate.pd.notCFI + val commitFflags = exuFflags(commitIdx) + when(io.commits(i).valid){ + when(commitFflags.asUInt.orR()){ + // update fflags + fflags := exuFflags(commitIdx) + } + when(commitUop.ctrl.fpWen){ + // set fs to dirty + dirty_fs := true.B + } + } + when(io.commits(i).valid){v := false.B} XSInfo(io.commits(i).valid, - "retired pc %x wen %d ldest %d pdest %x old_pdest %x data %x\n", + "retired pc %x wen %d ldest %d pdest %x old_pdest %x data %x fflags: %b\n", commitUop.cf.pc, commitUop.ctrl.rfWen, commitUop.ctrl.ldest, commitUop.pdest, commitUop.old_pdest, - exuData(commitIdx) + exuData(commitIdx), + exuFflags(commitIdx).asUInt ) XSInfo(io.commits(i).valid && exuDebug(commitIdx).isMMIO, "difftest skiped pc0x%x\n", @@ -236,6 +259,9 @@ class Roq extends XSModule with HasCircularQueuePtrHelper { io.commits(i).bits.isWalk := state =/= s_idle } + io.fflags := fflags + io.dirty_fs := dirty_fs + val validCommit = io.commits.map(_.valid) when(state===s_walk) { //exit walk state when all roq entry is commited @@ -261,9 +287,6 @@ class Roq extends XSModule with HasCircularQueuePtrHelper { // commit branch to brq io.bcommit := PopCount(cfiCommitVec) - val hasWFI = io.commits.map(c => c.valid && state===s_idle && c.bits.uop.cf.instr===WFI).reduce(_||_) - ExcitingUtils.addSource(hasWFI, "isWFI") - // when redirect, walk back roq entries when(io.brqRedirect.valid){ // TODO: need check if consider exception redirect? state := s_walk @@ -369,19 +392,18 @@ class Roq extends XSModule with HasCircularQueuePtrHelper { val retirePCFix = SignExt(Mux(io.redirect.valid, microOp(deqPtr).cf.pc, microOp(firstValidCommit).cf.pc), XLEN) val retireInstFix = Mux(io.redirect.valid, microOp(deqPtr).cf.instr, microOp(firstValidCommit).cf.instr) if(!env.FPGAPlatform){ - BoringUtils.addSource(RegNext(retireCounterFix), "difftestCommit") - BoringUtils.addSource(RegNext(retirePCFix), "difftestThisPC")//first valid PC - BoringUtils.addSource(RegNext(retireInstFix), "difftestThisINST")//first valid inst - BoringUtils.addSource(RegNext(skip.asUInt), "difftestSkip") - // BoringUtils.addSource(RegNext(false.B), "difftestIsRVC")//FIXIT - BoringUtils.addSource(RegNext(isRVC.asUInt), "difftestIsRVC") - BoringUtils.addSource(RegNext(wen.asUInt), "difftestWen") - BoringUtils.addSource(RegNext(wpc), "difftestWpc") - BoringUtils.addSource(RegNext(wdata), "difftestWdata") - BoringUtils.addSource(RegNext(wdst), "difftestWdst") - BoringUtils.addSource(RegNext(scFailed), "difftestScFailed") - BoringUtils.addSource(RegNext(difftestIntrNO), "difftestIntrNO") - BoringUtils.addSource(RegNext(difftestCause), "difftestCause") + ExcitingUtils.addSource(RegNext(retireCounterFix), "difftestCommit", ExcitingUtils.Debug) + ExcitingUtils.addSource(RegNext(retirePCFix), "difftestThisPC", ExcitingUtils.Debug)//first valid PC + ExcitingUtils.addSource(RegNext(retireInstFix), "difftestThisINST", ExcitingUtils.Debug)//first valid inst + ExcitingUtils.addSource(RegNext(skip.asUInt), "difftestSkip", ExcitingUtils.Debug) + ExcitingUtils.addSource(RegNext(isRVC.asUInt), "difftestIsRVC", ExcitingUtils.Debug) + ExcitingUtils.addSource(RegNext(wen.asUInt), "difftestWen", ExcitingUtils.Debug) + ExcitingUtils.addSource(RegNext(wpc), "difftestWpc", ExcitingUtils.Debug) + ExcitingUtils.addSource(RegNext(wdata), "difftestWdata", ExcitingUtils.Debug) + ExcitingUtils.addSource(RegNext(wdst), "difftestWdst", ExcitingUtils.Debug) + ExcitingUtils.addSource(RegNext(scFailed), "difftestScFailed", ExcitingUtils.Debug) + ExcitingUtils.addSource(RegNext(difftestIntrNO), "difftestIntrNO", ExcitingUtils.Debug) + ExcitingUtils.addSource(RegNext(difftestCause), "difftestCause", ExcitingUtils.Debug) val hitTrap = trapVec.reduce(_||_) val trapCode = PriorityMux(wdata.zip(trapVec).map(x => x._2 -> x._1)) diff --git a/src/main/scala/xiangshan/cache/atomics.scala b/src/main/scala/xiangshan/cache/atomics.scala index 4323cc14e19cf2e2f2772c6a83a8ca473b392184..8991f0a0f9ba480d99d2f28e045d261cdf3bec38 100644 --- a/src/main/scala/xiangshan/cache/atomics.scala +++ b/src/main/scala/xiangshan/cache/atomics.scala @@ -2,7 +2,6 @@ package xiangshan.cache import chisel3._ import chisel3.util._ -import chisel3.util.experimental.BoringUtils import utils.{XSDebug} @@ -69,7 +68,7 @@ class AtomicsPipe extends DCacheModule // --------------------------------------- // stage 2 val s2_req = RegNext(s1_req) - val s2_valid = RegNext(s1_valid && !io.lsu.s1_kill, init = false.B) + val s2_valid = RegNext(s1_valid, init = false.B) dump_pipeline_reqs("AtomicsPipe s2", s2_valid, s2_req) @@ -122,8 +121,6 @@ class AtomicsPipe extends DCacheModule val s2_sc_fail = s2_sc && !s2_lrsc_addr_match val s2_sc_resp = Mux(s2_sc_fail, 1.U, 0.U) - // BoringUtils.addSource(RegEnable(lrsc_addr, s2_valid && s2_lr), "difftestLrscAddr") - // we have permission on this block // but we can not finish in this pass // we need to go to miss queue to update meta and set dirty first diff --git a/src/main/scala/xiangshan/cache/atomicsMissQueue.scala b/src/main/scala/xiangshan/cache/atomicsMissQueue.scala index 490e27b82a9c933a9f88d265e1dbd7336546db00..54592f58e0c14ba87e4bb009f68e6edb48aa2a26 100644 --- a/src/main/scala/xiangshan/cache/atomicsMissQueue.scala +++ b/src/main/scala/xiangshan/cache/atomicsMissQueue.scala @@ -34,7 +34,6 @@ class AtomicsMissQueue extends DCacheModule io.replay.req.valid := false.B io.replay.req.bits := DontCare io.replay.resp.ready := false.B - io.replay.s1_kill := false.B io.miss_req.valid := false.B io.miss_req.bits := DontCare diff --git a/src/main/scala/xiangshan/cache/dcacheWrapper.scala b/src/main/scala/xiangshan/cache/dcacheWrapper.scala index da27669b678603924018d352c92ee87c79028668..eab412427a01a67557e4b53b53d2ed9fe4258387 100644 --- a/src/main/scala/xiangshan/cache/dcacheWrapper.scala +++ b/src/main/scala/xiangshan/cache/dcacheWrapper.scala @@ -25,7 +25,19 @@ class DCacheMeta extends DCacheBundle { val replay = Bool() // whether it's a replayed request? } -// ordinary load and special memory operations(lr/sc, atomics) +// for load from load unit +// cycle 0: vaddr +// cycle 1: paddr +class DCacheLoadReq extends DCacheBundle +{ + val cmd = UInt(M_SZ.W) + val addr = UInt(VAddrBits.W) + val data = UInt(DataBits.W) + val mask = UInt((DataBits/8).W) + val meta = new DCacheMeta +} + +// special memory operations(lr/sc, atomics) class DCacheWordReq extends DCacheBundle { val cmd = UInt(M_SZ.W) @@ -45,6 +57,16 @@ class DCacheLineReq extends DCacheBundle val meta = new DCacheMeta } +class DCacheLoadResp extends DCacheBundle +{ + val data = UInt(DataBits.W) + val meta = new DCacheMeta + // cache req missed, send it to miss queue + val miss = Bool() + // cache req nacked, replay it later + val nack = Bool() +} + class DCacheWordResp extends DCacheBundle { val data = UInt(DataBits.W) @@ -65,12 +87,19 @@ class DCacheLineResp extends DCacheBundle val nack = Bool() } -class DCacheWordIO extends DCacheBundle +class DCacheLoadIO extends DCacheBundle { - val req = DecoupledIO(new DCacheWordReq ) + val req = DecoupledIO(new DCacheWordReq) val resp = Flipped(DecoupledIO(new DCacheWordResp)) // kill previous cycle's req - val s1_kill = Output(Bool()) + val s1_kill = Output(Bool()) + val s1_paddr = Output(UInt(PAddrBits.W)) +} + +class DCacheWordIO extends DCacheBundle +{ + val req = DecoupledIO(new DCacheWordReq) + val resp = Flipped(DecoupledIO(new DCacheWordResp)) } class DCacheLineIO extends DCacheBundle @@ -80,7 +109,7 @@ class DCacheLineIO extends DCacheBundle } class DCacheToLsuIO extends DCacheBundle { - val load = Vec(LoadPipelineWidth, Flipped(new DCacheWordIO)) // for speculative load + val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load val lsroq = Flipped(new DCacheLineIO) // lsroq load/store val store = Flipped(new DCacheLineIO) // for sbuffer val atomics = Flipped(new DCacheWordIO) // atomics reqs @@ -229,6 +258,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame for (w <- 0 until LoadPipelineWidth) { val load_w_nack = nack_load(io.lsu.load(w).req.bits.addr) ldu(w).io.lsu.req <> io.lsu.load(w).req + ldu(w).io.lsu.s1_paddr <> io.lsu.load(w).s1_paddr ldu(w).io.nack := load_w_nack XSDebug(load_w_nack, s"LoadUnit $w nacked\n") @@ -289,8 +319,6 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame "MMIO requests should not go to cache") assert(!(atomicsReq.fire() && atomicsReq.bits.meta.tlb_miss), "TLB missed requests should not go to cache") - assert(!io.lsu.atomics.s1_kill, "Lsroq should never use s1 kill on atomics") - //---------------------------------------- // miss queue diff --git a/src/main/scala/xiangshan/cache/dtlb.scala b/src/main/scala/xiangshan/cache/dtlb.scala index 1b8543eb04fe8d16b1b81162d5b2a93c25946b93..163467653608409690adcaeaea6bc1fb1b97e969 100644 --- a/src/main/scala/xiangshan/cache/dtlb.scala +++ b/src/main/scala/xiangshan/cache/dtlb.scala @@ -4,11 +4,9 @@ import chisel3._ import chisel3.util._ import xiangshan._ import utils._ -import chisel3.util.experimental.BoringUtils import xiangshan.backend.decode.XSTrap import xiangshan.backend.roq.RoqPtr import xiangshan.mem._ -import bus.simplebus._ import xiangshan.backend.fu.HasCSRConst import chisel3.ExcitingUtils._ @@ -176,6 +174,8 @@ class TlbPtwIO extends TlbBundle { class TlbIO(Width: Int) extends TlbBundle { val requestor = Vec(Width, Flipped(new TlbRequestIO)) val ptw = new TlbPtwIO + val sfence = Input(new SfenceBundle) + val csr = Input(new TlbCsrBundle) override def cloneType: this.type = (new TlbIO(Width)).asInstanceOf[this.type] } @@ -188,16 +188,14 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ val resp = io.requestor.map(_.resp) val ptw = io.ptw - val sfence = WireInit(0.U.asTypeOf(new SfenceBundle)) - val csr = WireInit(0.U.asTypeOf(new TlbCsrBundle)) + val sfence = io.sfence + val csr = io.csr val satp = csr.satp val priv = csr.priv val ifecth = if (isDtlb) false.B else true.B val mode = if (isDtlb) priv.dmode else priv.imode // val vmEnable = satp.mode === 8.U // && (mode < ModeM) // FIXME: fix me when boot xv6/linux... val vmEnable = satp.mode === 8.U && (mode < ModeM) - BoringUtils.addSink(sfence, "SfenceBundle") - BoringUtils.addSink(csr, "TLBCSRIO") val reqAddr = req.map(_.bits.vaddr.asTypeOf(vaBundle)) val cmd = req.map(_.bits.cmd) @@ -397,10 +395,21 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ } object TLB { - def apply(in: Seq[BlockTlbRequestIO], width: Int, isDtlb: Boolean, shouldBlock: Boolean) = { + def apply + ( + in: Seq[BlockTlbRequestIO], + sfence: SfenceBundle, + csr: TlbCsrBundle, + width: Int, + isDtlb: Boolean, + shouldBlock: Boolean + ) = { require(in.length == width) val tlb = Module(new TLB(width, isDtlb)) + + tlb.io.sfence <> sfence + tlb.io.csr <> csr if (!shouldBlock) { // dtlb for (i <- 0 until width) { diff --git a/src/main/scala/xiangshan/cache/icache.scala b/src/main/scala/xiangshan/cache/icache.scala index bbd62f4305a0b97a97b86b91de69e79fba6e5fa8..c4f08fdbed232a12b462e8353c003177f9026c01 100644 --- a/src/main/scala/xiangshan/cache/icache.scala +++ b/src/main/scala/xiangshan/cache/icache.scala @@ -7,7 +7,6 @@ import xiangshan._ import xiangshan.frontend._ import utils._ import chisel3.ExcitingUtils._ -import chisel3.util.experimental.BoringUtils import chipsalliance.rocketchip.config.Parameters import freechips.rocketchip.tilelink.{TLBundleA,TLBundleD,TLBundleE,TLEdgeOut} @@ -91,6 +90,7 @@ class ICacheIO(edge: TLEdgeOut) extends ICacheBundle val resp = DecoupledIO(new ICacheResp) val tlb = new BlockTlbRequestIO val flush = Input(UInt(2.W)) + val fencei = Input(Bool()) } /* ------------------------------------------------------------ @@ -276,9 +276,8 @@ class ICacheImp(outer: ICache) extends ICacheModule(outer) .elsewhen((state=== s_wait_resp) && needFlush){ needFlush := false.B } //cache flush register - val icacheFlush = WireInit(false.B) + val icacheFlush = io.fencei val cacheflushed = RegInit(false.B) - BoringUtils.addSink(icacheFlush, "FenceI") XSDebug("[Fence.i] icacheFlush:%d, cacheflushed:%d\n",icacheFlush,cacheflushed) when(icacheFlush && (state =/= s_idle) && (state =/= s_wait_resp)){ cacheflushed := true.B} .elsewhen((state=== s_wait_resp) && cacheflushed) {cacheflushed := false.B } diff --git a/src/main/scala/xiangshan/cache/ldu.scala b/src/main/scala/xiangshan/cache/ldu.scala index a86cfb7c5d6a3e4a4d08c7a14587d9d6b9713c21..fc12b4e0d41cef65dbfc48d16c671da91253671f 100644 --- a/src/main/scala/xiangshan/cache/ldu.scala +++ b/src/main/scala/xiangshan/cache/ldu.scala @@ -8,7 +8,7 @@ import utils.XSDebug class LoadPipe extends DCacheModule { val io = IO(new DCacheBundle{ - val lsu = Flipped(new DCacheWordIO) + val lsu = Flipped(new DCacheLoadIO) val data_read = DecoupledIO(new L1DataReadReq) val data_resp = Input(Vec(nWays, Vec(blockRows, Bits(encRowBits.W)))) val meta_read = DecoupledIO(new L1MetaReadReq) @@ -56,7 +56,8 @@ class LoadPipe extends DCacheModule // stage 1 val s1_req = RegNext(s0_req) val s1_valid = RegNext(s0_valid, init = false.B) - val s1_addr = s1_req.addr + // in stage 1, load unit gets the physical address + val s1_addr = io.lsu.s1_paddr val s1_nack = RegNext(io.nack) dump_pipeline_reqs("LoadPipe s1", s1_valid, s1_req) @@ -76,6 +77,7 @@ class LoadPipe extends DCacheModule dump_pipeline_reqs("LoadPipe s2", s2_valid, s2_req) + val s2_addr = RegNext(s1_addr) val s2_tag_match_way = RegNext(s1_tag_match_way) val s2_tag_match = s2_tag_match_way.orR val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegNext(meta_resp(w).coh))) @@ -96,12 +98,12 @@ class LoadPipe extends DCacheModule val s2_data = Wire(Vec(nWays, UInt(encRowBits.W))) val data_resp = io.data_resp for (w <- 0 until nWays) { - s2_data(w) := data_resp(w)(get_row(s2_req.addr)) + s2_data(w) := data_resp(w)(get_row(s2_addr)) } val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data) // the index of word in a row, in case rowBits != wordBits - val s2_word_idx = if (rowWords == 1) 0.U else s2_req.addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes)) + val s2_word_idx = if (rowWords == 1) 0.U else s2_addr(log2Up(rowWords*wordBytes)-1, log2Up(wordBytes)) val s2_nack_hit = RegNext(s1_nack) // Can't allocate MSHR for same set currently being written back diff --git a/src/main/scala/xiangshan/cache/ptw.scala b/src/main/scala/xiangshan/cache/ptw.scala index 8a5bb3351dd7f0cec5cb73b66177a5566d94a5af..0f7b624efdc8cd6d496e47b6e3b38cce363cd8e1 100644 --- a/src/main/scala/xiangshan/cache/ptw.scala +++ b/src/main/scala/xiangshan/cache/ptw.scala @@ -5,9 +5,6 @@ import chisel3._ import chisel3.util._ import xiangshan._ import utils._ -import chisel3.util.experimental.BoringUtils -import xiangshan.backend.decode.XSTrap -import xiangshan.mem._ import chisel3.ExcitingUtils._ import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp} import freechips.rocketchip.tilelink.{TLClientNode, TLMasterParameters, TLMasterPortParameters} @@ -108,6 +105,8 @@ class PtwResp extends PtwBundle { class PtwIO extends PtwBundle { val tlb = Vec(PtwWidth, Flipped(new TlbPtwIO)) + val sfence = Input(new SfenceBundle) + val csr = Input(new TlbCsrBundle) } object ValidHold { @@ -159,12 +158,10 @@ class PTWImp(outer: PTW) extends PtwModule(outer){ val validOneCycle = OneCycleValid(arb.io.out.fire()) arb.io.out.ready := !valid// || resp(arbChosen).fire() - val sfence = WireInit(0.U.asTypeOf(new SfenceBundle)) - val csr = WireInit(0.U.asTypeOf(new TlbCsrBundle)) + val sfence = io.sfence + val csr = io.csr val satp = csr.satp val priv = csr.priv - BoringUtils.addSink(sfence, "SfenceBundle") - BoringUtils.addSink(csr, "TLBCSRIO") // two level: l2-tlb-cache && pde/pte-cache // l2-tlb-cache is ram-larger-edition tlb diff --git a/src/main/scala/xiangshan/frontend/BPU.scala b/src/main/scala/xiangshan/frontend/BPU.scala index 4edd1776d4409fa1c94b2856a5f7b505479a2787..d8229deb9bb9767abb850b757e25e1666d8c35a0 100644 --- a/src/main/scala/xiangshan/frontend/BPU.scala +++ b/src/main/scala/xiangshan/frontend/BPU.scala @@ -337,8 +337,16 @@ class BPUStage3 extends BPUStage { io.out.bits.brInfo(i).rasTopCtr := ras.io.branchInfo.rasTopCtr io.out.bits.brInfo(i).rasToqAddr := ras.io.branchInfo.rasToqAddr } - takens := VecInit((0 until PredictWidth).map(i => (brTakens(i) || jalrs(i)) && btbHits(i) || jals(i)|| rets(i))) - when(ras.io.is_ret && ras.io.out.valid){targetSrc(retIdx) := ras.io.out.bits.target} + takens := VecInit((0 until PredictWidth).map(i => { + ((brTakens(i) || jalrs(i)) && btbHits(i)) || + jals(i) || + (!ras.io.out.bits.specEmpty && rets(i)) || + (ras.io.out.bits.specEmpty && btbHits(i)) + } + )) + when(ras.io.is_ret && ras.io.out.valid){ + targetSrc(retIdx) := ras.io.out.bits.target + } } diff --git a/src/main/scala/xiangshan/frontend/Bim.scala b/src/main/scala/xiangshan/frontend/Bim.scala index 67a0f24688e24c4865f58ebd9d3679eaddf868d6..20481c6436691a8d37c9783a9bd9b2442d2f041c 100644 --- a/src/main/scala/xiangshan/frontend/Bim.scala +++ b/src/main/scala/xiangshan/frontend/Bim.scala @@ -5,7 +5,6 @@ import chisel3.util._ import xiangshan._ import xiangshan.backend.ALUOpType import utils._ -import chisel3.util.experimental.BoringUtils import xiangshan.backend.decode.XSTrap trait BimParams extends HasXSParameter { diff --git a/src/main/scala/xiangshan/frontend/Btb.scala b/src/main/scala/xiangshan/frontend/Btb.scala index 1aada7f1021e58600d9b39113fdf3cc71c63cb06..9370231a9b645b538b5d524b7ed48e9508489c9a 100644 --- a/src/main/scala/xiangshan/frontend/Btb.scala +++ b/src/main/scala/xiangshan/frontend/Btb.scala @@ -5,7 +5,6 @@ import chisel3.util._ import xiangshan._ import xiangshan.backend.ALUOpType import utils._ -import chisel3.util.experimental.BoringUtils import xiangshan.backend.decode.XSTrap import scala.math.min diff --git a/src/main/scala/xiangshan/frontend/FakeICache.scala b/src/main/scala/xiangshan/frontend/FakeICache.scala index 32dfd65c2bccea5afaa4d8ae9979b5d11a61926f..19c1d73b46a511f236c009f7735c6666dca7b435 100644 --- a/src/main/scala/xiangshan/frontend/FakeICache.scala +++ b/src/main/scala/xiangshan/frontend/FakeICache.scala @@ -4,7 +4,7 @@ import chisel3._ import chisel3.util._ import device.RAMHelper import xiangshan._ -import utils.{Debug, GTimer, XSDebug} +import utils.{GTimer, XSDebug} import xiangshan.backend.decode.isa import xiangshan.backend.decode.Decoder diff --git a/src/main/scala/xiangshan/frontend/Frontend.scala b/src/main/scala/xiangshan/frontend/Frontend.scala index f0fbb05223aa4377b9ce2459be26e749a15afe6f..6c41da0279df0dc5c17e1a607adcc292aea18df0 100644 --- a/src/main/scala/xiangshan/frontend/Frontend.scala +++ b/src/main/scala/xiangshan/frontend/Frontend.scala @@ -33,6 +33,8 @@ class Frontend extends XSModule { //itlb to ptw io.ptw <> TLB( in = Seq(io.icacheToTlb), + sfence = io.backend.sfence, + csr = io.backend.tlbCsrIO, width = 1, isDtlb = false, shouldBlock = true diff --git a/src/main/scala/xiangshan/frontend/RAS.scala b/src/main/scala/xiangshan/frontend/RAS.scala index b5912b1970da4702538afa5154d5e4341f5b0cd7..2242a2038d8f7ce5ae3f030e423bd4ba48bf4302 100644 --- a/src/main/scala/xiangshan/frontend/RAS.scala +++ b/src/main/scala/xiangshan/frontend/RAS.scala @@ -11,6 +11,7 @@ class RAS extends BasePredictor class RASResp extends Resp { val target =UInt(VAddrBits.W) + val specEmpty = Bool() } class RASBranchInfo extends Meta @@ -64,6 +65,7 @@ class RAS extends BasePredictor io.branchInfo.rasToqAddr := DontCare io.out.valid := !spec_is_empty && io.is_ret + io.out.bits.specEmpty := spec_is_empty // update spec RAS // speculative update RAS diff --git a/src/main/scala/xiangshan/mem/Memend.scala b/src/main/scala/xiangshan/mem/Memend.scala index 5be41ff7a867afdec8355cc1baf343b03ffae210..86aefd98e168a4252f115d9be4c67567c1c7426c 100644 --- a/src/main/scala/xiangshan/mem/Memend.scala +++ b/src/main/scala/xiangshan/mem/Memend.scala @@ -2,14 +2,11 @@ package xiangshan.mem import chisel3._ import chisel3.util._ -import chisel3.util.experimental.BoringUtils import xiangshan._ import utils._ -import chisel3.util.experimental.BoringUtils import xiangshan.backend.roq.RoqPtr - import xiangshan.cache._ -import bus.tilelink.{TLArbiter, TLCached, TLMasterUtilities, TLParameters} +import xiangshan.backend.exu.FenceToSbuffer object genWmask { def apply(addr: UInt, sizeEncode: UInt): UInt = { @@ -36,12 +33,13 @@ object genWdata { class LsPipelineBundle extends XSBundle { val vaddr = UInt(VAddrBits.W) val paddr = UInt(PAddrBits.W) - val func = UInt(6.W) + val func = UInt(6.W) //fixme??? val mask = UInt(8.W) val data = UInt(XLEN.W) val uop = new MicroOp val miss = Bool() + val tlbMiss = Bool() val mmio = Bool() val rollback = Bool() @@ -73,12 +71,16 @@ class MemToBackendIO extends XSBundle { // replay all instructions form dispatch val replayAll = ValidIO(new Redirect) // replay mem instructions form Load Queue/Store Queue - val tlbFeedback = Vec(exuParameters.LduCnt + exuParameters.LduCnt, ValidIO(new TlbFeedback)) + val tlbFeedback = Vec(exuParameters.LduCnt + exuParameters.StuCnt, ValidIO(new TlbFeedback)) val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit))) val dp1Req = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp))) val lsIdxs = Output(Vec(RenameWidth, new LSIdx)) val oldestStore = Output(Valid(new RoqPtr)) val roqDeqPtr = Input(new RoqPtr) + val exceptionAddr = new ExceptionAddrIO + val fenceToSbuffer = Flipped(new FenceToSbuffer) + val sfence = Input(new SfenceBundle) + val csr = Input(new TlbCsrBundle) } // Memory pipeline wrapper @@ -87,7 +89,7 @@ class MemToBackendIO extends XSBundle { class Memend extends XSModule { val io = IO(new Bundle{ val backend = new MemToBackendIO - val loadUnitToDcacheVec = Vec(exuParameters.LduCnt, new DCacheWordIO) + val loadUnitToDcacheVec = Vec(exuParameters.LduCnt, new DCacheLoadIO) val loadMiss = new DCacheLineIO val atomics = new DCacheWordIO val sbufferToDcache = new DCacheLineIO @@ -107,6 +109,8 @@ class Memend extends XSModule { // dtlb io.ptw <> dtlb.io.ptw + dtlb.io.sfence <> io.backend.sfence + dtlb.io.csr <> io.backend.csr // LoadUnit for (i <- 0 until exuParameters.LduCnt) { @@ -150,6 +154,7 @@ class Memend extends XSModule { lsroq.io.lsIdxs <> io.backend.lsIdxs lsroq.io.brqRedirect := io.backend.redirect lsroq.io.roqDeqPtr := io.backend.roqDeqPtr + io.backend.replayAll <> lsroq.io.rollback lsroq.io.dcache <> io.loadMiss @@ -164,10 +169,9 @@ class Memend extends XSModule { // flush sbuffer val fenceFlush = WireInit(false.B) val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid - BoringUtils.addSink(fenceFlush, "FenceUnitSbufferFlush") - val sbEmpty = WireInit(false.B) - sbEmpty := sbuffer.io.flush.empty - BoringUtils.addSource(sbEmpty, "SBufferEmpty") + fenceFlush := io.backend.fenceToSbuffer.flushSb + val sbEmpty = sbuffer.io.flush.empty + io.backend.fenceToSbuffer.sbIsEmpty := sbEmpty // if both of them tries to flush sbuffer at the same time // something must have gone wrong assert(!(fenceFlush && atomicsFlush)) @@ -225,4 +229,9 @@ class Memend extends XSModule { assert(!loadUnits(0).io.ldout.valid) loadUnits(0).io.ldout.ready := false.B } + + lsroq.io.exceptionAddr.lsIdx := io.backend.exceptionAddr.lsIdx + lsroq.io.exceptionAddr.isStore := io.backend.exceptionAddr.isStore + io.backend.exceptionAddr.vaddr := Mux(atomicsUnit.io.exceptionAddr.valid, atomicsUnit.io.exceptionAddr.bits, lsroq.io.exceptionAddr.vaddr) + } diff --git a/src/main/scala/xiangshan/mem/lsqueue/separated/LSQWrapper.scala b/src/main/scala/xiangshan/mem/lsqueue/separated/LSQWrapper.scala index 07dec80164006f86b9c2e68a8b2fe654dc7cb3be..374c906577eef018ece5ad0c6ff4f20b80281eb8 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/separated/LSQWrapper.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/separated/LSQWrapper.scala @@ -10,6 +10,12 @@ import xiangshan.backend.LSUOpType import xiangshan.mem._ import xiangshan.backend.roq.RoqPtr +class ExceptionAddrIO extends XSBundle { + val lsIdx = Input(new LSIdx) + val isStore = Input(Bool()) + val vaddr = Output(UInt(VAddrBits.W)) +} + // Load / Store Queue Wrapper for XiangShan Out of Order LSU // // By using this Wrapper, interface of unified lsroq and ldq / stq are the same @@ -30,6 +36,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters with NeedImpl { val uncache = new DCacheWordIO val roqDeqPtr = Input(new RoqPtr) val oldestStore = Output(Valid(new RoqPtr)) + val exceptionAddr = new ExceptionAddrIO }) if(EnableUnifiedLSQ){ @@ -49,6 +56,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters with NeedImpl { lsroq.io.uncache <> io.uncache lsroq.io.roqDeqPtr <> io.roqDeqPtr lsroq.io.oldestStore <> io.oldestStore + lsroq.io.exceptionAddr <> io.exceptionAddr (0 until RenameWidth).map(i => { io.lsIdxs(i).lsroqIdx := lsroq.io.lsroqIdxs(i) }) @@ -66,7 +74,9 @@ class LsqWrappper extends XSModule with HasDCacheParameters with NeedImpl { loadQueue.io.rollback <> io.rollback loadQueue.io.dcache <> io.dcache loadQueue.io.roqDeqPtr <> io.roqDeqPtr - + loadQueue.io.exceptionAddr.lsIdx := io.exceptionAddr.lsIdx + loadQueue.io.exceptionAddr.isStore := DontCare + // store queue wiring // storeQueue.io <> DontCare storeQueue.io.dp1Req <> io.dp1Req @@ -77,10 +87,14 @@ class LsqWrappper extends XSModule with HasDCacheParameters with NeedImpl { storeQueue.io.commits <> io.commits storeQueue.io.roqDeqPtr <> io.roqDeqPtr storeQueue.io.oldestStore <> io.oldestStore - + storeQueue.io.exceptionAddr.lsIdx := io.exceptionAddr.lsIdx + storeQueue.io.exceptionAddr.isStore := DontCare + loadQueue.io.forward <> io.forward storeQueue.io.forward <> io.forward // overlap forwardMask & forwardData, DO NOT CHANGE SEQUENCE + io.exceptionAddr.vaddr := Mux(io.exceptionAddr.isStore, storeQueue.io.exceptionAddr.vaddr, loadQueue.io.exceptionAddr.vaddr) + // naive uncache arbiter val s_idle :: s_load :: s_store :: Nil = Enum(3) val uncacheState = RegInit(s_idle) @@ -117,7 +131,6 @@ class LsqWrappper extends XSModule with HasDCacheParameters with NeedImpl { }.otherwise{ io.uncache.resp <> storeQueue.io.uncache.resp } - io.uncache.s1_kill := false.B assert(!(loadQueue.io.uncache.req.valid && storeQueue.io.uncache.req.valid)) assert(!(loadQueue.io.uncache.resp.valid && storeQueue.io.uncache.resp.valid)) diff --git a/src/main/scala/xiangshan/mem/lsqueue/separated/LoadQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/separated/LoadQueue.scala index 711b9d638b3f0b401981565e145c4faf080992ad..b0c847f1fdb569eacd89351dea814fe6befb166b 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/separated/LoadQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/separated/LoadQueue.scala @@ -9,6 +9,7 @@ import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConsta import xiangshan.backend.LSUOpType import xiangshan.mem._ import xiangshan.backend.roq.RoqPtr +import xiangshan.backend.fu.fpu.boxF32ToF64 class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { } @@ -31,13 +32,14 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP val brqRedirect = Input(Valid(new Redirect)) val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle))) val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // FIXME: Valid() only - val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback store + val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback load val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit))) val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store val dcache = new DCacheLineIO val uncache = new DCacheWordIO val roqDeqPtr = Input(new RoqPtr) + val exceptionAddr = new ExceptionAddrIO // val refill = Flipped(Valid(new DCacheLineReq )) }) @@ -119,7 +121,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP io.loadIn(i).bits.uop.cf.exceptionVec.asUInt ) }.otherwise { - XSInfo(io.loadIn(i).valid, "load hit write to cbd idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n", + XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n", io.loadIn(i).bits.uop.lqIdx.asUInt, io.loadIn(i).bits.uop.cf.pc, io.loadIn(i).bits.vaddr, @@ -251,10 +253,13 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP allocated(i) && valid(i) && !writebacked(i) })).asUInt() // use uint instead vec to reduce verilog lines val loadWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LoadQueueSize).W))) + val loadWbSelV= Wire(Vec(StorePipelineWidth, Bool())) val lselvec0 = PriorityEncoderOH(loadWbSelVec) val lselvec1 = PriorityEncoderOH(loadWbSelVec & (~lselvec0).asUInt) loadWbSel(0) := OHToUInt(lselvec0) + loadWbSelV(0):= lselvec0.orR loadWbSel(1) := OHToUInt(lselvec1) + loadWbSelV(1) := lselvec1.orR (0 until StorePipelineWidth).map(i => { // data select val rdata = data(loadWbSel(i)).data @@ -277,7 +282,8 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP LSUOpType.ld -> SignExt(rdataSel(63, 0), XLEN), LSUOpType.lbu -> ZeroExt(rdataSel(7, 0) , XLEN), LSUOpType.lhu -> ZeroExt(rdataSel(15, 0), XLEN), - LSUOpType.lwu -> ZeroExt(rdataSel(31, 0), XLEN) + LSUOpType.lwu -> ZeroExt(rdataSel(31, 0), XLEN), + LSUOpType.flw -> boxF32ToF64(rdataSel(31, 0)) )) io.ldout(i).bits.uop := uop(loadWbSel(i)) io.ldout(i).bits.uop.cf.exceptionVec := data(loadWbSel(i)).exception.asBools @@ -287,10 +293,12 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP io.ldout(i).bits.redirect := DontCare io.ldout(i).bits.brUpdate := DontCare io.ldout(i).bits.debug.isMMIO := data(loadWbSel(i)).mmio - io.ldout(i).valid := loadWbSelVec(loadWbSel(i)) + io.ldout(i).bits.fflags := DontCare + io.ldout(i).valid := loadWbSelVec(loadWbSel(i)) && loadWbSelV(i) when(io.ldout(i).fire()) { writebacked(loadWbSel(i)) := true.B - XSInfo(io.loadIn(i).valid, "load miss write to cbd idx %d pc 0x%x paddr %x data %x mmio %x\n", + XSInfo("load miss write to cbd roqidx %d lqidx %d pc 0x%x paddr %x data %x mmio %x\n", + io.ldout(i).bits.uop.roqIdx.asUInt, io.ldout(i).bits.uop.lqIdx.asUInt, io.ldout(i).bits.uop.cf.pc, data(loadWbSel(i)).paddr, @@ -379,6 +387,8 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP val xorMask = lqIdxMask ^ headMask val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === ringBufferHeadExtended.flag val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask) + + // check if load already in lq needs to be rolledback val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => { val addrMatch = allocated(j) && io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3) @@ -403,18 +413,19 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP val wbViolationUop = getOldestInTwo(wbViolationVec, io.loadIn.map(_.bits.uop)) XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n") - // check if rollback is needed for load in l4 - val l4ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => { + // check if rollback is needed for load in l1 + val l1ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => { io.forward(j).valid && // L4 valid\ isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) && io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) && (io.storeIn(i).bits.mask & io.forward(j).mask).orR })) - val l4Violation = l4ViolationVec.asUInt().orR() - val l4ViolationUop = getOldestInTwo(l4ViolationVec, io.forward.map(_.uop)) + val l1Violation = l1ViolationVec.asUInt().orR() + val l1ViolationUop = getOldestInTwo(l1ViolationVec, io.forward.map(_.uop)) + XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n") - val rollbackValidVec = Seq(lqViolation, wbViolation, l4Violation) - val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l4ViolationUop) + val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation) + val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop) rollback(i).valid := Cat(rollbackValidVec).orR val mask = getAfterMask(rollbackValidVec, rollbackUopVec) val oneAfterZero = mask(1)(0) @@ -428,6 +439,11 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP rollback(i).bits.isException := false.B rollback(i).bits.isFlushPipe := false.B + XSDebug( + l1Violation, + "need rollback (l4 load) pc %x roqidx %d target %x\n", + io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt + ) XSDebug( lqViolation, "need rollback (ld wb before store) pc %x roqidx %d target %x\n", @@ -438,11 +454,6 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP "need rollback (ld/st wb together) pc %x roqidx %d target %x\n", io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt ) - XSDebug( - l4Violation, - "need rollback (l4 load) pc %x roqidx %d target %x\n", - io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l4ViolationUop.roqIdx.asUInt - ) }.otherwise { rollback(i).valid := false.B } @@ -487,7 +498,6 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP io.uncache.req.bits.meta.replay := false.B io.uncache.resp.ready := true.B - io.uncache.s1_kill := false.B when(io.uncache.req.fire()){ pending(ringBufferTail) := false.B @@ -514,10 +524,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP } // Read vaddr for mem exception - val mexcLsIdx = WireInit(0.U.asTypeOf(new LSIdx())) - val memExceptionAddr = WireInit(data(mexcLsIdx.lqIdx.value).vaddr) - ExcitingUtils.addSink(mexcLsIdx, "EXECPTION_LSROQIDX") - ExcitingUtils.addSource(memExceptionAddr, "EXECPTION_LOAD_VADDR") + io.exceptionAddr.vaddr := data(io.exceptionAddr.lsIdx.lqIdx.value).vaddr // misprediction recovery / exception redirect // invalidate lq term using robIdx @@ -558,7 +565,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP for (i <- 0 until LoadQueueSize) { if (i % 4 == 0) XSDebug("") - XSDebug(false, true.B, "%x ", uop(i).cf.pc) + XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, data(i).paddr) PrintFlag(allocated(i), "a") PrintFlag(allocated(i) && valid(i), "v") PrintFlag(allocated(i) && writebacked(i), "w") diff --git a/src/main/scala/xiangshan/mem/lsqueue/separated/StoreQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/separated/StoreQueue.scala index 99e70138b0b39ab89e0334049b73495f4fed5f84..6da88e5f2db7108d29b97edaa8778bbf7f240f02 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/separated/StoreQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/separated/StoreQueue.scala @@ -36,6 +36,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue val roqDeqPtr = Input(new RoqPtr) // val refill = Flipped(Valid(new DCacheLineReq )) val oldestStore = Output(Valid(new RoqPtr)) + val exceptionAddr = new ExceptionAddrIO }) val uop = Reg(Vec(StoreQueueSize, new MicroOp)) @@ -178,6 +179,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue when(io.stout(i).fire()) { writebacked(storeWbSel(i)) := true.B } + io.stout(i).bits.fflags := DontCare }) // remove retired insts from sq, add retired store to sbuffer @@ -271,6 +273,8 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue io.sbuffer(i).bits.meta.mmio := mmio io.sbuffer(i).bits.meta.mask := data(ptr).mask + XSDebug(io.sbuffer(i).fire(), "[SBUFFER STORE REQ] pa %x data %x\n", data(ptr).paddr, data(ptr).data) + // update sq meta if store inst is send to sbuffer when(storeCommitValid(i) && (mmio || io.sbuffer(i).ready)) { allocated(ptr) := false.B @@ -302,7 +306,6 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue io.uncache.req.bits.meta.replay := false.B io.uncache.resp.ready := true.B - io.uncache.s1_kill := false.B when(io.uncache.req.fire()){ pending(ringBufferTail) := false.B @@ -325,10 +328,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue } // Read vaddr for mem exception - val mexcLsIdx = WireInit(0.U.asTypeOf(new LSIdx())) - val memExceptionAddr = WireInit(data(mexcLsIdx.sqIdx.value).vaddr) - ExcitingUtils.addSink(mexcLsIdx, "EXECPTION_LSROQIDX") - ExcitingUtils.addSource(memExceptionAddr, "EXECPTION_STORE_VADDR") + io.exceptionAddr.vaddr := data(io.exceptionAddr.lsIdx.sqIdx.value).vaddr // misprediction recovery / exception redirect // invalidate sq term using robIdx @@ -364,7 +364,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue for (i <- 0 until StoreQueueSize) { if (i % 4 == 0) XSDebug("") - XSDebug(false, true.B, "%x ", uop(i).cf.pc) + XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, data(i).paddr) PrintFlag(allocated(i), "a") PrintFlag(allocated(i) && valid(i), "v") PrintFlag(allocated(i) && writebacked(i), "w") diff --git a/src/main/scala/xiangshan/mem/lsqueue/unified/Lsroq.scala b/src/main/scala/xiangshan/mem/lsqueue/unified/Lsroq.scala index 62a861cb3257a324153b6ecb3aa8e9522ff5b138..cf1e9e7e28f6d745210e84ef541ca4e962309974 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/unified/Lsroq.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/unified/Lsroq.scala @@ -7,6 +7,7 @@ import xiangshan._ import xiangshan.cache._ import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants} import xiangshan.backend.LSUOpType +import xiangshan.backend.fu.fpu.boxF32ToF64 import xiangshan.backend.roq.RoqPtr class LsRoqEntry extends XSBundle { @@ -45,6 +46,7 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe val dcache = new DCacheLineIO val uncache = new DCacheWordIO val roqDeqPtr = Input(new RoqPtr) + val exceptionAddr = new ExceptionAddrIO // val refill = Flipped(Valid(new DCacheLineReq )) }) @@ -323,6 +325,7 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe LSUOpType.lb -> SignExt(rdataSel(7, 0) , XLEN), LSUOpType.lh -> SignExt(rdataSel(15, 0), XLEN), LSUOpType.lw -> SignExt(rdataSel(31, 0), XLEN), + LSUOpType.flw -> boxF32ToF64(rdataSel(31, 0)), LSUOpType.ld -> SignExt(rdataSel(63, 0), XLEN), LSUOpType.lbu -> ZeroExt(rdataSel(7, 0) , XLEN), LSUOpType.lhu -> ZeroExt(rdataSel(15, 0), XLEN), @@ -332,6 +335,7 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe io.ldout(i).bits.uop.cf.exceptionVec := data(loadWbSel(i)).exception.asBools io.ldout(i).bits.uop.lsroqIdx := loadWbSel(i) io.ldout(i).bits.data := rdataPartialLoad + io.ldout(i).bits.fflags := DontCare io.ldout(i).bits.redirectValid := false.B io.ldout(i).bits.redirect := DontCare io.ldout(i).bits.brUpdate := DontCare @@ -368,6 +372,7 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe io.stout(i).bits.uop.lsroqIdx := storeWbSel(i) io.stout(i).bits.uop.cf.exceptionVec := data(storeWbSel(i)).exception.asBools io.stout(i).bits.data := data(storeWbSel(i)).data + io.stout(i).bits.fflags := DontCare io.stout(i).bits.redirectValid := false.B io.stout(i).bits.redirect := DontCare io.stout(i).bits.brUpdate := DontCare @@ -585,18 +590,19 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe val wbViolationUop = getOldestInTwo(wbViolationVec, io.loadIn.map(_.bits.uop)) XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n") - // check if rollback is needed for load in l4 - val l4ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => { + // check if rollback is needed for load in l1 + val l1ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => { io.forward(j).valid && // L4 valid\ isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) && io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) && (io.storeIn(i).bits.mask & io.forward(j).mask).orR })) - val l4Violation = l4ViolationVec.asUInt().orR() - val l4ViolationUop = getOldestInTwo(l4ViolationVec, io.forward.map(_.uop)) + val l1Violation = l1ViolationVec.asUInt().orR() + val l1ViolationUop = getOldestInTwo(l1ViolationVec, io.forward.map(_.uop)) + XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n") - val rollbackValidVec = Seq(lsroqViolation, wbViolation, l4Violation) - val rollbackUopVec = Seq(lsroqViolationUop, wbViolationUop, l4ViolationUop) + val rollbackValidVec = Seq(lsroqViolation, wbViolation, l1Violation) + val rollbackUopVec = Seq(lsroqViolationUop, wbViolationUop, l1ViolationUop) rollback(i).valid := Cat(rollbackValidVec).orR val mask = getAfterMask(rollbackValidVec, rollbackUopVec) val oneAfterZero = mask(1)(0) @@ -610,6 +616,12 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe rollback(i).bits.isException := false.B rollback(i).bits.isFlushPipe := false.B + XSDebug( + l1Violation, + "need rollback (l4 load) pc %x roqidx %d target %x\n", + io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt + ) + XSDebug( lsroqViolation, "need rollback (ld wb before store) pc %x roqidx %d target %x\n", @@ -620,11 +632,6 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe "need rollback (ld/st wb together) pc %x roqidx %d target %x\n", io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt ) - XSDebug( - l4Violation, - "need rollback (l4 load) pc %x roqidx %d target %x\n", - io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l4ViolationUop.roqIdx.asUInt - ) }.otherwise { rollback(i).valid := false.B } @@ -669,7 +676,6 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe io.uncache.req.bits.meta.replay := false.B io.uncache.resp.ready := true.B - io.uncache.s1_kill := false.B when(io.uncache.req.fire()){ pending(ringBufferTail) := false.B @@ -696,10 +702,7 @@ class Lsroq extends XSModule with HasDCacheParameters with HasCircularQueuePtrHe } // Read vaddr for mem exception - val mexcLsroqIdx = WireInit(0.U(LsroqIdxWidth.W)) - val memExceptionAddr = WireInit(data(mexcLsroqIdx(InnerLsroqIdxWidth - 1, 0)).vaddr) - ExcitingUtils.addSink(mexcLsroqIdx, "EXECPTION_LSROQIDX") - ExcitingUtils.addSource(memExceptionAddr, "EXECPTION_VADDR") + io.exceptionAddr.vaddr := data(io.exceptionAddr.lsIdx.lsroqIdx(InnerLsroqIdxWidth - 1, 0)).vaddr // misprediction recovery / exception redirect // invalidate lsroq term using robIdx diff --git a/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala b/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala index 964131e69926275622d63d949602002535ca5938..966e170d459551766a19e06ec8b0bb00a501bd29 100644 --- a/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala @@ -16,6 +16,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{ val flush_sbuffer = new SbufferFlushBundle val tlbFeedback = ValidIO(new TlbFeedback) val redirect = Flipped(ValidIO(new Redirect)) + val exceptionAddr = ValidIO(UInt(VAddrBits.W)) }) //------------------------------------------------------- @@ -31,8 +32,8 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{ val resp_data = Reg(UInt()) val is_lrsc_valid = Reg(Bool()) - ExcitingUtils.addSource(in.src1, "ATOM_EXECPTION_VADDR") - ExcitingUtils.addSource(atom_override_xtval, "ATOM_OVERRIDE_XTVAL") + io.exceptionAddr.valid := atom_override_xtval + io.exceptionAddr.bits := in.src1 // assign default value to output signals io.in.ready := false.B @@ -41,7 +42,6 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{ io.dcache.req.valid := false.B io.dcache.req.bits := DontCare - io.dcache.s1_kill := false.B io.dcache.resp.ready := false.B io.dtlb.req.valid := false.B diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index 30bf974215d8c0d52b743149e3429bd59a631794..0920f150dd0493bcc169441e47b7750a9e24b587 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -4,8 +4,10 @@ import chisel3._ import chisel3.util._ import utils._ import xiangshan._ -import xiangshan.cache.{DCacheWordIO, TlbRequestIO, TlbCmd, MemoryOpConstants} +import xiangshan.cache._ +// import xiangshan.cache.{DCacheWordIO, TlbRequestIO, TlbCmd, MemoryOpConstants, TlbReq, DCacheLoadReq, DCacheWordResp} import xiangshan.backend.LSUOpType +import xiangshan.backend.fu.fpu.boxF32ToF64 class LoadToLsroqIO extends XSBundle { val loadIn = ValidIO(new LsPipelineBundle) @@ -13,250 +15,173 @@ class LoadToLsroqIO extends XSBundle { val forward = new LoadForwardQueryIO } -class LoadUnit extends XSModule { +// Load Pipeline Stage 0 +// Generate addr, use addr to query DCache and DTLB +class LoadUnit_S0 extends XSModule { val io = IO(new Bundle() { - val ldin = Flipped(Decoupled(new ExuInput)) - val ldout = Decoupled(new ExuOutput) + val in = Flipped(Decoupled(new ExuInput)) + val out = Decoupled(new LsPipelineBundle) val redirect = Flipped(ValidIO(new Redirect)) + val dtlbReq = Valid(new TlbReq) + val dtlbResp = Flipped(Valid(new TlbResp)) val tlbFeedback = ValidIO(new TlbFeedback) - val dcache = new DCacheWordIO - val dtlb = new TlbRequestIO() - val sbuffer = new LoadForwardQueryIO - val lsroq = new LoadToLsroqIO + val dcacheReq = DecoupledIO(new DCacheLoadReq) }) - - when(io.ldin.valid){ - XSDebug("load enpipe %x iw %x fw %x\n", io.ldin.bits.uop.cf.pc, io.ldin.bits.uop.ctrl.rfWen, io.ldin.bits.uop.ctrl.fpWen) - } - //------------------------------------------------------- - // Load Pipeline - //------------------------------------------------------- - - val l2_out = Wire(Decoupled(new LsPipelineBundle)) - val l4_out = Wire(Decoupled(new LsPipelineBundle)) - val l5_in = Wire(Flipped(Decoupled(new LsPipelineBundle))) - - //------------------------------------------------------- - // LD Pipeline Stage 2 - // Generate addr, use addr to query DCache Tag and DTLB - //------------------------------------------------------- - - val l2_dtlb_hit = Wire(new Bool()) - val l2_dtlb_miss = Wire(new Bool()) - val l2_dcache = Wire(new Bool()) - val l2_mmio = Wire(new Bool()) - val isMMIOReq = Wire(new Bool()) - - // send req to dtlb - io.dtlb.req.valid := l2_out.valid - io.dtlb.req.bits.vaddr := l2_out.bits.vaddr - io.dtlb.req.bits.cmd := TlbCmd.read - io.dtlb.req.bits.roqIdx := l2_out.bits.uop.roqIdx - io.dtlb.req.bits.debug.pc := l2_out.bits.uop.cf.pc - io.dtlb.req.bits.debug.lsroqIdx := l2_out.bits.uop.lsroqIdx // FIXME: need update - - l2_dtlb_hit := io.dtlb.resp.valid && !io.dtlb.resp.bits.miss - l2_dtlb_miss := io.dtlb.resp.valid && io.dtlb.resp.bits.miss - isMMIOReq := AddressSpace.isMMIO(io.dtlb.resp.bits.paddr) - l2_dcache := l2_dtlb_hit && !isMMIOReq - l2_mmio := l2_dtlb_hit && isMMIOReq - - // l2_out is used to generate dcache req - l2_out.bits := DontCare - l2_out.bits.vaddr := io.ldin.bits.src1 + io.ldin.bits.uop.ctrl.imm - l2_out.bits.paddr := io.dtlb.resp.bits.paddr - l2_out.bits.mask := genWmask(l2_out.bits.vaddr, io.ldin.bits.uop.ctrl.fuOpType(1,0)) - l2_out.bits.uop := io.ldin.bits.uop - l2_out.bits.miss := false.B - l2_out.bits.mmio := l2_mmio - l2_out.valid := io.ldin.valid && !io.ldin.bits.uop.roqIdx.needFlush(io.redirect) - // when we are sure it's a MMIO req, we do not need to wait for cache ready - l2_out.ready := (l2_dcache && io.dcache.req.ready) || l2_mmio || l2_dtlb_miss - io.ldin.ready := l2_out.ready - - // exception check - val addrAligned = LookupTree(io.ldin.bits.uop.ctrl.fuOpType(1,0), List( - "b00".U -> true.B, //b - "b01".U -> (l2_out.bits.vaddr(0) === 0.U), //h - "b10".U -> (l2_out.bits.vaddr(1,0) === 0.U), //w - "b11".U -> (l2_out.bits.vaddr(2,0) === 0.U) //d + val s0_uop = io.in.bits.uop + val s0_vaddr = io.in.bits.src1 + s0_uop.ctrl.imm + val s0_paddr = io.dtlbResp.bits.paddr + val s0_tlb_miss = io.dtlbResp.bits.miss + val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0)) + + // query DTLB + io.dtlbReq.valid := io.out.valid + io.dtlbReq.bits.vaddr := s0_vaddr + io.dtlbReq.bits.cmd := TlbCmd.read + io.dtlbReq.bits.roqIdx := s0_uop.roqIdx + io.dtlbReq.bits.debug.pc := s0_uop.cf.pc + io.dtlbReq.bits.debug.lsroqIdx := s0_uop.lsroqIdx + + // feedback tlb result to RS + // Note: can be moved to s1 + io.tlbFeedback.valid := io.out.valid + io.tlbFeedback.bits.hit := !s0_tlb_miss + io.tlbFeedback.bits.roqIdx := s0_uop.roqIdx + + // query DCache + io.dcacheReq.valid := io.in.valid && !s0_uop.roqIdx.needFlush(io.redirect) + io.dcacheReq.bits.cmd := MemoryOpConstants.M_XRD + io.dcacheReq.bits.addr := s0_vaddr + io.dcacheReq.bits.mask := s0_mask + io.dcacheReq.bits.data := DontCare + + // TODO: update cache meta + io.dcacheReq.bits.meta.id := DontCare + io.dcacheReq.bits.meta.vaddr := s0_vaddr + io.dcacheReq.bits.meta.paddr := DontCare + io.dcacheReq.bits.meta.uop := s0_uop + io.dcacheReq.bits.meta.mmio := false.B + io.dcacheReq.bits.meta.tlb_miss := false.B + io.dcacheReq.bits.meta.mask := s0_mask + io.dcacheReq.bits.meta.replay := false.B + + val addrAligned = LookupTree(s0_uop.ctrl.fuOpType(1, 0), List( + "b00".U -> true.B, //b + "b01".U -> (s0_vaddr(0) === 0.U), //h + "b10".U -> (s0_vaddr(1, 0) === 0.U), //w + "b11".U -> (s0_vaddr(2, 0) === 0.U) //d )) - l2_out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned - l2_out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp.pf.ld - - // send result to dcache - // never send tlb missed or MMIO reqs to dcache - io.dcache.req.valid := l2_dcache - - io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD - // TODO: vaddr - io.dcache.req.bits.addr := io.dtlb.resp.bits.paddr - io.dcache.req.bits.data := DontCare - io.dcache.req.bits.mask := l2_out.bits.mask - - io.dcache.req.bits.meta.id := DontCare - io.dcache.req.bits.meta.vaddr := l2_out.bits.vaddr - io.dcache.req.bits.meta.paddr := io.dtlb.resp.bits.paddr - io.dcache.req.bits.meta.uop := l2_out.bits.uop - io.dcache.req.bits.meta.mmio := isMMIOReq - io.dcache.req.bits.meta.tlb_miss := io.dtlb.resp.bits.miss - io.dcache.req.bits.meta.mask := l2_out.bits.mask - io.dcache.req.bits.meta.replay := false.B - - - val l2_tlbFeedback = Wire(new TlbFeedback) - l2_tlbFeedback.hit := !io.dtlb.resp.bits.miss - l2_tlbFeedback.roqIdx := l2_out.bits.uop.roqIdx - - // dump l2 - XSDebug(l2_out.valid, "L2: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x dltb_miss %b dcache %b mmio %b\n", - l2_out.bits.uop.cf.pc, l2_out.bits.vaddr, l2_out.bits.paddr, - l2_out.bits.uop.ctrl.fuOpType, l2_out.bits.data, l2_out.bits.mask, - l2_dtlb_miss, l2_dcache, l2_mmio) - - XSDebug(l2_out.fire(), "load req: pc 0x%x addr 0x%x -> 0x%x op %b\n", - l2_out.bits.uop.cf.pc, l2_out.bits.vaddr, l2_out.bits.paddr, l2_out.bits.uop.ctrl.fuOpType) - - XSDebug(io.dcache.req.valid, p"dcache req(${io.dcache.req.valid} ${io.dcache.req.ready}): pc:0x${Hexadecimal(io.dcache.req.bits.meta.uop.cf.pc)} roqIdx:${io.dcache.req.bits.meta.uop.roqIdx} lsroqIdx:${io.dcache.req.bits.meta.uop.lsroqIdx} addr:0x${Hexadecimal(io.dcache.req.bits.addr)} vaddr:0x${Hexadecimal(io.dcache.req.bits.meta.vaddr)} paddr:0x${Hexadecimal(io.dcache.req.bits.meta.paddr)} mmio:${io.dcache.req.bits.meta.mmio} tlb_miss:${io.dcache.req.bits.meta.tlb_miss} mask:${io.dcache.req.bits.meta.mask}\n") - - //------------------------------------------------------- - // LD Pipeline Stage 3 - // Compare tag, use addr to query DCache Data - //------------------------------------------------------- - - val l3_valid = RegNext(l2_out.fire(), false.B) - val l3_dtlb_miss = RegEnable(next = l2_dtlb_miss, enable = l2_out.fire(), init = false.B) - val l3_dcache = RegEnable(next = l2_dcache, enable = l2_out.fire(), init = false.B) - val l3_tlbFeedback = RegEnable(next = l2_tlbFeedback, enable = l2_out.fire()) - val l3_bundle = RegEnable(next = l2_out.bits, enable = l2_out.fire()) - val l3_uop = l3_bundle.uop - // dltb miss reqs ends here - val l3_passdown = l3_valid && !l3_dtlb_miss && !l3_uop.roqIdx.needFlush(io.redirect) - - io.tlbFeedback.valid := l3_valid - io.tlbFeedback.bits := l3_tlbFeedback - io.dcache.s1_kill := l3_valid && l3_dcache && l3_uop.roqIdx.needFlush(io.redirect) - - // dump l3 - XSDebug(l3_valid, "l3: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x dltb_miss %b dcache %b mmio %b\n", - l3_bundle.uop.cf.pc, l3_bundle.vaddr, l3_bundle.paddr, - l3_bundle.uop.ctrl.fuOpType, l3_bundle.data, l3_bundle.mask, - l3_dtlb_miss, l3_dcache, l3_bundle.mmio) - - XSDebug(io.tlbFeedback.valid, "tlbFeedback: hit %b roqIdx %d\n", - io.tlbFeedback.bits.hit, io.tlbFeedback.bits.roqIdx.asUInt) - - XSDebug(io.dcache.s1_kill, "l3: dcache s1_kill\n") - - // Done in Dcache - - //------------------------------------------------------- - // LD Pipeline Stage 4 - // Dcache return result, do tag ecc check and forward check - //------------------------------------------------------- - - val l4_valid = RegNext(l3_passdown, false.B) - val l4_dcache = RegNext(l3_dcache, false.B) - val l4_bundle = RegNext(l3_bundle) - - val fullForward = Wire(Bool()) - - assert(!(io.dcache.resp.ready && !io.dcache.resp.valid), "DCache response got lost") - io.dcache.resp.ready := l4_valid && l4_dcache - when (io.dcache.resp.fire()) { - l4_out.bits := DontCare - l4_out.bits.data := io.dcache.resp.bits.data - l4_out.bits.paddr := io.dcache.resp.bits.meta.paddr - l4_out.bits.uop := io.dcache.resp.bits.meta.uop - l4_out.bits.mmio := io.dcache.resp.bits.meta.mmio - l4_out.bits.mask := io.dcache.resp.bits.meta.mask - // when we can get the data completely from forward - // we no longer need to access dcache - // treat nack as miss - l4_out.bits.miss := Mux(fullForward, false.B, - io.dcache.resp.bits.miss || io.dcache.resp.bits.nack) - XSDebug(io.dcache.resp.fire(), p"DcacheResp(l4): data:0x${Hexadecimal(io.dcache.resp.bits.data)} paddr:0x${Hexadecimal(io.dcache.resp.bits.meta.paddr)} pc:0x${Hexadecimal(io.dcache.resp.bits.meta.uop.cf.pc)} roqIdx:${io.dcache.resp.bits.meta.uop.roqIdx} lsroqIdx:${io.dcache.resp.bits.meta.uop.lsroqIdx} miss:${io.dcache.resp.bits.miss}\n") - } .otherwise { - l4_out.bits := l4_bundle - } - l4_out.valid := l4_valid && !l4_out.bits.uop.roqIdx.needFlush(io.redirect) - - // Store addr forward match - // If match, get data / fmask from store queue / store buffer - - // io.lsroq.forward := DontCare - io.lsroq.forward.paddr := l4_out.bits.paddr - io.lsroq.forward.mask := io.dcache.resp.bits.meta.mask - io.lsroq.forward.lsroqIdx := l4_out.bits.uop.lsroqIdx - io.lsroq.forward.sqIdx := l4_out.bits.uop.sqIdx - io.lsroq.forward.uop := l4_out.bits.uop - io.lsroq.forward.pc := l4_out.bits.uop.cf.pc - io.lsroq.forward.valid := io.dcache.resp.valid //TODO: opt timing - - io.sbuffer.paddr := l4_out.bits.paddr - io.sbuffer.mask := io.dcache.resp.bits.meta.mask - io.sbuffer.lsroqIdx := l4_out.bits.uop.lsroqIdx - io.sbuffer.sqIdx := l4_out.bits.uop.sqIdx - io.sbuffer.uop := DontCare - io.sbuffer.pc := l4_out.bits.uop.cf.pc - io.sbuffer.valid := l4_out.valid - - val forwardVec = WireInit(io.sbuffer.forwardData) - val forwardMask = WireInit(io.sbuffer.forwardMask) + + io.out.valid := io.dcacheReq.fire() // dcache may not accept load request + io.out.bits := DontCare + io.out.bits.vaddr := s0_vaddr + io.out.bits.paddr := s0_paddr + io.out.bits.tlbMiss := io.dtlbResp.bits.miss + io.out.bits.mask := s0_mask + io.out.bits.uop := s0_uop + io.out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned + io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld + + io.in.ready := io.out.fire() + + XSDebug(io.dcacheReq.fire(), "[DCACHE LOAD REQ] pc %x vaddr %x paddr will be %x\n", + s0_uop.cf.pc, s0_vaddr, s0_paddr + ) +} + + +// Load Pipeline Stage 1 +// TLB resp (send paddr to dcache) +class LoadUnit_S1 extends XSModule { + val io = IO(new Bundle() { + val in = Flipped(Decoupled(new LsPipelineBundle)) + val out = Decoupled(new LsPipelineBundle) + val redirect = Flipped(ValidIO(new Redirect)) + val s1_paddr = Output(UInt(PAddrBits.W)) + val sbuffer = new LoadForwardQueryIO + val lsroq = new LoadForwardQueryIO + }) + + val s1_uop = io.in.bits.uop + val s1_paddr = io.in.bits.paddr + val s1_tlb_miss = io.in.bits.tlbMiss + val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr) + val s1_mask = io.in.bits.mask + + io.out.bits := io.in.bits // forwardXX field will be updated in s1 + io.s1_paddr := s1_paddr + + // load forward query datapath + io.sbuffer.valid := io.in.valid + io.sbuffer.paddr := s1_paddr + io.sbuffer.uop := s1_uop + io.sbuffer.sqIdx := s1_uop.sqIdx + io.sbuffer.lsroqIdx := s1_uop.lsroqIdx + io.sbuffer.mask := s1_mask + io.sbuffer.pc := s1_uop.cf.pc // FIXME: remove it + + io.lsroq.valid := io.in.valid + io.lsroq.paddr := s1_paddr + io.lsroq.uop := s1_uop + io.lsroq.sqIdx := s1_uop.sqIdx + io.lsroq.lsroqIdx := s1_uop.lsroqIdx + io.lsroq.mask := s1_mask + io.lsroq.pc := s1_uop.cf.pc // FIXME: remove it + + io.out.bits.forwardMask := io.sbuffer.forwardMask + io.out.bits.forwardData := io.sbuffer.forwardData // generate XLEN/8 Muxs - (0 until XLEN/8).map(j => { - when(io.lsroq.forward.forwardMask(j)) { - forwardMask(j) := true.B - forwardVec(j) := io.lsroq.forward.forwardData(j) + for (i <- 0 until XLEN / 8) { + when(io.lsroq.forwardMask(i)) { + io.out.bits.forwardMask(i) := true.B + io.out.bits.forwardData(i) := io.lsroq.forwardData(i) } - }) - l4_out.bits.forwardMask := forwardMask - l4_out.bits.forwardData := forwardVec - fullForward := (~l4_out.bits.forwardMask.asUInt & l4_out.bits.mask) === 0.U + } + + XSDebug(io.out.fire(), "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n", + s1_uop.cf.pc, + io.lsroq.forwardData.asUInt, io.lsroq.forwardMask.asUInt, + io.sbuffer.forwardData.asUInt, io.sbuffer.forwardMask.asUInt + ) - PipelineConnect(l4_out, l5_in, io.ldout.fire() || (l5_in.bits.miss || l5_in.bits.mmio) && l5_in.valid, false.B) + io.out.valid := io.in.valid && !s1_tlb_miss && !s1_uop.roqIdx.needFlush(io.redirect) + io.out.bits.paddr := s1_paddr + io.out.bits.mmio := s1_mmio + io.out.bits.tlbMiss := s1_tlb_miss - XSDebug(l4_valid, "l4: out.valid:%d pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x forwardData: 0x%x forwardMask: %x dcache %b mmio %b miss:%d\n", - l4_out.valid, l4_out.bits.uop.cf.pc, l4_out.bits.vaddr, l4_out.bits.paddr, - l4_out.bits.uop.ctrl.fuOpType, l4_out.bits.data, l4_out.bits.mask, - l4_out.bits.forwardData.asUInt, l4_out.bits.forwardMask.asUInt, l4_dcache, l4_out.bits.mmio, l4_out.bits.miss) + io.in.ready := io.out.ready || !io.in.valid - XSDebug(l5_in.valid, "L5(%d %d): pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x forwardData: 0x%x forwardMask: %x\n", - l5_in.valid, l5_in.ready, l5_in.bits.uop.cf.pc, l5_in.bits.vaddr, l5_in.bits.paddr, - l5_in.bits.uop.ctrl.fuOpType , l5_in.bits.data, l5_in.bits.mask, - l5_in.bits.forwardData.asUInt, l5_in.bits.forwardMask.asUInt) +} - XSDebug(l4_valid, "l4: sbuffer forwardData: 0x%x forwardMask: %x\n", - io.sbuffer.forwardData.asUInt, io.sbuffer.forwardMask.asUInt) - XSDebug(l4_valid, "l4: lsroq forwardData: 0x%x forwardMask: %x\n", - io.lsroq.forward.forwardData.asUInt, io.lsroq.forward.forwardMask.asUInt) +// Load Pipeline Stage 2 +// DCache resp +class LoadUnit_S2 extends XSModule { + val io = IO(new Bundle() { + val in = Flipped(Decoupled(new LsPipelineBundle)) + val out = Decoupled(new LsPipelineBundle) + val redirect = Flipped(ValidIO(new Redirect)) + val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp)) + }) + + val s2_uop = io.in.bits.uop + val s2_mask = io.in.bits.mask + val s2_paddr = io.in.bits.paddr + val s2_cache_miss = io.dcacheResp.bits.miss + val s2_cache_nack = io.dcacheResp.bits.nack - XSDebug(io.redirect.valid, - p"Redirect: excp:${io.redirect.bits.isException} flushPipe:${io.redirect.bits.isFlushPipe} misp:${io.redirect.bits.isMisPred} " + - p"replay:${io.redirect.bits.isReplay} pc:0x${Hexadecimal(io.redirect.bits.pc)} target:0x${Hexadecimal(io.redirect.bits.target)} " + - p"brTag:${io.redirect.bits.brTag} l2:${io.ldin.bits.uop.roqIdx.needFlush(io.redirect)} l3:${l3_uop.roqIdx.needFlush(io.redirect)} " + - p"l4:${l4_out.bits.uop.roqIdx.needFlush(io.redirect)}\n" - ) - //------------------------------------------------------- - // LD Pipeline Stage 5 - // Do data ecc check, merge result and write back to LS ROQ - // If cache hit, return writeback result to CDB - //------------------------------------------------------- - val loadWriteBack = l5_in.fire() + io.dcacheResp.ready := true.B + assert(!(io.in.valid && !io.dcacheResp.valid), "DCache response got lost") + + val forwardMask = io.in.bits.forwardMask + val forwardData = io.in.bits.forwardData + val fullForward = (~forwardMask.asUInt & s2_mask) === 0.U // data merge - val rdata = VecInit((0 until 8).map(j => { - Mux(l5_in.bits.forwardMask(j), - l5_in.bits.forwardData(j), - l5_in.bits.data(8*(j+1)-1, 8*j) - ) - })).asUInt - val func = l5_in.bits.uop.ctrl.fuOpType - val raddr = l5_in.bits.paddr - val rdataSel = LookupTree(raddr(2, 0), List( + val rdata = VecInit((0 until XLEN / 8).map(j => + Mux(forwardMask(j), forwardData(j), io.dcacheResp.bits.data(8*(j+1)-1, 8*j)))).asUInt + val rdataSel = LookupTree(s2_paddr(2, 0), List( "b000".U -> rdata(63, 0), "b001".U -> rdata(63, 8), "b010".U -> rdata(63, 16), @@ -266,49 +191,107 @@ class LoadUnit extends XSModule { "b110".U -> rdata(63, 48), "b111".U -> rdata(63, 56) )) - val rdataPartialLoad = LookupTree(func, List( + val rdataPartialLoad = LookupTree(s2_uop.ctrl.fuOpType, List( LSUOpType.lb -> SignExt(rdataSel(7, 0) , XLEN), LSUOpType.lh -> SignExt(rdataSel(15, 0), XLEN), LSUOpType.lw -> SignExt(rdataSel(31, 0), XLEN), LSUOpType.ld -> SignExt(rdataSel(63, 0), XLEN), LSUOpType.lbu -> ZeroExt(rdataSel(7, 0) , XLEN), LSUOpType.lhu -> ZeroExt(rdataSel(15, 0), XLEN), - LSUOpType.lwu -> ZeroExt(rdataSel(31, 0), XLEN) + LSUOpType.lwu -> ZeroExt(rdataSel(31, 0), XLEN), + LSUOpType.flw -> boxF32ToF64(rdataSel(31, 0)) )) - // ecc check - // TODO + // TODO: ECC check - // if hit, writeback result to CDB - // val ldout = Vec(2, Decoupled(new ExuOutput)) - // when io.loadIn(i).fire() && !io.io.loadIn(i).miss, commit load to cdb - val hitLoadOut = Wire(Decoupled(new ExuOutput)) - hitLoadOut.bits.uop := l5_in.bits.uop - hitLoadOut.bits.data := rdataPartialLoad - hitLoadOut.bits.redirectValid := false.B - hitLoadOut.bits.redirect := DontCare - hitLoadOut.bits.brUpdate := DontCare - hitLoadOut.bits.debug.isMMIO := l5_in.bits.mmio - hitLoadOut.valid := l5_in.valid && !l5_in.bits.mmio && !l5_in.bits.miss // MMIO will be done in lsroq - XSDebug(hitLoadOut.fire(), "load writeback: pc %x data %x (%x + %x(%b))\n", - hitLoadOut.bits.uop.cf.pc, rdataPartialLoad, l5_in.bits.data, - l5_in.bits.forwardData.asUInt, l5_in.bits.forwardMask.asUInt + io.out.valid := io.in.valid // && !s2_uop.needFlush(io.redirect) will cause comb. loop + // Inst will be canceled in store queue / lsroq, + // so we do not need to care about flush in load / store unit's out.valid + io.out.bits := io.in.bits + io.out.bits.data := rdataPartialLoad + io.out.bits.miss := (s2_cache_miss || s2_cache_nack) && !fullForward + io.out.bits.mmio := io.in.bits.mmio + + io.in.ready := io.out.ready || !io.in.valid + + XSDebug(io.out.fire(), "[DCACHE LOAD RESP] pc %x rdata %x <- D$ %x + fwd %x(%b)\n", + s2_uop.cf.pc, rdataPartialLoad, io.dcacheResp.bits.data, + io.in.bits.forwardData.asUInt, io.in.bits.forwardMask.asUInt ) +} + + +class LoadUnit extends XSModule { + val io = IO(new Bundle() { + val ldin = Flipped(Decoupled(new ExuInput)) + val ldout = Decoupled(new ExuOutput) + val redirect = Flipped(ValidIO(new Redirect)) + val tlbFeedback = ValidIO(new TlbFeedback) + val dcache = new DCacheLoadIO + val dtlb = new TlbRequestIO() + val sbuffer = new LoadForwardQueryIO + val lsroq = new LoadToLsroqIO + }) + + val load_s0 = Module(new LoadUnit_S0) + val load_s1 = Module(new LoadUnit_S1) + val load_s2 = Module(new LoadUnit_S2) + + load_s0.io.in <> io.ldin + load_s0.io.redirect <> io.redirect + load_s0.io.dtlbReq <> io.dtlb.req + load_s0.io.dtlbResp <> io.dtlb.resp + load_s0.io.dcacheReq <> io.dcache.req + load_s0.io.tlbFeedback <> io.tlbFeedback + + PipelineConnect(load_s0.io.out, load_s1.io.in, load_s1.io.out.fire() || load_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect), false.B) + + io.dcache.s1_paddr := load_s1.io.out.bits.paddr + load_s1.io.redirect <> io.redirect + io.dcache.s1_kill := DontCare // FIXME + io.sbuffer <> load_s1.io.sbuffer + io.lsroq.forward <> load_s1.io.lsroq + + PipelineConnect(load_s1.io.out, load_s2.io.in, load_s2.io.out.fire() || load_s1.io.out.bits.tlbMiss, false.B) + + load_s2.io.redirect <> io.redirect + load_s2.io.dcacheResp <> io.dcache.resp + + XSDebug(load_s0.io.out.valid, + p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " + + p"vaddr ${Hexadecimal(load_s0.io.out.bits.vaddr)}, mask ${Hexadecimal(load_s0.io.out.bits.mask)}\n") + XSDebug(load_s1.io.out.valid, + p"S1: pc ${Hexadecimal(load_s1.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s1.io.out.bits.uop.lqIdx.asUInt)}, tlb_miss ${io.dtlb.resp.bits.miss}, " + + p"paddr ${Hexadecimal(load_s1.io.out.bits.paddr)}, mmio ${load_s1.io.out.bits.mmio}\n") + // writeback to LSROQ // Current dcache use MSHR + io.lsroq.loadIn.valid := load_s2.io.out.valid + io.lsroq.loadIn.bits := load_s2.io.out.bits - io.lsroq.loadIn.bits := l5_in.bits - io.lsroq.loadIn.bits.data := rdataPartialLoad // for debug - io.lsroq.loadIn.valid := loadWriteBack - - // pipeline control - l5_in.ready := io.ldout.ready + val hitLoadOut = Wire(Valid(new ExuOutput)) + hitLoadOut.valid := load_s2.io.out.valid && !load_s2.io.out.bits.miss + hitLoadOut.bits.uop := load_s2.io.out.bits.uop + hitLoadOut.bits.data := load_s2.io.out.bits.data + hitLoadOut.bits.redirectValid := false.B + hitLoadOut.bits.redirect := DontCare + hitLoadOut.bits.brUpdate := DontCare + hitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio + hitLoadOut.bits.fflags := DontCare - val cdbArb = Module(new Arbiter(new ExuOutput, 2)) - io.ldout <> cdbArb.io.out - hitLoadOut <> cdbArb.io.in(0) - io.lsroq.ldout <> cdbArb.io.in(1) // missLoadOut + // TODO: arbiter + // if hit, writeback result to CDB + // val ldout = Vec(2, Decoupled(new ExuOutput)) + // when io.loadIn(i).fire() && !io.io.loadIn(i).miss, commit load to cdb + // val cdbArb = Module(new Arbiter(new ExuOutput, 2)) + // io.ldout <> cdbArb.io.out + // hitLoadOut <> cdbArb.io.in(0) + // io.lsroq.ldout <> cdbArb.io.in(1) // missLoadOut + load_s2.io.out.ready := true.B + io.lsroq.ldout.ready := !hitLoadOut.valid + io.ldout.bits := Mux(hitLoadOut.valid, hitLoadOut.bits, io.lsroq.ldout.bits) + io.ldout.valid := hitLoadOut.valid || io.lsroq.ldout.valid when(io.ldout.fire()){ XSDebug("ldout %x iw %x fw %x\n", io.ldout.bits.uop.cf.pc, io.ldout.bits.uop.ctrl.rfWen, io.ldout.bits.uop.ctrl.fpWen) diff --git a/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala b/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala index 571ac38f8582ac52ad154354eb71207540c2c87d..b3b0143bcf3b7da169a53725bcb219bf5be08acd 100644 --- a/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala +++ b/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala @@ -371,6 +371,9 @@ class NewSbuffer extends XSModule with HasSbufferCst { XSDebug(valid_tag_match, p"valid tag match: forward [$i] <> buf[$valid_forward_idx]\n" ) + XSDebug(inflight_tag_match || valid_tag_match, + p"[$i] forward paddr:${Hexadecimal(forward.paddr)}\n" + ) } } diff --git a/src/test/csrc/emu.cpp b/src/test/csrc/emu.cpp index 733039f645473faa0e18c35fa2d948dd4dd0731d..64609465cbfc190a0326bf29f193b30e27bfdc19 100644 --- a/src/test/csrc/emu.cpp +++ b/src/test/csrc/emu.cpp @@ -110,6 +110,7 @@ Emulator::Emulator(int argc, const char *argv[]): if (args.snapshot_path != NULL) { printf("loading from snapshot `%s`...\n", args.snapshot_path); snapshot_load(args.snapshot_path); + printf("model cycleCnt = %" PRIu64 "\n", dut_ptr->io_trap_cycleCnt); hascommit = 1; } diff --git a/src/test/csrc/main.cpp b/src/test/csrc/main.cpp index 38d0af4dc59b809e40106659e2bb3cb213e51e69..983f83c4519cb6d0708458f9376355f4d70f5008 100644 --- a/src/test/csrc/main.cpp +++ b/src/test/csrc/main.cpp @@ -8,6 +8,8 @@ std::function get_sc_time_stamp = []() -> double { return 0; }; double sc_time_stamp() { return get_sc_time_stamp(); } int main(int argc, const char** argv) { + printf("Emu compiled at %s, %s\n", __DATE__, __TIME__); + setbuf(stderr, mybuf); auto emu = new Emulator(argc, argv); diff --git a/src/test/scala/cache/CacheTest.scala b/src/test/scala/cache/CacheTest.scala index 17dea507eca5c504915b7133a526c15adca6abdf..22c26bf3afc9802448a2e4369c49b1dda19dbadc 100644 --- a/src/test/scala/cache/CacheTest.scala +++ b/src/test/scala/cache/CacheTest.scala @@ -1,7 +1,5 @@ package top -import noop._ -import bus.simplebus._ import device._ import utils._ diff --git a/src/test/scala/top/XSSim.scala b/src/test/scala/top/XSSim.scala index d52bc2a8fe9f2fe0a519dafdb5a0acaf1a23690a..f95cae721bfc4983e188bb92fca202142f0d1758 100644 --- a/src/test/scala/top/XSSim.scala +++ b/src/test/scala/top/XSSim.scala @@ -3,7 +3,6 @@ package top import system._ import chisel3._ import chisel3.util._ -import chisel3.util.experimental.BoringUtils import chipsalliance.rocketchip.config import chisel3.stage.ChiselGeneratorAnnotation import device._ @@ -12,8 +11,7 @@ import freechips.rocketchip.diplomacy.{AddressSet, BufferParams, LazyModule, Laz import freechips.rocketchip.tilelink.{TLBuffer, TLCacheCork, TLFragmenter, TLFuzzer, TLToAXI4, TLXbar} import xiangshan._ import utils._ -import firrtl.stage.RunFirrtlTransformAnnotation -import xstransforms.ShowPrintTransform +import ExcitingUtils.Debug class DiffTestIO extends XSBundle { val r = Output(Vec(64, UInt(XLEN.W))) @@ -102,37 +100,37 @@ class XSSimTop()(implicit p: config.Parameters) extends LazyModule { soc.module.io.meip := false.B val difftest = WireInit(0.U.asTypeOf(new DiffTestIO)) - BoringUtils.addSink(difftest.commit, "difftestCommit") - BoringUtils.addSink(difftest.thisPC, "difftestThisPC") - BoringUtils.addSink(difftest.thisINST, "difftestThisINST") - BoringUtils.addSink(difftest.skip, "difftestSkip") - BoringUtils.addSink(difftest.isRVC, "difftestIsRVC") - BoringUtils.addSink(difftest.wen, "difftestWen") - BoringUtils.addSink(difftest.wdata, "difftestWdata") - BoringUtils.addSink(difftest.wdst, "difftestWdst") - BoringUtils.addSink(difftest.wpc, "difftestWpc") - BoringUtils.addSink(difftest.intrNO, "difftestIntrNO") - BoringUtils.addSink(difftest.cause, "difftestCause") - BoringUtils.addSink(difftest.r, "difftestRegs") - BoringUtils.addSink(difftest.priviledgeMode, "difftestMode") - BoringUtils.addSink(difftest.mstatus, "difftestMstatus") - BoringUtils.addSink(difftest.sstatus, "difftestSstatus") - BoringUtils.addSink(difftest.mepc, "difftestMepc") - BoringUtils.addSink(difftest.sepc, "difftestSepc") - BoringUtils.addSink(difftest.mtval, "difftestMtval") - BoringUtils.addSink(difftest.stval, "difftestStval") - BoringUtils.addSink(difftest.mtvec, "difftestMtvec") - BoringUtils.addSink(difftest.stvec, "difftestStvec") - BoringUtils.addSink(difftest.mcause, "difftestMcause") - BoringUtils.addSink(difftest.scause, "difftestScause") - BoringUtils.addSink(difftest.satp, "difftestSatp") - BoringUtils.addSink(difftest.mip, "difftestMip") - BoringUtils.addSink(difftest.mie, "difftestMie") - BoringUtils.addSink(difftest.mscratch, "difftestMscratch") - BoringUtils.addSink(difftest.sscratch, "difftestSscratch") - BoringUtils.addSink(difftest.mideleg, "difftestMideleg") - BoringUtils.addSink(difftest.medeleg, "difftestMedeleg") - BoringUtils.addSink(difftest.scFailed, "difftestScFailed") + ExcitingUtils.addSink(difftest.commit, "difftestCommit", Debug) + ExcitingUtils.addSink(difftest.thisPC, "difftestThisPC", Debug) + ExcitingUtils.addSink(difftest.thisINST, "difftestThisINST", Debug) + ExcitingUtils.addSink(difftest.skip, "difftestSkip", Debug) + ExcitingUtils.addSink(difftest.isRVC, "difftestIsRVC", Debug) + ExcitingUtils.addSink(difftest.wen, "difftestWen", Debug) + ExcitingUtils.addSink(difftest.wdata, "difftestWdata", Debug) + ExcitingUtils.addSink(difftest.wdst, "difftestWdst", Debug) + ExcitingUtils.addSink(difftest.wpc, "difftestWpc", Debug) + ExcitingUtils.addSink(difftest.intrNO, "difftestIntrNO", Debug) + ExcitingUtils.addSink(difftest.cause, "difftestCause", Debug) + ExcitingUtils.addSink(difftest.r, "difftestRegs", Debug) + ExcitingUtils.addSink(difftest.priviledgeMode, "difftestMode", Debug) + ExcitingUtils.addSink(difftest.mstatus, "difftestMstatus", Debug) + ExcitingUtils.addSink(difftest.sstatus, "difftestSstatus", Debug) + ExcitingUtils.addSink(difftest.mepc, "difftestMepc", Debug) + ExcitingUtils.addSink(difftest.sepc, "difftestSepc", Debug) + ExcitingUtils.addSink(difftest.mtval, "difftestMtval", Debug) + ExcitingUtils.addSink(difftest.stval, "difftestStval", Debug) + ExcitingUtils.addSink(difftest.mtvec, "difftestMtvec", Debug) + ExcitingUtils.addSink(difftest.stvec, "difftestStvec", Debug) + ExcitingUtils.addSink(difftest.mcause, "difftestMcause", Debug) + ExcitingUtils.addSink(difftest.scause, "difftestScause", Debug) + ExcitingUtils.addSink(difftest.satp, "difftestSatp", Debug) + ExcitingUtils.addSink(difftest.mip, "difftestMip", Debug) + ExcitingUtils.addSink(difftest.mie, "difftestMie", Debug) + ExcitingUtils.addSink(difftest.mscratch, "difftestMscratch", Debug) + ExcitingUtils.addSink(difftest.sscratch, "difftestSscratch", Debug) + ExcitingUtils.addSink(difftest.mideleg, "difftestMideleg", Debug) + ExcitingUtils.addSink(difftest.medeleg, "difftestMedeleg", Debug) + ExcitingUtils.addSink(difftest.scFailed, "difftestScFailed", Debug) // BoringUtils.addSink(difftest.lrscAddr, "difftestLrscAddr") io.difftest := difftest diff --git a/src/test/scala/xiangshan/testutils/TestCaseGenerator.scala b/src/test/scala/xiangshan/testutils/TestCaseGenerator.scala index ae2f339db541ee6b8fb052050cd99d598614705b..990fa974370f9810561cf4004fe1e096480ba46f 100644 --- a/src/test/scala/xiangshan/testutils/TestCaseGenerator.scala +++ b/src/test/scala/xiangshan/testutils/TestCaseGenerator.scala @@ -4,7 +4,6 @@ import chisel3._ import chisel3.util._ import chisel3.experimental.BundleLiterals._ import chiseltest._ -import noop.MDUOpType import xiangshan._ import xiangshan.backend._