diff --git a/.gitignore b/.gitignore index fabc511356033b0be00469518c1f94c6994c5983..ca8987abacd50e97e0bb7ceebb1eee73766f3906 100644 --- a/.gitignore +++ b/.gitignore @@ -349,3 +349,5 @@ mill.rdiB stale_outputs_checked *.snapshot +__pycache__ + diff --git a/Makefile b/Makefile index f652d0ecf7c1d43e4cb2ab446a6df360c22511e3..5ebe83966b881630ef7235aa37ac5654540fd9cc 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ help: $(TOP_V): $(SCALA_FILE) mkdir -p $(@D) mill XiangShan.test.runMain $(SIMTOP) -td $(@D) --full-stacktrace --output-file $(@F) --disable-all --fpga-platform --remove-assert --infer-rw --repl-seq-mem -c:$(SIMTOP):-o:$(@D)/$(@F).conf $(SIM_ARGS) - $(MEM_GEN) $(@D)/$(@F).conf >> $@ + $(MEM_GEN) $(@D)/$(@F).conf --tsmc28 --output_file $(@D)/tsmc28_sram.v > $(@D)/tsmc28_sram.v.conf # sed -i -e 's/_\(aw\|ar\|w\|r\|b\)_\(\|bits_\)/_\1/g' $@ @git log -n 1 >> .__head__ @git diff >> .__diff__ diff --git a/scripts/vlsi_mem_gen b/scripts/vlsi_mem_gen index 306f0dece5d5d9afb6fc77f620ca048eced48d14..e22e5f4927dd5225c56300b062e65191e0180e4f 100755 --- a/scripts/vlsi_mem_gen +++ b/scripts/vlsi_mem_gen @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 # See LICENSE.SiFive for license details. # See LICENSE.Berkeley for license details. @@ -7,208 +7,414 @@ import sys import math use_latches = 0 -blackbox = 0 - -def parse_line(line): - name = '' - width = 0 - depth = 0 - ports = '' - mask_gran = 0 - tokens = line.split() - i = 0 - for i in range(0,len(tokens),2): - s = tokens[i] - if s == 'name': - name = tokens[i+1] - elif s == 'width': - width = int(tokens[i+1]) - mask_gran = width # default setting - elif s == 'depth': - depth = int(tokens[i+1]) - elif s == 'ports': - ports = tokens[i+1].split(',') - elif s == 'mask_gran': - mask_gran = int(tokens[i+1]) - else: - sys.exit('%s: unknown argument %s' % (sys.argv[0], a)) - return (name, width, depth, mask_gran, width//mask_gran, ports) - -def gen_mem(name, width, depth, mask_gran, mask_seg, ports): - addr_width = max(math.ceil(math.log(depth)/math.log(2)),1) - port_spec = [] - readports = [] - writeports = [] - latchports = [] - rwports = [] - decl = [] - combinational = [] - sequential = [] - maskedports = {} - for pid in range(len(ports)): - ptype = ports[pid] - if ptype[0:1] == 'm': - ptype = ptype[1:] - maskedports[pid] = pid - - if ptype == 'read': - prefix = 'R%d_' % len(readports) - port_spec.append('input %sclk' % prefix) - port_spec.append('input [%d:0] %saddr' % (addr_width-1, prefix)) - port_spec.append('input %sen' % prefix) - port_spec.append('output [%d:0] %sdata' % (width-1, prefix)) - readports.append(pid) - elif ptype == 'write': - prefix = 'W%d_' % len(writeports) - port_spec.append('input %sclk' % prefix) - port_spec.append('input [%d:0] %saddr' % (addr_width-1, prefix)) - port_spec.append('input %sen' % prefix) - port_spec.append('input [%d:0] %sdata' % (width-1, prefix)) - if pid in maskedports: - port_spec.append('input [%d:0] %smask' % (mask_seg-1, prefix)) - if not use_latches or pid in maskedports: - writeports.append(pid) - else: - latchports.append(pid) - elif ptype == 'rw': - prefix = 'RW%d_' % len(rwports) - port_spec.append('input %sclk' % prefix) - port_spec.append('input [%d:0] %saddr' % (addr_width-1, prefix)) - port_spec.append('input %sen' % prefix) - port_spec.append('input %swmode' % prefix) - if pid in maskedports: - port_spec.append('input [%d:0] %swmask' % (mask_seg-1, prefix)) - port_spec.append('input [%d:0] %swdata' % (width-1, prefix)) - port_spec.append('output [%d:0] %srdata' % (width-1, prefix)) - rwports.append(pid) - else: - sys.exit('%s: unknown port type %s' % (sys.argv[0], ptype)) - - nr = len(readports) - nw = len(writeports) - nrw = len(rwports) - masked = len(maskedports)>0 - tup = (depth, width, nr, nw, nrw, masked) - - def emit_read(idx, rw): - prefix = ('RW%d_' if rw else 'R%d_') % idx - data = ('%srdata' if rw else '%sdata') % prefix - en = ('%sen && !%swmode' % (prefix, prefix)) if rw else ('%sen' % prefix) - decl.append('reg reg_%sren;' % prefix) - decl.append('reg [%d:0] reg_%saddr;' % (addr_width-1, prefix)) - sequential.append('always @(posedge %sclk)' % prefix) - sequential.append(' reg_%sren <= %s;' % (prefix, en)) - sequential.append('always @(posedge %sclk)' % prefix) - sequential.append(' if (%s) reg_%saddr <= %saddr;' % (en, prefix, prefix)) - combinational.append('`ifdef RANDOMIZE_GARBAGE_ASSIGN') - combinational.append('reg [%d:0] %srandom;' % (((width-1)//32+1)*32-1, prefix)) - combinational.append('`ifdef RANDOMIZE_MEM_INIT') - combinational.append(' initial begin') - combinational.append(' #`RANDOMIZE_DELAY begin end') - combinational.append(' %srandom = {%s};' % (prefix, ', '.join(['$random'] * ((width-1)//32+1)))) - combinational.append(' reg_%sren = %srandom[0];' % (prefix, prefix)) - combinational.append(' end') - combinational.append('`endif') - combinational.append('always @(posedge %sclk) %srandom <= {%s};' % (prefix, prefix, ', '.join(['$random'] * ((width-1)//32+1)))) - combinational.append('assign %s = reg_%sren ? ram[reg_%saddr] : %srandom[%d:0];' % (data, prefix, prefix, prefix, width-1)) - combinational.append('`else') - combinational.append('assign %s = ram[reg_%saddr];' % (data, prefix)) - combinational.append('`endif') - - for idx in range(nr): - emit_read(idx, False) - - for idx in range(nrw): - emit_read(idx, True) - - for idx in range(len(latchports)): - prefix = 'W%d_' % idx - decl.append('reg [%d:0] latch_%saddr;' % (addr_width-1, prefix)) - decl.append('reg [%d:0] latch_%sdata;' % (width-1, prefix)) - decl.append('reg latch_%sen;' % (prefix)) - combinational.append('always @(*) begin') - combinational.append(' if (!%sclk && %sen) latch_%saddr <= %saddr;' % (prefix, prefix, prefix, prefix)) - combinational.append(' if (!%sclk && %sen) latch_%sdata <= %sdata;' % (prefix, prefix, prefix, prefix)) - combinational.append(' if (!%sclk) latch_%sen <= %sen;' % (prefix, prefix, prefix)) - combinational.append('end') - combinational.append('always @(*)') - combinational.append(' if (%sclk && latch_%sen)' % (prefix, prefix)) - combinational.append(' ram[latch_%saddr] <= latch_%sdata;' % (prefix, prefix)) - - decl.append('reg [%d:0] ram [%d:0];' % (width-1, depth-1)) - decl.append('`ifdef RANDOMIZE_MEM_INIT') - decl.append(' integer initvar;') - decl.append(' initial begin') - decl.append(' #`RANDOMIZE_DELAY begin end') - decl.append(' for (initvar = 0; initvar < %d; initvar = initvar+1)' % depth) - decl.append(' ram[initvar] = {%d {$random}};' % ((width-1)//32+1)) - for idx in range(nr): - prefix = 'R%d_' % idx - decl.append(' reg_%saddr = {%d {$random}};' % (prefix, ((addr_width-1)//32+1))) - for idx in range(nrw): - prefix = 'RW%d_' % idx - decl.append(' reg_%saddr = {%d {$random}};' % (prefix, ((addr_width-1)//32+1))) - decl.append(' end') - decl.append('`endif') - - decl.append("integer i;") - for idx in range(nw): - prefix = 'W%d_' % idx - pid = writeports[idx] - sequential.append('always @(posedge %sclk)' % prefix) - sequential.append(" if (%sen) begin" % prefix) - for i in range(mask_seg): - mask = ('if (%smask[%d]) ' % (prefix, i)) if pid in maskedports else '' - ram_range = '%d:%d' % ((i+1)*mask_gran-1, i*mask_gran) - sequential.append(" %sram[%saddr][%s] <= %sdata[%s];" % (mask, prefix, ram_range, prefix, ram_range)) - sequential.append(" end") - for idx in range(nrw): - pid = rwports[idx] - prefix = 'RW%d_' % idx - sequential.append('always @(posedge %sclk)' % prefix) - sequential.append(" if (%sen && %swmode) begin" % (prefix, prefix)) - if mask_seg > 0: - sequential.append(" for(i=0;i<%d;i=i+1) begin" % mask_seg) - if pid in maskedports: - sequential.append(" if(%swmask[i]) begin" % prefix) - sequential.append(" ram[%saddr][i*%d +: %d] <= %swdata[i*%d +: %d];" %(prefix, mask_gran, mask_gran, prefix, mask_gran, mask_gran)) - sequential.append(" end") - else: - sequential.append(" ram[%saddr][i*%d +: %d] <= %swdata[i*%d +: %d];" %(prefix, mask_gran, mask_gran, prefix, mask_gran, mask_gran)) - sequential.append(" end") - sequential.append(" end") - body = "\ + +class VerilogModuleGenerator(object): + def __init__(self, name): + self.name = name + self.port_spec = [] + self.decl = [] + self.combinational = [] + self.sequential = [] + + def __format_width(self, width): + return "[{}:0] ".format(width-1) if width > 1 else "" + + def __format_depth(self, depth): + return " [{}:0]".format(depth-1) if depth > 1 else "" + + def add_io(self, io_type, width, name): + width_str = self.__format_width(width) + # print(io_type, width_str, name) + self.port_spec.append(f'{io_type} {width_str}{name}') + + def add_input(self, width, name): + self.add_io("input", width, name) + + def add_output(self, width, name): + self.add_io("output", width, name) + + def add_decl(self, decl_type, width, name, depth=1): + width_str = self.__format_width(width) + depth_str = self.__format_depth(depth) + self.decl.append(f"{decl_type} {width_str}{name}{depth_str};") + + def add_decl_reg(self, width, name, depth=1): + self.add_decl("reg", width, name, depth) + + def add_decl_wire(self, width, name, depth=1): + self.add_decl("wire", width, name, depth) + + def add_decl_line(self, line): + self.decl.append(line) + + def add_sequential(self, line): + self.sequential.append(line) + + def add_combinational(self, line): + self.combinational.append(line) + + def generate(self, blackbox): + body = "\ %s\n\ %s\n\ - %s\n" % ('\n '.join(decl), '\n '.join(sequential), '\n '.join(combinational)) + %s\n" % ('\n '.join(self.decl), '\n '.join(self.sequential), '\n '.join(self.combinational)) - s = "\nmodule %s(\n\ + s = "\nmodule %s(\n\ %s\n\ );\n\ \n\ %s\ \n\ -endmodule" % (name, ',\n '.join(port_spec), body if not blackbox else "") - return s +endmodule" % (self.name, ',\n '.join(self.port_spec), body if not blackbox else blackbox) + return s + + +class Reshaper(object): + def __init__(self, before, after): + # print(before, after) + self.conf = before + self.new_conf = after + assert(self.conf[-1] == ['write', 'read']) + assert(self.new_conf[-1] == ['mwrite', 'read']) + + def generate(self, mem): + (name, width, depth, mask_gran, mask_seg, _) = self.conf + (new_name, new_width, new_depth, new_mask_gran, new_mask_seg, _) = self.new_conf + addr_bits = math.log2(depth) + ways = new_width // width + ways_bits = int(math.log2(ways)) + mem.add_decl_wire(new_width, "data_read") + mem.add_decl_wire(new_width, "data_write") + mem.add_combinational(f"assign data_write = ") + sels = [f"{f'(write_way_index == {w}) ?' if w != ways-1 else ''} ({{{new_width-width}'h0, W0_data}} << {width*w})" for w in range(ways)] + mem.add_combinational(":\n ".join(sels) + ";") + mem.add_decl_wire(ways_bits, "read_way_index") + mem.add_combinational(f"assign read_way_index = R0_addr[{ways_bits-1}:0];") + mem.add_decl_wire(ways_bits, "write_way_index") + mem.add_combinational(f"assign write_way_index = W0_addr[{ways_bits-1}:0];") + mem.add_combinational(f"{new_name} array (") + mem.add_combinational(f" .W0_clk(W0_clk),") + mem.add_combinational(f" .W0_addr(W0_addr[{new_width-1}:{ways_bits}]),") + mem.add_combinational(f" .W0_en(W0_en),") + mem.add_combinational(f" .W0_data(data_write),") + mem.add_combinational(f" .W0_mask({ways}'h1 << write_way_index),") + mem.add_combinational(f" .R0_clk(R0_clk),") + mem.add_combinational(f" .R0_addr(R0_addr[{new_width-1}:{ways_bits}]),") + mem.add_combinational(f" .R0_en(R0_en),") + mem.add_combinational(f" .R0_data(data_read)") + mem.add_combinational(f");") + mem.add_combinational(f"assign R0_data = ") + sels = [f"{f'(read_way_index == {w}) ?' if w != ways-1 else ''} data_read[{width*(w+1)-1}:{width*w}]" for w in range(ways)] + mem.add_combinational(":\n ".join(sels) + ";") + + +class Spliter(object): + def __init__(self, before, after): + # print(before, after) + self.conf = before + self.new_conf = after + assert(self.conf[-1] == ['mrw']) + assert(self.new_conf[-1] == ['rw']) + + def generate(self, mem): + (name, width, depth, mask_gran, mask_seg, _) = self.conf + (new_name, new_width, new_depth, new_mask_gran, new_mask_seg, _) = self.new_conf + assert(depth == new_depth) + ways = width // new_width + for i in range(ways): + data_slice = f"[{new_width*(i+1)-1}:{new_width*i}]" + mem.add_combinational(f"{new_name} array_{i} (") + mem.add_combinational(f" .RW0_clk(RW0_clk),") + mem.add_combinational(f" .RW0_addr(RW0_addr),") + mem.add_combinational(f" .RW0_en(RW0_en),") + mem.add_combinational(f" .RW0_wmode(RW0_wmode && RW0_wmask[{i}]),") + mem.add_combinational(f" .RW0_wdata(RW0_wdata{data_slice}),") + mem.add_combinational(f" .RW0_rdata(RW0_rdata{data_slice})") + mem.add_combinational(f");") + +class SRAM(object): + def __init__(self, line): + self.parse_line(line) + self.prepare_module() + + def parse_line(self, line): + name = '' + width = 0 + depth = 0 + ports = '' + mask_gran = 0 + tokens = line.split() + i = 0 + for i in range(0, len(tokens), 2): + s = tokens[i] + if s == 'name': + name = tokens[i+1] + elif s == 'width': + width = int(tokens[i+1]) + mask_gran = width # default setting + elif s == 'depth': + depth = int(tokens[i+1]) + elif s == 'ports': + ports = tokens[i+1].split(',') + elif s == 'mask_gran': + mask_gran = int(tokens[i+1]) + else: + sys.exit('%s: unknown argument %s' % (sys.argv[0], i)) + self.conf = (name, width, depth, mask_gran, width//mask_gran, ports) + # return (name, width, depth, mask_gran, width//mask_gran, ports) + + def prepare_module(self): + (name, width, depth, mask_gran, mask_seg, ports) = self.conf + addr_width = max(math.ceil(math.log(depth)/math.log(2)),1) + + mem = VerilogModuleGenerator(name) + readports = [] + writeports = [] + latchports = [] + rwports = [] + maskedports = {} + + for pid, ptype in enumerate(ports): + if ptype[0:1] == 'm': + ptype = ptype[1:] + maskedports[pid] = pid + + if ptype == 'read': + prefix = 'R%d_' % len(readports) + mem.add_input(1, prefix + "clk") + mem.add_input(addr_width, prefix + "addr") + mem.add_input(1, prefix + "en") + mem.add_output(width, prefix + "data") + readports.append(pid) + elif ptype == 'write': + prefix = 'W%d_' % len(writeports) + mem.add_input(1, prefix + "clk") + mem.add_input(addr_width, prefix + "addr") + mem.add_input(1, prefix + "en") + mem.add_input(width, prefix + "data") + if pid in maskedports: + mem.add_input(mask_seg, prefix + "mask") + if not use_latches or pid in maskedports: + writeports.append(pid) + else: + latchports.append(pid) + elif ptype == 'rw': + prefix = 'RW%d_' % len(rwports) + mem.add_input(1, prefix + "clk") + mem.add_input(addr_width, prefix + "addr") + mem.add_input(1, prefix + "en") + mem.add_input(1, prefix + "wmode") + if pid in maskedports: + mem.add_input(mask_seg, prefix + "wmask") + mem.add_input(width, prefix + "wdata") + mem.add_output(width, prefix + "rdata") + rwports.append(pid) + else: + sys.exit('%s: unknown port type %s' % (sys.argv[0], ptype)) + self.mem = mem + self.ports_conf = (readports, writeports, latchports, rwports, maskedports) + + def generate(self, blackbox): + (name, width, depth, mask_gran, mask_seg, ports) = self.conf + addr_width = max(math.ceil(math.log(depth)/math.log(2)),1) + mem, (readports, writeports, latchports, rwports, maskedports) = self.mem, self.ports_conf + + nr = len(readports) + nw = len(writeports) + nrw = len(rwports) + + def emit_read(idx, rw): + prefix = ('RW%d_' if rw else 'R%d_') % idx + data = ('%srdata' if rw else '%sdata') % prefix + en = ('%sen && !%swmode' % (prefix, prefix)) if rw else ('%sen' % prefix) + mem.add_decl_reg(1, f"reg_{prefix}ren") + mem.add_decl_reg(addr_width, f"reg_{prefix}addr") + mem.add_sequential(f"always @(posedge {prefix}clk)") + mem.add_sequential(f" reg_{prefix}ren <= {en};") + mem.add_sequential(f"always @(posedge {prefix}clk)") + mem.add_sequential(f" if ({en}) reg_{prefix}addr <= {prefix}addr;") + mem.add_combinational("`ifdef RANDOMIZE_GARBAGE_ASSIGN") + mem.add_combinational(f"reg [{((width-1)//32+1)*32-1}:0] {prefix}random;") + mem.add_combinational(f"`ifdef RANDOMIZE_MEM_INIT") + mem.add_combinational(f" initial begin") + mem.add_combinational(f" #`RANDOMIZE_DELAY begin end") + mem.add_combinational(' %srandom = {%s};' % (prefix, ', '.join(['$random'] * ((width-1)//32+1)))) + mem.add_combinational(' reg_%sren = %srandom[0];' % (prefix, prefix)) + mem.add_combinational(' end') + mem.add_combinational('`endif') + mem.add_combinational('always @(posedge %sclk) %srandom <= {%s};' % (prefix, prefix, ', '.join(['$random'] * ((width-1)//32+1)))) + mem.add_combinational('assign %s = reg_%sren ? ram[reg_%saddr] : %srandom[%d:0];' % (data, prefix, prefix, prefix, width-1)) + mem.add_combinational('`else') + mem.add_combinational('assign %s = ram[reg_%saddr];' % (data, prefix)) + mem.add_combinational('`endif') + + for idx in range(nr): + emit_read(idx, False) + + for idx in range(nrw): + emit_read(idx, True) + + for idx in range(len(latchports)): + prefix = 'W%d_' % idx + mem.add_decl_reg(addr_width, f"latch_{prefix}addr") + mem.add_decl_reg(width, f"latch_{prefix}data") + mem.add_decl_reg(1, f"latch_{prefix}en") + mem.add_combinational('always @(*) begin') + mem.add_combinational(' if (!%sclk && %sen) latch_%saddr <= %saddr;' % (prefix, prefix, prefix, prefix)) + mem.add_combinational(' if (!%sclk && %sen) latch_%sdata <= %sdata;' % (prefix, prefix, prefix, prefix)) + mem.add_combinational(' if (!%sclk) latch_%sen <= %sen;' % (prefix, prefix, prefix)) + mem.add_combinational('end') + mem.add_combinational('always @(*)') + mem.add_combinational(' if (%sclk && latch_%sen)' % (prefix, prefix)) + mem.add_combinational(' ram[latch_%saddr] <= latch_%sdata;' % (prefix, prefix)) + + mem.add_decl_reg(width, "ram", depth) + mem.add_decl_line('`ifdef RANDOMIZE_MEM_INIT') + mem.add_decl_line(' integer initvar;') + mem.add_decl_line(' initial begin') + mem.add_decl_line(' #`RANDOMIZE_DELAY begin end') + mem.add_decl_line(' for (initvar = 0; initvar < %d; initvar = initvar+1)' % depth) + mem.add_decl_line(' ram[initvar] = {%d {$random}};' % ((width-1)//32+1)) + for idx in range(nr): + prefix = 'R%d_' % idx + mem.add_decl_line(' reg_%saddr = {%d {$random}};' % (prefix, ((addr_width-1)//32+1))) + for idx in range(nrw): + prefix = 'RW%d_' % idx + mem.add_decl_line(' reg_%saddr = {%d {$random}};' % (prefix, ((addr_width-1)//32+1))) + mem.add_decl_line(' end') + mem.add_decl_line('`endif') + + mem.add_decl_line("integer i;") + for idx in range(nw): + prefix = 'W%d_' % idx + pid = writeports[idx] + mem.add_sequential('always @(posedge %sclk)' % prefix) + mem.add_sequential(" if (%sen) begin" % prefix) + for i in range(mask_seg): + mask = ('if (%smask[%d]) ' % (prefix, i)) if pid in maskedports else '' + ram_range = '%d:%d' % ((i+1)*mask_gran-1, i*mask_gran) + mem.add_sequential(" %sram[%saddr][%s] <= %sdata[%s];" % (mask, prefix, ram_range, prefix, ram_range)) + mem.add_sequential(" end") + for idx in range(nrw): + pid = rwports[idx] + prefix = 'RW%d_' % idx + mem.add_sequential('always @(posedge %sclk)' % prefix) + mem.add_sequential(" if (%sen && %swmode) begin" % (prefix, prefix)) + if mask_seg > 0: + mem.add_sequential(" for(i=0;i<%d;i=i+1) begin" % mask_seg) + if pid in maskedports: + mem.add_sequential(" if(%swmask[i]) begin" % prefix) + mem.add_sequential(" ram[%saddr][i*%d +: %d] <= %swdata[i*%d +: %d];" %(prefix, mask_gran, mask_gran, prefix, mask_gran, mask_gran)) + mem.add_sequential(" end") + else: + mem.add_sequential(" ram[%saddr][i*%d +: %d] <= %swdata[i*%d +: %d];" %(prefix, mask_gran, mask_gran, prefix, mask_gran, mask_gran)) + mem.add_sequential(" end") + mem.add_sequential(" end") + return mem.generate(blackbox) + + +class SRAM_TSMC28(SRAM): + def __init__(self, line): + super().__init__(line) + self.sub_srams = [] + if self.__check_subsrams(): + print(line.strip()) + + def __check_subsrams(self): + need_split = self.__split() + need_reshape = self.__reshape() + assert(not (need_split and need_reshape)) + return not need_split and not need_reshape + + def __split(self): + (name, width, depth, mask_gran, mask_seg, ports) = self.conf + if ports == ["mrw"]: + new_conf = (name + "_sub", str(depth), str(mask_gran), "rw") + line_field = ("name", "depth", "width", "ports") + new_line = " ".join(map(lambda x: " ".join(x), zip(line_field, new_conf))) + new_sram = SRAM_TSMC28(new_line) + self.sub_srams.append(new_sram) + reshaper = Spliter(self.conf, new_sram.conf) + reshaper.generate(self.mem) + return True + return False + + def __reshape(self): + (name, width, depth, mask_gran, mask_seg, ports) = self.conf + if width == 2 and depth == 256: + new_conf = (name + "_sub", "64", "8", "mwrite,read", "2") + line_field = ("name", "depth", "width", "ports", "mask_gran") + new_line = " ".join(map(lambda x: " ".join(x), zip(line_field, new_conf))) + new_sram = SRAM_TSMC28(new_line) + self.sub_srams.append(new_sram) + reshaper = Reshaper(self.conf, new_sram.conf) + reshaper.generate(self.mem) + return True + return False + + def __get_tsmc_lib(self): + mem, (readports, writeports, latchports, rwports, maskedports) = self.mem, self.ports_conf + blackbox = "// tsmc lib here\n" + (name, width, depth, mask_gran, mask_seg, _) = self.conf + nports = (len(readports), len(writeports), len(rwports)) + addr_width = max(math.ceil(math.log(depth)/math.log(2)),1) + masked = len(maskedports) > 0 + # from tsmc28_sram import gen_tsmc_ram_1pw, gen_tsmc_ram_1pnw, gen_tsmc_ram_2pw, gen_tsmc_ram_2pnw + # if nports == (1, 1, 0): + # if masked: + # blackbox = gen_tsmc_ram_2pw("TS6N28HPCPLVTA64X8M2F", width, mask_gran) + # else: + # blackbox = gen_tsmc_ram_2pnw("TS6N28HPCPLVTA64X14M2F") + # elif nports == (0, 0, 1): + # if masked: + # blackbox = gen_tsmc_ram_1pw('TS1N28HPCPLVTB8192X64M8SW', width, mask_gran, addr_width) + # else: + # blackbox = gen_tsmc_ram_1pnw('TS5N28HPCPLVTA64X144M2F', width, addr_width) + # else: + # blackbox = "// unknown tsmc lib type\n" + return mem.generate(blackbox) + + def generate(self, blackbox, itself_only=False): + if itself_only: + # generate splits or reshapes + if self.sub_srams: + return self.mem.generate("") + # use empty blackbox + elif blackbox: + return super().generate(" ") + # insert tsmc libs + else: + return self.__get_tsmc_lib() + else: + s = self.generate(blackbox, True) + for sram in self.sub_srams: + s += sram.generate(blackbox) + return s + def main(args): f = open(args.output_file, "w") if (args.output_file) else None conf_file = args.conf for line in open(conf_file): - parsed_line = gen_mem(*parse_line(line)) + sram = SRAM(line) + if args.tsmc28: + sram = SRAM_TSMC28(line) + else: + sram = SRAM(line) if f is not None: - f.write(parsed_line) + f.write(sram.generate(args.blackbox)) else: - print(parsed_line) + print(sram.generate(args.blackbox)) + if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Memory generator for Rocket Chip') parser.add_argument('conf', metavar='.conf file') + parser.add_argument('--tsmc28', action='store_true', help='use tsmc28 sram to generate module body') parser.add_argument('--blackbox', '-b', action='store_true', help='set to disable output of module body') #parser.add_argument('--use_latches', '-l', action='store_true', help='set to enable use of latches') parser.add_argument('--output_file', '-o', help='name of output file, default is stdout') args = parser.parse_args() - blackbox = args.blackbox #use_latches = args.use_latches main(args) diff --git a/src/main/scala/system/SoC.scala b/src/main/scala/system/SoC.scala index ba5e6efb7a89603469816d58729324e743c0c0c0..4af15bf7930a18372d0f00cd0b4598d2c2cf7529 100644 --- a/src/main/scala/system/SoC.scala +++ b/src/main/scala/system/SoC.scala @@ -8,7 +8,7 @@ import freechips.rocketchip.diplomacy.{AddressSet, LazyModule, LazyModuleImp} import freechips.rocketchip.tilelink.{BankBinder, TLBuffer, TLBundleParameters, TLCacheCork, TLClientNode, TLFilter, TLFuzzer, TLIdentityNode, TLToAXI4, TLWidthWidget, TLXbar} import utils.{DebugIdentityNode, DataDontCareNode} import utils.XSInfo -import xiangshan.{HasXSParameter, XSCore, HasXSLog} +import xiangshan.{HasXSParameter, XSCore, HasXSLog, DifftestBundle} import sifive.blocks.inclusivecache.{CacheParameters, InclusiveCache, InclusiveCacheMicroParameters} import freechips.rocketchip.diplomacy.{AddressSet, LazyModule, LazyModuleImp} import freechips.rocketchip.devices.tilelink.{DevNullParams, TLError} @@ -162,6 +162,13 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter { // val meip = Input(Vec(NumCores, Bool())) val ila = if(env.FPGAPlatform && EnableILA) Some(Output(new ILABundle)) else None }) + val difftestIO0 = IO(new DifftestBundle()) + val difftestIO1 = IO(new DifftestBundle()) + val difftestIO = Seq(difftestIO0, difftestIO1) + + val trapIO0 = IO(new xiangshan.TrapIO()) + val trapIO1 = IO(new xiangshan.TrapIO()) + val trapIO = Seq(trapIO0, trapIO1) plic.module.io.extra.get.intrVec <> RegNext(RegNext(Cat(io.extIntrs))) @@ -172,6 +179,14 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter { xs_core(i).module.io.externalInterrupt.meip := plic.module.io.extra.get.meip(i) xs_core(i).module.io.l2ToPrefetcher <> l2cache(i).module.io } + difftestIO0 <> DontCare + difftestIO1 <> DontCare + if (env.DualCoreDifftest) { + difftestIO0 <> xs_core(0).module.difftestIO + difftestIO1 <> xs_core(1).module.difftestIO + trapIO0 <> xs_core(0).module.trapIO + trapIO1 <> xs_core(1).module.trapIO + } // do not let dma AXI signals optimized out chisel3.dontTouch(dma.out.head._1) chisel3.dontTouch(extDev.out.head._1) diff --git a/src/main/scala/top/Parameters.scala b/src/main/scala/top/Parameters.scala index 7ab28a7debb0c01f7ed471fcf39976483e74bf77..ae56c3b3d5fd4b9bfdcc4df1b21c240f7a7be0e0 100644 --- a/src/main/scala/top/Parameters.scala +++ b/src/main/scala/top/Parameters.scala @@ -24,7 +24,7 @@ object Parameters { val simParameters = Parameters(envParameters = EnviromentParameters(FPGAPlatform = false)) // sim only, disable log val debugParameters = Parameters(envParameters = simParameters.envParameters.copy(EnableDebug = true)) // open log - val simDualCoreParameters = Parameters(socParameters = SoCParameters(NumCores = 2), envParameters = EnviromentParameters(FPGAPlatform = false)) + val simDualCoreParameters = Parameters(socParameters = SoCParameters(NumCores = 2), envParameters = EnviromentParameters(FPGAPlatform = true, DualCoreDifftest = true)) val debugDualCoreParameters = Parameters(socParameters = SoCParameters(NumCores = 2), envParameters = simParameters.envParameters.copy(EnableDebug = true)) private var parameters = Parameters() // a default parameter, can be updated before use diff --git a/src/main/scala/utils/BitUtils.scala b/src/main/scala/utils/BitUtils.scala index 9ab3e7a6fb7a29b8b0be87602985138f76afeef0..c2e5baf8f39791ee5898dcd3b8266d2f41d3e8d0 100644 --- a/src/main/scala/utils/BitUtils.scala +++ b/src/main/scala/utils/BitUtils.scala @@ -116,3 +116,16 @@ object GenMask { object UIntToMask { def apply(ptr: UInt, length: Integer) = UIntToOH(ptr)(length - 1, 0) - 1.U } + +object GetEvenBits { + def apply(input: UInt): UInt = { + VecInit((0 until input.getWidth/2).map(i => {input(2*i)})).asUInt + } +} + + +object GetOddBits { + def apply(input: UInt): UInt = { + VecInit((0 until input.getWidth/2).map(i => {input(2*i+1)})).asUInt + } +} \ No newline at end of file diff --git a/src/main/scala/utils/Replacement.scala b/src/main/scala/utils/Replacement.scala index ead2e975bfb44fc82a3fa6db7c1cb29e86f2b88b..c31d94d83cfd7b4a80d14e18c42dadea8b38e585 100644 --- a/src/main/scala/utils/Replacement.scala +++ b/src/main/scala/utils/Replacement.scala @@ -6,22 +6,60 @@ package utils import chisel3._ import chisel3.util._ import chisel3.util.random.LFSR +import freechips.rocketchip.util._ +import freechips.rocketchip.util.property.cover import xiangshan.{HasXSLog, XSCoreParameters} abstract class ReplacementPolicy { + def nBits: Int + def perSet: Boolean def way: UInt def miss: Unit def hit: Unit + def access(touch_way: UInt): Unit + def access(touch_ways: Seq[Valid[UInt]]): Unit + def state_read: UInt + def get_next_state(state: UInt, touch_way: UInt): UInt + def get_next_state(state: UInt, touch_ways: Seq[Valid[UInt]]): UInt = { + touch_ways.foldLeft(state)((prev, touch_way) => Mux(touch_way.valid, get_next_state(prev, touch_way.bits), prev)) + } + def get_replace_way(state: UInt): UInt +} + +object ReplacementPolicy { + //for fully associative mapping + def fromString(s: Option[String],n_ways: Int): ReplacementPolicy = fromString(s.getOrElse("none"),n_ways) + def fromString(s: String, n_ways: Int): ReplacementPolicy = s.toLowerCase match { + case "random" => new RandomReplacement(n_ways) + case "lru" => new TrueLRU(n_ways) + case "plru" => new PseudoLRU(n_ways) + case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t") + } + //for set associative mapping + def fromString(s: Option[String], n_ways: Int, n_sets: Int): SetAssocReplacementPolicy = fromString(s.getOrElse("none"),n_ways,n_sets ) + def fromString(s: String, n_ways: Int, n_sets: Int): SetAssocReplacementPolicy = s.toLowerCase match { + case "random" => new SetAssocRandom(n_sets, n_ways) + case "setlru" => new SetAssocLRU(n_sets, n_ways, "lru") + case "setplru" => new SetAssocLRU(n_sets, n_ways, "plru") + case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t") + } } -class RandomReplacement(ways: Int) extends ReplacementPolicy { +class RandomReplacement(n_ways: Int) extends ReplacementPolicy { private val replace = Wire(Bool()) replace := false.B - val lfsr = LFSR(16, replace) + def nBits = 16 + def perSet = false + private val lfsr = LFSR(nBits, replace) + def state_read = WireDefault(lfsr) - def way = Random(ways, lfsr) + def way = Random(n_ways, lfsr) def miss = replace := true.B def hit = {} + def access(touch_way: UInt) = {} + def access(touch_ways: Seq[Valid[UInt]]) = {} + def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare + def get_replace_way(state: UInt) = way } abstract class SeqReplacementPolicy { @@ -30,6 +68,14 @@ abstract class SeqReplacementPolicy { def way: UInt } +abstract class SetAssocReplacementPolicy { + def access(set: UInt, touch_way: UInt): Unit + def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]): Unit + def way(set: UInt): UInt + def miss(set: UInt): Unit +} + + class SeqRandom(n_ways: Int) extends SeqReplacementPolicy { val logic = new RandomReplacement(n_ways) def access(set: UInt) = { } @@ -39,7 +85,7 @@ class SeqRandom(n_ways: Int) extends SeqReplacementPolicy { def way = logic.way } -class TrueLRU(n_ways: Int) { +class TrueLRU(n_ways: Int) extends ReplacementPolicy { // True LRU replacement policy, using a triangular matrix to track which sets are more recently used than others. // The matrix is packed into a single UInt (or Bits). Example 4-way (6-bits): // [5] - 3 more recent than 2 @@ -49,6 +95,7 @@ class TrueLRU(n_ways: Int) { // [1] - 2 more recent than 0 // [0] - 1 more recent than 0 def nBits = (n_ways * (n_ways-1)) / 2 + def perSet = true private val state_reg = RegInit(0.U(nBits.W)) def state_read = WireDefault(state_reg) @@ -70,28 +117,23 @@ class TrueLRU(n_ways: Int) { // Compute next value of triangular matrix // set the touched way as more recent than every other way - nextState.zipWithIndex.foreach { case (e, i) => + nextState.zipWithIndex.map { case (e, i) => e := Mux(i.U === touch_way, 0.U(n_ways.W), moreRecentVec(i) | wayDec) } nextState.zipWithIndex.tail.foldLeft((nextState.head.apply(n_ways-1,1),0)) { case ((pe,pi),(ce,ci)) => (Cat(ce.apply(n_ways-1,ci+1), pe), ci) }._1 } - - def get_next_state(state: UInt, touch_ways: Seq[Valid[UInt]]): UInt = { - touch_ways.foldLeft(state)((prev, touch_way) => Mux(touch_way.valid, get_next_state(prev, touch_way.bits), prev)) - } - - def access(touch_way: UInt) { + def access(touch_way: UInt): Unit = { state_reg := get_next_state(state_reg, touch_way) } - def access(touch_ways: Seq[Valid[UInt]]) { - when (ParallelOR(touch_ways.map(_.valid))) { + def access(touch_ways: Seq[Valid[UInt]]): Unit = { + when (touch_ways.map(_.valid).orR) { state_reg := get_next_state(state_reg, touch_ways) } -// for (i <- 1 until touch_ways.size) { -// cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i", s"LRU Update $i simultaneous") -// } + for (i <- 1 until touch_ways.size) { + cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i", s"LRU Update $i simultaneous") + } } def get_replace_way(state: UInt): UInt = { @@ -108,49 +150,149 @@ class TrueLRU(n_ways: Int) { def way = get_replace_way(state_reg) def miss = access(way) def hit = {} - def flush() = { state_reg := 0.U(nBits.W) } @deprecated("replace 'replace' with 'way' from abstract class ReplacementPolicy","Rocket Chip 2020.05") def replace: UInt = way } -class PseudoLRU(n: Int) -{ - private val state_reg = Reg(UInt((n-1).W)) - def access(way: UInt) { - state_reg := get_next_state(state_reg,way) - } - def access(ways: Seq[ValidIO[UInt]]) { - state_reg := ways.foldLeft(state_reg)((prev, way) => Mux(way.valid, get_next_state(prev, way.bits), prev)) - } - def get_next_state(state: UInt, way: UInt) = { - var next_state = state << 1 - var idx = 1.U(1.W) - for (i <- log2Up(n)-1 to 0 by -1) { - val bit = way(i) -// next_state = next_state.bitSet(idx, !bit) - next_state = Mux(bit, next_state & (~UIntToOH(idx)), next_state | UIntToOH(idx)) - idx = Cat(idx, bit) +class PseudoLRU(n_ways: Int) extends ReplacementPolicy { + // Pseudo-LRU tree algorithm: https://en.wikipedia.org/wiki/Pseudo-LRU#Tree-PLRU + // + // + // - bits storage example for 4-way PLRU binary tree: + // bit[2]: ways 3+2 older than ways 1+0 + // / \ + // bit[1]: way 3 older than way 2 bit[0]: way 1 older than way 0 + // + // + // - bits storage example for 3-way PLRU binary tree: + // bit[1]: way 2 older than ways 1+0 + // \ + // bit[0]: way 1 older than way 0 + // + // + // - bits storage example for 8-way PLRU binary tree: + // bit[6]: ways 7-4 older than ways 3-0 + // / \ + // bit[5]: ways 7+6 > 5+4 bit[2]: ways 3+2 > 1+0 + // / \ / \ + // bit[4]: way 7>6 bit[3]: way 5>4 bit[1]: way 3>2 bit[0]: way 1>0 + + def nBits = n_ways - 1 + def perSet = true + private val state_reg = if (nBits == 0) Reg(UInt(0.W)) else RegInit(0.U(nBits.W)) + def state_read = WireDefault(state_reg) + + def access(touch_way: UInt): Unit = { + state_reg := get_next_state(state_reg, touch_way) + } + def access(touch_ways: Seq[Valid[UInt]]): Unit = { + when (touch_ways.map(_.valid).orR) { + state_reg := get_next_state(state_reg, touch_ways) + } + for (i <- 1 until touch_ways.size) { + cover(PopCount(touch_ways.map(_.valid)) === i.U, s"PLRU_UpdateCount$i", s"PLRU Update $i simultaneous") + } + } + + + /** @param state state_reg bits for this sub-tree + * @param touch_way touched way encoded value bits for this sub-tree + * @param tree_nways number of ways in this sub-tree + */ + def get_next_state(state: UInt, touch_way: UInt, tree_nways: Int): UInt = { + require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways") + require(touch_way.getWidth == (log2Ceil(tree_nways) max 1), s"wrong encoded way width ${touch_way.getWidth} for $tree_nways ways") + + if (tree_nways > 2) { + // we are at a branching node in the tree, so recurse + val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree + val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree + val set_left_older = !touch_way(log2Ceil(tree_nways)-1) + val left_subtree_state = state.extract(tree_nways-3, right_nways-1) + val right_subtree_state = state(right_nways-2, 0) + + if (left_nways > 1) { + // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees + Cat(set_left_older, + Mux(set_left_older, + left_subtree_state, // if setting left sub-tree as older, do NOT recurse into left sub-tree + get_next_state(left_subtree_state, touch_way.extract(log2Ceil(left_nways)-1,0), left_nways)), // recurse left if newer + Mux(set_left_older, + get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer + right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree + } else { + // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree + Cat(set_left_older, + Mux(set_left_older, + get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer + right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree + } + } else if (tree_nways == 2) { + // we are at a leaf node at the end of the tree, so set the single state bit opposite of the lsb of the touched way encoded value + !touch_way(0) + } else { // tree_nways <= 1 + // we are at an empty node in an empty tree for 1 way, so return single zero bit for Chisel (no zero-width wires) + 0.U(1.W) } - next_state(n-1, 1) - } - def replace = get_replace_way(state_reg) - def get_replace_way(state: UInt) = { - val shifted_state = state << 1 - var idx = 1.U(1.W) - for (i <- log2Up(n)-1 to 0 by -1) { - val in_bounds = Cat(idx, (BigInt(1) << i).U)(log2Up(n)-1, 0) < n.U - idx = Cat(idx, in_bounds && shifted_state(idx)) + } + + def get_next_state(state: UInt, touch_way: UInt): UInt = { + val touch_way_sized = if (touch_way.getWidth < log2Ceil(n_ways)) touch_way.padTo (log2Ceil(n_ways)) + else touch_way.extract(log2Ceil(n_ways)-1,0) + get_next_state(state, touch_way_sized, n_ways) + } + + + /** @param state state_reg bits for this sub-tree + * @param tree_nways number of ways in this sub-tree + */ + def get_replace_way(state: UInt, tree_nways: Int): UInt = { + require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways") + + // this algorithm recursively descends the binary tree, filling in the way-to-replace encoded value from msb to lsb + if (tree_nways > 2) { + // we are at a branching node in the tree, so recurse + val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree + val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree + val left_subtree_older = state(tree_nways-2) + val left_subtree_state = state.extract(tree_nways-3, right_nways-1) + val right_subtree_state = state(right_nways-2, 0) + + if (left_nways > 1) { + // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees + Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value + Mux(left_subtree_older, // if left sub-tree is older, recurse left, else recurse right + get_replace_way(left_subtree_state, left_nways), // recurse left + get_replace_way(right_subtree_state, right_nways))) // recurse right + } else { + // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree + Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value + Mux(left_subtree_older, // if left sub-tree is older, return and do not recurse right + 0.U(1.W), + get_replace_way(right_subtree_state, right_nways))) // recurse right + } + } else if (tree_nways == 2) { + // we are at a leaf node at the end of the tree, so just return the single state bit as lsb of the way-to-replace encoded value + state(0) + } else { // tree_nways <= 1 + // we are at an empty node in an unbalanced tree for non-power-of-2 ways, so return single zero bit as lsb of the way-to-replace encoded value + 0.U(1.W) } - idx(log2Up(n)-1,0) } + + def get_replace_way(state: UInt): UInt = get_replace_way(state, n_ways) + + def way = get_replace_way(state_reg) + def miss = access(way) + def hit = {} } class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy { - val state = SyncReadMem(n_sets, UInt((n_ways-1).W)) val logic = new PseudoLRU(n_ways) - val current_state = Wire(UInt()) + val state = SyncReadMem(n_sets, UInt(logic.nBits.W)) + val current_state = Wire(UInt(logic.nBits.W)) + val next_state = Wire(UInt(logic.nBits.W)) val plru_way = logic.get_replace_way(current_state) - val next_state = Wire(UInt()) def access(set: UInt) = { current_state := state.read(set) @@ -165,6 +307,45 @@ class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy { def way = plru_way } + +class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocReplacementPolicy { + val logic = policy.toLowerCase match { + case "plru" => new PseudoLRU(n_ways) + case "lru" => new TrueLRU(n_ways) + case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t") + } + val state_vec = Reg(Vec(n_sets, UInt(logic.nBits.W))) + + def access(set: UInt, touch_way: UInt) = { + state_vec(set) := logic.get_next_state(state_vec(set), touch_way) + } + + def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]) = { + require(sets.size == touch_ways.size, "internal consistency check: should be same number of simultaneous updates for sets and touch_ways") + for (set <- 0 until n_sets) { + val set_touch_ways = (sets zip touch_ways).map { case (touch_set, touch_way) => + Pipe(touch_way.valid && (touch_set === set.U), touch_way.bits, 0)} + when (set_touch_ways.map(_.valid).orR) { + state_vec(set) := logic.get_next_state(state_vec(set), set_touch_ways) + } + } + } + + def way(set: UInt) = logic.get_replace_way(state_vec(set)) + def miss(set: UInt) = {} +} + +class SetAssocRandom(n_sets : Int, n_ways: Int) extends SetAssocReplacementPolicy { + val random = new RandomReplacement(n_ways) + + def miss(set: UInt) = random.miss + def way(set: UInt) = random.way + + def access(set: UInt, touch_way: UInt) = {} + def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]) = {} + +} + class SbufferLRU(n_ways: Int) { def nBits = n_ways * n_ways diff --git a/src/main/scala/xiangshan/Bundle.scala b/src/main/scala/xiangshan/Bundle.scala index ae8daf676937e72d16bc7c913a098f4cf361e137..9291b12d883688c070b7b2b2953483a38d1ad3c1 100644 --- a/src/main/scala/xiangshan/Bundle.scala +++ b/src/main/scala/xiangshan/Bundle.scala @@ -120,6 +120,12 @@ class BranchPrediction extends XSBundle with HasIFUConst { def hasNotTakenBrs = Mux(taken, ParallelPriorityMux(realTakens, sawNotTakenBr), ParallelORR(brNotTakens)) } +class PredictorAnswer extends XSBundle { + val hit = Bool() + val taken = Bool() + val target = UInt(VAddrBits.W) +} + class BpuMeta extends XSBundle with HasBPUParameter { val ubtbWriteWay = UInt(log2Up(UBtbWays).W) val ubtbHits = Bool() @@ -144,6 +150,12 @@ class BpuMeta extends XSBundle with HasBPUParameter { val predictor = if (BPUDebug) UInt(log2Up(4).W) else UInt(0.W) // Mark which component this prediction comes from {ubtb, btb, tage, loopPredictor} + val ubtbAns = new PredictorAnswer + val btbAns = new PredictorAnswer + val tageAns = new PredictorAnswer + val rasAns = new PredictorAnswer + val loopAns = new PredictorAnswer + // def apply(histPtr: UInt, tageMeta: TageMeta, rasSp: UInt, rasTopCtr: UInt) = { // this.histPtr := histPtr // this.tageMeta := tageMeta @@ -338,8 +350,6 @@ class RoqCommitInfo extends XSBundle { val commitType = CommitType() val pdest = UInt(PhyRegIdxWidth.W) val old_pdest = UInt(PhyRegIdxWidth.W) - val lqIdx = new LqPtr - val sqIdx = new SqPtr // these should be optimized for synthesis verilog val pc = UInt(VAddrBits.W) @@ -401,3 +411,63 @@ class SfenceBundle extends XSBundle { p"valid:0x${Hexadecimal(valid)} rs1:${bits.rs1} rs2:${bits.rs2} addr:${Hexadecimal(bits.addr)}" } } + +class DifftestBundle extends XSBundle { + val fromSbuffer = new Bundle() { + val sbufferResp = Output(Bool()) + val sbufferAddr = Output(UInt(64.W)) + val sbufferData = Output(Vec(64, UInt(8.W))) + val sbufferMask = Output(UInt(64.W)) + } + val fromSQ = new Bundle() { + val storeCommit = Output(UInt(2.W)) + val storeAddr = Output(Vec(2, UInt(64.W))) + val storeData = Output(Vec(2, UInt(64.W))) + val storeMask = Output(Vec(2, UInt(8.W))) + } + val fromXSCore = new Bundle() { + val r = Output(Vec(64, UInt(XLEN.W))) + } + val fromCSR = new Bundle() { + val intrNO = Output(UInt(64.W)) + val cause = Output(UInt(64.W)) + val priviledgeMode = Output(UInt(2.W)) + val mstatus = Output(UInt(64.W)) + val sstatus = Output(UInt(64.W)) + val mepc = Output(UInt(64.W)) + val sepc = Output(UInt(64.W)) + val mtval = Output(UInt(64.W)) + val stval = Output(UInt(64.W)) + val mtvec = Output(UInt(64.W)) + val stvec = Output(UInt(64.W)) + val mcause = Output(UInt(64.W)) + val scause = Output(UInt(64.W)) + val satp = Output(UInt(64.W)) + val mip = Output(UInt(64.W)) + val mie = Output(UInt(64.W)) + val mscratch = Output(UInt(64.W)) + val sscratch = Output(UInt(64.W)) + val mideleg = Output(UInt(64.W)) + val medeleg = Output(UInt(64.W)) + } + val fromRoq = new Bundle() { + val commit = Output(UInt(32.W)) + val thisPC = Output(UInt(XLEN.W)) + val thisINST = Output(UInt(32.W)) + val skip = Output(UInt(32.W)) + val wen = Output(UInt(32.W)) + val wdata = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6 + val wdst = Output(Vec(CommitWidth, UInt(32.W))) // set difftest width to 6 + val wpc = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6 + val isRVC = Output(UInt(32.W)) + val scFailed = Output(Bool()) + } +} + +class TrapIO extends XSBundle { + val valid = Output(Bool()) + val code = Output(UInt(3.W)) + val pc = Output(UInt(VAddrBits.W)) + val cycleCnt = Output(UInt(XLEN.W)) + val instrCnt = Output(UInt(XLEN.W)) +} \ No newline at end of file diff --git a/src/main/scala/xiangshan/XSCore.scala b/src/main/scala/xiangshan/XSCore.scala index e14211f51826b64bedec76af7a73716ef4b205df..f2fdf93b520d187198ded4e80a24b74e24a6bbdd 100644 --- a/src/main/scala/xiangshan/XSCore.scala +++ b/src/main/scala/xiangshan/XSCore.scala @@ -22,6 +22,14 @@ import freechips.rocketchip.tile.HasFPUParameters import sifive.blocks.inclusivecache.PrefetcherIO import utils._ +object hartIdCore extends (() => Int) { + var x = 0 + def apply(): Int = { + x = x + 1 + x-1 + } +} + case class XSCoreParameters ( XLEN: Int = 64, @@ -179,6 +187,7 @@ trait HasXSParameter { val icacheParameters = ICacheParameters( tagECC = Some("parity"), dataECC = Some("parity"), + replacer = Some("setlru"), nMissEntries = 2 ) @@ -288,7 +297,8 @@ case class EnviromentParameters ( FPGAPlatform: Boolean = true, EnableDebug: Boolean = false, - EnablePerfDebug: Boolean = false + EnablePerfDebug: Boolean = false, + DualCoreDifftest: Boolean = false ) // object AddressSpace extends HasXSParameter { @@ -349,6 +359,12 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) val externalInterrupt = new ExternalInterruptIO val l2ToPrefetcher = Flipped(new PrefetcherIO(PAddrBits)) }) + + val difftestIO = IO(new DifftestBundle()) + difftestIO <> DontCare + + val trapIO = IO(new TrapIO()) + trapIO <> DontCare println(s"FPGAPlatform:${env.FPGAPlatform} EnableDebug:${env.EnableDebug}") AddressSpace.printMemmap() @@ -451,8 +467,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) floatBlock.io.frm <> integerBlock.io.csrio.frm - memBlock.io.lsqio.commits <> ctrlBlock.io.roqio.commits - memBlock.io.lsqio.roqDeqPtr <> ctrlBlock.io.roqio.roqDeqPtr + memBlock.io.lsqio.roq <> ctrlBlock.io.roqio.lsq memBlock.io.lsqio.exceptionAddr.lsIdx.lqIdx := ctrlBlock.io.roqio.exception.bits.lqIdx memBlock.io.lsqio.exceptionAddr.lsIdx.sqIdx := ctrlBlock.io.roqio.exception.bits.sqIdx memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.roqio.exception.bits.ctrl.commitType) @@ -481,4 +496,19 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) ExcitingUtils.addSource(debugArchReg, "difftestRegs", ExcitingUtils.Debug) } + if (env.DualCoreDifftest) { + val id = hartIdCore() + difftestIO.fromSbuffer <> memBlock.difftestIO.fromSbuffer + difftestIO.fromSQ <> memBlock.difftestIO.fromSQ + difftestIO.fromCSR <> integerBlock.difftestIO.fromCSR + difftestIO.fromRoq <> ctrlBlock.difftestIO.fromRoq + trapIO <> ctrlBlock.trapIO + + val debugIntReg, debugFpReg = WireInit(VecInit(Seq.fill(32)(0.U(XLEN.W)))) + ExcitingUtils.addSink(debugIntReg, s"DEBUG_INT_ARCH_REG$id", ExcitingUtils.Debug) + ExcitingUtils.addSink(debugFpReg, s"DEBUG_FP_ARCH_REG$id", ExcitingUtils.Debug) + val debugArchReg = WireInit(VecInit(debugIntReg ++ debugFpReg)) + difftestIO.fromXSCore.r := debugArchReg + } + } diff --git a/src/main/scala/xiangshan/backend/CtrlBlock.scala b/src/main/scala/xiangshan/backend/CtrlBlock.scala index a3f46459e1e7e0e6977757608e6dfc546cdb8f90..5eebf8c29e5c81e2ef17fc96307132e313fc7837 100644 --- a/src/main/scala/xiangshan/backend/CtrlBlock.scala +++ b/src/main/scala/xiangshan/backend/CtrlBlock.scala @@ -11,7 +11,7 @@ import xiangshan.backend.dispatch.Dispatch import xiangshan.backend.exu._ import xiangshan.backend.exu.Exu.exuConfigs import xiangshan.backend.regfile.RfReadPort -import xiangshan.backend.roq.{Roq, RoqCSRIO, RoqPtr} +import xiangshan.backend.roq.{Roq, RoqCSRIO, RoqLsqIO, RoqPtr} import xiangshan.mem.LsqEnqIO class CtrlToIntBlockIO extends XSBundle { @@ -52,11 +52,29 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper { val exception = ValidIO(new MicroOp) val isInterrupt = Output(Bool()) // to mem block - val commits = new RoqCommitIO - val roqDeqPtr = Output(new RoqPtr) + val lsq = new RoqLsqIO } }) + val difftestIO = IO(new Bundle() { + val fromRoq = new Bundle() { + val commit = Output(UInt(32.W)) + val thisPC = Output(UInt(XLEN.W)) + val thisINST = Output(UInt(32.W)) + val skip = Output(UInt(32.W)) + val wen = Output(UInt(32.W)) + val wdata = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6 + val wdst = Output(Vec(CommitWidth, UInt(32.W))) // set difftest width to 6 + val wpc = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6 + val isRVC = Output(UInt(32.W)) + val scFailed = Output(Bool()) + } + }) + difftestIO <> DontCare + + val trapIO = IO(new TrapIO()) + trapIO <> DontCare + val decode = Module(new DecodeStage) val brq = Module(new Brq) val rename = Module(new Rename) @@ -145,6 +163,11 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper { } roq.io.exeWbResults.last := brq.io.out + if (env.DualCoreDifftest) { + difftestIO.fromRoq <> roq.difftestIO + trapIO <> roq.trapIO + } + io.toIntBlock.redirect.valid := redirectValid io.toIntBlock.redirect.bits := redirect io.toFpBlock.redirect.valid := redirectValid @@ -161,6 +184,5 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper { io.roqio.exception.bits := roq.io.exception io.roqio.isInterrupt := roq.io.redirectOut.bits.interrupt // roq to mem block - io.roqio.roqDeqPtr := roq.io.roqDeqPtr - io.roqio.commits := roq.io.commits + io.roqio.lsq <> roq.io.lsq } diff --git a/src/main/scala/xiangshan/backend/IntegerBlock.scala b/src/main/scala/xiangshan/backend/IntegerBlock.scala index 52504f77498c4c5d1c24051a4921909585ecb5e3..87e8dafc11b64dbf79242ecc66335b62cbd2b054 100644 --- a/src/main/scala/xiangshan/backend/IntegerBlock.scala +++ b/src/main/scala/xiangshan/backend/IntegerBlock.scala @@ -92,6 +92,31 @@ class IntegerBlock val sbuffer = new FenceToSbuffer // to mem } }) + val difftestIO = IO(new Bundle() { + val fromCSR = new Bundle() { + val intrNO = Output(UInt(64.W)) + val cause = Output(UInt(64.W)) + val priviledgeMode = Output(UInt(2.W)) + val mstatus = Output(UInt(64.W)) + val sstatus = Output(UInt(64.W)) + val mepc = Output(UInt(64.W)) + val sepc = Output(UInt(64.W)) + val mtval = Output(UInt(64.W)) + val stval = Output(UInt(64.W)) + val mtvec = Output(UInt(64.W)) + val stvec = Output(UInt(64.W)) + val mcause = Output(UInt(64.W)) + val scause = Output(UInt(64.W)) + val satp = Output(UInt(64.W)) + val mip = Output(UInt(64.W)) + val mie = Output(UInt(64.W)) + val mscratch = Output(UInt(64.W)) + val sscratch = Output(UInt(64.W)) + val mideleg = Output(UInt(64.W)) + val medeleg = Output(UInt(64.W)) + } + }) + difftestIO <> DontCare val redirect = io.fromCtrlBlock.redirect @@ -210,6 +235,9 @@ class IntegerBlock jmpExeUnit.csrio <> io.csrio jmpExeUnit.fenceio <> io.fenceio + if (env.DualCoreDifftest) { + jmpExeUnit.difftestIO.fromCSR <> difftestIO.fromCSR + } // read int rf from ctrl block intRf.io.readPorts.zipWithIndex.map{ case(r, i) => r.addr := io.fromCtrlBlock.readRf(i) } diff --git a/src/main/scala/xiangshan/backend/MemBlock.scala b/src/main/scala/xiangshan/backend/MemBlock.scala index bb4e22eb44354cd32f468114e3393159a4e1ab89..4c430d4f79d21231f432eb69df0910cbda8e78f2 100644 --- a/src/main/scala/xiangshan/backend/MemBlock.scala +++ b/src/main/scala/xiangshan/backend/MemBlock.scala @@ -7,7 +7,7 @@ import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp} import freechips.rocketchip.tile.HasFPUParameters import xiangshan._ import xiangshan.backend.exu.Exu.{loadExuConfigs, storeExuConfigs} -import xiangshan.backend.roq.RoqPtr +import xiangshan.backend.roq.{RoqPtr, RoqLsqIO} import xiangshan.backend.exu._ import xiangshan.cache._ import xiangshan.mem._ @@ -77,12 +77,26 @@ class MemBlockImp val lsqio = new Bundle { val exceptionAddr = new ExceptionAddrIO // to csr - val commits = Flipped(new RoqCommitIO) // to lsq - val roqDeqPtr = Input(new RoqPtr) // to lsq + val roq = Flipped(new RoqLsqIO) // roq to lsq } val toDCachePrefetch = DecoupledIO(new MissReq) }) + val difftestIO = IO(new Bundle() { + val fromSbuffer = new Bundle() { + val sbufferResp = Output(Bool()) + val sbufferAddr = Output(UInt(64.W)) + val sbufferData = Output(Vec(64, UInt(8.W))) + val sbufferMask = Output(UInt(64.W)) + } + val fromSQ = new Bundle() { + val storeCommit = Output(UInt(2.W)) + val storeAddr = Output(Vec(2, UInt(64.W))) + val storeData = Output(Vec(2, UInt(64.W))) + val storeMask = Output(Vec(2, UInt(8.W))) + } + }) + difftestIO <> DontCare val dcache = outer.dcache.module val uncache = outer.uncache.module @@ -191,6 +205,10 @@ class MemBlockImp io.ptw <> dtlb.io.ptw dtlb.io.sfence <> io.sfence dtlb.io.csr <> io.tlbCsr + if (env.DualCoreDifftest) { + difftestIO.fromSbuffer <> sbuffer.difftestIO + difftestIO.fromSQ <> lsq.difftestIO.fromSQ + } // LoadUnit for (i <- 0 until exuParameters.LduCnt) { @@ -208,6 +226,7 @@ class MemBlockImp // passdown to lsq lsq.io.loadIn(i) <> loadUnits(i).io.lsq.loadIn lsq.io.ldout(i) <> loadUnits(i).io.lsq.ldout + lsq.io.loadDataForwarded(i) <> loadUnits(i).io.lsq.loadDataForwarded } // StoreUnit @@ -236,10 +255,9 @@ class MemBlockImp } // Lsq - lsq.io.commits <> io.lsqio.commits + lsq.io.roq <> io.lsqio.roq lsq.io.enq <> io.fromCtrlBlock.enqLsq lsq.io.brqRedirect <> io.fromCtrlBlock.redirect - lsq.io.roqDeqPtr <> io.lsqio.roqDeqPtr io.toCtrlBlock.replay <> lsq.io.rollback lsq.io.dcache <> dcache.io.lsu.lsq lsq.io.uncache <> uncache.io.lsq diff --git a/src/main/scala/xiangshan/backend/brq/Brq.scala b/src/main/scala/xiangshan/backend/brq/Brq.scala index 5ac58413b731167dfb1975d8eef3285a34a67f1e..c4571df26e72d791e61684f34464a075447928f9 100644 --- a/src/main/scala/xiangshan/backend/brq/Brq.scala +++ b/src/main/scala/xiangshan/backend/brq/Brq.scala @@ -116,6 +116,7 @@ class Brq extends XSModule with HasCircularQueuePtrHelper { io.redirectOut.valid := wbValid && wbIsMisPred io.redirectOut.bits := wbEntry.redirect + io.redirectOut.bits.level := RedirectLevel.flushAfter io.redirectOut.bits.brTag := BrqPtr(ptrFlagVec(writebackIdx), writebackIdx) io.out.valid := wbValid || wbIsAuipc @@ -315,21 +316,60 @@ class Brq extends XSModule with HasCircularQueuePtrHelper { val mbpRRight = predRight && isRType val mbpRWrong = predWrong && isRType - val predictor = io.cfiInfo.bits.bpuMeta.predictor + if(!env.FPGAPlatform && env.EnablePerfDebug) { + val predictor = io.cfiInfo.bits.bpuMeta.predictor - val ubtbRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 0.U - val ubtbWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 0.U + val cfiCountValid = io.cfiInfo.valid && !io.cfiInfo.bits.isReplay - val btbRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 1.U - val btbWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 1.U + val ubtbAns = io.cfiInfo.bits.bpuMeta.ubtbAns + val btbAns = io.cfiInfo.bits.bpuMeta.btbAns + val tageAns = io.cfiInfo.bits.bpuMeta.tageAns + val rasAns = io.cfiInfo.bits.bpuMeta.rasAns + val loopAns = io.cfiInfo.bits.bpuMeta.loopAns - val tageRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 2.U - val tageWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 2.U + // Pipeline stage counter + val s1Right = cfiCountValid && !io.cfiInfo.bits.isMisPred && predictor === 0.U + val s1Wrong = cfiCountValid && io.cfiInfo.bits.isMisPred && predictor === 0.U - val loopRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 3.U - val loopWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 3.U + val s2Right = cfiCountValid && !io.cfiInfo.bits.isMisPred && predictor === 1.U + val s2Wrong = cfiCountValid && io.cfiInfo.bits.isMisPred && predictor === 1.U + + val s3Right = cfiCountValid && !io.cfiInfo.bits.isMisPred && predictor === 2.U + val s3Wrong = cfiCountValid && io.cfiInfo.bits.isMisPred && predictor === 2.U + + // Predictor counter + // val ubtbRight = cfiCountValid && ubtbAns.hit && io.cfiInfo.bits.target === ubtbAns.target && io.cfiInfo.bits.taken === ubtbAns.taken + // val ubtbWrong = cfiCountValid && ubtbAns.hit && (io.cfiInfo.bits.target =/= ubtbAns.target || io.cfiInfo.bits.taken =/= ubtbAns.taken) + + val ubtbRight = cfiCountValid && ubtbAns.hit && Mux(ubtbAns.taken, + io.cfiInfo.bits.target === ubtbAns.target && io.cfiInfo.bits.taken === ubtbAns.taken, // taken + io.cfiInfo.bits.taken === ubtbAns.taken) // noTaken + val ubtbWrong = cfiCountValid && ubtbAns.hit && Mux(ubtbAns.taken, + io.cfiInfo.bits.target =/= ubtbAns.target || io.cfiInfo.bits.taken =/= ubtbAns.taken, // taken + io.cfiInfo.bits.taken =/= ubtbAns.taken) // noTaken + + val takenAndRight = ubtbAns.taken && ubtbRight + val takenButWrong = ubtbAns.taken && ubtbWrong + + // val btbRight = cfiCountValid && btbAns.hit && io.cfiInfo.bits.target === btbAns.target && io.cfiInfo.bits.taken === btbAns.taken + // val btbWrong = cfiCountValid && btbAns.hit && (io.cfiInfo.bits.target =/= btbAns.target || io.cfiInfo.bits.taken =/= btbAns.taken) + + val btbRight = cfiCountValid && btbAns.hit && Mux(btbAns.taken, + io.cfiInfo.bits.target === btbAns.target && io.cfiInfo.bits.taken === btbAns.taken, // taken + io.cfiInfo.bits.taken === btbAns.taken) // noTaken + val btbWrong = cfiCountValid && btbAns.hit && Mux(btbAns.taken, + io.cfiInfo.bits.target =/= btbAns.target || io.cfiInfo.bits.taken =/= btbAns.taken, // taken + io.cfiInfo.bits.taken =/= btbAns.taken) // noTaken + + val tageRight = cfiCountValid && io.cfiInfo.bits.pd.brType =/= "b10".U && io.cfiInfo.bits.taken === tageAns.taken // DontCare jal + val tageWrong = cfiCountValid && io.cfiInfo.bits.pd.brType =/= "b10".U && io.cfiInfo.bits.taken =/= tageAns.taken // DontCare jal + + val rasRight = cfiCountValid && io.cfiInfo.bits.pd.isRet && rasAns.hit && io.cfiInfo.bits.target === rasAns.target + val rasWrong = cfiCountValid && io.cfiInfo.bits.pd.isRet && rasAns.hit && io.cfiInfo.bits.target =/= rasAns.target + + val loopRight = cfiCountValid && loopAns.hit && io.cfiInfo.bits.taken === loopAns.taken + val loopWrong = cfiCountValid && loopAns.hit && io.cfiInfo.bits.taken =/= loopAns.taken - if(!env.FPGAPlatform){ ExcitingUtils.addSource(mbpInstr, "perfCntCondBpInstr", Perf) ExcitingUtils.addSource(mbpRight, "perfCntCondBpRight", Perf) ExcitingUtils.addSource(mbpWrong, "perfCntCondBpWrong", Perf) @@ -342,14 +382,26 @@ class Brq extends XSModule with HasCircularQueuePtrHelper { ExcitingUtils.addSource(mbpRRight, "perfCntCondBpRRight", Perf) ExcitingUtils.addSource(mbpRWrong, "perfCntCondBpRWrong", Perf) + ExcitingUtils.addSource(s1Right, "perfCntS1Right", Perf) + ExcitingUtils.addSource(s1Wrong, "perfCntS1Wrong", Perf) + ExcitingUtils.addSource(s2Right, "perfCntS2Right", Perf) + ExcitingUtils.addSource(s2Wrong, "perfCntS2Wrong", Perf) + ExcitingUtils.addSource(s3Right, "perfCntS3Right", Perf) + ExcitingUtils.addSource(s3Wrong, "perfCntS3Wrong", Perf) + ExcitingUtils.addSource(ubtbRight, "perfCntubtbRight", Perf) ExcitingUtils.addSource(ubtbWrong, "perfCntubtbWrong", Perf) ExcitingUtils.addSource(btbRight, "perfCntbtbRight", Perf) ExcitingUtils.addSource(btbWrong, "perfCntbtbWrong", Perf) ExcitingUtils.addSource(tageRight, "perfCnttageRight", Perf) ExcitingUtils.addSource(tageWrong, "perfCnttageWrong", Perf) + ExcitingUtils.addSource(rasRight, "perfCntrasRight", Perf) + ExcitingUtils.addSource(rasWrong, "perfCntrasWrong", Perf) ExcitingUtils.addSource(loopRight, "perfCntloopRight", Perf) ExcitingUtils.addSource(loopWrong, "perfCntloopWrong", Perf) + + ExcitingUtils.addSource(takenAndRight, "perfCntTakenAndRight", Perf) + ExcitingUtils.addSource(takenButWrong, "perfCntTakenButWrong", Perf) } val utilization = Mux(headPtr.flag === tailPtr.flag, tailPtr.value - headPtr.value, BrqSize.U + tailPtr.value - headPtr.value) diff --git a/src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala b/src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala index c62cd8bbdbb9922f0eb40e68844bf4f719ea62f9..9c4b35b3df0264446010d67b60adbd090fd07164 100644 --- a/src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala +++ b/src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala @@ -101,7 +101,8 @@ class Dispatch1 extends XSModule with HasExceptionNO { // update commitType updatedUop(i).ctrl.commitType := updatedCommitType(i) // update roqIdx, lqIdx, sqIdx - updatedUop(i).roqIdx := io.enqRoq.resp(i) + // updatedUop(i).roqIdx := io.enqRoq.resp(i) + XSError(io.fromRename(i).valid && updatedUop(i).roqIdx.asUInt =/= io.enqRoq.resp(i).asUInt, "they should equal") updatedUop(i).lqIdx := io.enqLsq.resp(i).lqIdx updatedUop(i).sqIdx := io.enqLsq.resp(i).sqIdx } diff --git a/src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala b/src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala index cf8e424c833b1d537a4afdbe7856f6d3e86c7b37..34503aa382a0b0f4342317552e0bbd6324f0e5ce 100644 --- a/src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala +++ b/src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala @@ -30,6 +30,31 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg) val fencei = Output(Bool()) val sbuffer = new FenceToSbuffer }) + val difftestIO = IO(new Bundle() { + val fromCSR = new Bundle() { + val intrNO = Output(UInt(64.W)) + val cause = Output(UInt(64.W)) + val priviledgeMode = Output(UInt(2.W)) + val mstatus = Output(UInt(64.W)) + val sstatus = Output(UInt(64.W)) + val mepc = Output(UInt(64.W)) + val sepc = Output(UInt(64.W)) + val mtval = Output(UInt(64.W)) + val stval = Output(UInt(64.W)) + val mtvec = Output(UInt(64.W)) + val stvec = Output(UInt(64.W)) + val mcause = Output(UInt(64.W)) + val scause = Output(UInt(64.W)) + val satp = Output(UInt(64.W)) + val mip = Output(UInt(64.W)) + val mie = Output(UInt(64.W)) + val mscratch = Output(UInt(64.W)) + val sscratch = Output(UInt(64.W)) + val mideleg = Output(UInt(64.W)) + val medeleg = Output(UInt(64.W)) + } + }) + difftestIO <> DontCare val jmp = supportedFunctionUnits.collectFirst{ case j: Jump => j @@ -58,6 +83,10 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg) csr.csrio.externalInterrupt <> csrio.externalInterrupt csr.csrio.tlb <> csrio.tlb + if (env.DualCoreDifftest) { + difftestIO.fromCSR <> csr.difftestIO + } + fenceio.sfence <> fence.sfence fenceio.fencei <> fence.fencei fenceio.sbuffer <> fence.toSbuffer diff --git a/src/main/scala/xiangshan/backend/fu/CSR.scala b/src/main/scala/xiangshan/backend/fu/CSR.scala index f80ac03b1665f05f0eee85c83d7f025d69ecaa16..4949ac8048ebc1b05e3067dde5fc30f2bf42dbdc 100644 --- a/src/main/scala/xiangshan/backend/fu/CSR.scala +++ b/src/main/scala/xiangshan/backend/fu/CSR.scala @@ -145,6 +145,29 @@ class CSR extends FunctionUnit with HasCSRConst // TLB val tlb = Output(new TlbCsrBundle) }) + val difftestIO = IO(new Bundle() { + val intrNO = Output(UInt(64.W)) + val cause = Output(UInt(64.W)) + val priviledgeMode = Output(UInt(2.W)) + val mstatus = Output(UInt(64.W)) + val sstatus = Output(UInt(64.W)) + val mepc = Output(UInt(64.W)) + val sepc = Output(UInt(64.W)) + val mtval = Output(UInt(64.W)) + val stval = Output(UInt(64.W)) + val mtvec = Output(UInt(64.W)) + val stvec = Output(UInt(64.W)) + val mcause = Output(UInt(64.W)) + val scause = Output(UInt(64.W)) + val satp = Output(UInt(64.W)) + val mip = Output(UInt(64.W)) + val mie = Output(UInt(64.W)) + val mscratch = Output(UInt(64.W)) + val sscratch = Output(UInt(64.W)) + val mideleg = Output(UInt(64.W)) + val medeleg = Output(UInt(64.W)) + }) + difftestIO <> DontCare val cfIn = io.in.bits.uop.cf val cfOut = Wire(new CtrlFlow) @@ -812,24 +835,34 @@ class CSR extends FunctionUnit with HasCSRConst "btbWrong" -> (0x1033, "perfCntbtbWrong"), "tageRight" -> (0x1034, "perfCnttageRight"), "tageWrong" -> (0x1035, "perfCnttageWrong"), - "loopRight" -> (0x1036, "perfCntloopRight"), - "loopWrong" -> (0x1037, "perfCntloopWrong") + "rasRight" -> (0x1036, "perfCntrasRight"), + "rasWrong" -> (0x1037, "perfCntrasWrong"), + "loopRight" -> (0x1038, "perfCntloopRight"), + "loopWrong" -> (0x1039, "perfCntloopWrong"), + "s1Right" -> (0x103a, "perfCntS1Right"), + "s1Wrong" -> (0x103b, "perfCntS1Wrong"), + "s2Right" -> (0x103c, "perfCntS2Right"), + "s2Wrong" -> (0x103d, "perfCntS2Wrong"), + "s3Right" -> (0x103e, "perfCntS3Right"), + "s3Wrong" -> (0x103f, "perfCntS3Wrong"), + "takenAndRight" -> (0x1040, "perfCntTakenAndRight"), + "takenButWrong" -> (0x1041, "perfCntTakenButWrong"), // "L2cacheHit" -> (0x1023, "perfCntCondL2cacheHit") ) ++ ( - (0 until dcacheParameters.nMissEntries).map(i => - ("DCacheMissQueuePenalty" + Integer.toString(i, 10), (0x102a + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10))) + (0 until dcacheParameters.nMissEntries).map(i => + ("DCacheMissQueuePenalty" + Integer.toString(i, 10), (0x1042 + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10))) ).toMap ) ++ ( (0 until icacheParameters.nMissEntries).map(i => - ("ICacheMissQueuePenalty" + Integer.toString(i, 10), (0x102a + dcacheParameters.nMissEntries + i, "perfCntICacheMissQueuePenaltyEntry" + Integer.toString(i, 10))) + ("ICacheMissQueuePenalty" + Integer.toString(i, 10), (0x1042 + dcacheParameters.nMissEntries + i, "perfCntICacheMissQueuePenaltyEntry" + Integer.toString(i, 10))) ).toMap ) ++ ( (0 until l1plusPrefetcherParameters.nEntries).map(i => - ("L1+PrefetchPenalty" + Integer.toString(i, 10), (0x102a + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + i, "perfCntL1plusPrefetchPenaltyEntry" + Integer.toString(i, 10))) + ("L1+PrefetchPenalty" + Integer.toString(i, 10), (0x1042 + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + i, "perfCntL1plusPrefetchPenaltyEntry" + Integer.toString(i, 10))) ).toMap ) ++ ( (0 until l2PrefetcherParameters.nEntries).map(i => - ("L2PrefetchPenalty" + Integer.toString(i, 10), (0x102a + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + l1plusPrefetcherParameters.nEntries + i, "perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10))) + ("L2PrefetchPenalty" + Integer.toString(i, 10), (0x1042 + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + l1plusPrefetcherParameters.nEntries + i, "perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10))) ).toMap ) @@ -845,13 +878,15 @@ class CSR extends FunctionUnit with HasCSRConst // } // } } - + val xstrap = WireInit(false.B) if (!env.FPGAPlatform && EnableBPU) { ExcitingUtils.addSink(xstrap, "XSTRAP", ConnectionType.Debug) } def readWithScala(addr: Int): UInt = mapping(addr)._1 + val difftestIntrNO = Mux(raiseIntr, causeNO, 0.U) + if (!env.FPGAPlatform) { // display all perfcnt when nooptrap is executed @@ -862,7 +897,6 @@ class CSR extends FunctionUnit with HasCSRConst } } - val difftestIntrNO = Mux(raiseIntr, causeNO, 0.U) ExcitingUtils.addSource(difftestIntrNO, "difftestIntrNOfromCSR") ExcitingUtils.addSource(causeNO, "difftestCausefromCSR") ExcitingUtils.addSource(priviledgeMode, "difftestMode", Debug) @@ -884,4 +918,27 @@ class CSR extends FunctionUnit with HasCSRConst ExcitingUtils.addSource(mideleg, "difftestMideleg", Debug) ExcitingUtils.addSource(medeleg, "difftestMedeleg", Debug) } + + if (env.DualCoreDifftest) { + difftestIO.intrNO := RegNext(difftestIntrNO) + difftestIO.cause := RegNext(causeNO) + difftestIO.priviledgeMode := priviledgeMode + difftestIO.mstatus := mstatus + difftestIO.sstatus := mstatus & sstatusRmask + difftestIO.mepc := mepc + difftestIO.sepc := sepc + difftestIO.mtval:= mtval + difftestIO.stval:= stval + difftestIO.mtvec := mtvec + difftestIO.stvec := stvec + difftestIO.mcause := mcause + difftestIO.scause := scause + difftestIO.satp := satp + difftestIO.mip := mipReg + difftestIO.mie := mie + difftestIO.mscratch := mscratch + difftestIO.sscratch := sscratch + difftestIO.mideleg := mideleg + difftestIO.medeleg := medeleg + } } diff --git a/src/main/scala/xiangshan/backend/regfile/Regfile.scala b/src/main/scala/xiangshan/backend/regfile/Regfile.scala index 86f5e2254ad5bb13e1da98e8baff83b2fd611a41..88e5f8550cb16c6bebfb474ad17b2550b93dfca8 100644 --- a/src/main/scala/xiangshan/backend/regfile/Regfile.scala +++ b/src/main/scala/xiangshan/backend/regfile/Regfile.scala @@ -4,6 +4,22 @@ import chisel3._ import chisel3.util._ import xiangshan._ +object hartIdRFInt extends (() => Int) { + var x = 0 + def apply(): Int = { + x = x + 1 + x-1 + } +} + +object hartIdRFFp extends (() => Int) { + var x = 0 + def apply(): Int = { + x = x + 1 + x-1 + } +} + class RfReadPort(len: Int) extends XSBundle { val addr = Input(UInt(PhyRegIdxWidth.W)) val data = Output(UInt(len.W)) @@ -65,6 +81,29 @@ class Regfile ExcitingUtils.Debug ) } + + if (env.DualCoreDifftest) { + val id = if (hasZero) hartIdRFInt() else hartIdRFFp() + val debugArchRat = WireInit(VecInit(Seq.fill(32)(0.U(PhyRegIdxWidth.W)))) + ExcitingUtils.addSink( + debugArchRat, + if(hasZero) s"DEBUG_INI_ARCH_RAT$id" else s"DEBUG_FP_ARCH_RAT$id", + ExcitingUtils.Debug + ) + + val debugArchReg = WireInit(VecInit(debugArchRat.zipWithIndex.map( + x => if(hasZero){ + if(x._2 == 0) 0.U else mem(x._1) + } else { + ieee(mem(x._1)) + } + ))) + ExcitingUtils.addSource( + debugArchReg, + if(hasZero) s"DEBUG_INT_ARCH_REG$id" else s"DEBUG_FP_ARCH_REG$id", + ExcitingUtils.Debug + ) + } } else { val regfile = Module(new regfile_160x64_10w16r_sim) diff --git a/src/main/scala/xiangshan/backend/rename/Rename.scala b/src/main/scala/xiangshan/backend/rename/Rename.scala index 50d60d1f6b13fd82144a6099e171d8196e54efcd..5f48da087afda7f6d8bc29e96f267ccd622a46a0 100644 --- a/src/main/scala/xiangshan/backend/rename/Rename.scala +++ b/src/main/scala/xiangshan/backend/rename/Rename.scala @@ -4,6 +4,7 @@ import chisel3._ import chisel3.util._ import xiangshan._ import utils._ +import xiangshan.backend.roq.RoqPtr class RenameBypassInfo extends XSBundle { val lsrc1_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W))) @@ -12,7 +13,7 @@ class RenameBypassInfo extends XSBundle { val ldest_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W))) } -class Rename extends XSModule { +class Rename extends XSModule with HasCircularQueuePtrHelper { val io = IO(new Bundle() { val redirect = Flipped(ValidIO(new Redirect)) val roqCommits = Flipped(new RoqCommitIO) @@ -51,6 +52,7 @@ class Rename extends XSModule { freelist.redirect := io.redirect freelist.walk.valid := io.roqCommits.isWalk } + val canOut = io.out(0).ready && fpFreeList.req.canAlloc && intFreeList.req.canAlloc && !io.roqCommits.isWalk def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = { {if(fp) x.ctrl.fpWen else x.ctrl.rfWen && (x.ctrl.ldest =/= 0.U)} @@ -64,6 +66,16 @@ class Rename extends XSModule { fpFreeList.req.doAlloc := intFreeList.req.canAlloc && io.out(0).ready intFreeList.req.doAlloc := fpFreeList.req.canAlloc && io.out(0).ready + // speculatively assign the instruction with an roqIdx + val validCount = PopCount(io.in.map(_.valid)) + val roqIdxHead = RegInit(0.U.asTypeOf(new RoqPtr)) + val lastCycleMisprediction = RegNext(io.redirect.valid && !io.redirect.bits.isUnconditional() && !io.redirect.bits.flushItself()) + val roqIdxHeadNext = Mux(io.redirect.valid, + Mux(io.redirect.bits.isUnconditional(), 0.U.asTypeOf(new RoqPtr), io.redirect.bits.roqIdx), + Mux(lastCycleMisprediction, roqIdxHead + 1.U, Mux(canOut, roqIdxHead + validCount, roqIdxHead)) + ) + roqIdxHead := roqIdxHeadNext + /** * Rename: allocate free physical register and update rename table */ @@ -85,7 +97,6 @@ class Rename extends XSModule { val needFpDest = Wire(Vec(RenameWidth, Bool())) val needIntDest = Wire(Vec(RenameWidth, Bool())) val hasValid = Cat(io.in.map(_.valid)).orR - val canOut = io.out(0).ready && fpFreeList.req.canAlloc && intFreeList.req.canAlloc && !io.roqCommits.isWalk for (i <- 0 until RenameWidth) { uops(i).cf := io.in(i).bits.cf uops(i).ctrl := io.in(i).bits.ctrl @@ -115,6 +126,8 @@ class Rename extends XSModule { ) ) + uops(i).roqIdx := roqIdxHead + i.U + io.out(i).valid := io.in(i).valid && intFreeList.req.canAlloc && fpFreeList.req.canAlloc && !io.roqCommits.isWalk io.out(i).bits := uops(i) diff --git a/src/main/scala/xiangshan/backend/rename/RenameTable.scala b/src/main/scala/xiangshan/backend/rename/RenameTable.scala index 04b0939c29bd9a9aeb96faff8bea22b5ff9327a2..dade8c9910fd203e79a42e810da1eef4ac0b7874 100644 --- a/src/main/scala/xiangshan/backend/rename/RenameTable.scala +++ b/src/main/scala/xiangshan/backend/rename/RenameTable.scala @@ -15,6 +15,22 @@ class RatWritePort extends XSBundle { val wdata = Input(UInt(PhyRegIdxWidth.W)) } +object hartIdRTInt extends (() => Int) { + var x = 0 + def apply(): Int = { + x = x + 1 + x-1 + } +} + +object hartIdRTFp extends (() => Int) { + var x = 0 + def apply(): Int = { + x = x + 1 + x-1 + } +} + class RenameTable(float: Boolean) extends XSModule { val io = IO(new Bundle() { val redirect = Flipped(ValidIO(new Redirect)) @@ -65,4 +81,13 @@ class RenameTable(float: Boolean) extends XSModule { ExcitingUtils.Debug ) } + + if (env.DualCoreDifftest) { + val id = if (float) hartIdRTFp() else hartIdRTInt() + ExcitingUtils.addSource( + arch_table, + if(float) s"DEBUG_FP_ARCH_RAT$id" else s"DEBUG_INI_ARCH_RAT$id", + ExcitingUtils.Debug + ) + } } diff --git a/src/main/scala/xiangshan/backend/roq/Roq.scala b/src/main/scala/xiangshan/backend/roq/Roq.scala index f758fc3878ae3582aa68b41451f274d75cb4eae0..21f513073bb6b3b63a73392e3ea47739151487f7 100644 --- a/src/main/scala/xiangshan/backend/roq/Roq.scala +++ b/src/main/scala/xiangshan/backend/roq/Roq.scala @@ -43,6 +43,14 @@ class RoqCSRIO extends XSBundle { } } +class RoqLsqIO extends XSBundle { + val lcommit = Output(UInt(3.W)) + val scommit = Output(UInt(3.W)) + val pendingld = Output(Bool()) + val pendingst = Output(Bool()) + val commit = Output(Bool()) +} + class RoqEnqIO extends XSBundle { val canAccept = Output(Bool()) val isEmpty = Output(Bool()) @@ -57,7 +65,6 @@ class RoqDispatchData extends RoqCommitInfo { } class RoqWbData extends XSBundle { - val fflags = UInt(5.W) val flushPipe = Bool() } @@ -204,11 +211,29 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { // exu + brq val exeWbResults = Vec(numWbPorts, Flipped(ValidIO(new ExuOutput))) val commits = new RoqCommitIO + val lsq = new RoqLsqIO val bcommit = Output(UInt(BrTagWidth.W)) val roqDeqPtr = Output(new RoqPtr) val csr = new RoqCSRIO }) + val difftestIO = IO(new Bundle() { + val commit = Output(UInt(32.W)) + val thisPC = Output(UInt(XLEN.W)) + val thisINST = Output(UInt(32.W)) + val skip = Output(UInt(32.W)) + val wen = Output(UInt(32.W)) + val wdata = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6 + val wdst = Output(Vec(CommitWidth, UInt(32.W))) // set difftest width to 6 + val wpc = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6 + val isRVC = Output(UInt(32.W)) + val scFailed = Output(Bool()) + }) + difftestIO <> DontCare + + val trapIO = IO(new TrapIO()) + trapIO <> DontCare + // instvalid field // val valid = RegInit(VecInit(List.fill(RoqSize)(false.B))) val valid = Mem(RoqSize, Bool()) @@ -264,6 +289,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { val writebackDataRead = writebackData.io.rdata val exceptionDataRead = Wire(Vec(CommitWidth, ExceptionVec())) + val fflagsDataRead = Wire(Vec(CommitWidth, UInt(5.W))) io.roqDeqPtr := deqPtr @@ -353,8 +379,6 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { io.exception := debug_deqUop io.exception.ctrl.commitType := deqDispatchData.commitType - io.exception.lqIdx := deqDispatchData.lqIdx - io.exception.sqIdx := deqDispatchData.sqIdx io.exception.cf.pc := deqDispatchData.pc io.exception.cf.exceptionVec := deqExceptionVec io.exception.cf.crossPageIPFFix := deqDispatchData.crossPageIPFFix @@ -391,7 +415,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { }).unzip val fflags = Wire(Valid(UInt(5.W))) fflags.valid := Mux(io.commits.isWalk, false.B, Cat(wflags).orR()) - fflags.bits := wflags.zip(writebackDataRead.map(_.fflags)).map({ + fflags.bits := wflags.zip(fflagsDataRead).map({ case (w, f) => Mux(w, f, 0.U) }).reduce(_|_) val dirty_fs = Mux(io.commits.isWalk, false.B, Cat(fpWen).orR()) @@ -425,7 +449,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { io.commits.info(i).pdest, io.commits.info(i).old_pdest, debug_exuData(deqPtrVec(i).value), - writebackDataRead(i).fflags.asUInt + fflagsDataRead(i) ) XSInfo(state === s_walk && io.commits.valid(i), "walked pc %x wen %d ldst %d data %x\n", debug_microOp(walkPtrVec(i).value).cf.pc, @@ -442,12 +466,22 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { io.commits.info.map(info => dontTouch(info.pc)) } + // sync fflags/dirty_fs to csr io.csr.fflags := fflags io.csr.dirty_fs := dirty_fs + // commit branch to brq val cfiCommitVec = VecInit(io.commits.valid.zip(io.commits.info.map(_.commitType)).map{case(v, t) => v && CommitType.isBranch(t)}) io.bcommit := Mux(io.commits.isWalk, 0.U, PopCount(cfiCommitVec)) + // commit load/store to lsq + val ldCommitVec = VecInit((0 until CommitWidth).map(i => io.commits.valid(i) && io.commits.info(i).commitType === CommitType.LOAD)) + val stCommitVec = VecInit((0 until CommitWidth).map(i => io.commits.valid(i) && io.commits.info(i).commitType === CommitType.STORE)) + io.lsq.lcommit := Mux(io.commits.isWalk, 0.U, PopCount(ldCommitVec)) + io.lsq.scommit := Mux(io.commits.isWalk, 0.U, PopCount(stCommitVec)) + io.lsq.pendingld := !io.commits.isWalk && io.commits.info(0).commitType === CommitType.LOAD && valid(deqPtr.value) + io.lsq.pendingst := !io.commits.isWalk && io.commits.info(0).commitType === CommitType.STORE && valid(deqPtr.value) + io.lsq.commit := !io.commits.isWalk && io.commits.valid(0) /** * state changes @@ -617,8 +651,6 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { wdata.commitType := req.ctrl.commitType wdata.pdest := req.pdest wdata.old_pdest := req.old_pdest - wdata.lqIdx := req.lqIdx - wdata.sqIdx := req.sqIdx wdata.pc := req.cf.pc wdata.crossPageIPFFix := req.cf.crossPageIPFFix // wdata.exceptionVec := req.cf.exceptionVec @@ -628,13 +660,13 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { writebackData.io.wen := io.exeWbResults.map(_.valid) writebackData.io.waddr := io.exeWbResults.map(_.bits.uop.roqIdx.value) writebackData.io.wdata.zip(io.exeWbResults.map(_.bits)).map{ case (wdata, wb) => - wdata.fflags := wb.fflags wdata.flushPipe := wb.uop.ctrl.flushPipe } writebackData.io.raddr := commitReadAddr_next for (i <- 0 until 16) { val exceptionData = Module(new SyncDataModuleTemplate(Bool(), RoqSize, CommitWidth, RenameWidth + writebackCount(i))) + exceptionData.suggestName("exceptionData") var wPortIdx = 0 for (j <- 0 until RenameWidth) { exceptionData.io.wen (wPortIdx) := canEnqueue(j) @@ -675,6 +707,30 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { exceptionDataRead.zip(exceptionData.io.rdata).map{ case (d, r) => d(i) := r } } + val fflagsDataModule = Module(new SyncDataModuleTemplate(UInt(5.W), RoqSize, CommitWidth, 7)) + var wPortIdx = 0 + // 4 FMACs + for (i <- 0 until 4) { + fflagsDataModule.io.wen (wPortIdx) := io.exeWbResults(8+i).valid + fflagsDataModule.io.waddr(wPortIdx) := io.exeWbResults(8+i).bits.uop.roqIdx.value + fflagsDataModule.io.wdata(wPortIdx) := io.exeWbResults(8+i).bits.fflags + wPortIdx = wPortIdx + 1 + } + // 2 FMISCs (the first one includes I2F from JumpUnit) + for (i <- 0 until 2) { + fflagsDataModule.io.wen (wPortIdx) := io.exeWbResults(14+i).valid + fflagsDataModule.io.waddr(wPortIdx) := io.exeWbResults(14+i).bits.uop.roqIdx.value + fflagsDataModule.io.wdata(wPortIdx) := io.exeWbResults(14+i).bits.fflags + wPortIdx = wPortIdx + 1 + } + // 1 FMISC (Int Wb) + fflagsDataModule.io.wen (wPortIdx) := io.exeWbResults(7).valid + fflagsDataModule.io.waddr(wPortIdx) := io.exeWbResults(7).bits.uop.roqIdx.value + fflagsDataModule.io.wdata(wPortIdx) := io.exeWbResults(7).bits.fflags + fflagsDataModule.io.raddr := VecInit(deqPtrVec_next.map(_.value)) + fflagsDataRead := fflagsDataModule.io.rdata + + /** * debug info */ @@ -718,55 +774,59 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { instrCnt := instrCnt + retireCounter io.csr.perfinfo.retiredInstr := RegNext(retireCounter) - if(!env.FPGAPlatform) { - - //difftest signals - val firstValidCommit = (deqPtr + PriorityMux(io.commits.valid, VecInit(List.tabulate(CommitWidth)(_.U)))).value - - val skip = Wire(Vec(CommitWidth, Bool())) - val wen = Wire(Vec(CommitWidth, Bool())) - val wdata = Wire(Vec(CommitWidth, UInt(XLEN.W))) - val wdst = Wire(Vec(CommitWidth, UInt(32.W))) - val diffTestDebugLrScValid = Wire(Vec(CommitWidth, Bool())) - val wpc = Wire(Vec(CommitWidth, UInt(XLEN.W))) - val trapVec = Wire(Vec(CommitWidth, Bool())) - val isRVC = Wire(Vec(CommitWidth, Bool())) - for(i <- 0 until CommitWidth){ - // io.commits(i).valid - val idx = deqPtrVec(i).value - val uop = debug_microOp(idx) - val DifftestSkipSC = false - if(!DifftestSkipSC){ - skip(i) := (debug_exuDebug(idx).isMMIO || debug_exuDebug(idx).isPerfCnt) && io.commits.valid(i) - }else{ - skip(i) := ( - debug_exuDebug(idx).isMMIO || - debug_exuDebug(idx).isPerfCnt || - uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_d || - uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_w - ) && io.commits.valid(i) - } - wen(i) := io.commits.valid(i) && uop.ctrl.rfWen && uop.ctrl.ldest =/= 0.U - wdata(i) := debug_exuData(idx) - wdst(i) := uop.ctrl.ldest - diffTestDebugLrScValid(i) := uop.diffTestDebugLrScValid - wpc(i) := SignExt(uop.cf.pc, XLEN) - trapVec(i) := io.commits.valid(i) && (state===s_idle) && uop.ctrl.isXSTrap - isRVC(i) := uop.cf.brUpdate.pd.isRVC + //difftest signals + val firstValidCommit = (deqPtr + PriorityMux(io.commits.valid, VecInit(List.tabulate(CommitWidth)(_.U)))).value + + val skip = Wire(Vec(CommitWidth, Bool())) + val wen = Wire(Vec(CommitWidth, Bool())) + val wdata = Wire(Vec(CommitWidth, UInt(XLEN.W))) + val wdst = Wire(Vec(CommitWidth, UInt(32.W))) + val diffTestDebugLrScValid = Wire(Vec(CommitWidth, Bool())) + val wpc = Wire(Vec(CommitWidth, UInt(XLEN.W))) + val trapVec = Wire(Vec(CommitWidth, Bool())) + val isRVC = Wire(Vec(CommitWidth, Bool())) + for(i <- 0 until CommitWidth) { + // io.commits(i).valid + val idx = deqPtrVec(i).value + val uop = debug_microOp(idx) + val DifftestSkipSC = false + if(!DifftestSkipSC){ + skip(i) := (debug_exuDebug(idx).isMMIO || debug_exuDebug(idx).isPerfCnt) && io.commits.valid(i) + }else{ + skip(i) := ( + debug_exuDebug(idx).isMMIO || + debug_exuDebug(idx).isPerfCnt || + uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_d || + uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_w + ) && io.commits.valid(i) } + wen(i) := io.commits.valid(i) && uop.ctrl.rfWen && uop.ctrl.ldest =/= 0.U + wdata(i) := debug_exuData(idx) + wdst(i) := uop.ctrl.ldest + diffTestDebugLrScValid(i) := uop.diffTestDebugLrScValid + wpc(i) := SignExt(uop.cf.pc, XLEN) + trapVec(i) := io.commits.valid(i) && (state===s_idle) && uop.ctrl.isXSTrap + isRVC(i) := uop.cf.brUpdate.pd.isRVC + } + val retireCounterFix = Mux(io.redirectOut.valid, 1.U, retireCounter) + val retirePCFix = SignExt(Mux(io.redirectOut.valid, debug_deqUop.cf.pc, debug_microOp(firstValidCommit).cf.pc), XLEN) + val retireInstFix = Mux(io.redirectOut.valid, debug_deqUop.cf.instr, debug_microOp(firstValidCommit).cf.instr) - val scFailed = !diffTestDebugLrScValid(0) && - debug_deqUop.ctrl.fuType === FuType.mou && - (debug_deqUop.ctrl.fuOpType === LSUOpType.sc_d || debug_deqUop.ctrl.fuOpType === LSUOpType.sc_w) + val scFailed = !diffTestDebugLrScValid(0) && + debug_deqUop.ctrl.fuType === FuType.mou && + (debug_deqUop.ctrl.fuOpType === LSUOpType.sc_d || debug_deqUop.ctrl.fuOpType === LSUOpType.sc_w) + + val hitTrap = trapVec.reduce(_||_) + val trapCode = PriorityMux(wdata.zip(trapVec).map(x => x._2 -> x._1)) + val trapPC = SignExt(PriorityMux(wpc.zip(trapVec).map(x => x._2 ->x._1)), XLEN) + + if (!env.FPGAPlatform) { val difftestIntrNO = WireInit(0.U(XLEN.W)) val difftestCause = WireInit(0.U(XLEN.W)) ExcitingUtils.addSink(difftestIntrNO, "difftestIntrNOfromCSR") ExcitingUtils.addSink(difftestCause, "difftestCausefromCSR") XSDebug(difftestIntrNO =/= 0.U, "difftest intrNO set %x\n", difftestIntrNO) - val retireCounterFix = Mux(io.redirectOut.valid, 1.U, retireCounter) - val retirePCFix = SignExt(Mux(io.redirectOut.valid, debug_deqUop.cf.pc, debug_microOp(firstValidCommit).cf.pc), XLEN) - val retireInstFix = Mux(io.redirectOut.valid, debug_deqUop.cf.instr, debug_microOp(firstValidCommit).cf.instr) ExcitingUtils.addSource(RegNext(retireCounterFix), "difftestCommit", ExcitingUtils.Debug) ExcitingUtils.addSource(RegNext(retirePCFix), "difftestThisPC", ExcitingUtils.Debug)//first valid PC @@ -781,10 +841,6 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { ExcitingUtils.addSource(RegNext(difftestIntrNO), "difftestIntrNO", ExcitingUtils.Debug) ExcitingUtils.addSource(RegNext(difftestCause), "difftestCause", ExcitingUtils.Debug) - val hitTrap = trapVec.reduce(_||_) - val trapCode = PriorityMux(wdata.zip(trapVec).map(x => x._2 -> x._1)) - val trapPC = SignExt(PriorityMux(wpc.zip(trapVec).map(x => x._2 ->x._1)), XLEN) - ExcitingUtils.addSource(RegNext(hitTrap), "trapValid") ExcitingUtils.addSource(RegNext(trapCode), "trapCode") ExcitingUtils.addSource(RegNext(trapPC), "trapPC") @@ -795,4 +851,23 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { ExcitingUtils.addSource(hitTrap, "XSTRAP", ConnectionType.Debug) } } + + if (env.DualCoreDifftest) { + difftestIO.commit := RegNext(retireCounterFix) + difftestIO.thisPC := RegNext(retirePCFix) + difftestIO.thisINST := RegNext(retireInstFix) + difftestIO.skip := RegNext(skip.asUInt) + difftestIO.wen := RegNext(wen.asUInt) + difftestIO.wdata := RegNext(wdata) + difftestIO.wdst := RegNext(wdst) + difftestIO.wpc := RegNext(wpc) + difftestIO.isRVC := RegNext(isRVC.asUInt) + difftestIO.scFailed := RegNext(scFailed) + + trapIO.valid := RegNext(hitTrap) + trapIO.code := RegNext(trapCode) + trapIO.pc := RegNext(trapPC) + trapIO.cycleCnt := RegNext(GTimer()) + trapIO.instrCnt := RegNext(instrCnt) + } } diff --git a/src/main/scala/xiangshan/cache/icache.scala b/src/main/scala/xiangshan/cache/icache.scala index 6a5124f9dc24036cf216a61dd6b814f9a73fe54f..4410ee197c4a5186de744b118d25ed01f9e4d946 100644 --- a/src/main/scala/xiangshan/cache/icache.scala +++ b/src/main/scala/xiangshan/cache/icache.scala @@ -16,6 +16,7 @@ case class ICacheParameters( nTLBEntries: Int = 32, tagECC: Option[String] = None, dataECC: Option[String] = None, + replacer: Option[String] = Some("random"), nSDQ: Int = 17, nRPQ: Int = 16, nMissEntries: Int = 1, @@ -25,7 +26,7 @@ case class ICacheParameters( def tagCode: Code = Code.fromString(tagECC) def dataCode: Code = Code.fromString(dataECC) - def replacement = new RandomReplacement(nWays) + def replacement = ReplacementPolicy.fromString(replacer,nWays,nSets) } trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst with HasInstrMMIOConst { @@ -33,7 +34,7 @@ trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst with Has val groupAlign = log2Up(cacheParams.blockBytes) val packetInstNum = packetBytes/instBytes val packetInstNumBit = log2Up(packetInstNum) - val ptrHighBit = log2Up(groupBytes) - 1 + val ptrHighBit = log2Up(groupBytes) - 1 val ptrLowBit = log2Up(packetBytes) val encUnitBits = 8 val bankRows = 2 @@ -51,7 +52,7 @@ trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst with Has // def encMetaBits = cacheParams.tagCode.width(tagBits) def metaEntryBits = encMetaBits - def encDataBits = cacheParams.dataCode.width(encUnitBits) + def encDataBits = cacheParams.dataCode.width(encUnitBits) def dataEntryBits = encDataBits * bankUnitNum // def encDataBits // def encCacheline @@ -75,7 +76,6 @@ abstract class ICacheBundle extends XSBundle abstract class ICacheModule extends XSModule with HasICacheParameters - with ICacheBase with HasFrontEndExceptionNo abstract class ICacheArray extends XSModule @@ -122,39 +122,6 @@ class ICacheIO extends ICacheBundle val pd_out = Output(new PreDecodeResp) } -/* ------------------------------------------------------------ - * The 3-stage pipeline register - * ------------------------------------------------------------ - */ -trait ICacheBase extends HasICacheParameters -{ - //---------------------------- - // Stage 1 - //---------------------------- - // val s1_valid = WireInit(false.B) - val s1_req_pc = Wire(UInt(VAddrBits.W)) - val s1_req_mask = Wire(UInt(PredictWidth.W)) - val s1_fire = WireInit(false.B) - - //---------------------------- - // Stage 2 - //---------------------------- - val s2_valid = RegInit(false.B) - val s2_req_pc = RegEnable(next = s1_req_pc,init = 0.U, enable = s1_fire) - val s2_req_mask = RegEnable(next = s1_req_mask,init = 0.U, enable = s1_fire) - val s2_ready = WireInit(false.B) - val s2_fire = WireInit(false.B) - - //---------------------------- - // Stage 3 - //---------------------------- - val s3_valid = RegInit(false.B) - val s3_req_pc = RegEnable(next = s2_req_pc,init = 0.U, enable = s2_fire) - val s3_req_mask = RegEnable(next = s2_req_mask,init = 0.U, enable = s2_fire) - val s3_ready = WireInit(false.B) - -} - class ICacheMetaWriteBundle extends ICacheBundle { val virIdx = UInt(idxBits.W) @@ -255,15 +222,15 @@ class ICacheDataArray extends ICachArray } } val rdatas_decoded = rdatas.map{wdata => wdata.map{ bdata => bdata.map{ unit => cacheParams.dataCode.decode(unit)}}} - val rdata_corrected = VecInit((0 until nWays).map{ w => - VecInit((0 until nBanks).map{ b => + val rdata_corrected = VecInit((0 until nWays).map{ w => + VecInit((0 until nBanks).map{ b => VecInit((0 until bankUnitNum).map{ i => rdatas_decoded(w)(b)(i).corrected }) }) }) - (0 until nWays).map{ w => + (0 until nWays).map{ w => (0 until blockRows).map{ r => io.readResp(w)(r) := Cat( (0 until bankUnitNum/2).map{ i => @@ -292,7 +259,7 @@ class ICacheDataArray extends ICachArray for(w <- 0 until nWays){ for(b <- 0 until nBanks){ - dataArray(w)(b).io.w.req.valid := io.write.valid && w.U === write_way + dataArray(w)(b).io.w.req.valid := io.write.valid && w.U === write_way dataArray(w)(b).io.w.req.bits.setIdx := write.virIdx dataArray(w)(b).io.w.req.bits.data := write_bank_data(b) } @@ -308,67 +275,23 @@ class ICacheDataArray extends ICachArray */ class ICache extends ICacheModule { - // cut a cacheline into a fetch packet - def cutHelper(sourceVec: Vec[UInt], pc: UInt, mask: UInt): UInt = { - val sourceVec_inst = Wire(Vec(blockRows*rowBytes/instBytes,UInt(insLen.W))) - (0 until blockRows).foreach{ i => - (0 until rowBytes/instBytes).foreach{ j => - sourceVec_inst(i*rowBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen) - } - } - val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)})) - val start = Cat(pc(ptrHighBit,ptrLowBit),0.U(packetInstNumBit.W)) - (0 until PredictWidth ).foreach{ i => - cutPacket(i) := Mux(mask(i).asBool,sourceVec_inst(start + i.U),0.U) - } - cutPacket.asUInt - } - - def cutHelperMMIO(sourceVec: Vec[UInt], pc: UInt, mask: UInt) = { - val sourceVec_inst = Wire(Vec(mmioBeats * mmioBusBytes/instBytes,UInt(insLen.W))) - (0 until mmioBeats).foreach{ i => - (0 until mmioBusBytes/instBytes).foreach{ j => - sourceVec_inst(i*mmioBusBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen) - } - } - val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)})) - val insLenLog = log2Ceil(insLen) - val start = (pc >> insLenLog.U)(log2Ceil(mmioBeats * mmioBusBytes/instBytes) -1, 0) - val outMask = mask >> start - (0 until PredictWidth ).foreach{ i => - cutPacket(i) := Mux(outMask(i).asBool,sourceVec_inst(start + i.U),0.U) - } - (cutPacket.asUInt, outMask.asUInt) - } - - // generate the one hot code according to a UInt between 0-8 - def PriorityMask(sourceVec: UInt) : UInt = { - val oneHot = Mux(sourceVec >= 8.U, "b1000".U, - Mux(sourceVec >= 4.U, "b0100".U, - Mux(sourceVec >= 2.U, "b0010".U, "b0001".U))) - oneHot - } - val io = IO(new ICacheIO) - val s2_flush = io.flush(0) - val s3_flush = io.flush(1) + val (s2_flush,s3_flush) = (io.flush(0), io.flush(1)) //---------------------------- // Memory Part //---------------------------- val metaArray = Module(new ICacheMetaArray) val dataArray = Module(new ICacheDataArray) - // 256-bit valid val validArray = RegInit(0.U((nSets * nWays).W)) //---------------------------- // Stage 1 //---------------------------- - s1_fire := io.req.valid - s1_req_pc := io.req.bits.addr - s1_req_mask := io.req.bits.mask - s2_ready := WireInit(false.B) - // s1_fire := s1_valid && (s2_ready || s2_flush) + val req_in = io.req.bits + val req_valid = io.req.valid + + val (s1_fire, s1_req_pc, s1_req_mask) = {(req_valid, req_in.addr, req_in.mask)} // SRAM(Meta and Data) read request val s1_idx = get_idx(s1_req_pc) @@ -378,41 +301,51 @@ class ICache extends ICacheModule dataArray.io.read.valid := s1_fire dataArray.io.read.bits :=s1_idx - XSDebug("[Stage 1] r : f (%d %d) request pc: 0x%x mask: %b\n",s2_ready,s1_fire,s1_req_pc,s1_req_mask) - XSDebug("[Stage 1] index: %d\n",s1_idx) + // XSDebug("[Stage 1] r : f (%d %d) request pc: 0x%x mask: %b\n",s2_ready,s1_fire,s1_req_pc,s1_req_mask) + // XSDebug("[Stage 1] index: %d\n",s1_idx) //---------------------------- - // Stage 2 + // Stage 2 //---------------------------- - val s2_idx = get_idx(s2_req_pc) - val s2_tlb_resp = WireInit(io.tlb.resp.bits) - val s2_tag = get_tag(s2_tlb_resp.paddr) val s2_hit = WireInit(false.B) - val s2_allValid = s2_valid && io.tlb.resp.valid val s2_mmio = WireInit(false.B) + val s3_ready = WireInit(false.B) + val s2_tlb_resp = WireInit(io.tlb.resp.bits) + val s2_valid = RegInit(false.B) + val s2_req_pc = RegEnable(next = s1_req_pc,init = 0.U, enable = s1_fire) + val s2_req_mask = RegEnable(next = s1_req_mask,init = 0.U, enable = s1_fire) + + val (s2_idx, s2_tag) = { (get_idx(s2_req_pc), get_tag(s2_tlb_resp.paddr)) } + val (s2_ready, s2_allValid) = {((s3_ready || !s2_valid), (s2_valid && io.tlb.resp.valid)) } + val s2_fire = s2_allValid && s3_ready - s2_fire := s2_allValid && s3_ready - s2_ready := s3_ready || !s2_valid when(s1_fire) { s2_valid := true.B } .elsewhen(s2_flush) { s2_valid := false.B } .elsewhen(s2_fire) { s2_valid := false.B } // SRAM(Meta and Data) read reseponse // TODO :Parity wrong excetion - val metas = metaArray.io.readResp - - val datas =RegEnable(next=dataArray.io.readResp, enable=s2_fire) - + val (metas, datas) = {(metaArray.io.readResp , RegEnable(next=dataArray.io.readResp, enable=s2_fire))} val validMeta = Cat((0 until nWays).map{w => validArray(Cat(s2_idx, w.U(log2Ceil(nWays).W)))}.reverse).asUInt // hit check and generate victim cacheline mask + def PriorityMask(sourceVec: UInt) : UInt = { + val oneHot = Mux(sourceVec >= 8.U, "b1000".U, + Mux(sourceVec >= 4.U, "b0100".U, + Mux(sourceVec >= 2.U, "b0010".U, "b0001".U))) + oneHot + } val hitVec = VecInit((0 until nWays).map{w => metas(w)=== s2_tag && validMeta(w) === 1.U}) - val victimWayMask = (1.U << LFSR64()(log2Up(nWays)-1,0)) val invalidVec = ~validMeta val hasInvalidWay = invalidVec.orR val refillInvalidWaymask = PriorityMask(invalidVec) + val replacer = cacheParams.replacement + val victimWayMask = UIntToOH(replacer.way(s2_idx)) + + when(s2_hit) {replacer.access(s2_idx, OHToUInt(hitVec))} + //deal with icache exception val icacheExceptionVec = Wire(Vec(8,Bool())) @@ -422,47 +355,62 @@ class ICache extends ICacheModule icacheExceptionVec(pageFault) := s2_tlb_resp.excp.pf.instr && s2_allValid s2_mmio := s2_valid && io.tlb.resp.valid && s2_tlb_resp.mmio && !hasIcacheException - s2_hit := s2_valid && ParallelOR(hitVec) + s2_hit := s2_valid && ParallelOR(hitVec) val waymask = Mux(hasIcacheException,1.U(nWays.W),Mux(s2_hit, hitVec.asUInt, Mux(hasInvalidWay, refillInvalidWaymask, victimWayMask))) assert(!(s2_hit && s2_mmio),"MMIO address should not hit in icache") - XSDebug("[Stage 2] v : r : f (%d %d %d) pc: 0x%x mask: %b mmio:%d \n",s2_valid,s3_ready,s2_fire,s2_req_pc,s2_req_mask,s2_mmio) - XSDebug("[Stage 2] exception: af:%d pf:%d \n",icacheExceptionVec(accessFault),icacheExceptionVec(pageFault)) - XSDebug(p"[Stage 2] tlb req: v ${io.tlb.req.valid} r ${io.tlb.req.ready} ${io.tlb.req.bits}\n") - XSDebug(p"[Stage 2] tlb resp: v ${io.tlb.resp.valid} r ${io.tlb.resp.ready} ${s2_tlb_resp}\n") - XSDebug("[Stage 2] tag: %x hit:%d mmio:%d\n",s2_tag,s2_hit,s2_mmio) - XSDebug("[Stage 2] validMeta: %b victimWayMaks:%b invalidVec:%b hitVec:%b waymask:%b \n",validMeta,victimWayMask,invalidVec.asUInt,hitVec.asUInt,waymask.asUInt) - - //---------------------------- // Stage 3 //---------------------------- + val s3_valid = RegInit(false.B) + val s3_miss = WireInit(false.B) + val s3_req_pc = RegEnable(next = s2_req_pc,init = 0.U, enable = s2_fire) + val s3_req_mask = RegEnable(next = s2_req_mask,init = 0.U, enable = s2_fire) val s3_tlb_resp = RegEnable(next = s2_tlb_resp, init = 0.U.asTypeOf(new TlbResp), enable = s2_fire) - val s3_data = datas val s3_tag = RegEnable(s2_tag, s2_fire) val s3_hit = RegEnable(next=s2_hit,init=false.B,enable=s2_fire) val s3_mmio = RegEnable(next=s2_mmio,init=false.B,enable=s2_fire) val s3_wayMask = RegEnable(next=waymask,init=0.U,enable=s2_fire) - val s3_idx = get_idx(s3_req_pc) val s3_exception_vec = RegEnable(next= icacheExceptionVec,init=0.U.asTypeOf(Vec(8,Bool())), enable=s2_fire) - val s3_has_exception = s3_exception_vec.asUInt.orR - val s3_miss = s3_valid && !s3_hit && !s3_mmio && !s3_has_exception + val s3_has_exception = RegEnable(next= hasIcacheException,init=false.B,enable=s2_fire) + val s3_idx = get_idx(s3_req_pc) + val s3_data = datas + + when(s3_flush) { s3_valid := false.B } .elsewhen(s2_fire && !s2_flush) { s3_valid := true.B } .elsewhen(io.resp.fire()) { s3_valid := false.B } - // icache hit - // data Parity encoding - // simply cut the hit cacheline + + /* icache hit + * simply cut the cacheline into a fetchpacket according to the req_pc + * use hitVec to do data way choosing + */ + def cutHelper(sourceVec: Vec[UInt], pc: UInt, mask: UInt): UInt = { + val sourceVec_inst = Wire(Vec(blockRows*rowBytes/instBytes,UInt(insLen.W))) + (0 until blockRows).foreach{ i => + (0 until rowBytes/instBytes).foreach{ j => + sourceVec_inst(i*rowBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen) + } + } + val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)})) + val start = Cat(pc(ptrHighBit,ptrLowBit),0.U(packetInstNumBit.W)) + (0 until PredictWidth ).foreach{ i => + cutPacket(i) := Mux(mask(i).asBool,sourceVec_inst(start + i.U),0.U) + } + cutPacket.asUInt + } val dataHitWay = Mux1H(s3_wayMask,s3_data) val outPacket = Wire(UInt((FetchWidth * 32).W)) outPacket := cutHelper(dataHitWay,s3_req_pc.asUInt,s3_req_mask.asUInt) - - //ICache MissQueue + /* icache miss + * send a miss req to ICache Miss Queue, excluding exception/flush/blocking + * block the pipeline until refill finishes + */ val icacheMissQueue = Module(new IcacheMissQueue) val blocking = RegInit(false.B) val isICacheResp = icacheMissQueue.io.resp.valid && icacheMissQueue.io.resp.bits.clientID === cacheID.U(2.W) @@ -474,17 +422,19 @@ class ICache extends ICacheModule when(icacheMissQueue.io.req.fire() || io.mmio_acquire.fire()){blocking := true.B} .elsewhen(blocking && ((icacheMissQueue.io.resp.fire() && isICacheResp) || io.mmio_grant.fire() || s3_flush) ){blocking := false.B} - XSDebug(blocking && s3_flush,"check for icache non-blocking") - //cache flush register + /* icache flush + * backend send fence.i signal to flush all the cacheline in icache for consistency + * set a flag to inform the refill meta that should not write in validArray + */ val icacheFlush = io.fencei val cacheflushed = RegInit(false.B) - XSDebug("[Fence.i] icacheFlush:%d, cacheflushed:%d\n",icacheFlush,cacheflushed) when(icacheFlush && blocking && !isICacheResp){ cacheflushed := true.B} .elsewhen(isICacheResp && cacheflushed) {cacheflushed := false.B } - //TODO: Prefetcher - //refill write + XSDebug(blocking && s3_flush,"WARNING:icache non-blocking happens") + + //refill meta write val metaWriteReq = icacheMissQueue.io.meta_write.bits icacheMissQueue.io.meta_write.ready := true.B metaArray.io.write.valid := icacheMissQueue.io.meta_write.valid @@ -498,7 +448,7 @@ class ICache extends ICacheModule validArray := validArray.bitSet(validPtr, true.B) } - //data + //refill data write icacheMissQueue.io.refill.ready := true.B val refillReq = icacheMissQueue.io.refill.bits dataArray.io.write.valid := icacheMissQueue.io.refill.valid @@ -506,25 +456,55 @@ class ICache extends ICacheModule idx=refillReq.refill_idx, waymask=refillReq.refill_waymask) - //icache flush: only flush valid Array register + s3_ready := ((io.resp.ready && s3_hit || !s3_valid) && !blocking) || (blocking && ((icacheMissQueue.io.resp.fire()) || io.mmio_grant.fire())) + + when(icacheFlush){ validArray := 0.U } + XSDebug(icacheFlush,"WARNING:icache flush happens") + + /* refill output + * cut the refill data cacheline into a fetch packet for responsing to predecoder + */ val refillDataVec = icacheMissQueue.io.resp.bits.data.asTypeOf(Vec(blockRows,UInt(wordBits.W))) val refillDataOut = cutHelper(refillDataVec, s3_req_pc,s3_req_mask ) + // deal with same cacheline miss in s3 and s2 val is_same_cacheline = s3_miss && s2_valid && (groupAligned(s2_req_pc) ===groupAligned(s3_req_pc)) val useRefillReg = RegNext(is_same_cacheline && icacheMissQueue.io.resp.fire()) val refillDataVecReg = RegEnable(next=refillDataVec, enable= (is_same_cacheline && icacheMissQueue.io.resp.fire())) - //FIXME!! + s3_miss := s3_valid && !s3_hit && !s3_mmio && !s3_has_exception && !useRefillReg + + + + + /* mmio response output + * cut the mmio response data cacheline into a fetch packet for responsing to predecoder + * TODO: no need to wait for a whole fetch packet(once per beat)? + */ + def cutHelperMMIO(sourceVec: Vec[UInt], pc: UInt, mask: UInt) = { + val sourceVec_inst = Wire(Vec(mmioBeats * mmioBusBytes/instBytes,UInt(insLen.W))) + (0 until mmioBeats).foreach{ i => + (0 until mmioBusBytes/instBytes).foreach{ j => + sourceVec_inst(i*mmioBusBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen) + } + } + val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)})) + val insLenLog = log2Ceil(insLen) + val start = (pc >> insLenLog.U)(log2Ceil(mmioBeats * mmioBusBytes/instBytes) -1, 0) + val outMask = mask >> start + (0 until PredictWidth ).foreach{ i => + cutPacket(i) := Mux(outMask(i).asBool,sourceVec_inst(start + i.U),0.U) + } + (cutPacket.asUInt, outMask.asUInt) + } val mmioDataVec = io.mmio_grant.bits.data.asTypeOf(Vec(mmioBeats,UInt(mmioBusWidth.W))) - val mmio_packet = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)._1 - val mmio_mask = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)._2 + val (mmio_packet,mmio_mask) = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask) XSDebug("mmio data %x\n", mmio_packet) - s3_ready := ((io.resp.ready && s3_hit || !s3_valid) && !blocking) || (blocking && ((icacheMissQueue.io.resp.fire()) || io.mmio_grant.fire())) val pds = Seq.fill(nWays)(Module(new PreDecode)) @@ -542,26 +522,11 @@ class ICache extends ICacheModule pds(i).io.prev <> io.prev pds(i).io.prev_pc := io.prev_pc } - - - // if a fetch packet triggers page fault, at least send a valid instruction + + io.pd_out := Mux1H(s3_wayMask, pds.map(_.io.out)) val s3_noHit = s3_wayMask === 0.U - //TODO: coherence - XSDebug("[Stage 3] valid:%d miss:%d pc: 0x%x mmio :%d mask: %b ipf:%d\n",s3_valid, s3_miss,s3_req_pc,s3_req_mask,s3_tlb_resp.excp.pf.instr, s3_mmio) - XSDebug("[Stage 3] hit:%d miss:%d waymask:%x blocking:%d\n",s3_hit,s3_miss,s3_wayMask.asUInt,blocking) - XSDebug("[Stage 3] tag: %x idx: %d\n",s3_tag,get_idx(s3_req_pc)) - XSDebug(p"[Stage 3] tlb resp: ${s3_tlb_resp}\n") - XSDebug("[mem_acquire] valid:%d ready:%d\n",io.mem_acquire.valid,io.mem_acquire.ready) - XSDebug("[mem_grant] valid:%d ready:%d data:%x id:%d \n",io.mem_grant.valid,io.mem_grant.ready,io.mem_grant.bits.data,io.mem_grant.bits.id) - XSDebug("[Stage 3] ---------Hit Way--------- \n") - for(i <- 0 until blockRows){ - XSDebug("[Stage 3] %x\n",dataHitWay(i)) - } - XSDebug("[Stage 3] outPacket :%x\n",outPacket) - XSDebug("[Stage 3] refillDataOut :%x\n",refillDataOut) - XSDebug("[Stage 3] refillDataOutVec :%x startPtr:%d\n",refillDataVec.asUInt, s3_req_pc(5,1).asUInt) //---------------------------- // Out Put @@ -571,9 +536,9 @@ class ICache extends ICacheModule //icache response: to pre-decoder io.resp.valid := s3_valid && (s3_hit || s3_has_exception || icacheMissQueue.io.resp.valid || io.mmio_grant.valid) - io.resp.bits.data := Mux(s3_mmio,mmio_packet,Mux((s3_valid && s3_hit),outPacket,refillDataOut)) io.resp.bits.mask := Mux(s3_mmio,mmio_mask,s3_req_mask) io.resp.bits.pc := s3_req_pc + io.resp.bits.data := DontCare io.resp.bits.ipf := s3_tlb_resp.excp.pf.instr io.resp.bits.acf := s3_exception_vec(accessFault) io.resp.bits.mmio := s3_mmio @@ -589,7 +554,7 @@ class ICache extends ICacheModule //To L1 plus io.mem_acquire <> icacheMissQueue.io.mem_acquire icacheMissQueue.io.mem_grant <> io.mem_grant - + // to train l1plus prefetcher io.prefetchTrainReq.valid := s3_valid && icacheMissQueue.io.req.fire() io.prefetchTrainReq.bits := DontCare @@ -608,6 +573,61 @@ class ICache extends ICacheModule XSDebug("[flush] flush_0:%d flush_1:%d\n",s2_flush,s3_flush) + def dump_s1_info() = { + XSDebug("[Stage 1] r : f (%d %d) request pc: 0x%x mask: %b\n",s2_ready,s1_fire,s1_req_pc,s1_req_mask) + XSDebug("[Stage 1] virtula index: %x\n",s1_idx) + } + + def dump_s2_info() = { + XSDebug("[Stage 2] v : r : f (%d %d %d) pc: 0x%x mask: %b mmio:%d \n",s2_valid,s3_ready,s2_fire,s2_req_pc,s2_req_mask,s2_mmio) + XSDebug("[Stage 2] exception: af:%d pf:%d \n",icacheExceptionVec(accessFault),icacheExceptionVec(pageFault)) + XSDebug(p"[Stage 2] tlb req: v ${io.tlb.req.valid} r ${io.tlb.req.ready} ${io.tlb.req.bits}\n") + XSDebug(p"[Stage 2] tlb resp: v ${io.tlb.resp.valid} r ${io.tlb.resp.ready} ${s2_tlb_resp}\n") + XSDebug("[Stage 2] tag: %x idx:%x hit:%d mmio:%d\n",s2_tag,s2_idx,s2_hit,s2_mmio) + XSDebug("[Stage 2] validMeta: %b victimWayMaks:%b invalidVec:%b hitVec:%b waymask:%b \n",validMeta,victimWayMask,invalidVec.asUInt,hitVec.asUInt,waymask.asUInt) + } + + def dump_s3_info() = { + XSDebug("[Stage 3] valid:%d miss:%d pc: 0x%x mmio :%d mask: %b ipf:%d\n",s3_valid, s3_miss,s3_req_pc,s3_req_mask,s3_tlb_resp.excp.pf.instr, s3_mmio) + XSDebug("[Stage 3] hit:%d miss:%d waymask:%x blocking:%d\n",s3_hit,s3_miss,s3_wayMask.asUInt,blocking) + XSDebug("[Stage 3] tag: %x idx: %d\n",s3_tag,get_idx(s3_req_pc)) + XSDebug(p"[Stage 3] tlb resp: ${s3_tlb_resp}\n") + XSDebug(s3_hit && io.resp.fire(),"[Stage 3] ---------Hit Way--------- \n") + for(i <- 0 until blockRows){ + XSDebug(s3_hit && io.resp.fire(),"[Stage 3] (%d) %x\n",i.U,dataHitWay(i)) + } + XSDebug("[Stage 3] outPacket :%x\n",outPacket) + XSDebug("[Stage 3] startPtr:%d refillDataOut :%x\n",Cat(s3_req_pc(ptrHighBit,ptrLowBit),0.U(packetInstNumBit.W)),refillDataVec.asUInt) + XSDebug(icacheMissQueue.io.resp.fire(),"[Stage 3] ---------refill cacheline--------- \n") + for(i <- 0 until blockRows){ + XSDebug(icacheMissQueue.io.resp.fire(),"[Stage 3] (%d) %x\n",i.U,refillDataVec(i)) + } + XSDebug(is_same_cacheline,"WARNING: same cacheline happen!") + } + def dump_mem_info() = { + val toMem = io.mem_acquire + val fromMem = io.mem_grant + XSDebug(toMem.fire(),"[mem_acquire] valid:%d ready:%d\n",toMem.valid,toMem.ready) + XSDebug(fromMem.fire(),"[mem_grant] valid:%d ready:%d data:%x id:%d \n",fromMem.valid,fromMem.ready,fromMem.bits.data,fromMem.bits.id) + } + + def dump_mmio_info() = { + val toMMIO = io.mmio_acquire + val fromMMMIO = io.mmio_grant + XSDebug(toMMIO.fire(),"[mmio_acquire] valid:%d ready:%d\n",toMMIO.valid,toMMIO.ready) + XSDebug(fromMMMIO.fire(),"[mmio_grant] valid:%d ready:%d data:%x id:%d \n",fromMMMIO.valid,fromMMMIO.ready,fromMMMIO.bits.data,fromMMMIO.bits.id) + } + + def dump_pipe_info(){ + dump_s1_info() + dump_s2_info() + dump_s3_info() + dump_mem_info() + dump_mmio_info() + } + + dump_pipe_info() + //Performance Counter if (!env.FPGAPlatform ) { ExcitingUtils.addSource( s3_valid && !blocking, "perfCntIcacheReqCnt", Perf) diff --git a/src/main/scala/xiangshan/cache/prefetch/BestOffsetPrefetch.scala b/src/main/scala/xiangshan/cache/prefetch/BestOffsetPrefetch.scala index 7d0e83da500cd2755d230e6f96f200f2c21f05a4..e4d5b0ed272d18f8005bf77517d62cf43e21f00a 100644 --- a/src/main/scala/xiangshan/cache/prefetch/BestOffsetPrefetch.scala +++ b/src/main/scala/xiangshan/cache/prefetch/BestOffsetPrefetch.scala @@ -167,7 +167,7 @@ class RecentRequestTable(p: BOPParameters) extends PrefetchModule { rrTable.io.r.req.bits.setIdx := idx(rAddr) rData := rrTable.io.r.resp.data(0) - val rwConflict = io.w.fire() && io.r.req.fire() && idx(wAddr) === idx(rAddr) + val rwConflict = io.w.fire() && io.r.req.fire()// && idx(wAddr) === idx(rAddr) // when (rwConflict) { // rrTable.io.r.req.valid := false.B // } @@ -295,7 +295,7 @@ class OffsetScoreTable(p: BOPParameters) extends PrefetchModule { XSDebug(io.req.fire(), p"receive req from L1. io.req.bits=0x${Hexadecimal(io.req.bits)}\n") } -class BestOffsetPrefetchEntry(p: BOPParameters) extends PrefetchModule { +class BestOffsetPrefetchEntry(p: BOPParameters) extends PrefetchModule with HasTlbConst { val io = IO(new Bundle { val id = Input(UInt(p.totalWidth.W)) val prefetchOffset = Input(UInt(p.offsetWidth.W)) @@ -305,19 +305,27 @@ class BestOffsetPrefetchEntry(p: BOPParameters) extends PrefetchModule { }) def blockBytes = p.blockBytes - def getBlockAddr(addr: UInt) = Cat(addr(PAddrBits - 1, log2Up(blockBytes)), 0.U(log2Up(blockBytes).W)) + def getBlock(addr: UInt) = addr(PAddrBits - 1, log2Up(blockBytes)) + def getBlockAddr(addr: UInt) = Cat(getBlock(addr), 0.U(log2Up(blockBytes).W)) + def getPageNum(addr: UInt) = addr(PAddrBits - 1, offLen) val s_idle :: s_req :: s_resp :: s_write_recent_req :: s_finish :: Nil = Enum(5) val state = RegInit(s_idle) val req = RegInit(0.U.asTypeOf(new PrefetchReq)) val baseAddr = RegInit(0.U(PAddrBits.W)) + val baseBlock = getBlock(io.pft.train.bits.addr) + val nextBlock = baseBlock + io.prefetchOffset + val nextAddr = Cat(nextBlock, 0.U(log2Up(blockBytes).W)) + val crossPage = getPageNum(nextAddr) =/= getPageNum(io.pft.train.bits.addr) when (state === s_idle) { when (io.pft.train.valid) { - state := s_req - req.addr := getBlockAddr(io.pft.train.bits.addr) + (io.prefetchOffset << log2Up(blockBytes)) + // state := s_req + state := Mux(crossPage, s_idle, s_req) + req.addr := nextAddr req.write := io.pft.train.bits.write baseAddr := getBlockAddr(io.pft.train.bits.addr) + XSDebug(crossPage, p"prefetch addr 0x${nextAddr} cross page, ignore this!\n") } } @@ -357,7 +365,7 @@ class BestOffsetPrefetchEntry(p: BOPParameters) extends PrefetchModule { io.writeRRTable.valid := state === s_write_recent_req io.writeRRTable.bits := baseAddr // write this into recent request table - XSDebug(p"bopEntry ${io.id}: state=${state} prefetchOffset=${io.prefetchOffset} inflight=${io.inflight.valid} 0x${Hexadecimal(io.inflight.bits)} writeRRTable: ${io.writeRRTable.valid} 0x${Hexadecimal(io.writeRRTable.bits)} baseAddr=0x${Hexadecimal(baseAddr)} req: ${req}\n") + XSDebug(p"bopEntry ${io.id}: state=${state} prefetchOffset=${io.prefetchOffset} inflight=${io.inflight.valid} 0x${Hexadecimal(io.inflight.bits)} writeRRTable: ${io.writeRRTable.valid} 0x${Hexadecimal(io.writeRRTable.bits)} baseAddr=0x${Hexadecimal(baseAddr)} nextAddr=0x${Hexadecimal(nextAddr)} crossPage=${crossPage} req: ${req}\n") XSDebug(p"bopEntry ${io.id}: io.pft: ${io.pft}\n") } diff --git a/src/main/scala/xiangshan/cache/prefetch/StreamPrefetch.scala b/src/main/scala/xiangshan/cache/prefetch/StreamPrefetch.scala index 5daceb104836fe8b464a0977f385314ec63b2469..25ee3b0684cca494d33452e16cbe0e483554d062 100644 --- a/src/main/scala/xiangshan/cache/prefetch/StreamPrefetch.scala +++ b/src/main/scala/xiangshan/cache/prefetch/StreamPrefetch.scala @@ -85,7 +85,7 @@ class StreamBufferAlloc(p: StreamPrefetchParameters) extends StreamPrefetchReq(p } -class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule { +class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule with HasTlbConst { val io = IO(new Bundle { val streamBufId = Input(UInt(log2Up(streamCnt).W)) val addrs = Vec(p.streamSize, ValidIO(UInt(PAddrBits.W))) @@ -102,6 +102,7 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule { def blockBytes = p.blockBytes // def getBlockAddr(addr: UInt) = addr & ~((blockBytes - 1).U(addr.getWidth.W)) def getBlockAddr(addr: UInt) = Cat(addr(PAddrBits - 1, log2Up(p.blockBytes)), 0.U(log2Up(p.blockBytes).W)) + def getPageNum(addr: UInt) = addr(PAddrBits - 1, offLen) val baseReq = RegInit(0.U.asTypeOf(Valid(new PrefetchReq))) val nextReq = RegInit(0.U.asTypeOf(new PrefetchReq)) @@ -163,11 +164,17 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule { } // enqueue + val nextAddrCrossPage = getPageNum(baseReq.bits.addr) =/= getPageNum(nextReq.addr) when (!full && baseReq.valid && !needRealloc) { - state(tail) := s_req - tail := tail + 1.U - buf(tail) := nextReq - nextReq.addr := nextReq.addr + blockBytes.U + when (!nextAddrCrossPage) { + state(tail) := s_req + tail := tail + 1.U + buf(tail) := nextReq + nextReq.addr := nextReq.addr + blockBytes.U + XSDebug(p"enqueue 0x${nextReq.addr}\n") + }.otherwise { + XSDebug(p"addr 0x${nextReq.addr} could not enqueue for crossing pages\n") + } } val reqs = Wire(Vec(streamSize, Decoupled(new StreamPrefetchReq(p)))) @@ -259,7 +266,7 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule { p"deqLater: ${deqLater(i)} deqValid: ${deqValid(i)}\n") } XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} head: ${head} tail: ${tail} full: ${full} empty: ${empty} nextHead: ${nextHead} blockBytes: ${blockBytes.U}\n") - XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} baseReq: v=${baseReq.valid} ${baseReq.bits} nextReq: ${nextReq}\n") + XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} baseReq: v=${baseReq.valid} ${baseReq.bits} nextReq: ${nextReq} crossPage: ${nextAddrCrossPage}\n") XSDebug(needRealloc, s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} needRealloc: ${needRealloc} reallocReq: ${reallocReq}\n") XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} prefetchPrior: ") (0 until streamSize).foreach(i => XSDebug(false, true.B, p"${prefetchPrior(i)} ")) @@ -312,38 +319,40 @@ class StreamPrefetch(p: StreamPrefetchParameters) extends PrefetchModule { // 1. streamBufs hit while l1i miss val hit = WireInit(false.B) + val hitVec = WireInit(VecInit(Seq.fill(streamCnt * streamSize)(false.B))) for (i <- 0 until streamCnt) { for (j <- 0 until streamSize) { when (io.train.valid && addrValids(i)(j) && getBlockAddr(io.train.bits.addr) === streamBufs(i).io.addrs(j).bits) { - hit := true.B + // hit := true.B + hitVec(i*streamSize+j) := true.B streamBufs(i).io.update.valid := true.B streamBufs(i).io.update.bits.hitIdx := j.U ages(i) := maxAge } } } + hit := ParallelOR(hitVec) // 2. streamBufs miss + val allocIdx = Wire(UInt(log2Up(streamCnt).W)) + val ageCmp = Seq.fill(streamCnt)(Wire(new CompareBundle(ageWidth))) + (0 until streamCnt).foreach(i => ageCmp(i).bits := ages(i)) + (0 until streamCnt).foreach(i => ageCmp(i).idx := i.U) + when ((~bufValids.asUInt).orR) { + allocIdx := PriorityMux(~bufValids.asUInt, VecInit(List.tabulate(streamCnt)(_.U))) + }.otherwise { + allocIdx := ParallelMin(ageCmp).idx + } when (!hit && io.train.valid) { (0 until streamCnt).foreach(i => ages(i) := Mux(ages(i) =/= 0.U, ages(i) - 1.U, 0.U)) // realloc an invalid or the eldest stream buffer with new one - val idx = Wire(UInt(log2Up(streamCnt).W)) - when ((~bufValids.asUInt).orR) { - idx := PriorityMux(~bufValids.asUInt, VecInit(List.tabulate(streamCnt)(_.U))) - }.otherwise { - val ageCmp = Seq.fill(streamCnt)(Wire(new CompareBundle(ageWidth))) - (0 until streamCnt).foreach(i => ageCmp(i).bits := ages(i)) - (0 until streamCnt).foreach(i => ageCmp(i).idx := i.U) - idx := ParallelMin(ageCmp).idx - } - for (i <- 0 until streamCnt) { - streamBufs(i).io.alloc.valid := idx === i.U + streamBufs(i).io.alloc.valid := allocIdx === i.U streamBufs(i).io.alloc.bits := DontCare streamBufs(i).io.alloc.bits.addr := io.train.bits.addr streamBufs(i).io.alloc.bits.write := io.train.bits.write - when (idx === i.U) { ages(i) := maxAge } + when (allocIdx === i.U) { ages(i) := maxAge } } } diff --git a/src/main/scala/xiangshan/frontend/Bim.scala b/src/main/scala/xiangshan/frontend/Bim.scala index f15bfff44354bcfa65ee851d5882e1c38a5e34b6..be5fb78bfa74bd88610b682aa00b4a8d6db5a7bd 100644 --- a/src/main/scala/xiangshan/frontend/Bim.scala +++ b/src/main/scala/xiangshan/frontend/Bim.scala @@ -104,6 +104,14 @@ class BIM extends BasePredictor with BimParams { bim(b).io.w.req.bits.data := Mux(doing_reset, 2.U(2.W), newCtr) } + if (!env.FPGAPlatform && env.EnablePerfDebug) { + val bimResp = Wire(Vec(PredictWidth, Bool())) + for(i <- 0 until PredictWidth) { + bimResp(i) := io.resp.ctrs(i)(1) + } + ExcitingUtils.addSource(bimResp, "bimResp") + } + if (BPUDebug && debug) { XSDebug(doing_reset, "Reseting...\n") XSDebug("[update] v=%d pc=%x pnpc=%x tgt=%x", io.update.valid, u.pc, u.pnpc, u.target) diff --git a/src/main/scala/xiangshan/frontend/Btb.scala b/src/main/scala/xiangshan/frontend/Btb.scala index 66a6eb35a057756dec535ea3a4a75222d9392829..01cb5e40cac7843053e20a7114e2f3b3aab7e63b 100644 --- a/src/main/scala/xiangshan/frontend/Btb.scala +++ b/src/main/scala/xiangshan/frontend/Btb.scala @@ -206,6 +206,18 @@ class BTB extends BasePredictor with BTBParams{ edata.io.w.req.bits.setIdx := updateRow edata.io.w.req.bits.data := u.target + if (!env.FPGAPlatform && env.EnablePerfDebug) { + val btbAns = Wire(Vec(PredictWidth, new PredictorAnswer)) + + btbAns.zipWithIndex.foreach{ case(x,i) => + x.hit := io.resp.hits(i) + x.taken := DontCare + x.target := io.resp.targets(i) + } + + ExcitingUtils.addSource(btbAns, "btbAns") + } + if (BPUDebug && debug) { val debug_verbose = true diff --git a/src/main/scala/xiangshan/frontend/IFU.scala b/src/main/scala/xiangshan/frontend/IFU.scala index 8645354a1916512c39b2990951149470d0ce38a3..3377c2e9900c5689ae742ee2e617af8a4b9dce5c 100644 --- a/src/main/scala/xiangshan/frontend/IFU.scala +++ b/src/main/scala/xiangshan/frontend/IFU.scala @@ -500,34 +500,45 @@ class IFU extends XSModule with HasIFUConst io.fetchPacket.bits := fetchPacketWire io.fetchPacket.valid := fetchPacketValid -// if(IFUDebug) { + if(!env.FPGAPlatform && env.EnablePerfDebug) { val predictor_s3 = RegEnable(Mux(if3_redirect, 1.U(log2Up(4).W), 0.U(log2Up(4).W)), if3_fire) - val predictor_s4 = Mux(if4_redirect, 2.U, predictor_s3) + val predictor_s4 = Mux(if4_redirect, 2.U(log2Up(4).W), predictor_s3) val predictor = predictor_s4 - fetchPacketWire.bpuMeta.map(_.predictor := predictor) - // } - - // val predRight = cfiUpdate.valid && !cfiUpdate.bits.isMisPred && !cfiUpdate.bits.isReplay - // val predWrong = cfiUpdate.valid && cfiUpdate.bits.isMisPred && !cfiUpdate.bits.isReplay - - // val ubtbRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 0.U - // val ubtbWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 0.U - // val btbRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 1.U - // val btbWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 1.U - // val tageRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 2.U - // val tageWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 2.U - // val loopRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 3.U - // val loopWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 3.U - - // ExcitingUtils.addSource(ubtbRight, "perfCntubtbRight", Perf) - // ExcitingUtils.addSource(ubtbWrong, "perfCntubtbWrong", Perf) - // ExcitingUtils.addSource(btbRight, "perfCntbtbRight", Perf) - // ExcitingUtils.addSource(btbWrong, "perfCntbtbWrong", Perf) - // ExcitingUtils.addSource(tageRight, "perfCnttageRight", Perf) - // ExcitingUtils.addSource(tageWrong, "perfCnttageWrong", Perf) - // ExcitingUtils.addSource(loopRight, "perfCntloopRight", Perf) - // ExcitingUtils.addSource(loopWrong, "perfCntloopWrong", Perf) + // io.pc.valid && read_hit_vec.asUInt ubtb hit + val ubtbAns = WireInit(VecInit(Seq.fill(PredictWidth) {0.U.asTypeOf(new PredictorAnswer)} )) + val btbAns = WireInit(VecInit(Seq.fill(PredictWidth) {0.U.asTypeOf(new PredictorAnswer)} )) + val bimResp = WireInit(VecInit(Seq.fill(PredictWidth) {false.B} )) + val tageAns = WireInit(VecInit(Seq.fill(PredictWidth) {0.U.asTypeOf(new PredictorAnswer)} )) + val rasAns = WireInit(0.U.asTypeOf(new PredictorAnswer)) + val loopAns = WireInit(VecInit(Seq.fill(PredictWidth) {0.U.asTypeOf(new PredictorAnswer)} )) + + ExcitingUtils.addSink(ubtbAns, "ubtbAns") + ExcitingUtils.addSink(btbAns, "btbAns") + ExcitingUtils.addSink(bimResp, "bimResp") + ExcitingUtils.addSink(tageAns, "tageAns") + ExcitingUtils.addSink(rasAns, "rasAns") + ExcitingUtils.addSink(loopAns, "loopAns") + + val ubtbAns_s3 = RegEnable(ubtbAns, if2_fire) + val ubtbAns_s4 = RegEnable(ubtbAns_s3, if3_fire) + + val btbAns_s3 = RegEnable(btbAns, if2_fire) + val btbAns_s4 = RegEnable(btbAns_s3, if3_fire) + val bimResp_s3 = RegEnable(bimResp, if2_fire) + val bimResp_s4 = RegEnable(bimResp_s3, if3_fire) + + fetchPacketWire.bpuMeta.zipWithIndex.foreach{ case(x,i) => + x.predictor := predictor + + x.ubtbAns := ubtbAns_s4(i) + x.btbAns := btbAns_s4(i) + x.btbAns.taken := bimResp_s4(i) + x.tageAns := tageAns(i) + x.rasAns := rasAns // Is this right? + x.loopAns := loopAns(i) + } + } // debug info if (IFUDebug) { @@ -540,7 +551,6 @@ class IFU extends XSModule with HasIFUConst XSDebug("[IF2] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x snpc=%x\n", if2_valid, if2_ready, if2_fire, if2_redirect, if2_flush, if2_pc, if2_snpc) XSDebug("[IF3] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if3_valid, if3_ready, if3_fire, if3_redirect, if3_flush, if3_pc, crossPageIPF, if3_bp.hasNotTakenBrs) XSDebug("[IF4] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if4_valid, if4_ready, if4_fire, if4_redirect, if4_flush, if4_pc, if4_crossPageIPF, if4_bp.hasNotTakenBrs) - XSDebug("[predictor] predictor_s3=%d, predictor_s4=%d, predictor=%d\n", predictor_s3, predictor_s4, predictor) XSDebug("[IF1][icacheReq] v=%d r=%d addr=%x\n", icache.io.req.valid, icache.io.req.ready, icache.io.req.bits.addr) XSDebug("[IF1][ghr] hist=%b\n", if1_gh.asUInt) XSDebug("[IF1][ghr] extHist=%b\n\n", if1_gh.asUInt) diff --git a/src/main/scala/xiangshan/frontend/LoopPredictor.scala b/src/main/scala/xiangshan/frontend/LoopPredictor.scala index 1600d2628a546ba41d82f4346a3a8018fe2f7968..342defe9637971695f4d68e3aa6c2676eb1f30ac 100644 --- a/src/main/scala/xiangshan/frontend/LoopPredictor.scala +++ b/src/main/scala/xiangshan/frontend/LoopPredictor.scala @@ -403,8 +403,18 @@ class LoopPredictor extends BasePredictor with LTBParams { io.meta.specCnts(i) := ltbResps(i).meta } - if (!env.FPGAPlatform) { + if (!env.FPGAPlatform && env.EnablePerfDebug) { ExcitingUtils.addSource(io.resp.exit.reduce(_||_), "perfCntLoopExit", Perf) + + val loopAns = Wire(Vec(PredictWidth, new PredictorAnswer)) + + loopAns.zipWithIndex.foreach{ case(x,i) => + x.hit := io.resp.exit(i) + x.taken := false.B + x.target := DontCare + } + + ExcitingUtils.addSource(loopAns, "loopAns") } if (BPUDebug && debug) { diff --git a/src/main/scala/xiangshan/frontend/RAS.scala b/src/main/scala/xiangshan/frontend/RAS.scala index df6c2a8a71204482ed3698279c3144dc035e3588..082f0234680a4707588dfe2cb7fb6a9ca08e54b9 100644 --- a/src/main/scala/xiangshan/frontend/RAS.scala +++ b/src/main/scala/xiangshan/frontend/RAS.scala @@ -227,6 +227,15 @@ class RAS extends BasePredictor io.meta.rasTopCtr := DontCare io.meta.rasToqAddr := DontCare + if (!env.FPGAPlatform && env.EnablePerfDebug) { + val rasAns = Wire(new PredictorAnswer) + rasAns.hit := io.out.valid + rasAns.taken := DontCare + rasAns.target := io.out.bits.target + + ExcitingUtils.addSource(rasAns, "rasAns") + } + if (BPUDebug && debug) { val spec_debug = spec.debugIO val commit_debug = commit.debugIO diff --git a/src/main/scala/xiangshan/frontend/Tage.scala b/src/main/scala/xiangshan/frontend/Tage.scala index 743e957b76aa6a790ea54524eec734cee36f02f8..4bb01f932fd1622880001db6221ef66e766741d6 100644 --- a/src/main/scala/xiangshan/frontend/Tage.scala +++ b/src/main/scala/xiangshan/frontend/Tage.scala @@ -633,7 +633,17 @@ class Tage extends BaseTage { scTables(i).io.update.fetchIdx := u.bpuMeta.fetchIdx } + if (!env.FPGAPlatform && env.EnablePerfDebug) { + val tageAns = Wire(Vec(PredictWidth, new PredictorAnswer)) + tageAns.zipWithIndex.foreach{ case(x,i) => + x.hit := io.resp.hits(i) + x.taken := io.resp.takens(i) + x.target := DontCare + } + + ExcitingUtils.addSource(tageAns, "tageAns") + } if (BPUDebug && debug) { val m = updateMeta diff --git a/src/main/scala/xiangshan/frontend/uBTB.scala b/src/main/scala/xiangshan/frontend/uBTB.scala index 86ff187fc1159081b469959e515ea723691355ba..7a59e48dfacabcb54d0f47467f5497a72a4b2f29 100644 --- a/src/main/scala/xiangshan/frontend/uBTB.scala +++ b/src/main/scala/xiangshan/frontend/uBTB.scala @@ -5,6 +5,7 @@ import chisel3.util._ import utils._ import xiangshan._ import chisel3.experimental.chiselName +import chisel3.ExcitingUtils._ import scala.math.min @@ -258,6 +259,19 @@ class MicroBTB extends BasePredictor metas(b).wdata := Mux(do_reset, 0.U.asTypeOf(new MicroBTBMeta), update_write_meta) } + if (!env.FPGAPlatform && env.EnablePerfDebug) { + val ubtbAns = Wire(Vec(PredictWidth, new PredictorAnswer)) + // ubtbAns.hit := io.pc.valid && read_hit_vec.asUInt.orR + + ubtbAns.zipWithIndex.foreach{ case(x,i) => + x.hit := io.out.hits(i) + x.taken := io.out.takens(i) + x.target := io.out.targets(i) + } + + ExcitingUtils.addSource(ubtbAns, "ubtbAns") + } + if (BPUDebug && debug) { XSDebug(read_valid,"uBTB read req: pc:0x%x, tag:%x \n",io.pc.bits,read_req_tag) XSDebug(read_valid,"uBTB read resp: read_hit_vec:%b, \n",read_hit_vec.asUInt) diff --git a/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala b/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala index b65155d674d53411e678e4a83a1d5fcd43f030e6..1635c759ccacaf92310a601d6b39f8818eec4ab0 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala @@ -8,7 +8,7 @@ import xiangshan.cache._ import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants} import xiangshan.backend.LSUOpType import xiangshan.mem._ -import xiangshan.backend.roq.RoqPtr +import xiangshan.backend.roq.RoqLsqIO class ExceptionAddrIO extends XSBundle { val lsIdx = Input(new LSIdx) @@ -41,18 +41,27 @@ class LsqWrappper extends XSModule with HasDCacheParameters { val brqRedirect = Input(Valid(new Redirect)) val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle))) val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) + val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool())) val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq)) val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) - val commits = Flipped(new RoqCommitIO) + val roq = Flipped(new RoqLsqIO) val rollback = Output(Valid(new Redirect)) val dcache = Flipped(ValidIO(new Refill)) val uncache = new DCacheWordIO - val roqDeqPtr = Input(new RoqPtr) val exceptionAddr = new ExceptionAddrIO val sqempty = Output(Bool()) }) + val difftestIO = IO(new Bundle() { + val fromSQ = new Bundle() { + val storeCommit = Output(UInt(2.W)) + val storeAddr = Output(Vec(2, UInt(64.W))) + val storeData = Output(Vec(2, UInt(64.W))) + val storeMask = Output(Vec(2, UInt(8.W))) + } + }) + difftestIO <> DontCare val loadQueue = Module(new LoadQueue) val storeQueue = Module(new StoreQueue) @@ -82,11 +91,11 @@ class LsqWrappper extends XSModule with HasDCacheParameters { loadQueue.io.brqRedirect <> io.brqRedirect loadQueue.io.loadIn <> io.loadIn loadQueue.io.storeIn <> io.storeIn + loadQueue.io.loadDataForwarded <> io.loadDataForwarded loadQueue.io.ldout <> io.ldout - loadQueue.io.commits <> io.commits + loadQueue.io.roq <> io.roq loadQueue.io.rollback <> io.rollback loadQueue.io.dcache <> io.dcache - loadQueue.io.roqDeqPtr <> io.roqDeqPtr loadQueue.io.exceptionAddr.lsIdx := io.exceptionAddr.lsIdx loadQueue.io.exceptionAddr.isStore := DontCare @@ -96,8 +105,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters { storeQueue.io.storeIn <> io.storeIn storeQueue.io.sbuffer <> io.sbuffer storeQueue.io.mmioStout <> io.mmioStout - storeQueue.io.commits <> io.commits - storeQueue.io.roqDeqPtr <> io.roqDeqPtr + storeQueue.io.roq <> io.roq storeQueue.io.exceptionAddr.lsIdx := io.exceptionAddr.lsIdx storeQueue.io.exceptionAddr.isStore := DontCare @@ -106,26 +114,30 @@ class LsqWrappper extends XSModule with HasDCacheParameters { storeQueue.io.sqempty <> io.sqempty + if (env.DualCoreDifftest) { + difftestIO.fromSQ <> storeQueue.difftestIO + } + io.exceptionAddr.vaddr := Mux(io.exceptionAddr.isStore, storeQueue.io.exceptionAddr.vaddr, loadQueue.io.exceptionAddr.vaddr) // naive uncache arbiter val s_idle :: s_load :: s_store :: Nil = Enum(3) - val uncacheState = RegInit(s_idle) + val pendingstate = RegInit(s_idle) - switch(uncacheState){ + switch(pendingstate){ is(s_idle){ when(io.uncache.req.fire()){ - uncacheState := Mux(loadQueue.io.uncache.req.valid, s_load, s_store) + pendingstate := Mux(loadQueue.io.uncache.req.valid, s_load, s_store) } } is(s_load){ when(io.uncache.resp.fire()){ - uncacheState := s_idle + pendingstate := s_idle } } is(s_store){ when(io.uncache.resp.fire()){ - uncacheState := s_idle + pendingstate := s_idle } } } @@ -139,7 +151,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters { }.otherwise{ io.uncache.req <> storeQueue.io.uncache.req } - when(uncacheState === s_load){ + when(pendingstate === s_load){ io.uncache.resp <> loadQueue.io.uncache.resp }.otherwise{ io.uncache.resp <> storeQueue.io.uncache.resp @@ -147,6 +159,6 @@ class LsqWrappper extends XSModule with HasDCacheParameters { assert(!(loadQueue.io.uncache.req.valid && storeQueue.io.uncache.req.valid)) assert(!(loadQueue.io.uncache.resp.valid && storeQueue.io.uncache.resp.valid)) - assert(!((loadQueue.io.uncache.resp.valid || storeQueue.io.uncache.resp.valid) && uncacheState === s_idle)) + assert(!((loadQueue.io.uncache.resp.valid || storeQueue.io.uncache.resp.valid) && pendingstate === s_idle)) } diff --git a/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala index 5329953eccc697d601a4fb6c38ae9b1c5826f46b..131f020efff7b94f3e2e40131bd82d1161400c93 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala @@ -9,7 +9,7 @@ import xiangshan.cache._ import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbRequestIO} import xiangshan.backend.LSUOpType import xiangshan.mem._ -import xiangshan.backend.roq.RoqPtr +import xiangshan.backend.roq.RoqLsqIO import xiangshan.backend.fu.HasExceptionNO @@ -66,13 +66,13 @@ class LoadQueue extends XSModule val brqRedirect = Input(Valid(new Redirect)) val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle))) val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) + val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool())) val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) - val commits = Flipped(new RoqCommitIO) + val roq = Flipped(new RoqLsqIO) val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store val dcache = Flipped(ValidIO(new Refill)) val uncache = new DCacheWordIO - val roqDeqPtr = Input(new RoqPtr) val exceptionAddr = new ExceptionAddrIO }) @@ -85,7 +85,6 @@ class LoadQueue extends XSModule val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB - val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request // val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq @@ -95,22 +94,16 @@ class LoadQueue extends XSModule val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr)))) val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr)) val deqPtrExtNext = Wire(new LqPtr) - val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W)) val allowEnqueue = RegInit(true.B) val enqPtr = enqPtrExt(0).value val deqPtr = deqPtrExt.value - val sameFlag = enqPtrExt(0).flag === deqPtrExt.flag - val isEmpty = enqPtr === deqPtr && sameFlag - val isFull = enqPtr === deqPtr && !sameFlag - val allowIn = !isFull - - val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.info(i).commitType === CommitType.LOAD) - val mcommitIdx = (0 until CommitWidth).map(i => io.commits.info(i).lqIdx.value) val deqMask = UIntToMask(deqPtr, LoadQueueSize) val enqMask = UIntToMask(enqPtr, LoadQueueSize) + val commitCount = RegNext(io.roq.lcommit) + /** * Enqueue at dispatch * @@ -127,7 +120,6 @@ class LoadQueue extends XSModule allocated(index) := true.B datavalid(index) := false.B writebacked(index) := false.B - commited(index) := false.B miss(index) := false.B // listening(index) := false.B pending(index) := false.B @@ -177,13 +169,13 @@ class LoadQueue extends XSModule io.loadIn(i).bits.mmio )} val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value - datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio + datavalid(loadWbIndex) := (!io.loadIn(i).bits.miss || io.loadDataForwarded(i)) && !io.loadIn(i).bits.mmio writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio val loadWbData = Wire(new LQDataEntry) loadWbData.paddr := io.loadIn(i).bits.paddr loadWbData.mask := io.loadIn(i).bits.mask - loadWbData.data := io.loadIn(i).bits.data // fwd data + loadWbData.data := io.loadIn(i).bits.forwardData.asUInt // fwd data loadWbData.fwdMask := io.loadIn(i).bits.forwardMask dataModule.io.wbWrite(i, loadWbIndex, loadWbData) dataModule.io.wb.wen(i) := true.B @@ -195,7 +187,7 @@ class LoadQueue extends XSModule debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio - miss(loadWbIndex) := dcacheMissed + miss(loadWbIndex) := dcacheMissed && !io.loadDataForwarded(i) pending(loadWbIndex) := io.loadIn(i).bits.mmio uop(loadWbIndex).debugInfo.issueTime := io.loadIn(i).bits.uop.debugInfo.issueTime } @@ -324,9 +316,8 @@ class LoadQueue extends XSModule * When load commited, mark it as !allocated and move deqPtrExt forward. */ (0 until CommitWidth).map(i => { - when(loadCommit(i)) { - allocated(mcommitIdx(i)) := false.B - XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc) + when(commitCount > i.U){ + allocated(deqPtr+i.U) := false.B } }) @@ -501,11 +492,39 @@ class LoadQueue extends XSModule /** * Memory mapped IO / other uncached operations * + * States: + * (1) writeback from store units: mark as pending + * (2) when they reach ROB's head, they can be sent to uncache channel + * (3) response from uncache channel: mark as datavalid + * (4) writeback to ROB (and other units): mark as writebacked + * (5) ROB commits the instruction: same as normal instructions */ - io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) && - io.commits.info(0).commitType === CommitType.LOAD && - io.roqDeqPtr === uop(deqPtr).roqIdx && - !io.commits.isWalk + //(2) when they reach ROB's head, they can be sent to uncache channel + val s_idle :: s_req :: s_resp :: s_wait :: Nil = Enum(4) + val uncacheState = RegInit(s_idle) + switch(uncacheState) { + is(s_idle) { + when(io.roq.pendingld && pending(deqPtr) && allocated(deqPtr)) { + uncacheState := s_req + } + } + is(s_req) { + when(io.uncache.req.fire()) { + uncacheState := s_resp + } + } + is(s_resp) { + when(io.uncache.resp.fire()) { + uncacheState := s_wait + } + } + is(s_wait) { + when(io.roq.commit) { + uncacheState := s_idle // ready for next mmio + } + } + } + io.uncache.req.valid := uncacheState === s_req dataModule.io.uncache.raddr := deqPtrExtNext.value @@ -537,6 +556,7 @@ class LoadQueue extends XSModule ) } + // (3) response from uncache channel: mark as datavalid dataModule.io.uncache.wen := false.B when(io.uncache.resp.fire()){ datavalid(deqPtr) := true.B @@ -547,14 +567,14 @@ class LoadQueue extends XSModule } // Read vaddr for mem exception - vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.lqIdx.value + vaddrModule.io.raddr(0) := deqPtr + commitCount io.exceptionAddr.vaddr := vaddrModule.io.rdata(0) // misprediction recovery / exception redirect // invalidate lq term using robIdx val needCancel = Wire(Vec(LoadQueueSize, Bool())) for (i <- 0 until LoadQueueSize) { - needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i) + needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) when (needCancel(i)) { allocated(i) := false.B } @@ -573,24 +593,13 @@ class LoadQueue extends XSModule enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber)) } - val commitCount = PopCount(loadCommit) deqPtrExtNext := deqPtrExt + commitCount deqPtrExt := deqPtrExtNext val lastLastCycleRedirect = RegNext(lastCycleRedirect.valid) - val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt) - validCounter := Mux(lastLastCycleRedirect, - trueValidCounter, - validCounter + enqNumber - commitCount - ) - - allowEnqueue := Mux(io.brqRedirect.valid, - false.B, - Mux(lastLastCycleRedirect, - trueValidCounter <= (LoadQueueSize - RenameWidth).U, - validCounter + enqNumber <= (LoadQueueSize - RenameWidth).U - ) - ) + val validCount = distanceBetween(enqPtrExt(0), deqPtrExt) + + allowEnqueue := validCount + enqNumber <= (LoadQueueSize - RenameWidth).U // debug info XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr) @@ -609,7 +618,6 @@ class LoadQueue extends XSModule PrintFlag(allocated(i), "a") PrintFlag(allocated(i) && datavalid(i), "v") PrintFlag(allocated(i) && writebacked(i), "w") - PrintFlag(allocated(i) && commited(i), "c") PrintFlag(allocated(i) && miss(i), "m") // PrintFlag(allocated(i) && listening(i), "l") PrintFlag(allocated(i) && pending(i), "p") diff --git a/src/main/scala/xiangshan/mem/lsqueue/LoadQueueData.scala b/src/main/scala/xiangshan/mem/lsqueue/LoadQueueData.scala index a054fc2d4ef79c68d8554fe05bf884052dcf4d3b..6f8a8ae293b1e8a854263d6847635c843ec26a78 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/LoadQueueData.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/LoadQueueData.scala @@ -106,6 +106,51 @@ class MaskModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule } } +class Data8Module(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule with HasDCacheParameters { + val io = IO(new Bundle { + // read + val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) + val rdata = Output(Vec(numRead, UInt(8.W))) + // address indexed write + val wen = Input(Vec(numWrite, Bool())) + val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) + val wdata = Input(Vec(numWrite, UInt(8.W))) + // masked write + val mwmask = Input(Vec(blockWords, Vec(numEntries, Bool()))) + val mwdata = Input(Vec(blockWords, UInt(8.W))) + }) + + val data = Reg(Vec(numEntries, UInt(8.W))) + + // read ports + for (i <- 0 until numRead) { + io.rdata(i) := data(RegNext(io.raddr(i))) + } + + // below is the write ports (with priorities) + for (i <- 0 until numWrite) { + when (io.wen(i)) { + data(io.waddr(i)) := io.wdata(i) + } + } + + // masked write + for (i <- 0 until blockWords) { + for (j <- 0 until numEntries) { + when (io.mwmask(i)(j)) { + data(j) := io.mwdata(i) + } + } + } + + // DataModuleTemplate should not be used when there're any write conflicts + for (i <- 0 until numWrite) { + for (j <- i+1 until numWrite) { + assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) + } + } +} + class CoredataModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule with HasDCacheParameters { val io = IO(new Bundle { // data io @@ -131,20 +176,28 @@ class CoredataModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSMod val paddrWen = Input(Vec(numWrite, Bool())) }) - val data = Reg(Vec(numEntries, UInt(XLEN.W))) + val data8 = Seq.fill(8)(Module(new Data8Module(numEntries, numRead, numWrite))) val fwdMask = Reg(Vec(numEntries, UInt(8.W))) val wordIndex = Reg(Vec(numEntries, UInt((blockOffBits - wordOffBits).W))) // read ports for (i <- 0 until numRead) { - io.rdata(i) := data(RegNext(io.raddr(i))) + for (j <- 0 until 8) { + data8(j).io.raddr(i) := io.raddr(i) + } + io.rdata(i) := VecInit((0 until 8).map(j => data8(j).io.rdata(i))).asUInt } // below is the write ports (with priorities) for (i <- 0 until numWrite) { - when (io.wen(i)) { - data(io.waddr(i)) := io.wdata(i) + // write to data8 + for (j <- 0 until 8) { + data8(j).io.waddr(i) := io.waddr(i) + data8(j).io.wdata(i) := io.wdata(i)(8*(j+1)-1, 8*j) + data8(j).io.wen(i) := io.wen(i) } + + // write ctrl info when (io.fwdMaskWen(i)) { fwdMask(io.waddr(i)) := io.fwdMaskWdata(i) } @@ -153,25 +206,25 @@ class CoredataModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSMod } } + // write refilled data to data8 - // masked write - // refill missed load - def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = { - val res = Wire(Vec(8, UInt(8.W))) - (0 until 8).foreach(i => { - res(i) := Mux(fwdMask(i), fwd(8 * (i + 1) - 1, 8 * i), refill(8 * (i + 1) - 1, 8 * i)) - }) - res.asUInt - } - + // select refill data // split dcache result into words val words = VecInit((0 until blockWords) map { i => io.refillData(DataBits * (i + 1) - 1, DataBits * i)}) + // select refill data according to wordIndex (paddr) + for (i <- 0 until 8) { + for (j <- 0 until blockWords) { + data8(i).io.mwdata(j) := words(j)(8*(i+1)-1, 8*i) + } + } - // refill data according to matchMask, refillMask and refill.vald - for (j <- 0 until numEntries) { - when (io.mwmask(j)) { - val refillData = words(wordIndex(j)) // TODO - data(j) := mergeRefillData(refillData, data(j), fwdMask(j)) + // gen refill wmask + for (j <- 0 until blockWords) { + for (k <- 0 until numEntries) { + val wordMatch = wordIndex(k) === j.U + for (i <- 0 until 8) { + data8(i).io.mwmask(j)(k) := wordMatch && io.mwmask(k) && !fwdMask(k)(i) + } } } diff --git a/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala index da58ce688794e14823808d31255ca9461374bb6b..758eab10838fd512fefccfc7c8c9f191e6418c39 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala @@ -7,7 +7,7 @@ import xiangshan._ import xiangshan.cache._ import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants} import xiangshan.backend.LSUOpType -import xiangshan.backend.roq.RoqPtr +import xiangshan.backend.roq.RoqLsqIO class SqPtr extends CircularQueuePtr(SqPtr.StoreQueueSize) { } @@ -38,19 +38,28 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq)) val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) - val commits = Flipped(new RoqCommitIO) + val roq = Flipped(new RoqLsqIO) val uncache = new DCacheWordIO - val roqDeqPtr = Input(new RoqPtr) // val refill = Flipped(Valid(new DCacheLineReq )) val exceptionAddr = new ExceptionAddrIO val sqempty = Output(Bool()) }) + val difftestIO = IO(new Bundle() { + val storeCommit = Output(UInt(2.W)) + val storeAddr = Output(Vec(2, UInt(64.W))) + val storeData = Output(Vec(2, UInt(64.W))) + val storeMask = Output(Vec(2, UInt(8.W))) + }) + difftestIO <> DontCare + // data modules val uop = Reg(Vec(StoreQueueSize, new MicroOp)) // val data = Reg(Vec(StoreQueueSize, new LsqEntry)) val dataModule = Module(new StoreQueueData(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth)) dataModule.io := DontCare + val paddrModule = Module(new SQPaddrModule(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth)) + paddrModule.io := DontCare val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), StoreQueueSize, numRead = 1, numWrite = StorePipelineWidth)) vaddrModule.io := DontCare @@ -66,14 +75,18 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue require(StoreQueueSize > RenameWidth) val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new SqPtr)))) val deqPtrExt = RegInit(VecInit((0 until StorePipelineWidth).map(_.U.asTypeOf(new SqPtr)))) + val cmtPtrExt = RegInit(VecInit((0 until CommitWidth).map(_.U.asTypeOf(new SqPtr)))) val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W)) val allowEnqueue = RegInit(true.B) val enqPtr = enqPtrExt(0).value val deqPtr = deqPtrExt(0).value + val cmtPtr = cmtPtrExt(0).value + + val deqMask = UIntToMask(deqPtr, StoreQueueSize) + val enqMask = UIntToMask(enqPtr, StoreQueueSize) - val tailMask = UIntToMask(deqPtr, StoreQueueSize) - val headMask = UIntToMask(enqPtr, StoreQueueSize) + val commitCount = RegNext(io.roq.scommit) // Read dataModule // deqPtrExtNext and deqPtrExtNext+1 entry will be read from dataModule @@ -86,11 +99,11 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue deqPtrExt ) )) - val dataModuleRead = dataModule.io.rdata for (i <- 0 until StorePipelineWidth) { dataModule.io.raddr(i) := deqPtrExtNext(i).value + paddrModule.io.raddr(i) := deqPtrExtNext(i).value } - vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.sqIdx.value + vaddrModule.io.raddr(0) := cmtPtr + commitCount /** * Enqueue at dispatch @@ -129,6 +142,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue */ for (i <- 0 until StorePipelineWidth) { dataModule.io.wen(i) := false.B + paddrModule.io.wen(i) := false.B vaddrModule.io.wen(i) := false.B when (io.storeIn(i).fire()) { val stWbIndex = io.storeIn(i).bits.uop.sqIdx.value @@ -138,13 +152,17 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue val storeWbData = Wire(new SQDataEntry) storeWbData := DontCare - storeWbData.paddr := io.storeIn(i).bits.paddr storeWbData.mask := io.storeIn(i).bits.mask storeWbData.data := io.storeIn(i).bits.data + dataModule.io.waddr(i) := stWbIndex dataModule.io.wdata(i) := storeWbData dataModule.io.wen(i) := true.B + paddrModule.io.waddr(i) := stWbIndex + paddrModule.io.wdata(i) := io.storeIn(i).bits.paddr + paddrModule.io.wen(i) := true.B + vaddrModule.io.waddr(i) := stWbIndex vaddrModule.io.wdata(i) := io.storeIn(i).bits.vaddr vaddrModule.io.wen(i) := true.B @@ -185,7 +203,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue for (j <- 0 until StoreQueueSize) { storeWritebackedVec(j) := datavalid(j) && allocated(j) // all datavalid terms need to be checked } - val needForward1 = Mux(differentFlag, ~tailMask, tailMask ^ forwardMask) & storeWritebackedVec.asUInt + val needForward1 = Mux(differentFlag, ~deqMask, deqMask ^ forwardMask) & storeWritebackedVec.asUInt val needForward2 = Mux(differentFlag, forwardMask, 0.U(StoreQueueSize.W)) & storeWritebackedVec.asUInt XSDebug(p"$i f1 ${Binary(needForward1)} f2 ${Binary(needForward2)} " + @@ -193,15 +211,13 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue ) // do real fwd query - dataModule.io.forwardQuery( - numForward = i, - paddr = io.forward(i).paddr, - needForward1 = needForward1, - needForward2 = needForward2 - ) + dataModule.io.needForward(i)(0) := needForward1 & paddrModule.io.forwardMmask(i).asUInt + dataModule.io.needForward(i)(1) := needForward2 & paddrModule.io.forwardMmask(i).asUInt + + paddrModule.io.forwardMdata(i) := io.forward(i).paddr - io.forward(i).forwardMask := dataModule.io.forward(i).forwardMask - io.forward(i).forwardData := dataModule.io.forward(i).forwardData + io.forward(i).forwardMask := dataModule.io.forwardMask(i) + io.forward(i).forwardData := dataModule.io.forwardData(i) } /** @@ -215,19 +231,40 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue * (5) ROB commits the instruction: same as normal instructions */ //(2) when they reach ROB's head, they can be sent to uncache channel - io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) && - io.commits.info(0).commitType === CommitType.STORE && - io.roqDeqPtr === uop(deqPtr).roqIdx && - !io.commits.isWalk + val s_idle :: s_req :: s_resp :: s_wait :: Nil = Enum(4) + val uncacheState = RegInit(s_idle) + switch(uncacheState) { + is(s_idle) { + when(io.roq.pendingst && pending(deqPtr) && allocated(deqPtr)) { + uncacheState := s_req + } + } + is(s_req) { + when(io.uncache.req.fire()) { + uncacheState := s_resp + } + } + is(s_resp) { + when(io.uncache.resp.fire()) { + uncacheState := s_wait + } + } + is(s_wait) { + when(io.roq.commit) { + uncacheState := s_idle // ready for next mmio + } + } + } + io.uncache.req.valid := uncacheState === s_req io.uncache.req.bits.cmd := MemoryOpConstants.M_XWR - io.uncache.req.bits.addr := dataModule.io.rdata(0).paddr // data(deqPtr) -> rdata(0) + io.uncache.req.bits.addr := paddrModule.io.rdata(0) // data(deqPtr) -> rdata(0) io.uncache.req.bits.data := dataModule.io.rdata(0).data io.uncache.req.bits.mask := dataModule.io.rdata(0).mask io.uncache.req.bits.meta.id := DontCare io.uncache.req.bits.meta.vaddr := DontCare - io.uncache.req.bits.meta.paddr := dataModule.io.rdata(0).paddr + io.uncache.req.bits.meta.paddr := paddrModule.io.rdata(0) io.uncache.req.bits.meta.uop := uop(deqPtr) io.uncache.req.bits.meta.mmio := true.B io.uncache.req.bits.meta.tlb_miss := false.B @@ -256,7 +293,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue io.mmioStout.valid := allocated(deqPtr) && datavalid(deqPtr) && !writebacked(deqPtr) io.mmioStout.bits.uop := uop(deqPtr) io.mmioStout.bits.uop.sqIdx := deqPtrExt(0) - io.mmioStout.bits.data := dataModuleRead(0).data // dataModuleRead.read(deqPtr) + io.mmioStout.bits.data := dataModule.io.rdata(0).data // dataModule.io.rdata.read(deqPtr) io.mmioStout.bits.redirectValid := false.B io.mmioStout.bits.redirect := DontCare io.mmioStout.bits.brUpdate := DontCare @@ -275,12 +312,11 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue * (2) They will not be cancelled and can be sent to lower level. */ for (i <- 0 until CommitWidth) { - val storeCommit = !io.commits.isWalk && io.commits.valid(i) && io.commits.info(i).commitType === CommitType.STORE - when (storeCommit) { - commited(io.commits.info(i).sqIdx.value) := true.B - XSDebug("store commit %d: idx %d\n", i.U, io.commits.info(i).sqIdx.value) + when (commitCount > i.U) { + commited(cmtPtrExt(i).value) := true.B } } + cmtPtrExt := cmtPtrExt.map(_ + commitCount) // Commited stores will not be cancelled and can be sent to lower level. // remove retired insts from sq, add retired store to sbuffer @@ -291,9 +327,9 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue // if sbuffer.fire(), read next io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !mmio(ptr) io.sbuffer(i).bits.cmd := MemoryOpConstants.M_XWR - io.sbuffer(i).bits.addr := dataModuleRead(i).paddr - io.sbuffer(i).bits.data := dataModuleRead(i).data - io.sbuffer(i).bits.mask := dataModuleRead(i).mask + io.sbuffer(i).bits.addr := paddrModule.io.rdata(i) + io.sbuffer(i).bits.data := dataModule.io.rdata(i).data + io.sbuffer(i).bits.mask := dataModule.io.rdata(i).mask io.sbuffer(i).bits.meta := DontCare io.sbuffer(i).bits.meta.tlb_miss := false.B io.sbuffer(i).bits.meta.uop := DontCare @@ -309,17 +345,23 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue assert(io.sbuffer(0).fire()) } - if (!env.FPGAPlatform) { - val storeCommit = PopCount(io.sbuffer.map(_.fire())) - val waddr = VecInit(io.sbuffer.map(req => SignExt(req.bits.addr, 64))) - val wdata = VecInit(io.sbuffer.map(req => req.bits.data & MaskExpand(req.bits.mask))) - val wmask = VecInit(io.sbuffer.map(_.bits.mask)) + val storeCommit = PopCount(io.sbuffer.map(_.fire())) + val waddr = VecInit(io.sbuffer.map(req => SignExt(req.bits.addr, 64))) + val wdata = VecInit(io.sbuffer.map(req => req.bits.data & MaskExpand(req.bits.mask))) + val wmask = VecInit(io.sbuffer.map(_.bits.mask)) + if (!env.FPGAPlatform) { ExcitingUtils.addSource(RegNext(storeCommit), "difftestStoreCommit", ExcitingUtils.Debug) ExcitingUtils.addSource(RegNext(waddr), "difftestStoreAddr", ExcitingUtils.Debug) ExcitingUtils.addSource(RegNext(wdata), "difftestStoreData", ExcitingUtils.Debug) ExcitingUtils.addSource(RegNext(wmask), "difftestStoreMask", ExcitingUtils.Debug) } + if (env.DualCoreDifftest) { + difftestIO.storeCommit := RegNext(storeCommit) + difftestIO.storeAddr := RegNext(waddr) + difftestIO.storeData := RegNext(wdata) + difftestIO.storeMask := RegNext(wmask) + } // Read vaddr for mem exception io.exceptionAddr.vaddr := vaddrModule.io.rdata(0) @@ -352,19 +394,9 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue val lastLastCycleRedirect = RegNext(lastCycleRedirect) val dequeueCount = Mux(io.sbuffer(1).fire(), 2.U, Mux(io.sbuffer(0).fire() || io.mmioStout.fire(), 1.U, 0.U)) - val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt(0)) - validCounter := Mux(lastLastCycleRedirect, - trueValidCounter - dequeueCount, - validCounter + enqNumber - dequeueCount - ) - - allowEnqueue := Mux(io.brqRedirect.valid, - false.B, - Mux(lastLastCycleRedirect, - trueValidCounter <= (StoreQueueSize - RenameWidth).U, - validCounter + enqNumber <= (StoreQueueSize - RenameWidth).U - ) - ) + val validCount = distanceBetween(enqPtrExt(0), deqPtrExt(0)) + + allowEnqueue := validCount + enqNumber <= (StoreQueueSize - RenameWidth).U // io.sqempty will be used by sbuffer // We delay it for 1 cycle for better timing @@ -385,7 +417,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue for (i <- 0 until StoreQueueSize) { if (i % 4 == 0) XSDebug("") - XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.debug(i).paddr) + XSDebug(false, true.B, "%x ", uop(i).cf.pc) PrintFlag(allocated(i), "a") PrintFlag(allocated(i) && datavalid(i), "v") PrintFlag(allocated(i) && writebacked(i), "w") diff --git a/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala b/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala index 32b86eb40411efb4cf438fabfce474e08100ed64..8c014c60fd6733f0fb8338c4ee9af8a3ee5413f5 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala @@ -11,12 +11,52 @@ import xiangshan.mem._ import xiangshan.backend.roq.RoqPtr +// Data module define +// These data modules are like SyncDataModuleTemplate, but support cam-like ops +class SQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int) extends XSModule with HasDCacheParameters { + val io = IO(new Bundle { + val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) + val rdata = Output(Vec(numRead, UInt((PAddrBits).W))) + val wen = Input(Vec(numWrite, Bool())) + val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) + val wdata = Input(Vec(numWrite, UInt((PAddrBits).W))) + val forwardMdata = Input(Vec(numForward, UInt((PAddrBits).W))) + val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool()))) + }) + + val data = Reg(Vec(numEntries, UInt((PAddrBits).W))) + + // read ports + for (i <- 0 until numRead) { + io.rdata(i) := data(RegNext(io.raddr(i))) + } + + // below is the write ports (with priorities) + for (i <- 0 until numWrite) { + when (io.wen(i)) { + data(io.waddr(i)) := io.wdata(i) + } + } + + // content addressed match + for (i <- 0 until numForward) { + for (j <- 0 until numEntries) { + io.forwardMmask(i)(j) := io.forwardMdata(i)(PAddrBits-1, 3) === data(j)(PAddrBits-1, 3) + } + } + + // DataModuleTemplate should not be used when there're any write conflicts + for (i <- 0 until numWrite) { + for (j <- i+1 until numWrite) { + assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) + } + } +} + class SQDataEntry extends XSBundle { -// val vaddr = UInt(VAddrBits.W) // TODO: need opt - val paddr = UInt(PAddrBits.W) + // val paddr = UInt(PAddrBits.W) val mask = UInt(8.W) val data = UInt(XLEN.W) -// val exception = UInt(16.W) // TODO: opt size } class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper { @@ -29,13 +69,8 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) ex val debug = Vec(size, Output(new SQDataEntry)) val needForward = Input(Vec(numForward, Vec(2, UInt(size.W)))) - val forward = Vec(numForward, Flipped(new LoadForwardQueryIO)) - - def forwardQuery(numForward: Int, paddr: UInt, needForward1: Data, needForward2: Data): Unit = { - this.needForward(numForward)(0) := needForward1 - this.needForward(numForward)(1) := needForward2 - this.forward(numForward).paddr := paddr - } + val forwardMask = Vec(numForward, Output(Vec(8, Bool()))) + val forwardData = Vec(numForward, Output(Vec(8, UInt(8.W)))) }) io := DontCare @@ -72,32 +107,7 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) ex // entry with larger index should have higher priority since it's data is younger (0 until numForward).map(i => { - val forwardMask1 = WireInit(VecInit(Seq.fill(8)(false.B))) - val forwardData1 = WireInit(VecInit(Seq.fill(8)(0.U(8.W)))) - val forwardMask2 = WireInit(VecInit(Seq.fill(8)(false.B))) - val forwardData2 = WireInit(VecInit(Seq.fill(8)(0.U(8.W)))) - - for (j <- 0 until size) { - val needCheck = io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3) - (0 until XLEN / 8).foreach(k => { - when (needCheck && data(j).mask(k)) { - when (io.needForward(i)(0)(j)) { - forwardMask1(k) := true.B - forwardData1(k) := data(j).data(8 * (k + 1) - 1, 8 * k) - } - when (io.needForward(i)(1)(j)) { - forwardMask2(k) := true.B - forwardData2(k) := data(j).data(8 * (k + 1) - 1, 8 * k) - } - XSDebug(io.needForward(i)(0)(j) || io.needForward(i)(1)(j), - p"forwarding $k-th byte ${Hexadecimal(data(j).data(8 * (k + 1) - 1, 8 * k))} " + - p"from ptr $j\n") - } - }) - } - // parallel fwd logic - val paddrMatch = Wire(Vec(size, Bool())) val matchResultVec = Wire(Vec(size * 2, new FwdEntry)) def parallelFwd(xs: Seq[Data]): Data = { @@ -113,13 +123,14 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) ex }) } - for (j <- 0 until size) { - paddrMatch(j) := io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3) - } + // paddrMatch is now included in io.needForward + // for (j <- 0 until size) { + // paddrMatch(j) := io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3) + // } for (j <- 0 until size) { - val needCheck0 = RegNext(paddrMatch(j) && io.needForward(i)(0)(j)) - val needCheck1 = RegNext(paddrMatch(j) && io.needForward(i)(1)(j)) + val needCheck0 = RegNext(io.needForward(i)(0)(j)) + val needCheck1 = RegNext(io.needForward(i)(1)(j)) (0 until XLEN / 8).foreach(k => { matchResultVec(j).mask(k) := needCheck0 && data(j).mask(k) matchResultVec(j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k) @@ -130,8 +141,8 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) ex val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry) - io.forward(i).forwardMask := parallelFwdResult.mask - io.forward(i).forwardData := parallelFwdResult.data + io.forwardMask(i) := parallelFwdResult.mask + io.forwardData(i) := parallelFwdResult.data }) diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index ffe7c0732e0d5acda6516cf6e423f65c42d01fd5..c859d0dc8588858c21a4b4ad7f342fe451044753 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -12,6 +12,7 @@ import xiangshan.backend.LSUOpType class LoadToLsqIO extends XSBundle { val loadIn = ValidIO(new LsPipelineBundle) val ldout = Flipped(DecoupledIO(new ExuOutput)) + val loadDataForwarded = Output(Bool()) val forward = new LoadForwardQueryIO } @@ -26,8 +27,18 @@ class LoadUnit_S0 extends XSModule { }) val s0_uop = io.in.bits.uop - val s0_vaddr = io.in.bits.src1 + SignExt(ImmUnion.I.toImm32(s0_uop.ctrl.imm), XLEN) - val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0)) + val s0_vaddr_old = io.in.bits.src1 + SignExt(ImmUnion.I.toImm32(s0_uop.ctrl.imm), XLEN) + val imm12 = WireInit(s0_uop.ctrl.imm(11,0)) + val s0_vaddr_lo = io.in.bits.src1(11,0) + Cat(0.U(1.W), imm12) + val s0_vaddr_hi = Mux(imm12(11), + Mux((s0_vaddr_lo(12)), io.in.bits.src1(VAddrBits-1, 12), io.in.bits.src1(VAddrBits-1, 12)+SignExt(1.U, VAddrBits-12)), + Mux((s0_vaddr_lo(12)), io.in.bits.src1(VAddrBits-1, 12)+1.U, io.in.bits.src1(VAddrBits-1, 12)) + ) + val s0_vaddr = Cat(s0_vaddr_hi, s0_vaddr_lo(11,0)) + when(io.in.fire() && s0_vaddr(VAddrBits-1,0) =/= (io.in.bits.src1 + SignExt(ImmUnion.I.toImm32(s0_uop.ctrl.imm), XLEN))(VAddrBits-1,0)){ + printf("s0_vaddr %x s0_vaddr_old %x\n", s0_vaddr, s0_vaddr_old(VAddrBits-1,0)) + } + val s0_mask = genWmask(s0_vaddr_lo, s0_uop.ctrl.fuOpType(1,0)) // query DTLB io.dtlbReq.valid := io.in.valid @@ -141,6 +152,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper { val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp)) val lsq = new LoadForwardQueryIO val sbuffer = new LoadForwardQueryIO + val dataForwarded = Output(Bool()) }) val s2_uop = io.in.bits.uop @@ -194,10 +206,17 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper { io.out.bits := io.in.bits io.out.bits.data := rdataPartialLoad // when exception occurs, set it to not miss and let it write back to roq (via int port) - io.out.bits.miss := s2_cache_miss && !fullForward && !s2_exception + io.out.bits.miss := s2_cache_miss && !s2_exception io.out.bits.uop.ctrl.fpWen := io.in.bits.uop.ctrl.fpWen && !s2_exception io.out.bits.mmio := s2_mmio + // For timing reasons, we can not let + // io.out.bits.miss := s2_cache_miss && !s2_exception && !fullForward + // We use io.dataForwarded instead. It means forward logic have prepared all data needed, + // and dcache query is no longer needed. + // Such inst will be writebacked from load queue. + io.dataForwarded := s2_cache_miss && fullForward && !s2_exception + io.in.ready := io.out.ready || !io.in.valid // merge forward result @@ -259,6 +278,7 @@ class LoadUnit extends XSModule with HasLoadHelper { load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask load_s2.io.sbuffer.forwardData <> io.sbuffer.forwardData load_s2.io.sbuffer.forwardMask <> io.sbuffer.forwardMask + load_s2.io.dataForwarded <> io.lsq.loadDataForwarded XSDebug(load_s0.io.out.valid, p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " + diff --git a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala index c4868d2783c8fb500ef91192b87ab2d655e5a942..1d4f8b8368fab7a8dd97c4020953f6da753029c2 100644 --- a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala @@ -17,7 +17,17 @@ class StoreUnit_S0 extends XSModule { }) // send req to dtlb - val saddr = io.in.bits.src1 + SignExt(ImmUnion.S.toImm32(io.in.bits.uop.ctrl.imm), XLEN) + val saddr_old = io.in.bits.src1 + SignExt(ImmUnion.S.toImm32(io.in.bits.uop.ctrl.imm), XLEN) + val imm12 = WireInit(io.in.bits.uop.ctrl.imm(11,0)) + val saddr_lo = io.in.bits.src1(11,0) + Cat(0.U(1.W), imm12) + val saddr_hi = Mux(imm12(11), + Mux((saddr_lo(12)), io.in.bits.src1(VAddrBits-1, 12), io.in.bits.src1(VAddrBits-1, 12)+SignExt(1.U, VAddrBits-12)), + Mux((saddr_lo(12)), io.in.bits.src1(VAddrBits-1, 12)+1.U, io.in.bits.src1(VAddrBits-1, 12)) + ) + val saddr = Cat(saddr_hi, saddr_lo(11,0)) + when(io.in.fire() && saddr(VAddrBits-1,0) =/= (io.in.bits.src1 + SignExt(ImmUnion.S.toImm32(io.in.bits.uop.ctrl.imm), XLEN))(VAddrBits-1,0)){ + printf("saddr %x saddr_old %x\n", saddr, saddr_old(VAddrBits-1,0)) + } io.dtlbReq.bits.vaddr := saddr io.dtlbReq.valid := io.in.valid diff --git a/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala b/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala index 37b9d9a45749bf6df15412020c17af3723d06e45..3381774aa10bda5bdb49c4ea01d76c6d2924d0a5 100644 --- a/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala +++ b/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala @@ -8,10 +8,16 @@ import xiangshan.cache._ trait HasSbufferCst extends HasXSParameter { - def s_invalid = 0.U(2.W) - def s_valid = 1.U(2.W) - def s_prepare = 2.U(2.W) - def s_inflight = 3.U(2.W) + // use 1h to speedup selection + def s_invalid = (1<<0).U(4.W) + def s_valid = (1<<1).U(4.W) + def s_prepare = (1<<2).U(4.W) + def s_inflight = (1<<3).U(4.W) + + def isInvalid(i: UInt): Bool = i(0).asBool + def isValid(i: UInt): Bool = i(1).asBool + def isPrepare(i: UInt): Bool = i(2).asBool + def isInflight(i: UInt): Bool = i(3).asBool val evictCycle = 1 << 20 require(isPow2(evictCycle)) @@ -114,6 +120,13 @@ class NewSbuffer extends XSModule with HasSbufferCst { val empty = Output(Bool()) } // sbuffer flush }) + val difftestIO = IO(new Bundle() { + val sbufferResp = Output(Bool()) + val sbufferAddr = Output(UInt(64.W)) + val sbufferData = Output(Vec(64, UInt(8.W))) + val sbufferMask = Output(UInt(64.W)) + }) + difftestIO <> DontCare val buffer = Mem(StoreBufferSize, new SbufferLine) val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(s_invalid))) @@ -150,7 +163,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { // sbuffer entry count val invalidCount = RegInit(StoreBufferSize.U((log2Up(StoreBufferSize) + 1).W)) val validCount = RegInit(0.U((log2Up(StoreBufferSize) + 1).W)) - val full = invalidCount === 0.U + val full = invalidCount === 0.U // full = TODO: validCount(log2Up(StoreBufferSize)) val bufferRead = VecInit((0 until StoreBufferSize).map(i => buffer(i))) val stateRead = VecInit((0 until StoreBufferSize).map(i => stateVec(i))) @@ -172,7 +185,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { val lru = Module(new ChooseReplace(StoreBufferSize)) val evictionIdx = lru.io.way - lru.io.mask := stateRead.map(_ === s_valid) + lru.io.mask := stateRead.map(isValid(_)) val tags = io.in.map(in => getTag(in.bits.addr)) val sameTag = tags(0) === tags(1) @@ -188,21 +201,36 @@ class NewSbuffer extends XSModule with HasSbufferCst { for(i <- 0 until StorePipelineWidth){ mergeMask(i) := widthMap(j => - Mux(tags(i) === tagRead(j) && stateRead(j) === s_valid, true.B, false.B)) + Mux(tags(i) === tagRead(j) && isValid(stateRead(j)), true.B, false.B)) } // insert confition // firstInsert: the first invalid entry // if first entry canMerge or second entry has the same tag with the first entry , secondInsert equal the first invalid entry, otherwise, the second invalid entry - val invalidMask = stateRead.map(s => s === s_invalid) - val firstInsertMask = PriorityEncoderOH(invalidMask) - val secondInsertMask = Wire(Vec(StoreBufferSize, Bool())) - for (i <- 0 until StoreBufferSize){ - secondInsertMask(i) := Mux(canMerge(0) || sameTag, firstInsertMask(i), invalidMask(i) - firstInsertMask(i)) + val invalidMask = stateRead.map(s => isInvalid(s)) + val evenInvalidMask = GetEvenBits(VecInit(invalidMask).asUInt) + val oddInvalidMask = GetOddBits(VecInit(invalidMask).asUInt) + + val (evenRawInsertIdx, evenCanInsert) = PriorityEncoderWithFlag(evenInvalidMask) + val (oddRawInsertIdx, oddCanInsert) = PriorityEncoderWithFlag(oddInvalidMask) + val evenInsertIdx = Cat(evenRawInsertIdx, 0.U(1.W)) + val oddInsertIdx = Cat(oddRawInsertIdx, 1.U(1.W)) + + val enbufferSelReg = RegInit(false.B) + when(io.in(0).valid) { + enbufferSelReg := ~enbufferSelReg } - val (firstInsertIdx, firstCanInsert) = PriorityEncoderWithFlag(invalidMask) - val (secondInsertIdx, secondCanInsert) = PriorityEncoderWithFlag(secondInsertMask) + val firstInsertIdx = Mux(enbufferSelReg, evenInsertIdx, oddInsertIdx) + val secondInsertIdx = Mux(sameTag, + firstInsertIdx, + Mux(~enbufferSelReg, evenInsertIdx, oddInsertIdx) + ) + val firstCanInsert = Mux(enbufferSelReg, evenCanInsert, oddCanInsert) + val secondCanInsert = Mux(sameTag, + firstCanInsert, + Mux(~enbufferSelReg, evenCanInsert, oddCanInsert) + ) io.in(0).ready := firstCanInsert || canMerge(0) io.in(1).ready := (secondCanInsert || canMerge(1)) && !sameWord && io.in(0).ready @@ -244,7 +272,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { when(canMerge(0)){ mergeWordReq(io.in(0).bits, mergeIdx(0), firstWord) XSDebug(p"merge req 0 to line [${mergeIdx(0)}]\n") - }.elsewhen(firstCanInsert){ + }.otherwise{ wordReqToBufLine(io.in(0).bits, tags(0), firstInsertIdx, firstWord, true.B) XSDebug(p"insert req 0 to line[$firstInsertIdx]\n") } @@ -255,7 +283,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { when(canMerge(1)){ mergeWordReq(io.in(1).bits, mergeIdx(1), secondWord) XSDebug(p"merge req 1 to line [${mergeIdx(1)}]\n") - }.elsewhen(secondCanInsert){ + }.otherwise{ wordReqToBufLine(io.in(1).bits, tags(1), secondInsertIdx, secondWord, !sameTag) XSDebug(p"insert req 1 to line[$secondInsertIdx]\n") } @@ -288,7 +316,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { // ---------------------- Send Dcache Req --------------------- val do_eviction = Wire(Bool()) - val empty = Cat(stateVec.map(s => s===s_invalid)).andR() && !Cat(io.in.map(_.valid)).orR() + val empty = Cat(stateVec.map(s => isInvalid(s))).andR() && !Cat(io.in.map(_.valid)).orR() do_eviction := validCount >= 12.U @@ -321,7 +349,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { val tag = tagRead(idx) !Cat(widthMap(i => { // stateVec(idx) itself must not be s_inflight* - (stateRead(i) === s_inflight || stateRead(i) === s_prepare) && + (isInflight(stateRead(i)) || isPrepare(stateRead(i))) && tag === tagRead(i) })).orR() } @@ -340,28 +368,34 @@ class NewSbuffer extends XSModule with HasSbufferCst { // evictionEntry.bits := evictionIdx val prepareValid = ((do_eviction && sbuffer_state === x_replace) || (sbuffer_state === x_drain_sbuffer)) && - stateVec(evictionIdx)===s_valid && + isValid(stateVec(evictionIdx)) && noSameBlockInflight(evictionIdx) when(prepareValid){ stateVec(evictionIdx) := s_prepare } - val prepareMask = stateVec.map(s => s === s_prepare) + val prepareMask = stateVec.map(s => isPrepare(s)) val (prepareIdx, prepareEn) = PriorityEncoderWithFlag(prepareMask) - - io.dcache.req.valid := prepareEn - - io.dcache.req.bits.addr := getAddr(tagRead(prepareIdx)) - io.dcache.req.bits.data := bufferRead(prepareIdx).data - io.dcache.req.bits.mask := bufferRead(prepareIdx).mask - io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR - io.dcache.req.bits.meta := DontCare - io.dcache.req.bits.meta.id := prepareIdx + val dcacheReqValid = RegInit(false.B) + val dcacheCandidate = Reg(new DCacheLineReq) when(io.dcache.req.fire()){ + dcacheReqValid := false.B + } + when(prepareEn && (!dcacheReqValid || io.dcache.req.fire())) { + dcacheCandidate.addr := getAddr(tagRead(prepareIdx)) + dcacheCandidate.data := bufferRead(prepareIdx).data + dcacheCandidate.mask := bufferRead(prepareIdx).mask + dcacheCandidate.cmd := MemoryOpConstants.M_XWR + dcacheCandidate.meta := DontCare + dcacheCandidate.meta.id := prepareIdx stateVec(prepareIdx) := s_inflight + dcacheReqValid := true.B } + + io.dcache.req.valid := dcacheReqValid + io.dcache.req.bits := dcacheCandidate // evictionEntry.ready := io.dcache.req.ready XSDebug(io.dcache.req.fire(), @@ -376,6 +410,13 @@ class NewSbuffer extends XSModule with HasSbufferCst { XSDebug(p"recv cache resp: id=[$respId]\n") } + if (env.DualCoreDifftest) { + difftestIO.sbufferResp := WireInit(io.dcache.resp.fire()) + difftestIO.sbufferAddr := WireInit(getAddr(tagRead(respId))) + difftestIO.sbufferData := WireInit(bufferRead(respId).data.asTypeOf(Vec(CacheLineBytes, UInt(8.W)))) + difftestIO.sbufferMask := WireInit(bufferRead(respId).mask) + } + val needSpace = (io.in(0).fire && !canMerge(0)) +& (io.in(1).fire && !canMerge(1) && !sameTag) invalidCount := invalidCount - needSpace + io.dcache.resp.fire() validCount := validCount + needSpace - prepareValid @@ -388,7 +429,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { // every cycle cohCount+=1 // if cohCount(countBits-1)==1,evict for(i <- 0 until StoreBufferSize){ - when(stateVec(i) === s_valid){ + when(isValid(stateVec(i))){ when(cohCount(i)(countBits-1)){ assert(stateVec(i) === s_valid) stateUpdate(i) := s_prepare @@ -401,9 +442,9 @@ class NewSbuffer extends XSModule with HasSbufferCst { for ((forward, i) <- io.forward.zipWithIndex) { val tag_matches = widthMap(i => tagRead(i) === getTag(forward.paddr)) - val valid_tag_matches = widthMap(i => tag_matches(i) && stateVec(i) === s_valid) + val valid_tag_matches = widthMap(i => tag_matches(i) && isValid(stateVec(i))) val inflight_tag_matches = widthMap(i => - tag_matches(i) && (stateVec(i) === s_inflight || stateVec(i) === s_prepare) + tag_matches(i) && (isInflight(stateVec(i)) || isPrepare(stateVec(i))) ) val line_offset_mask = UIntToOH(getWordOffset(forward.paddr)) diff --git a/src/test/csrc/common.cpp b/src/test/csrc/common.cpp index d27766547e37efb70cc56fceee3ddd7aa7c02f6d..54e8c3c30fe348da1133dd8f9d9b8b5ca7262edd 100644 --- a/src/test/csrc/common.cpp +++ b/src/test/csrc/common.cpp @@ -21,5 +21,7 @@ extern "C" void xs_assert(long long line) { } void sig_handler(int signo) { + if (signal_num != 0) + exit(0); signal_num = signo; } diff --git a/src/test/csrc/compress.cpp b/src/test/csrc/compress.cpp index afa708f01b0558719716f4ef92e7caaf16de546a..2947cf5f26665b6605e4ce6441bb31684f4b6bb6 100644 --- a/src/test/csrc/compress.cpp +++ b/src/test/csrc/compress.cpp @@ -16,7 +16,7 @@ int isGzFile(const char *filename) { long snapshot_compressToFile(uint8_t *ptr, const char *filename, long buf_size) { gzFile compressed_mem = gzopen(filename, "wb"); - if(compressed_mem == NULL) { + if (compressed_mem == NULL) { printf("Can't open compressed binary file '%s'", filename); return -1; } @@ -44,7 +44,7 @@ long snapshot_compressToFile(uint8_t *ptr, const char *filename, long buf_size) delete [] temp_page; - if(gzclose(compressed_mem)) { + if (gzclose(compressed_mem)) { printf("Error closing '%s'\n", filename); return -1; } @@ -55,7 +55,7 @@ long readFromGz(void* ptr, const char *file_name, long buf_size, uint8_t load_ty assert(buf_size > 0); gzFile compressed_mem = gzopen(file_name, "rb"); - if(compressed_mem == NULL) { + if (compressed_mem == NULL) { printf("Can't open compressed binary file '%s'", file_name); return -1; } diff --git a/src/test/csrc/difftest.cpp b/src/test/csrc/difftest.cpp index e87c461ef5c9bac4a02d26a0686b12e254c3b0ce..c94353b6c3164728904e3ff0097efdda094de99c 100644 --- a/src/test/csrc/difftest.cpp +++ b/src/test/csrc/difftest.cpp @@ -12,19 +12,19 @@ #define DEBUG_RETIRE_TRACE_SIZE 16 #define DEBUG_WB_TRACE_SIZE 16 -void (*ref_difftest_memcpy_from_dut)(paddr_t dest, void *src, size_t n) = NULL; -void (*ref_difftest_memcpy_from_ref)(void *dest, paddr_t src, size_t n) = NULL; -void (*ref_difftest_getregs)(void *c) = NULL; -void (*ref_difftest_setregs)(const void *c) = NULL; -void (*ref_difftest_get_mastatus)(void *s) = NULL; -void (*ref_difftest_set_mastatus)(const void *s) = NULL; -void (*ref_difftest_get_csr)(void *c) = NULL; -void (*ref_difftest_set_csr)(const void *c) = NULL; -vaddr_t (*ref_disambiguate_exec)(void *disambiguate_para) = NULL; -int (*ref_difftest_store_commit)(uint64_t *saddr, uint64_t *sdata, uint8_t *smask) = NULL; -static void (*ref_difftest_exec)(uint64_t n) = NULL; -static void (*ref_difftest_raise_intr)(uint64_t NO) = NULL; -static void (*ref_isa_reg_display)(void) = NULL; +void (*ref_difftest_memcpy_from_dut)(paddr_t dest, void *src, size_t n, int coreid) = NULL; +void (*ref_difftest_memcpy_from_ref)(void *dest, paddr_t src, size_t n, int coreid) = NULL; +void (*ref_difftest_getregs)(void *c, int coreid) = NULL; +void (*ref_difftest_setregs)(const void *c, int coreid) = NULL; +void (*ref_difftest_get_mastatus)(void *s, int coreid) = NULL; +void (*ref_difftest_set_mastatus)(const void *s, int coreid) = NULL; +void (*ref_difftest_get_csr)(void *c, int coreid) = NULL; +void (*ref_difftest_set_csr)(const void *c, int coreid) = NULL; +vaddr_t (*ref_disambiguate_exec)(void *disambiguate_para, int coreid) = NULL; +int (*ref_difftest_store_commit)(uint64_t *saddr, uint64_t *sdata, uint8_t *smask, int coreid) = NULL; +static void (*ref_difftest_exec)(uint64_t n, int coreid) = NULL; +static void (*ref_difftest_raise_intr)(uint64_t NO, int coreid) = NULL; +static void (*ref_isa_reg_display)(int coreid) = NULL; static bool is_skip_ref; static bool is_skip_dut; @@ -41,7 +41,7 @@ void difftest_skip_ref() { void difftest_skip_dut() { if (is_skip_dut) return; - ref_difftest_exec(1); + ref_difftest_exec(1, 0); is_skip_dut = true; } @@ -51,49 +51,49 @@ void init_difftest() { puts("Using " REF_SO " for difftest"); assert(handle); - ref_difftest_memcpy_from_dut = (void (*)(paddr_t, void *, size_t))dlsym(handle, "difftest_memcpy_from_dut"); + ref_difftest_memcpy_from_dut = (void (*)(paddr_t, void *, size_t, int))dlsym(handle, "difftest_memcpy_from_dut"); assert(ref_difftest_memcpy_from_dut); - ref_difftest_memcpy_from_ref = (void (*)(void *, paddr_t, size_t))dlsym(handle, "difftest_memcpy_from_ref"); + ref_difftest_memcpy_from_ref = (void (*)(void *, paddr_t, size_t, int))dlsym(handle, "difftest_memcpy_from_ref"); assert(ref_difftest_memcpy_from_ref); - ref_difftest_getregs = (void (*)(void *))dlsym(handle, "difftest_getregs"); + ref_difftest_getregs = (void (*)(void *, int))dlsym(handle, "difftest_getregs"); assert(ref_difftest_getregs); - ref_difftest_setregs = (void (*)(const void *))dlsym(handle, "difftest_setregs"); + ref_difftest_setregs = (void (*)(const void *, int))dlsym(handle, "difftest_setregs"); assert(ref_difftest_setregs); - ref_difftest_get_mastatus = (void (*)(void *))dlsym(handle, "difftest_get_mastatus"); + ref_difftest_get_mastatus = (void (*)(void *, int))dlsym(handle, "difftest_get_mastatus"); assert(ref_difftest_get_mastatus); - ref_difftest_set_mastatus = (void (*)(const void *))dlsym(handle, "difftest_set_mastatus"); + ref_difftest_set_mastatus = (void (*)(const void *, int))dlsym(handle, "difftest_set_mastatus"); assert(ref_difftest_set_mastatus); - ref_difftest_get_csr = (void (*)(void *))dlsym(handle, "difftest_get_csr"); + ref_difftest_get_csr = (void (*)(void *, int))dlsym(handle, "difftest_get_csr"); assert(ref_difftest_get_csr); - ref_difftest_set_csr = (void (*)(const void *))dlsym(handle, "difftest_set_csr"); + ref_difftest_set_csr = (void (*)(const void *, int))dlsym(handle, "difftest_set_csr"); assert(ref_difftest_set_csr); - ref_disambiguate_exec = (vaddr_t (*)(void *))dlsym(handle, "disambiguate_exec"); + ref_disambiguate_exec = (vaddr_t (*)(void *, int))dlsym(handle, "disambiguate_exec"); assert(ref_disambiguate_exec); - ref_difftest_store_commit = (int (*)(uint64_t*, uint64_t*, uint8_t*))dlsym(handle, "difftest_store_commit"); + ref_difftest_store_commit = (int (*)(uint64_t*, uint64_t*, uint8_t*, int))dlsym(handle, "difftest_store_commit"); assert(ref_difftest_store_commit); - ref_difftest_exec = (void (*)(uint64_t))dlsym(handle, "difftest_exec"); + ref_difftest_exec = (void (*)(uint64_t, int))dlsym(handle, "difftest_exec"); assert(ref_difftest_exec); - ref_difftest_raise_intr = (void (*)(uint64_t))dlsym(handle, "difftest_raise_intr"); + ref_difftest_raise_intr = (void (*)(uint64_t, int))dlsym(handle, "difftest_raise_intr"); assert(ref_difftest_raise_intr); - ref_isa_reg_display = (void (*)(void))dlsym(handle, "isa_reg_display"); + ref_isa_reg_display = (void (*)(int))dlsym(handle, "isa_reg_display"); assert(ref_isa_reg_display); - void (*ref_difftest_init)(void) = (void (*)(void))dlsym(handle, "difftest_init"); + void (*ref_difftest_init)(int) = (void (*)(int))dlsym(handle, "difftest_init"); assert(ref_difftest_init); - ref_difftest_init(); + ref_difftest_init(0); } static const char *reg_name[DIFFTEST_NR_REG] = { @@ -140,7 +140,7 @@ void difftest_display(uint8_t mode) { j, pc_wb_queue[j], wen_wb_queue[j]!=0, wdst_wb_queue[j], wdata_wb_queue[j], (j==((wb_pointer-1)%DEBUG_WB_TRACE_SIZE))?"<--":""); } printf("\n============== Reg Diff ==============\n"); - ref_isa_reg_display(); + ref_isa_reg_display(0); printf("priviledgeMode: %d\n", mode); } @@ -171,12 +171,12 @@ int difftest_step(DiffState *s) { struct SyncState sync; sync.lrscValid = 0; sync.lrscAddr = 0; - ref_difftest_set_mastatus((uint64_t*)&sync); // sync lr/sc microarchitectural regs + ref_difftest_set_mastatus((uint64_t*)&sync, 0); // sync lr/sc microarchitectural regs } // single step difftest if (s->intrNO) { - ref_difftest_raise_intr(s->intrNO); + ref_difftest_raise_intr(s->intrNO, 0); // ref_difftest_exec(1);//TODO } else { @@ -191,14 +191,14 @@ int difftest_step(DiffState *s) { // MMIO accessing should not be a branch or jump, just +2/+4 to get the next pc // printf("SKIP %d\n", i); // to skip the checking of an instruction, just copy the reg state to reference design - ref_difftest_getregs(&ref_r); + ref_difftest_getregs(&ref_r, 0); ref_r[DIFFTEST_THIS_PC] += selectBit(s->isRVC, i) ? 2 : 4; if(selectBit(s->wen, i)){ if(s->wdst[i] != 0){ ref_r[s->wdst[i]] = s->wdata[i]; } } - ref_difftest_setregs(ref_r); + ref_difftest_setregs(ref_r, 0); }else{ // single step exec // IPF, LPF, SPF @@ -208,14 +208,14 @@ int difftest_step(DiffState *s) { ds.exceptionNo = s->cause; ds.mtval = s->reg_scala[DIFFTEST_MTVAL]; ds.stval = s->reg_scala[DIFFTEST_STVAL]; - ref_disambiguate_exec(&ds); + ref_disambiguate_exec(&ds, 0); }else{ - ref_difftest_exec(1); + ref_difftest_exec(1, 0); } } } } - ref_difftest_getregs(&ref_r); + ref_difftest_getregs(&ref_r, 0); uint64_t next_pc = ref_r[DIFFTEST_THIS_PC]; pc_retire_pointer = (pc_retire_pointer+1) % DEBUG_RETIRE_TRACE_SIZE; @@ -255,5 +255,5 @@ int difftest_step(DiffState *s) { } int difftest_store_step(uint64_t *saddr, uint64_t *sdata, uint8_t *smask) { - return ref_difftest_store_commit(saddr, sdata, smask); + return ref_difftest_store_commit(saddr, sdata, smask, 0); } diff --git a/src/test/csrc/difftest.h b/src/test/csrc/difftest.h index df35b5f0f278c81c1567cd6d56da560c075d28c1..4538b5621ced311e07c222fd86b6137c1bcbac50 100644 --- a/src/test/csrc/difftest.h +++ b/src/test/csrc/difftest.h @@ -82,16 +82,16 @@ struct DisambiguationState { uint64_t stval; }; -extern void (*ref_difftest_memcpy_from_dut)(paddr_t dest, void *src, size_t n); -extern void (*ref_difftest_memcpy_from_ref)(void *dest, paddr_t src, size_t n); -extern void (*ref_difftest_getregs)(void *c); -extern void (*ref_difftest_setregs)(const void *c); -extern void (*ref_difftest_get_mastatus)(void *s); -extern void (*ref_difftest_set_mastatus)(const void *s); -extern void (*ref_difftest_get_csr)(void *c); -extern void (*ref_difftest_set_csr)(const void *c); -extern vaddr_t (*ref_disambiguate_exec)(void *disambiguate_para); -extern int (*ref_difftest_store_commit)(uint64_t *saddr, uint64_t *sdata, uint8_t *smask); +extern void (*ref_difftest_memcpy_from_dut)(paddr_t dest, void *src, size_t n, int coreid); +extern void (*ref_difftest_memcpy_from_ref)(void *dest, paddr_t src, size_t n, int coreid); +extern void (*ref_difftest_getregs)(void *c, int coreid); +extern void (*ref_difftest_setregs)(const void *c, int coreid); +extern void (*ref_difftest_get_mastatus)(void *s, int coreid); +extern void (*ref_difftest_set_mastatus)(const void *s, int coreid); +extern void (*ref_difftest_get_csr)(void *c, int coreid); +extern void (*ref_difftest_set_csr)(const void *c, int coreid); +extern vaddr_t (*ref_disambiguate_exec)(void *disambiguate_para, int coreid); +extern int (*ref_difftest_store_commit)(uint64_t *saddr, uint64_t *sdata, uint8_t *smask, int coreid); void init_difftest(); int difftest_step(DiffState *s); diff --git a/src/test/csrc/emu.cpp b/src/test/csrc/emu.cpp index 24975c4a250a31ad26991087cdfff829fd40f549..2ecd4968332049c9bc78088211dec5b384b06050 100644 --- a/src/test/csrc/emu.cpp +++ b/src/test/csrc/emu.cpp @@ -276,19 +276,26 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) { extern uint32_t uptime(void); uint32_t lasttime_poll = 0; uint32_t lasttime_snapshot = 0; - uint64_t lastcommit = max_cycle; - uint64_t instr_left_last_cycle = max_instr; + uint64_t lastcommit[NumCore]; + uint64_t instr_left_last_cycle[NumCore]; const int stuck_limit = 2000; + uint64_t core_max_instr[NumCore]; + + uint32_t wdst[NumCore][DIFFTEST_WIDTH]; + uint64_t wdata[NumCore][DIFFTEST_WIDTH]; + uint64_t wpc[NumCore][DIFFTEST_WIDTH]; + uint64_t reg[NumCore][DIFFTEST_NR_REG]; + DiffState diff[NumCore]; + for (int i = 0; i < NumCore; i++) { + diff[i].reg_scala = reg[i]; + diff[i].wpc = wpc[i]; + diff[i].wdata = wdata[i]; + diff[i].wdst = wdst[i]; + lastcommit[i] = max_cycle; + instr_left_last_cycle[i] = max_cycle; + core_max_instr[i] = max_instr; + } - uint32_t wdst[DIFFTEST_WIDTH]; - uint64_t wdata[DIFFTEST_WIDTH]; - uint64_t wpc[DIFFTEST_WIDTH]; - uint64_t reg[DIFFTEST_NR_REG]; - DiffState diff; - diff.reg_scala = reg; - diff.wpc = wpc; - diff.wdata = wdata; - diff.wdst = wdst; #if VM_COVERAGE == 1 // we dump coverage into files at the end @@ -298,8 +305,10 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) { #endif while (!Verilated::gotFinish() && trapCode == STATE_RUNNING) { - if (!(max_cycle > 0 && max_instr > 0 && instr_left_last_cycle >= max_instr /* handle overflow */)) { - trapCode = STATE_LIMIT_EXCEEDED; + if (!(max_cycle > 0 && + core_max_instr[0] > 0 && + instr_left_last_cycle[0] >= core_max_instr[0])) { + trapCode = STATE_LIMIT_EXCEEDED; /* handle overflow */ break; } if (assert_count > 0) { @@ -319,7 +328,7 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) { if (dut_ptr->io_trap_valid) trapCode = dut_ptr->io_trap_code; if (trapCode != STATE_RUNNING) break; - if (lastcommit - max_cycle > stuck_limit && hascommit) { + if (lastcommit[0] - max_cycle > stuck_limit && hascommit) { eprintf("No instruction commits for %d cycles, maybe get stuck\n" "(please also check whether a fence.i instruction requires more than %d cycles to flush the icache)\n", stuck_limit, stuck_limit); @@ -329,57 +338,66 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) { if (!hascommit && dut_ptr->io_difftest_commit && dut_ptr->io_difftest_thisPC == 0x80000000u) { hascommit = 1; - read_emu_regs(reg); + read_emu_regs(reg[0]); void* get_img_start(); long get_img_size(); - ref_difftest_memcpy_from_dut(0x80000000, get_img_start(), get_img_size()); - ref_difftest_setregs(reg); + ref_difftest_memcpy_from_dut(0x80000000, get_img_start(), get_img_size(), 0); + ref_difftest_setregs(reg[0], 0); printf("The first instruction has commited. Difftest enabled. \n"); } // difftest - if (dut_ptr->io_difftest_commit && hascommit) { - read_emu_regs(reg); - read_wb_info(wpc, wdata, wdst); - - diff.commit = dut_ptr->io_difftest_commit; - diff.this_inst = dut_ptr->io_difftest_thisINST; - diff.skip = dut_ptr->io_difftest_skip; - diff.isRVC = dut_ptr->io_difftest_isRVC; - diff.wen = dut_ptr->io_difftest_wen; - diff.intrNO = dut_ptr->io_difftest_intrNO; - diff.cause = dut_ptr->io_difftest_cause; - diff.priviledgeMode = dut_ptr->io_difftest_priviledgeMode; - - diff.sync.scFailed = dut_ptr->io_difftest_scFailed; - - if (difftest_step(&diff)) { - trapCode = STATE_ABORT; - } - lastcommit = max_cycle; - // update instr_cnt - instr_left_last_cycle = max_instr; - max_instr -= diff.commit; - } + for (int i = 0; i < NumCore; i++) { + if (dut_ptr->io_difftest_commit && hascommit) { + read_emu_regs(reg[i]); + read_wb_info(wpc[i], wdata[i], wdst[i]); + + diff[i].commit = dut_ptr->io_difftest_commit; + diff[i].this_inst = dut_ptr->io_difftest_thisINST; + diff[i].skip = dut_ptr->io_difftest_skip; + diff[i].isRVC = dut_ptr->io_difftest_isRVC; + diff[i].wen = dut_ptr->io_difftest_wen; + diff[i].intrNO = dut_ptr->io_difftest_intrNO; + diff[i].cause = dut_ptr->io_difftest_cause; + diff[i].priviledgeMode = dut_ptr->io_difftest_priviledgeMode; + + diff[i].sync.scFailed = dut_ptr->io_difftest_scFailed; + + if (i == 0) { + if (difftest_step(&diff[i])) { + trapCode = STATE_ABORT; + } + } + lastcommit[i] = max_cycle; - if (dut_ptr->io_difftest_storeCommit) { - read_store_info(diff.store_addr, diff.store_data, diff.store_mask); - - for (int i = 0; i < dut_ptr->io_difftest_storeCommit; i++) { - auto addr = diff.store_addr[i]; - auto data = diff.store_data[i]; - auto mask = diff.store_mask[i]; - if (difftest_store_step(&addr, &data, &mask)) { - difftest_display(dut_ptr->io_difftest_priviledgeMode); - printf("Mismatch for store commits: \n"); - printf("REF commits addr 0x%lx, data 0x%lx, mask 0x%x\n", addr, data, mask); - printf("DUT commits addr 0x%lx, data 0x%lx, mask 0x%x\n", - diff.store_addr[i], diff.store_data[i], diff.store_mask[i]); - trapCode = STATE_ABORT; - break; + // update instr_cnt + instr_left_last_cycle[i] = core_max_instr[i]; + core_max_instr[i] -= diff[i].commit; + } + +#ifdef DIFFTEST_STORE_COMMIT + for (int core = 0; core < NumCore; core++) { + if (dut_ptr->io_difftest_storeCommit) { + read_store_info(diff[core].store_addr, diff[core].store_data, diff[core].store_mask); + + for (int i = 0; i < dut_ptr->io_difftest_storeCommit; i++) { + auto addr = diff[core].store_addr[i]; + auto data = diff[core].store_data[i]; + auto mask = diff[core].store_mask[i]; + if (difftest_store_step(&addr, &data, &mask)) { + difftest_display(dut_ptr->io_difftest_priviledgeMode); + printf("Mismatch for store commits: \n"); + printf("REF commits addr 0x%lx, data 0x%lx, mask 0x%x\n", addr, data, mask); + printf("DUT commits addr 0x%lx, data 0x%lx, mask 0x%x\n", + diff[core].store_addr[i], diff[core].store_data[i], diff[core].store_mask[i]); + trapCode = STATE_ABORT; + break; + } + } } } +#endif } uint32_t t = uptime(); @@ -504,23 +522,23 @@ void Emulator::snapshot_save(const char *filename) { stream.unbuf_write(get_ram_start(), size); uint64_t ref_r[DIFFTEST_NR_REG]; - ref_difftest_getregs(&ref_r); + ref_difftest_getregs(&ref_r, 0); stream.unbuf_write(ref_r, sizeof(ref_r)); uint64_t nemu_this_pc = get_nemu_this_pc(); stream.unbuf_write(&nemu_this_pc, sizeof(nemu_this_pc)); char *buf = (char *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); - ref_difftest_memcpy_from_ref(buf, 0x80000000, size); + ref_difftest_memcpy_from_ref(buf, 0x80000000, size, 0); stream.unbuf_write(buf, size); munmap(buf, size); struct SyncState sync_mastate; - ref_difftest_get_mastatus(&sync_mastate); + ref_difftest_get_mastatus(&sync_mastate, 0); stream.unbuf_write(&sync_mastate, sizeof(struct SyncState)); uint64_t csr_buf[4096]; - ref_difftest_get_csr(csr_buf); + ref_difftest_get_csr(csr_buf, 0); stream.unbuf_write(&csr_buf, sizeof(csr_buf)); long sdcard_offset; @@ -553,7 +571,7 @@ void Emulator::snapshot_load(const char *filename) { char *buf = (char *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); stream.read(buf, size); - ref_difftest_memcpy_from_dut(0x80000000, buf, size); + ref_difftest_memcpy_from_dut(0x80000000, buf, size, 0); munmap(buf, size); struct SyncState sync_mastate; diff --git a/src/test/csrc/emu.h b/src/test/csrc/emu.h index 8f2fed1084c82e821bc309814319a2c1a9ffe885..49751a2c123748a5b441f1ba2baa644488831aa2 100644 --- a/src/test/csrc/emu.h +++ b/src/test/csrc/emu.h @@ -7,6 +7,8 @@ #include // Trace file format header #define SNAPSHOT_INTERVAL 60 // unit: second +#define DIFFTEST_STORE_COMMIT +#define NumCore 1 struct EmuArgs { uint32_t seed; diff --git a/src/test/scala/top/XSSim.scala b/src/test/scala/top/XSSim.scala index 9f8ab38a56d3233d628228dfd5b0378871540f24..8aa31732085f85214b3b7ac0255eae51c32af657 100644 --- a/src/test/scala/top/XSSim.scala +++ b/src/test/scala/top/XSSim.scala @@ -53,6 +53,11 @@ class DiffTestIO extends XSBundle { val storeAddr = Output(Vec(2, UInt(64.W))) val storeData = Output(Vec(2, UInt(64.W))) val storeMask = Output(Vec(2, UInt(8.W))) + + val sbufferResp = Output(Bool()) + val sbufferAddr = Output(UInt(64.W)) + val sbufferData = Output(Vec(64, UInt(8.W))) + val sbufferMask = Output(UInt(64.W)) } class LogCtrlIO extends Bundle { @@ -60,14 +65,6 @@ class LogCtrlIO extends Bundle { val log_level = Input(UInt(64.W)) // a cpp uint } -class TrapIO extends XSBundle { - val valid = Output(Bool()) - val code = Output(UInt(3.W)) - val pc = Output(UInt(VAddrBits.W)) - val cycleCnt = Output(UInt(XLEN.W)) - val instrCnt = Output(UInt(XLEN.W)) -} - class XSSimSoC(axiSim: Boolean)(implicit p: config.Parameters) extends LazyModule with HasXSParameter { // address space[0G - 1024G) val fullRange = AddressSet(0x0L, 0xffffffffffL) @@ -112,11 +109,14 @@ class XSSimSoC(axiSim: Boolean)(implicit p: config.Parameters) extends LazyModul lazy val module = new LazyModuleImp(this) { val io = IO(new Bundle { - val difftest = new DiffTestIO + val difftest = new DiffTestIO + val difftest2 = new DiffTestIO val logCtrl = new LogCtrlIO val trap = new TrapIO + val trap2 = new TrapIO val uart = new UARTIO }) + io.difftest2 <> DontCare dontTouch(io.difftest) dontTouch(io.logCtrl) @@ -129,58 +129,112 @@ class XSSimSoC(axiSim: Boolean)(implicit p: config.Parameters) extends LazyModul soc.module.io.extIntrs(i) := false.B } - val difftest = WireInit(0.U.asTypeOf(new DiffTestIO)) + val difftest = Seq(WireInit(0.U.asTypeOf(new DiffTestIO)), WireInit(0.U.asTypeOf(new DiffTestIO))) + val trap = Seq(WireInit(0.U.asTypeOf(new TrapIO)), WireInit(0.U.asTypeOf(new TrapIO))) + if (!env.FPGAPlatform) { - ExcitingUtils.addSink(difftest.commit, "difftestCommit", Debug) - ExcitingUtils.addSink(difftest.thisPC, "difftestThisPC", Debug) - ExcitingUtils.addSink(difftest.thisINST, "difftestThisINST", Debug) - ExcitingUtils.addSink(difftest.skip, "difftestSkip", Debug) - ExcitingUtils.addSink(difftest.isRVC, "difftestIsRVC", Debug) - ExcitingUtils.addSink(difftest.wen, "difftestWen", Debug) - ExcitingUtils.addSink(difftest.wdata, "difftestWdata", Debug) - ExcitingUtils.addSink(difftest.wdst, "difftestWdst", Debug) - ExcitingUtils.addSink(difftest.wpc, "difftestWpc", Debug) - ExcitingUtils.addSink(difftest.intrNO, "difftestIntrNO", Debug) - ExcitingUtils.addSink(difftest.cause, "difftestCause", Debug) - ExcitingUtils.addSink(difftest.r, "difftestRegs", Debug) - ExcitingUtils.addSink(difftest.priviledgeMode, "difftestMode", Debug) - ExcitingUtils.addSink(difftest.mstatus, "difftestMstatus", Debug) - ExcitingUtils.addSink(difftest.sstatus, "difftestSstatus", Debug) - ExcitingUtils.addSink(difftest.mepc, "difftestMepc", Debug) - ExcitingUtils.addSink(difftest.sepc, "difftestSepc", Debug) - ExcitingUtils.addSink(difftest.mtval, "difftestMtval", Debug) - ExcitingUtils.addSink(difftest.stval, "difftestStval", Debug) - ExcitingUtils.addSink(difftest.mtvec, "difftestMtvec", Debug) - ExcitingUtils.addSink(difftest.stvec, "difftestStvec", Debug) - ExcitingUtils.addSink(difftest.mcause, "difftestMcause", Debug) - ExcitingUtils.addSink(difftest.scause, "difftestScause", Debug) - ExcitingUtils.addSink(difftest.satp, "difftestSatp", Debug) - ExcitingUtils.addSink(difftest.mip, "difftestMip", Debug) - ExcitingUtils.addSink(difftest.mie, "difftestMie", Debug) - ExcitingUtils.addSink(difftest.mscratch, "difftestMscratch", Debug) - ExcitingUtils.addSink(difftest.sscratch, "difftestSscratch", Debug) - ExcitingUtils.addSink(difftest.mideleg, "difftestMideleg", Debug) - ExcitingUtils.addSink(difftest.medeleg, "difftestMedeleg", Debug) - ExcitingUtils.addSink(difftest.scFailed, "difftestScFailed", Debug) - ExcitingUtils.addSink(difftest.storeCommit, "difftestStoreCommit", Debug) - ExcitingUtils.addSink(difftest.storeAddr, "difftestStoreAddr", Debug) - ExcitingUtils.addSink(difftest.storeData, "difftestStoreData", Debug) - ExcitingUtils.addSink(difftest.storeMask, "difftestStoreMask", Debug) + ExcitingUtils.addSink(difftest(0).commit, "difftestCommit", Debug) + ExcitingUtils.addSink(difftest(0).thisPC, "difftestThisPC", Debug) + ExcitingUtils.addSink(difftest(0).thisINST, "difftestThisINST", Debug) + ExcitingUtils.addSink(difftest(0).skip, "difftestSkip", Debug) + ExcitingUtils.addSink(difftest(0).isRVC, "difftestIsRVC", Debug) + ExcitingUtils.addSink(difftest(0).wen, "difftestWen", Debug) + ExcitingUtils.addSink(difftest(0).wdata, "difftestWdata", Debug) + ExcitingUtils.addSink(difftest(0).wdst, "difftestWdst", Debug) + ExcitingUtils.addSink(difftest(0).wpc, "difftestWpc", Debug) + ExcitingUtils.addSink(difftest(0).intrNO, "difftestIntrNO", Debug) + ExcitingUtils.addSink(difftest(0).cause, "difftestCause", Debug) + ExcitingUtils.addSink(difftest(0).r, "difftestRegs", Debug) + ExcitingUtils.addSink(difftest(0).priviledgeMode, "difftestMode", Debug) + ExcitingUtils.addSink(difftest(0).mstatus, "difftestMstatus", Debug) + ExcitingUtils.addSink(difftest(0).sstatus, "difftestSstatus", Debug) + ExcitingUtils.addSink(difftest(0).mepc, "difftestMepc", Debug) + ExcitingUtils.addSink(difftest(0).sepc, "difftestSepc", Debug) + ExcitingUtils.addSink(difftest(0).mtval, "difftestMtval", Debug) + ExcitingUtils.addSink(difftest(0).stval, "difftestStval", Debug) + ExcitingUtils.addSink(difftest(0).mtvec, "difftestMtvec", Debug) + ExcitingUtils.addSink(difftest(0).stvec, "difftestStvec", Debug) + ExcitingUtils.addSink(difftest(0).mcause, "difftestMcause", Debug) + ExcitingUtils.addSink(difftest(0).scause, "difftestScause", Debug) + ExcitingUtils.addSink(difftest(0).satp, "difftestSatp", Debug) + ExcitingUtils.addSink(difftest(0).mip, "difftestMip", Debug) + ExcitingUtils.addSink(difftest(0).mie, "difftestMie", Debug) + ExcitingUtils.addSink(difftest(0).mscratch, "difftestMscratch", Debug) + ExcitingUtils.addSink(difftest(0).sscratch, "difftestSscratch", Debug) + ExcitingUtils.addSink(difftest(0).mideleg, "difftestMideleg", Debug) + ExcitingUtils.addSink(difftest(0).medeleg, "difftestMedeleg", Debug) + ExcitingUtils.addSink(difftest(0).scFailed, "difftestScFailed", Debug) + ExcitingUtils.addSink(difftest(0).storeCommit, "difftestStoreCommit", Debug) + ExcitingUtils.addSink(difftest(0).storeAddr, "difftestStoreAddr", Debug) + ExcitingUtils.addSink(difftest(0).storeData, "difftestStoreData", Debug) + ExcitingUtils.addSink(difftest(0).storeMask, "difftestStoreMask", Debug) } - // BoringUtils.addSink(difftest.lrscAddr, "difftestLrscAddr") - io.difftest := difftest + if (env.DualCoreDifftest) { + for (i <- 0 until NumCores) { + difftest(i).commit := soc.module.difftestIO(i).fromRoq.commit + difftest(i).thisPC := soc.module.difftestIO(i).fromRoq.thisPC + difftest(i).thisINST := soc.module.difftestIO(i).fromRoq.thisINST + difftest(i).skip := soc.module.difftestIO(i).fromRoq.skip + difftest(i).isRVC := soc.module.difftestIO(i).fromRoq.isRVC + difftest(i).wen := soc.module.difftestIO(i).fromRoq.wen + difftest(i).wdata := soc.module.difftestIO(i).fromRoq.wdata + difftest(i).wdst := soc.module.difftestIO(i).fromRoq.wdst + difftest(i).wpc := soc.module.difftestIO(i).fromRoq.wpc + difftest(i).scFailed := soc.module.difftestIO(i).fromRoq.scFailed + + difftest(i).r := soc.module.difftestIO(i).fromXSCore.r + + difftest(i).intrNO := soc.module.difftestIO(i).fromCSR.intrNO + difftest(i).cause := soc.module.difftestIO(i).fromCSR.cause + difftest(i).priviledgeMode := soc.module.difftestIO(i).fromCSR.priviledgeMode + difftest(i).mstatus := soc.module.difftestIO(i).fromCSR.mstatus + difftest(i).sstatus := soc.module.difftestIO(i).fromCSR.sstatus + difftest(i).mepc := soc.module.difftestIO(i).fromCSR.mepc + difftest(i).sepc := soc.module.difftestIO(i).fromCSR.sepc + difftest(i).mtval := soc.module.difftestIO(i).fromCSR.mtval + difftest(i).stval := soc.module.difftestIO(i).fromCSR.stval + difftest(i).mtvec := soc.module.difftestIO(i).fromCSR.mtvec + difftest(i).stvec := soc.module.difftestIO(i).fromCSR.stvec + difftest(i).mcause := soc.module.difftestIO(i).fromCSR.mcause + difftest(i).scause := soc.module.difftestIO(i).fromCSR.scause + difftest(i).satp := soc.module.difftestIO(i).fromCSR.satp + difftest(i).mip := soc.module.difftestIO(i).fromCSR.mip + difftest(i).mie := soc.module.difftestIO(i).fromCSR.mie + difftest(i).mscratch := soc.module.difftestIO(i).fromCSR.mscratch + difftest(i).sscratch := soc.module.difftestIO(i).fromCSR.sscratch + difftest(i).mideleg := soc.module.difftestIO(i).fromCSR.mideleg + difftest(i).medeleg := soc.module.difftestIO(i).fromCSR.medeleg + + difftest(i).storeCommit := soc.module.difftestIO(i).fromSQ.storeCommit + difftest(i).storeAddr := soc.module.difftestIO(i).fromSQ.storeAddr + difftest(i).storeData := soc.module.difftestIO(i).fromSQ.storeData + difftest(i).storeMask := soc.module.difftestIO(i).fromSQ.storeMask - val trap = WireInit(0.U.asTypeOf(new TrapIO)) + difftest(i).sbufferResp := soc.module.difftestIO(i).fromSbuffer.sbufferResp + difftest(i).sbufferAddr := soc.module.difftestIO(i).fromSbuffer.sbufferAddr + difftest(i).sbufferData := soc.module.difftestIO(i).fromSbuffer.sbufferData + difftest(i).sbufferMask := soc.module.difftestIO(i).fromSbuffer.sbufferMask + + trap(i) <> soc.module.trapIO(i) + } + } + if (!env.FPGAPlatform) { - ExcitingUtils.addSink(trap.valid, "trapValid") - ExcitingUtils.addSink(trap.code, "trapCode") - ExcitingUtils.addSink(trap.pc, "trapPC") - ExcitingUtils.addSink(trap.cycleCnt, "trapCycleCnt") - ExcitingUtils.addSink(trap.instrCnt, "trapInstrCnt") + ExcitingUtils.addSink(trap(0).valid, "trapValid") + ExcitingUtils.addSink(trap(0).code, "trapCode") + ExcitingUtils.addSink(trap(0).pc, "trapPC") + ExcitingUtils.addSink(trap(0).cycleCnt, "trapCycleCnt") + ExcitingUtils.addSink(trap(0).instrCnt, "trapInstrCnt") } - io.trap := trap + io.difftest := difftest(0) + io.trap := trap(0) + + if (env.DualCoreDifftest) { + io.difftest2 := difftest(1) + io.trap2 := trap(1) + } if (env.EnableDebug) { val timer = GTimer() @@ -213,17 +267,24 @@ class XSSimTop(axiSim: Boolean)(implicit p: config.Parameters) extends LazyModul lazy val module = new LazyModuleImp(this) { val io = IO(new Bundle { - val difftest = new DiffTestIO + val difftest = new DiffTestIO + val difftest2 = new DiffTestIO val logCtrl = new LogCtrlIO val trap = new TrapIO + val trap2 = new TrapIO val uart = new UARTIO val memAXI = if (axiSim) chiselTypeOf(axiSimRam.module.io) else Input(Bool()) }) + io.difftest2 <> DontCare - io.difftest <> dut.module.io.difftest + io.difftest <> dut.module.io.difftest io.logCtrl <> dut.module.io.logCtrl io.trap <> dut.module.io.trap io.uart <> dut.module.io.uart + if (env.DualCoreDifftest) { + io.difftest2 <> dut.module.io.difftest2 + io.trap2 <> dut.module.io.trap2 + } if (axiSim) { io.memAXI <> axiSimRam.module.io } diff --git a/src/test/scala/xiangshan/testutils/AddSinks.scala b/src/test/scala/xiangshan/testutils/AddSinks.scala index 2b6ca42336105ea411bdde380ee8e07a7db9e25c..2e21ff57202ff0d8dfd4eac0ae94fd29aaf76fd5 100644 --- a/src/test/scala/xiangshan/testutils/AddSinks.scala +++ b/src/test/scala/xiangshan/testutils/AddSinks.scala @@ -31,15 +31,25 @@ object AddSinks { "perfCntCondMbpIWrong", "perfCntCondMbpRRight", "perfCntCondMbpRWrong", + "perfCntS1Right", + "perfCntS1Wrong", + "perfCntS2Right", + "perfCntS2Wrong", + "perfCntS3Right", + "perfCntS3Wrong", "perfCntubtbRight", "perfCntubtbWrong", "perfCntbtbRight", "perfCntbtbWrong", "perfCnttageRight", "perfCnttageWrong", + "perfCntrasRight", + "perfCntrasWrong", "perfCntloopRight", "perfCntloopWrong", "perfCntLoopExit", + "perfCntTakenAndRight", + "perfCntTakenButWrong", // "CntFetchFromICache", // "CntFetchFromLoopBuffer", // "CntExitLoop1",