提交 c047dc75 编写于 作者: Z ZhangZifei

Merge branch 'master' into rs-dream

......@@ -349,3 +349,5 @@ mill.rdiB
stale_outputs_checked
*.snapshot
__pycache__
......@@ -28,7 +28,7 @@ help:
$(TOP_V): $(SCALA_FILE)
mkdir -p $(@D)
mill XiangShan.test.runMain $(SIMTOP) -td $(@D) --full-stacktrace --output-file $(@F) --disable-all --fpga-platform --remove-assert --infer-rw --repl-seq-mem -c:$(SIMTOP):-o:$(@D)/$(@F).conf $(SIM_ARGS)
$(MEM_GEN) $(@D)/$(@F).conf >> $@
$(MEM_GEN) $(@D)/$(@F).conf --tsmc28 --output_file $(@D)/tsmc28_sram.v > $(@D)/tsmc28_sram.v.conf
# sed -i -e 's/_\(aw\|ar\|w\|r\|b\)_\(\|bits_\)/_\1/g' $@
@git log -n 1 >> .__head__
@git diff >> .__diff__
......
#! /usr/bin/env python
#! /usr/bin/env python3
# See LICENSE.SiFive for license details.
# See LICENSE.Berkeley for license details.
......@@ -7,208 +7,414 @@ import sys
import math
use_latches = 0
blackbox = 0
def parse_line(line):
name = ''
width = 0
depth = 0
ports = ''
mask_gran = 0
tokens = line.split()
i = 0
for i in range(0,len(tokens),2):
s = tokens[i]
if s == 'name':
name = tokens[i+1]
elif s == 'width':
width = int(tokens[i+1])
mask_gran = width # default setting
elif s == 'depth':
depth = int(tokens[i+1])
elif s == 'ports':
ports = tokens[i+1].split(',')
elif s == 'mask_gran':
mask_gran = int(tokens[i+1])
else:
sys.exit('%s: unknown argument %s' % (sys.argv[0], a))
return (name, width, depth, mask_gran, width//mask_gran, ports)
def gen_mem(name, width, depth, mask_gran, mask_seg, ports):
addr_width = max(math.ceil(math.log(depth)/math.log(2)),1)
port_spec = []
readports = []
writeports = []
latchports = []
rwports = []
decl = []
combinational = []
sequential = []
maskedports = {}
for pid in range(len(ports)):
ptype = ports[pid]
if ptype[0:1] == 'm':
ptype = ptype[1:]
maskedports[pid] = pid
if ptype == 'read':
prefix = 'R%d_' % len(readports)
port_spec.append('input %sclk' % prefix)
port_spec.append('input [%d:0] %saddr' % (addr_width-1, prefix))
port_spec.append('input %sen' % prefix)
port_spec.append('output [%d:0] %sdata' % (width-1, prefix))
readports.append(pid)
elif ptype == 'write':
prefix = 'W%d_' % len(writeports)
port_spec.append('input %sclk' % prefix)
port_spec.append('input [%d:0] %saddr' % (addr_width-1, prefix))
port_spec.append('input %sen' % prefix)
port_spec.append('input [%d:0] %sdata' % (width-1, prefix))
if pid in maskedports:
port_spec.append('input [%d:0] %smask' % (mask_seg-1, prefix))
if not use_latches or pid in maskedports:
writeports.append(pid)
else:
latchports.append(pid)
elif ptype == 'rw':
prefix = 'RW%d_' % len(rwports)
port_spec.append('input %sclk' % prefix)
port_spec.append('input [%d:0] %saddr' % (addr_width-1, prefix))
port_spec.append('input %sen' % prefix)
port_spec.append('input %swmode' % prefix)
if pid in maskedports:
port_spec.append('input [%d:0] %swmask' % (mask_seg-1, prefix))
port_spec.append('input [%d:0] %swdata' % (width-1, prefix))
port_spec.append('output [%d:0] %srdata' % (width-1, prefix))
rwports.append(pid)
else:
sys.exit('%s: unknown port type %s' % (sys.argv[0], ptype))
nr = len(readports)
nw = len(writeports)
nrw = len(rwports)
masked = len(maskedports)>0
tup = (depth, width, nr, nw, nrw, masked)
def emit_read(idx, rw):
prefix = ('RW%d_' if rw else 'R%d_') % idx
data = ('%srdata' if rw else '%sdata') % prefix
en = ('%sen && !%swmode' % (prefix, prefix)) if rw else ('%sen' % prefix)
decl.append('reg reg_%sren;' % prefix)
decl.append('reg [%d:0] reg_%saddr;' % (addr_width-1, prefix))
sequential.append('always @(posedge %sclk)' % prefix)
sequential.append(' reg_%sren <= %s;' % (prefix, en))
sequential.append('always @(posedge %sclk)' % prefix)
sequential.append(' if (%s) reg_%saddr <= %saddr;' % (en, prefix, prefix))
combinational.append('`ifdef RANDOMIZE_GARBAGE_ASSIGN')
combinational.append('reg [%d:0] %srandom;' % (((width-1)//32+1)*32-1, prefix))
combinational.append('`ifdef RANDOMIZE_MEM_INIT')
combinational.append(' initial begin')
combinational.append(' #`RANDOMIZE_DELAY begin end')
combinational.append(' %srandom = {%s};' % (prefix, ', '.join(['$random'] * ((width-1)//32+1))))
combinational.append(' reg_%sren = %srandom[0];' % (prefix, prefix))
combinational.append(' end')
combinational.append('`endif')
combinational.append('always @(posedge %sclk) %srandom <= {%s};' % (prefix, prefix, ', '.join(['$random'] * ((width-1)//32+1))))
combinational.append('assign %s = reg_%sren ? ram[reg_%saddr] : %srandom[%d:0];' % (data, prefix, prefix, prefix, width-1))
combinational.append('`else')
combinational.append('assign %s = ram[reg_%saddr];' % (data, prefix))
combinational.append('`endif')
for idx in range(nr):
emit_read(idx, False)
for idx in range(nrw):
emit_read(idx, True)
for idx in range(len(latchports)):
prefix = 'W%d_' % idx
decl.append('reg [%d:0] latch_%saddr;' % (addr_width-1, prefix))
decl.append('reg [%d:0] latch_%sdata;' % (width-1, prefix))
decl.append('reg latch_%sen;' % (prefix))
combinational.append('always @(*) begin')
combinational.append(' if (!%sclk && %sen) latch_%saddr <= %saddr;' % (prefix, prefix, prefix, prefix))
combinational.append(' if (!%sclk && %sen) latch_%sdata <= %sdata;' % (prefix, prefix, prefix, prefix))
combinational.append(' if (!%sclk) latch_%sen <= %sen;' % (prefix, prefix, prefix))
combinational.append('end')
combinational.append('always @(*)')
combinational.append(' if (%sclk && latch_%sen)' % (prefix, prefix))
combinational.append(' ram[latch_%saddr] <= latch_%sdata;' % (prefix, prefix))
decl.append('reg [%d:0] ram [%d:0];' % (width-1, depth-1))
decl.append('`ifdef RANDOMIZE_MEM_INIT')
decl.append(' integer initvar;')
decl.append(' initial begin')
decl.append(' #`RANDOMIZE_DELAY begin end')
decl.append(' for (initvar = 0; initvar < %d; initvar = initvar+1)' % depth)
decl.append(' ram[initvar] = {%d {$random}};' % ((width-1)//32+1))
for idx in range(nr):
prefix = 'R%d_' % idx
decl.append(' reg_%saddr = {%d {$random}};' % (prefix, ((addr_width-1)//32+1)))
for idx in range(nrw):
prefix = 'RW%d_' % idx
decl.append(' reg_%saddr = {%d {$random}};' % (prefix, ((addr_width-1)//32+1)))
decl.append(' end')
decl.append('`endif')
decl.append("integer i;")
for idx in range(nw):
prefix = 'W%d_' % idx
pid = writeports[idx]
sequential.append('always @(posedge %sclk)' % prefix)
sequential.append(" if (%sen) begin" % prefix)
for i in range(mask_seg):
mask = ('if (%smask[%d]) ' % (prefix, i)) if pid in maskedports else ''
ram_range = '%d:%d' % ((i+1)*mask_gran-1, i*mask_gran)
sequential.append(" %sram[%saddr][%s] <= %sdata[%s];" % (mask, prefix, ram_range, prefix, ram_range))
sequential.append(" end")
for idx in range(nrw):
pid = rwports[idx]
prefix = 'RW%d_' % idx
sequential.append('always @(posedge %sclk)' % prefix)
sequential.append(" if (%sen && %swmode) begin" % (prefix, prefix))
if mask_seg > 0:
sequential.append(" for(i=0;i<%d;i=i+1) begin" % mask_seg)
if pid in maskedports:
sequential.append(" if(%swmask[i]) begin" % prefix)
sequential.append(" ram[%saddr][i*%d +: %d] <= %swdata[i*%d +: %d];" %(prefix, mask_gran, mask_gran, prefix, mask_gran, mask_gran))
sequential.append(" end")
else:
sequential.append(" ram[%saddr][i*%d +: %d] <= %swdata[i*%d +: %d];" %(prefix, mask_gran, mask_gran, prefix, mask_gran, mask_gran))
sequential.append(" end")
sequential.append(" end")
body = "\
class VerilogModuleGenerator(object):
def __init__(self, name):
self.name = name
self.port_spec = []
self.decl = []
self.combinational = []
self.sequential = []
def __format_width(self, width):
return "[{}:0] ".format(width-1) if width > 1 else ""
def __format_depth(self, depth):
return " [{}:0]".format(depth-1) if depth > 1 else ""
def add_io(self, io_type, width, name):
width_str = self.__format_width(width)
# print(io_type, width_str, name)
self.port_spec.append(f'{io_type} {width_str}{name}')
def add_input(self, width, name):
self.add_io("input", width, name)
def add_output(self, width, name):
self.add_io("output", width, name)
def add_decl(self, decl_type, width, name, depth=1):
width_str = self.__format_width(width)
depth_str = self.__format_depth(depth)
self.decl.append(f"{decl_type} {width_str}{name}{depth_str};")
def add_decl_reg(self, width, name, depth=1):
self.add_decl("reg", width, name, depth)
def add_decl_wire(self, width, name, depth=1):
self.add_decl("wire", width, name, depth)
def add_decl_line(self, line):
self.decl.append(line)
def add_sequential(self, line):
self.sequential.append(line)
def add_combinational(self, line):
self.combinational.append(line)
def generate(self, blackbox):
body = "\
%s\n\
%s\n\
%s\n" % ('\n '.join(decl), '\n '.join(sequential), '\n '.join(combinational))
%s\n" % ('\n '.join(self.decl), '\n '.join(self.sequential), '\n '.join(self.combinational))
s = "\nmodule %s(\n\
s = "\nmodule %s(\n\
%s\n\
);\n\
\n\
%s\
\n\
endmodule" % (name, ',\n '.join(port_spec), body if not blackbox else "")
return s
endmodule" % (self.name, ',\n '.join(self.port_spec), body if not blackbox else blackbox)
return s
class Reshaper(object):
def __init__(self, before, after):
# print(before, after)
self.conf = before
self.new_conf = after
assert(self.conf[-1] == ['write', 'read'])
assert(self.new_conf[-1] == ['mwrite', 'read'])
def generate(self, mem):
(name, width, depth, mask_gran, mask_seg, _) = self.conf
(new_name, new_width, new_depth, new_mask_gran, new_mask_seg, _) = self.new_conf
addr_bits = math.log2(depth)
ways = new_width // width
ways_bits = int(math.log2(ways))
mem.add_decl_wire(new_width, "data_read")
mem.add_decl_wire(new_width, "data_write")
mem.add_combinational(f"assign data_write = ")
sels = [f"{f'(write_way_index == {w}) ?' if w != ways-1 else ''} ({{{new_width-width}'h0, W0_data}} << {width*w})" for w in range(ways)]
mem.add_combinational(":\n ".join(sels) + ";")
mem.add_decl_wire(ways_bits, "read_way_index")
mem.add_combinational(f"assign read_way_index = R0_addr[{ways_bits-1}:0];")
mem.add_decl_wire(ways_bits, "write_way_index")
mem.add_combinational(f"assign write_way_index = W0_addr[{ways_bits-1}:0];")
mem.add_combinational(f"{new_name} array (")
mem.add_combinational(f" .W0_clk(W0_clk),")
mem.add_combinational(f" .W0_addr(W0_addr[{new_width-1}:{ways_bits}]),")
mem.add_combinational(f" .W0_en(W0_en),")
mem.add_combinational(f" .W0_data(data_write),")
mem.add_combinational(f" .W0_mask({ways}'h1 << write_way_index),")
mem.add_combinational(f" .R0_clk(R0_clk),")
mem.add_combinational(f" .R0_addr(R0_addr[{new_width-1}:{ways_bits}]),")
mem.add_combinational(f" .R0_en(R0_en),")
mem.add_combinational(f" .R0_data(data_read)")
mem.add_combinational(f");")
mem.add_combinational(f"assign R0_data = ")
sels = [f"{f'(read_way_index == {w}) ?' if w != ways-1 else ''} data_read[{width*(w+1)-1}:{width*w}]" for w in range(ways)]
mem.add_combinational(":\n ".join(sels) + ";")
class Spliter(object):
def __init__(self, before, after):
# print(before, after)
self.conf = before
self.new_conf = after
assert(self.conf[-1] == ['mrw'])
assert(self.new_conf[-1] == ['rw'])
def generate(self, mem):
(name, width, depth, mask_gran, mask_seg, _) = self.conf
(new_name, new_width, new_depth, new_mask_gran, new_mask_seg, _) = self.new_conf
assert(depth == new_depth)
ways = width // new_width
for i in range(ways):
data_slice = f"[{new_width*(i+1)-1}:{new_width*i}]"
mem.add_combinational(f"{new_name} array_{i} (")
mem.add_combinational(f" .RW0_clk(RW0_clk),")
mem.add_combinational(f" .RW0_addr(RW0_addr),")
mem.add_combinational(f" .RW0_en(RW0_en),")
mem.add_combinational(f" .RW0_wmode(RW0_wmode && RW0_wmask[{i}]),")
mem.add_combinational(f" .RW0_wdata(RW0_wdata{data_slice}),")
mem.add_combinational(f" .RW0_rdata(RW0_rdata{data_slice})")
mem.add_combinational(f");")
class SRAM(object):
def __init__(self, line):
self.parse_line(line)
self.prepare_module()
def parse_line(self, line):
name = ''
width = 0
depth = 0
ports = ''
mask_gran = 0
tokens = line.split()
i = 0
for i in range(0, len(tokens), 2):
s = tokens[i]
if s == 'name':
name = tokens[i+1]
elif s == 'width':
width = int(tokens[i+1])
mask_gran = width # default setting
elif s == 'depth':
depth = int(tokens[i+1])
elif s == 'ports':
ports = tokens[i+1].split(',')
elif s == 'mask_gran':
mask_gran = int(tokens[i+1])
else:
sys.exit('%s: unknown argument %s' % (sys.argv[0], i))
self.conf = (name, width, depth, mask_gran, width//mask_gran, ports)
# return (name, width, depth, mask_gran, width//mask_gran, ports)
def prepare_module(self):
(name, width, depth, mask_gran, mask_seg, ports) = self.conf
addr_width = max(math.ceil(math.log(depth)/math.log(2)),1)
mem = VerilogModuleGenerator(name)
readports = []
writeports = []
latchports = []
rwports = []
maskedports = {}
for pid, ptype in enumerate(ports):
if ptype[0:1] == 'm':
ptype = ptype[1:]
maskedports[pid] = pid
if ptype == 'read':
prefix = 'R%d_' % len(readports)
mem.add_input(1, prefix + "clk")
mem.add_input(addr_width, prefix + "addr")
mem.add_input(1, prefix + "en")
mem.add_output(width, prefix + "data")
readports.append(pid)
elif ptype == 'write':
prefix = 'W%d_' % len(writeports)
mem.add_input(1, prefix + "clk")
mem.add_input(addr_width, prefix + "addr")
mem.add_input(1, prefix + "en")
mem.add_input(width, prefix + "data")
if pid in maskedports:
mem.add_input(mask_seg, prefix + "mask")
if not use_latches or pid in maskedports:
writeports.append(pid)
else:
latchports.append(pid)
elif ptype == 'rw':
prefix = 'RW%d_' % len(rwports)
mem.add_input(1, prefix + "clk")
mem.add_input(addr_width, prefix + "addr")
mem.add_input(1, prefix + "en")
mem.add_input(1, prefix + "wmode")
if pid in maskedports:
mem.add_input(mask_seg, prefix + "wmask")
mem.add_input(width, prefix + "wdata")
mem.add_output(width, prefix + "rdata")
rwports.append(pid)
else:
sys.exit('%s: unknown port type %s' % (sys.argv[0], ptype))
self.mem = mem
self.ports_conf = (readports, writeports, latchports, rwports, maskedports)
def generate(self, blackbox):
(name, width, depth, mask_gran, mask_seg, ports) = self.conf
addr_width = max(math.ceil(math.log(depth)/math.log(2)),1)
mem, (readports, writeports, latchports, rwports, maskedports) = self.mem, self.ports_conf
nr = len(readports)
nw = len(writeports)
nrw = len(rwports)
def emit_read(idx, rw):
prefix = ('RW%d_' if rw else 'R%d_') % idx
data = ('%srdata' if rw else '%sdata') % prefix
en = ('%sen && !%swmode' % (prefix, prefix)) if rw else ('%sen' % prefix)
mem.add_decl_reg(1, f"reg_{prefix}ren")
mem.add_decl_reg(addr_width, f"reg_{prefix}addr")
mem.add_sequential(f"always @(posedge {prefix}clk)")
mem.add_sequential(f" reg_{prefix}ren <= {en};")
mem.add_sequential(f"always @(posedge {prefix}clk)")
mem.add_sequential(f" if ({en}) reg_{prefix}addr <= {prefix}addr;")
mem.add_combinational("`ifdef RANDOMIZE_GARBAGE_ASSIGN")
mem.add_combinational(f"reg [{((width-1)//32+1)*32-1}:0] {prefix}random;")
mem.add_combinational(f"`ifdef RANDOMIZE_MEM_INIT")
mem.add_combinational(f" initial begin")
mem.add_combinational(f" #`RANDOMIZE_DELAY begin end")
mem.add_combinational(' %srandom = {%s};' % (prefix, ', '.join(['$random'] * ((width-1)//32+1))))
mem.add_combinational(' reg_%sren = %srandom[0];' % (prefix, prefix))
mem.add_combinational(' end')
mem.add_combinational('`endif')
mem.add_combinational('always @(posedge %sclk) %srandom <= {%s};' % (prefix, prefix, ', '.join(['$random'] * ((width-1)//32+1))))
mem.add_combinational('assign %s = reg_%sren ? ram[reg_%saddr] : %srandom[%d:0];' % (data, prefix, prefix, prefix, width-1))
mem.add_combinational('`else')
mem.add_combinational('assign %s = ram[reg_%saddr];' % (data, prefix))
mem.add_combinational('`endif')
for idx in range(nr):
emit_read(idx, False)
for idx in range(nrw):
emit_read(idx, True)
for idx in range(len(latchports)):
prefix = 'W%d_' % idx
mem.add_decl_reg(addr_width, f"latch_{prefix}addr")
mem.add_decl_reg(width, f"latch_{prefix}data")
mem.add_decl_reg(1, f"latch_{prefix}en")
mem.add_combinational('always @(*) begin')
mem.add_combinational(' if (!%sclk && %sen) latch_%saddr <= %saddr;' % (prefix, prefix, prefix, prefix))
mem.add_combinational(' if (!%sclk && %sen) latch_%sdata <= %sdata;' % (prefix, prefix, prefix, prefix))
mem.add_combinational(' if (!%sclk) latch_%sen <= %sen;' % (prefix, prefix, prefix))
mem.add_combinational('end')
mem.add_combinational('always @(*)')
mem.add_combinational(' if (%sclk && latch_%sen)' % (prefix, prefix))
mem.add_combinational(' ram[latch_%saddr] <= latch_%sdata;' % (prefix, prefix))
mem.add_decl_reg(width, "ram", depth)
mem.add_decl_line('`ifdef RANDOMIZE_MEM_INIT')
mem.add_decl_line(' integer initvar;')
mem.add_decl_line(' initial begin')
mem.add_decl_line(' #`RANDOMIZE_DELAY begin end')
mem.add_decl_line(' for (initvar = 0; initvar < %d; initvar = initvar+1)' % depth)
mem.add_decl_line(' ram[initvar] = {%d {$random}};' % ((width-1)//32+1))
for idx in range(nr):
prefix = 'R%d_' % idx
mem.add_decl_line(' reg_%saddr = {%d {$random}};' % (prefix, ((addr_width-1)//32+1)))
for idx in range(nrw):
prefix = 'RW%d_' % idx
mem.add_decl_line(' reg_%saddr = {%d {$random}};' % (prefix, ((addr_width-1)//32+1)))
mem.add_decl_line(' end')
mem.add_decl_line('`endif')
mem.add_decl_line("integer i;")
for idx in range(nw):
prefix = 'W%d_' % idx
pid = writeports[idx]
mem.add_sequential('always @(posedge %sclk)' % prefix)
mem.add_sequential(" if (%sen) begin" % prefix)
for i in range(mask_seg):
mask = ('if (%smask[%d]) ' % (prefix, i)) if pid in maskedports else ''
ram_range = '%d:%d' % ((i+1)*mask_gran-1, i*mask_gran)
mem.add_sequential(" %sram[%saddr][%s] <= %sdata[%s];" % (mask, prefix, ram_range, prefix, ram_range))
mem.add_sequential(" end")
for idx in range(nrw):
pid = rwports[idx]
prefix = 'RW%d_' % idx
mem.add_sequential('always @(posedge %sclk)' % prefix)
mem.add_sequential(" if (%sen && %swmode) begin" % (prefix, prefix))
if mask_seg > 0:
mem.add_sequential(" for(i=0;i<%d;i=i+1) begin" % mask_seg)
if pid in maskedports:
mem.add_sequential(" if(%swmask[i]) begin" % prefix)
mem.add_sequential(" ram[%saddr][i*%d +: %d] <= %swdata[i*%d +: %d];" %(prefix, mask_gran, mask_gran, prefix, mask_gran, mask_gran))
mem.add_sequential(" end")
else:
mem.add_sequential(" ram[%saddr][i*%d +: %d] <= %swdata[i*%d +: %d];" %(prefix, mask_gran, mask_gran, prefix, mask_gran, mask_gran))
mem.add_sequential(" end")
mem.add_sequential(" end")
return mem.generate(blackbox)
class SRAM_TSMC28(SRAM):
def __init__(self, line):
super().__init__(line)
self.sub_srams = []
if self.__check_subsrams():
print(line.strip())
def __check_subsrams(self):
need_split = self.__split()
need_reshape = self.__reshape()
assert(not (need_split and need_reshape))
return not need_split and not need_reshape
def __split(self):
(name, width, depth, mask_gran, mask_seg, ports) = self.conf
if ports == ["mrw"]:
new_conf = (name + "_sub", str(depth), str(mask_gran), "rw")
line_field = ("name", "depth", "width", "ports")
new_line = " ".join(map(lambda x: " ".join(x), zip(line_field, new_conf)))
new_sram = SRAM_TSMC28(new_line)
self.sub_srams.append(new_sram)
reshaper = Spliter(self.conf, new_sram.conf)
reshaper.generate(self.mem)
return True
return False
def __reshape(self):
(name, width, depth, mask_gran, mask_seg, ports) = self.conf
if width == 2 and depth == 256:
new_conf = (name + "_sub", "64", "8", "mwrite,read", "2")
line_field = ("name", "depth", "width", "ports", "mask_gran")
new_line = " ".join(map(lambda x: " ".join(x), zip(line_field, new_conf)))
new_sram = SRAM_TSMC28(new_line)
self.sub_srams.append(new_sram)
reshaper = Reshaper(self.conf, new_sram.conf)
reshaper.generate(self.mem)
return True
return False
def __get_tsmc_lib(self):
mem, (readports, writeports, latchports, rwports, maskedports) = self.mem, self.ports_conf
blackbox = "// tsmc lib here\n"
(name, width, depth, mask_gran, mask_seg, _) = self.conf
nports = (len(readports), len(writeports), len(rwports))
addr_width = max(math.ceil(math.log(depth)/math.log(2)),1)
masked = len(maskedports) > 0
# from tsmc28_sram import gen_tsmc_ram_1pw, gen_tsmc_ram_1pnw, gen_tsmc_ram_2pw, gen_tsmc_ram_2pnw
# if nports == (1, 1, 0):
# if masked:
# blackbox = gen_tsmc_ram_2pw("TS6N28HPCPLVTA64X8M2F", width, mask_gran)
# else:
# blackbox = gen_tsmc_ram_2pnw("TS6N28HPCPLVTA64X14M2F")
# elif nports == (0, 0, 1):
# if masked:
# blackbox = gen_tsmc_ram_1pw('TS1N28HPCPLVTB8192X64M8SW', width, mask_gran, addr_width)
# else:
# blackbox = gen_tsmc_ram_1pnw('TS5N28HPCPLVTA64X144M2F', width, addr_width)
# else:
# blackbox = "// unknown tsmc lib type\n"
return mem.generate(blackbox)
def generate(self, blackbox, itself_only=False):
if itself_only:
# generate splits or reshapes
if self.sub_srams:
return self.mem.generate("")
# use empty blackbox
elif blackbox:
return super().generate(" ")
# insert tsmc libs
else:
return self.__get_tsmc_lib()
else:
s = self.generate(blackbox, True)
for sram in self.sub_srams:
s += sram.generate(blackbox)
return s
def main(args):
f = open(args.output_file, "w") if (args.output_file) else None
conf_file = args.conf
for line in open(conf_file):
parsed_line = gen_mem(*parse_line(line))
sram = SRAM(line)
if args.tsmc28:
sram = SRAM_TSMC28(line)
else:
sram = SRAM(line)
if f is not None:
f.write(parsed_line)
f.write(sram.generate(args.blackbox))
else:
print(parsed_line)
print(sram.generate(args.blackbox))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Memory generator for Rocket Chip')
parser.add_argument('conf', metavar='.conf file')
parser.add_argument('--tsmc28', action='store_true', help='use tsmc28 sram to generate module body')
parser.add_argument('--blackbox', '-b', action='store_true', help='set to disable output of module body')
#parser.add_argument('--use_latches', '-l', action='store_true', help='set to enable use of latches')
parser.add_argument('--output_file', '-o', help='name of output file, default is stdout')
args = parser.parse_args()
blackbox = args.blackbox
#use_latches = args.use_latches
main(args)
......@@ -8,7 +8,7 @@ import freechips.rocketchip.diplomacy.{AddressSet, LazyModule, LazyModuleImp}
import freechips.rocketchip.tilelink.{BankBinder, TLBuffer, TLBundleParameters, TLCacheCork, TLClientNode, TLFilter, TLFuzzer, TLIdentityNode, TLToAXI4, TLWidthWidget, TLXbar}
import utils.{DebugIdentityNode, DataDontCareNode}
import utils.XSInfo
import xiangshan.{HasXSParameter, XSCore, HasXSLog}
import xiangshan.{HasXSParameter, XSCore, HasXSLog, DifftestBundle}
import sifive.blocks.inclusivecache.{CacheParameters, InclusiveCache, InclusiveCacheMicroParameters}
import freechips.rocketchip.diplomacy.{AddressSet, LazyModule, LazyModuleImp}
import freechips.rocketchip.devices.tilelink.{DevNullParams, TLError}
......@@ -162,6 +162,13 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
// val meip = Input(Vec(NumCores, Bool()))
val ila = if(env.FPGAPlatform && EnableILA) Some(Output(new ILABundle)) else None
})
val difftestIO0 = IO(new DifftestBundle())
val difftestIO1 = IO(new DifftestBundle())
val difftestIO = Seq(difftestIO0, difftestIO1)
val trapIO0 = IO(new xiangshan.TrapIO())
val trapIO1 = IO(new xiangshan.TrapIO())
val trapIO = Seq(trapIO0, trapIO1)
plic.module.io.extra.get.intrVec <> RegNext(RegNext(Cat(io.extIntrs)))
......@@ -172,6 +179,14 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
xs_core(i).module.io.externalInterrupt.meip := plic.module.io.extra.get.meip(i)
xs_core(i).module.io.l2ToPrefetcher <> l2cache(i).module.io
}
difftestIO0 <> DontCare
difftestIO1 <> DontCare
if (env.DualCoreDifftest) {
difftestIO0 <> xs_core(0).module.difftestIO
difftestIO1 <> xs_core(1).module.difftestIO
trapIO0 <> xs_core(0).module.trapIO
trapIO1 <> xs_core(1).module.trapIO
}
// do not let dma AXI signals optimized out
chisel3.dontTouch(dma.out.head._1)
chisel3.dontTouch(extDev.out.head._1)
......
......@@ -24,7 +24,7 @@ object Parameters {
val simParameters = Parameters(envParameters = EnviromentParameters(FPGAPlatform = false)) // sim only, disable log
val debugParameters = Parameters(envParameters = simParameters.envParameters.copy(EnableDebug = true)) // open log
val simDualCoreParameters = Parameters(socParameters = SoCParameters(NumCores = 2), envParameters = EnviromentParameters(FPGAPlatform = false))
val simDualCoreParameters = Parameters(socParameters = SoCParameters(NumCores = 2), envParameters = EnviromentParameters(FPGAPlatform = true, DualCoreDifftest = true))
val debugDualCoreParameters = Parameters(socParameters = SoCParameters(NumCores = 2), envParameters = simParameters.envParameters.copy(EnableDebug = true))
private var parameters = Parameters() // a default parameter, can be updated before use
......
......@@ -116,3 +116,16 @@ object GenMask {
object UIntToMask {
def apply(ptr: UInt, length: Integer) = UIntToOH(ptr)(length - 1, 0) - 1.U
}
object GetEvenBits {
def apply(input: UInt): UInt = {
VecInit((0 until input.getWidth/2).map(i => {input(2*i)})).asUInt
}
}
object GetOddBits {
def apply(input: UInt): UInt = {
VecInit((0 until input.getWidth/2).map(i => {input(2*i+1)})).asUInt
}
}
\ No newline at end of file
......@@ -6,22 +6,60 @@ package utils
import chisel3._
import chisel3.util._
import chisel3.util.random.LFSR
import freechips.rocketchip.util._
import freechips.rocketchip.util.property.cover
import xiangshan.{HasXSLog, XSCoreParameters}
abstract class ReplacementPolicy {
def nBits: Int
def perSet: Boolean
def way: UInt
def miss: Unit
def hit: Unit
def access(touch_way: UInt): Unit
def access(touch_ways: Seq[Valid[UInt]]): Unit
def state_read: UInt
def get_next_state(state: UInt, touch_way: UInt): UInt
def get_next_state(state: UInt, touch_ways: Seq[Valid[UInt]]): UInt = {
touch_ways.foldLeft(state)((prev, touch_way) => Mux(touch_way.valid, get_next_state(prev, touch_way.bits), prev))
}
def get_replace_way(state: UInt): UInt
}
object ReplacementPolicy {
//for fully associative mapping
def fromString(s: Option[String],n_ways: Int): ReplacementPolicy = fromString(s.getOrElse("none"),n_ways)
def fromString(s: String, n_ways: Int): ReplacementPolicy = s.toLowerCase match {
case "random" => new RandomReplacement(n_ways)
case "lru" => new TrueLRU(n_ways)
case "plru" => new PseudoLRU(n_ways)
case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t")
}
//for set associative mapping
def fromString(s: Option[String], n_ways: Int, n_sets: Int): SetAssocReplacementPolicy = fromString(s.getOrElse("none"),n_ways,n_sets )
def fromString(s: String, n_ways: Int, n_sets: Int): SetAssocReplacementPolicy = s.toLowerCase match {
case "random" => new SetAssocRandom(n_sets, n_ways)
case "setlru" => new SetAssocLRU(n_sets, n_ways, "lru")
case "setplru" => new SetAssocLRU(n_sets, n_ways, "plru")
case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t")
}
}
class RandomReplacement(ways: Int) extends ReplacementPolicy {
class RandomReplacement(n_ways: Int) extends ReplacementPolicy {
private val replace = Wire(Bool())
replace := false.B
val lfsr = LFSR(16, replace)
def nBits = 16
def perSet = false
private val lfsr = LFSR(nBits, replace)
def state_read = WireDefault(lfsr)
def way = Random(ways, lfsr)
def way = Random(n_ways, lfsr)
def miss = replace := true.B
def hit = {}
def access(touch_way: UInt) = {}
def access(touch_ways: Seq[Valid[UInt]]) = {}
def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare
def get_replace_way(state: UInt) = way
}
abstract class SeqReplacementPolicy {
......@@ -30,6 +68,14 @@ abstract class SeqReplacementPolicy {
def way: UInt
}
abstract class SetAssocReplacementPolicy {
def access(set: UInt, touch_way: UInt): Unit
def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]): Unit
def way(set: UInt): UInt
def miss(set: UInt): Unit
}
class SeqRandom(n_ways: Int) extends SeqReplacementPolicy {
val logic = new RandomReplacement(n_ways)
def access(set: UInt) = { }
......@@ -39,7 +85,7 @@ class SeqRandom(n_ways: Int) extends SeqReplacementPolicy {
def way = logic.way
}
class TrueLRU(n_ways: Int) {
class TrueLRU(n_ways: Int) extends ReplacementPolicy {
// True LRU replacement policy, using a triangular matrix to track which sets are more recently used than others.
// The matrix is packed into a single UInt (or Bits). Example 4-way (6-bits):
// [5] - 3 more recent than 2
......@@ -49,6 +95,7 @@ class TrueLRU(n_ways: Int) {
// [1] - 2 more recent than 0
// [0] - 1 more recent than 0
def nBits = (n_ways * (n_ways-1)) / 2
def perSet = true
private val state_reg = RegInit(0.U(nBits.W))
def state_read = WireDefault(state_reg)
......@@ -70,28 +117,23 @@ class TrueLRU(n_ways: Int) {
// Compute next value of triangular matrix
// set the touched way as more recent than every other way
nextState.zipWithIndex.foreach { case (e, i) =>
nextState.zipWithIndex.map { case (e, i) =>
e := Mux(i.U === touch_way, 0.U(n_ways.W), moreRecentVec(i) | wayDec)
}
nextState.zipWithIndex.tail.foldLeft((nextState.head.apply(n_ways-1,1),0)) { case ((pe,pi),(ce,ci)) => (Cat(ce.apply(n_ways-1,ci+1), pe), ci) }._1
}
def get_next_state(state: UInt, touch_ways: Seq[Valid[UInt]]): UInt = {
touch_ways.foldLeft(state)((prev, touch_way) => Mux(touch_way.valid, get_next_state(prev, touch_way.bits), prev))
}
def access(touch_way: UInt) {
def access(touch_way: UInt): Unit = {
state_reg := get_next_state(state_reg, touch_way)
}
def access(touch_ways: Seq[Valid[UInt]]) {
when (ParallelOR(touch_ways.map(_.valid))) {
def access(touch_ways: Seq[Valid[UInt]]): Unit = {
when (touch_ways.map(_.valid).orR) {
state_reg := get_next_state(state_reg, touch_ways)
}
// for (i <- 1 until touch_ways.size) {
// cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i", s"LRU Update $i simultaneous")
// }
for (i <- 1 until touch_ways.size) {
cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i", s"LRU Update $i simultaneous")
}
}
def get_replace_way(state: UInt): UInt = {
......@@ -108,49 +150,149 @@ class TrueLRU(n_ways: Int) {
def way = get_replace_way(state_reg)
def miss = access(way)
def hit = {}
def flush() = { state_reg := 0.U(nBits.W) }
@deprecated("replace 'replace' with 'way' from abstract class ReplacementPolicy","Rocket Chip 2020.05")
def replace: UInt = way
}
class PseudoLRU(n: Int)
{
private val state_reg = Reg(UInt((n-1).W))
def access(way: UInt) {
state_reg := get_next_state(state_reg,way)
}
def access(ways: Seq[ValidIO[UInt]]) {
state_reg := ways.foldLeft(state_reg)((prev, way) => Mux(way.valid, get_next_state(prev, way.bits), prev))
}
def get_next_state(state: UInt, way: UInt) = {
var next_state = state << 1
var idx = 1.U(1.W)
for (i <- log2Up(n)-1 to 0 by -1) {
val bit = way(i)
// next_state = next_state.bitSet(idx, !bit)
next_state = Mux(bit, next_state & (~UIntToOH(idx)), next_state | UIntToOH(idx))
idx = Cat(idx, bit)
class PseudoLRU(n_ways: Int) extends ReplacementPolicy {
// Pseudo-LRU tree algorithm: https://en.wikipedia.org/wiki/Pseudo-LRU#Tree-PLRU
//
//
// - bits storage example for 4-way PLRU binary tree:
// bit[2]: ways 3+2 older than ways 1+0
// / \
// bit[1]: way 3 older than way 2 bit[0]: way 1 older than way 0
//
//
// - bits storage example for 3-way PLRU binary tree:
// bit[1]: way 2 older than ways 1+0
// \
// bit[0]: way 1 older than way 0
//
//
// - bits storage example for 8-way PLRU binary tree:
// bit[6]: ways 7-4 older than ways 3-0
// / \
// bit[5]: ways 7+6 > 5+4 bit[2]: ways 3+2 > 1+0
// / \ / \
// bit[4]: way 7>6 bit[3]: way 5>4 bit[1]: way 3>2 bit[0]: way 1>0
def nBits = n_ways - 1
def perSet = true
private val state_reg = if (nBits == 0) Reg(UInt(0.W)) else RegInit(0.U(nBits.W))
def state_read = WireDefault(state_reg)
def access(touch_way: UInt): Unit = {
state_reg := get_next_state(state_reg, touch_way)
}
def access(touch_ways: Seq[Valid[UInt]]): Unit = {
when (touch_ways.map(_.valid).orR) {
state_reg := get_next_state(state_reg, touch_ways)
}
for (i <- 1 until touch_ways.size) {
cover(PopCount(touch_ways.map(_.valid)) === i.U, s"PLRU_UpdateCount$i", s"PLRU Update $i simultaneous")
}
}
/** @param state state_reg bits for this sub-tree
* @param touch_way touched way encoded value bits for this sub-tree
* @param tree_nways number of ways in this sub-tree
*/
def get_next_state(state: UInt, touch_way: UInt, tree_nways: Int): UInt = {
require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways")
require(touch_way.getWidth == (log2Ceil(tree_nways) max 1), s"wrong encoded way width ${touch_way.getWidth} for $tree_nways ways")
if (tree_nways > 2) {
// we are at a branching node in the tree, so recurse
val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree
val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree
val set_left_older = !touch_way(log2Ceil(tree_nways)-1)
val left_subtree_state = state.extract(tree_nways-3, right_nways-1)
val right_subtree_state = state(right_nways-2, 0)
if (left_nways > 1) {
// we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees
Cat(set_left_older,
Mux(set_left_older,
left_subtree_state, // if setting left sub-tree as older, do NOT recurse into left sub-tree
get_next_state(left_subtree_state, touch_way.extract(log2Ceil(left_nways)-1,0), left_nways)), // recurse left if newer
Mux(set_left_older,
get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer
right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree
} else {
// we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree
Cat(set_left_older,
Mux(set_left_older,
get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer
right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree
}
} else if (tree_nways == 2) {
// we are at a leaf node at the end of the tree, so set the single state bit opposite of the lsb of the touched way encoded value
!touch_way(0)
} else { // tree_nways <= 1
// we are at an empty node in an empty tree for 1 way, so return single zero bit for Chisel (no zero-width wires)
0.U(1.W)
}
next_state(n-1, 1)
}
def replace = get_replace_way(state_reg)
def get_replace_way(state: UInt) = {
val shifted_state = state << 1
var idx = 1.U(1.W)
for (i <- log2Up(n)-1 to 0 by -1) {
val in_bounds = Cat(idx, (BigInt(1) << i).U)(log2Up(n)-1, 0) < n.U
idx = Cat(idx, in_bounds && shifted_state(idx))
}
def get_next_state(state: UInt, touch_way: UInt): UInt = {
val touch_way_sized = if (touch_way.getWidth < log2Ceil(n_ways)) touch_way.padTo (log2Ceil(n_ways))
else touch_way.extract(log2Ceil(n_ways)-1,0)
get_next_state(state, touch_way_sized, n_ways)
}
/** @param state state_reg bits for this sub-tree
* @param tree_nways number of ways in this sub-tree
*/
def get_replace_way(state: UInt, tree_nways: Int): UInt = {
require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways")
// this algorithm recursively descends the binary tree, filling in the way-to-replace encoded value from msb to lsb
if (tree_nways > 2) {
// we are at a branching node in the tree, so recurse
val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree
val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree
val left_subtree_older = state(tree_nways-2)
val left_subtree_state = state.extract(tree_nways-3, right_nways-1)
val right_subtree_state = state(right_nways-2, 0)
if (left_nways > 1) {
// we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees
Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value
Mux(left_subtree_older, // if left sub-tree is older, recurse left, else recurse right
get_replace_way(left_subtree_state, left_nways), // recurse left
get_replace_way(right_subtree_state, right_nways))) // recurse right
} else {
// we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree
Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value
Mux(left_subtree_older, // if left sub-tree is older, return and do not recurse right
0.U(1.W),
get_replace_way(right_subtree_state, right_nways))) // recurse right
}
} else if (tree_nways == 2) {
// we are at a leaf node at the end of the tree, so just return the single state bit as lsb of the way-to-replace encoded value
state(0)
} else { // tree_nways <= 1
// we are at an empty node in an unbalanced tree for non-power-of-2 ways, so return single zero bit as lsb of the way-to-replace encoded value
0.U(1.W)
}
idx(log2Up(n)-1,0)
}
def get_replace_way(state: UInt): UInt = get_replace_way(state, n_ways)
def way = get_replace_way(state_reg)
def miss = access(way)
def hit = {}
}
class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy {
val state = SyncReadMem(n_sets, UInt((n_ways-1).W))
val logic = new PseudoLRU(n_ways)
val current_state = Wire(UInt())
val state = SyncReadMem(n_sets, UInt(logic.nBits.W))
val current_state = Wire(UInt(logic.nBits.W))
val next_state = Wire(UInt(logic.nBits.W))
val plru_way = logic.get_replace_way(current_state)
val next_state = Wire(UInt())
def access(set: UInt) = {
current_state := state.read(set)
......@@ -165,6 +307,45 @@ class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy {
def way = plru_way
}
class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocReplacementPolicy {
val logic = policy.toLowerCase match {
case "plru" => new PseudoLRU(n_ways)
case "lru" => new TrueLRU(n_ways)
case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t")
}
val state_vec = Reg(Vec(n_sets, UInt(logic.nBits.W)))
def access(set: UInt, touch_way: UInt) = {
state_vec(set) := logic.get_next_state(state_vec(set), touch_way)
}
def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]) = {
require(sets.size == touch_ways.size, "internal consistency check: should be same number of simultaneous updates for sets and touch_ways")
for (set <- 0 until n_sets) {
val set_touch_ways = (sets zip touch_ways).map { case (touch_set, touch_way) =>
Pipe(touch_way.valid && (touch_set === set.U), touch_way.bits, 0)}
when (set_touch_ways.map(_.valid).orR) {
state_vec(set) := logic.get_next_state(state_vec(set), set_touch_ways)
}
}
}
def way(set: UInt) = logic.get_replace_way(state_vec(set))
def miss(set: UInt) = {}
}
class SetAssocRandom(n_sets : Int, n_ways: Int) extends SetAssocReplacementPolicy {
val random = new RandomReplacement(n_ways)
def miss(set: UInt) = random.miss
def way(set: UInt) = random.way
def access(set: UInt, touch_way: UInt) = {}
def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]) = {}
}
class SbufferLRU(n_ways: Int) {
def nBits = n_ways * n_ways
......
......@@ -120,6 +120,12 @@ class BranchPrediction extends XSBundle with HasIFUConst {
def hasNotTakenBrs = Mux(taken, ParallelPriorityMux(realTakens, sawNotTakenBr), ParallelORR(brNotTakens))
}
class PredictorAnswer extends XSBundle {
val hit = Bool()
val taken = Bool()
val target = UInt(VAddrBits.W)
}
class BpuMeta extends XSBundle with HasBPUParameter {
val ubtbWriteWay = UInt(log2Up(UBtbWays).W)
val ubtbHits = Bool()
......@@ -144,6 +150,12 @@ class BpuMeta extends XSBundle with HasBPUParameter {
val predictor = if (BPUDebug) UInt(log2Up(4).W) else UInt(0.W) // Mark which component this prediction comes from {ubtb, btb, tage, loopPredictor}
val ubtbAns = new PredictorAnswer
val btbAns = new PredictorAnswer
val tageAns = new PredictorAnswer
val rasAns = new PredictorAnswer
val loopAns = new PredictorAnswer
// def apply(histPtr: UInt, tageMeta: TageMeta, rasSp: UInt, rasTopCtr: UInt) = {
// this.histPtr := histPtr
// this.tageMeta := tageMeta
......@@ -338,8 +350,6 @@ class RoqCommitInfo extends XSBundle {
val commitType = CommitType()
val pdest = UInt(PhyRegIdxWidth.W)
val old_pdest = UInt(PhyRegIdxWidth.W)
val lqIdx = new LqPtr
val sqIdx = new SqPtr
// these should be optimized for synthesis verilog
val pc = UInt(VAddrBits.W)
......@@ -401,3 +411,63 @@ class SfenceBundle extends XSBundle {
p"valid:0x${Hexadecimal(valid)} rs1:${bits.rs1} rs2:${bits.rs2} addr:${Hexadecimal(bits.addr)}"
}
}
class DifftestBundle extends XSBundle {
val fromSbuffer = new Bundle() {
val sbufferResp = Output(Bool())
val sbufferAddr = Output(UInt(64.W))
val sbufferData = Output(Vec(64, UInt(8.W)))
val sbufferMask = Output(UInt(64.W))
}
val fromSQ = new Bundle() {
val storeCommit = Output(UInt(2.W))
val storeAddr = Output(Vec(2, UInt(64.W)))
val storeData = Output(Vec(2, UInt(64.W)))
val storeMask = Output(Vec(2, UInt(8.W)))
}
val fromXSCore = new Bundle() {
val r = Output(Vec(64, UInt(XLEN.W)))
}
val fromCSR = new Bundle() {
val intrNO = Output(UInt(64.W))
val cause = Output(UInt(64.W))
val priviledgeMode = Output(UInt(2.W))
val mstatus = Output(UInt(64.W))
val sstatus = Output(UInt(64.W))
val mepc = Output(UInt(64.W))
val sepc = Output(UInt(64.W))
val mtval = Output(UInt(64.W))
val stval = Output(UInt(64.W))
val mtvec = Output(UInt(64.W))
val stvec = Output(UInt(64.W))
val mcause = Output(UInt(64.W))
val scause = Output(UInt(64.W))
val satp = Output(UInt(64.W))
val mip = Output(UInt(64.W))
val mie = Output(UInt(64.W))
val mscratch = Output(UInt(64.W))
val sscratch = Output(UInt(64.W))
val mideleg = Output(UInt(64.W))
val medeleg = Output(UInt(64.W))
}
val fromRoq = new Bundle() {
val commit = Output(UInt(32.W))
val thisPC = Output(UInt(XLEN.W))
val thisINST = Output(UInt(32.W))
val skip = Output(UInt(32.W))
val wen = Output(UInt(32.W))
val wdata = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6
val wdst = Output(Vec(CommitWidth, UInt(32.W))) // set difftest width to 6
val wpc = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6
val isRVC = Output(UInt(32.W))
val scFailed = Output(Bool())
}
}
class TrapIO extends XSBundle {
val valid = Output(Bool())
val code = Output(UInt(3.W))
val pc = Output(UInt(VAddrBits.W))
val cycleCnt = Output(UInt(XLEN.W))
val instrCnt = Output(UInt(XLEN.W))
}
\ No newline at end of file
......@@ -22,6 +22,14 @@ import freechips.rocketchip.tile.HasFPUParameters
import sifive.blocks.inclusivecache.PrefetcherIO
import utils._
object hartIdCore extends (() => Int) {
var x = 0
def apply(): Int = {
x = x + 1
x-1
}
}
case class XSCoreParameters
(
XLEN: Int = 64,
......@@ -179,6 +187,7 @@ trait HasXSParameter {
val icacheParameters = ICacheParameters(
tagECC = Some("parity"),
dataECC = Some("parity"),
replacer = Some("setlru"),
nMissEntries = 2
)
......@@ -288,7 +297,8 @@ case class EnviromentParameters
(
FPGAPlatform: Boolean = true,
EnableDebug: Boolean = false,
EnablePerfDebug: Boolean = false
EnablePerfDebug: Boolean = false,
DualCoreDifftest: Boolean = false
)
// object AddressSpace extends HasXSParameter {
......@@ -349,6 +359,12 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
val externalInterrupt = new ExternalInterruptIO
val l2ToPrefetcher = Flipped(new PrefetcherIO(PAddrBits))
})
val difftestIO = IO(new DifftestBundle())
difftestIO <> DontCare
val trapIO = IO(new TrapIO())
trapIO <> DontCare
println(s"FPGAPlatform:${env.FPGAPlatform} EnableDebug:${env.EnableDebug}")
AddressSpace.printMemmap()
......@@ -451,8 +467,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
floatBlock.io.frm <> integerBlock.io.csrio.frm
memBlock.io.lsqio.commits <> ctrlBlock.io.roqio.commits
memBlock.io.lsqio.roqDeqPtr <> ctrlBlock.io.roqio.roqDeqPtr
memBlock.io.lsqio.roq <> ctrlBlock.io.roqio.lsq
memBlock.io.lsqio.exceptionAddr.lsIdx.lqIdx := ctrlBlock.io.roqio.exception.bits.lqIdx
memBlock.io.lsqio.exceptionAddr.lsIdx.sqIdx := ctrlBlock.io.roqio.exception.bits.sqIdx
memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.roqio.exception.bits.ctrl.commitType)
......@@ -481,4 +496,19 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ExcitingUtils.addSource(debugArchReg, "difftestRegs", ExcitingUtils.Debug)
}
if (env.DualCoreDifftest) {
val id = hartIdCore()
difftestIO.fromSbuffer <> memBlock.difftestIO.fromSbuffer
difftestIO.fromSQ <> memBlock.difftestIO.fromSQ
difftestIO.fromCSR <> integerBlock.difftestIO.fromCSR
difftestIO.fromRoq <> ctrlBlock.difftestIO.fromRoq
trapIO <> ctrlBlock.trapIO
val debugIntReg, debugFpReg = WireInit(VecInit(Seq.fill(32)(0.U(XLEN.W))))
ExcitingUtils.addSink(debugIntReg, s"DEBUG_INT_ARCH_REG$id", ExcitingUtils.Debug)
ExcitingUtils.addSink(debugFpReg, s"DEBUG_FP_ARCH_REG$id", ExcitingUtils.Debug)
val debugArchReg = WireInit(VecInit(debugIntReg ++ debugFpReg))
difftestIO.fromXSCore.r := debugArchReg
}
}
......@@ -11,7 +11,7 @@ import xiangshan.backend.dispatch.Dispatch
import xiangshan.backend.exu._
import xiangshan.backend.exu.Exu.exuConfigs
import xiangshan.backend.regfile.RfReadPort
import xiangshan.backend.roq.{Roq, RoqCSRIO, RoqPtr}
import xiangshan.backend.roq.{Roq, RoqCSRIO, RoqLsqIO, RoqPtr}
import xiangshan.mem.LsqEnqIO
class CtrlToIntBlockIO extends XSBundle {
......@@ -52,11 +52,29 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
val exception = ValidIO(new MicroOp)
val isInterrupt = Output(Bool())
// to mem block
val commits = new RoqCommitIO
val roqDeqPtr = Output(new RoqPtr)
val lsq = new RoqLsqIO
}
})
val difftestIO = IO(new Bundle() {
val fromRoq = new Bundle() {
val commit = Output(UInt(32.W))
val thisPC = Output(UInt(XLEN.W))
val thisINST = Output(UInt(32.W))
val skip = Output(UInt(32.W))
val wen = Output(UInt(32.W))
val wdata = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6
val wdst = Output(Vec(CommitWidth, UInt(32.W))) // set difftest width to 6
val wpc = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6
val isRVC = Output(UInt(32.W))
val scFailed = Output(Bool())
}
})
difftestIO <> DontCare
val trapIO = IO(new TrapIO())
trapIO <> DontCare
val decode = Module(new DecodeStage)
val brq = Module(new Brq)
val rename = Module(new Rename)
......@@ -145,6 +163,11 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
}
roq.io.exeWbResults.last := brq.io.out
if (env.DualCoreDifftest) {
difftestIO.fromRoq <> roq.difftestIO
trapIO <> roq.trapIO
}
io.toIntBlock.redirect.valid := redirectValid
io.toIntBlock.redirect.bits := redirect
io.toFpBlock.redirect.valid := redirectValid
......@@ -161,6 +184,5 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
io.roqio.exception.bits := roq.io.exception
io.roqio.isInterrupt := roq.io.redirectOut.bits.interrupt
// roq to mem block
io.roqio.roqDeqPtr := roq.io.roqDeqPtr
io.roqio.commits := roq.io.commits
io.roqio.lsq <> roq.io.lsq
}
......@@ -92,6 +92,31 @@ class IntegerBlock
val sbuffer = new FenceToSbuffer // to mem
}
})
val difftestIO = IO(new Bundle() {
val fromCSR = new Bundle() {
val intrNO = Output(UInt(64.W))
val cause = Output(UInt(64.W))
val priviledgeMode = Output(UInt(2.W))
val mstatus = Output(UInt(64.W))
val sstatus = Output(UInt(64.W))
val mepc = Output(UInt(64.W))
val sepc = Output(UInt(64.W))
val mtval = Output(UInt(64.W))
val stval = Output(UInt(64.W))
val mtvec = Output(UInt(64.W))
val stvec = Output(UInt(64.W))
val mcause = Output(UInt(64.W))
val scause = Output(UInt(64.W))
val satp = Output(UInt(64.W))
val mip = Output(UInt(64.W))
val mie = Output(UInt(64.W))
val mscratch = Output(UInt(64.W))
val sscratch = Output(UInt(64.W))
val mideleg = Output(UInt(64.W))
val medeleg = Output(UInt(64.W))
}
})
difftestIO <> DontCare
val redirect = io.fromCtrlBlock.redirect
......@@ -210,6 +235,9 @@ class IntegerBlock
jmpExeUnit.csrio <> io.csrio
jmpExeUnit.fenceio <> io.fenceio
if (env.DualCoreDifftest) {
jmpExeUnit.difftestIO.fromCSR <> difftestIO.fromCSR
}
// read int rf from ctrl block
intRf.io.readPorts.zipWithIndex.map{ case(r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
......
......@@ -7,7 +7,7 @@ import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import freechips.rocketchip.tile.HasFPUParameters
import xiangshan._
import xiangshan.backend.exu.Exu.{loadExuConfigs, storeExuConfigs}
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.roq.{RoqPtr, RoqLsqIO}
import xiangshan.backend.exu._
import xiangshan.cache._
import xiangshan.mem._
......@@ -77,12 +77,26 @@ class MemBlockImp
val lsqio = new Bundle {
val exceptionAddr = new ExceptionAddrIO // to csr
val commits = Flipped(new RoqCommitIO) // to lsq
val roqDeqPtr = Input(new RoqPtr) // to lsq
val roq = Flipped(new RoqLsqIO) // roq to lsq
}
val toDCachePrefetch = DecoupledIO(new MissReq)
})
val difftestIO = IO(new Bundle() {
val fromSbuffer = new Bundle() {
val sbufferResp = Output(Bool())
val sbufferAddr = Output(UInt(64.W))
val sbufferData = Output(Vec(64, UInt(8.W)))
val sbufferMask = Output(UInt(64.W))
}
val fromSQ = new Bundle() {
val storeCommit = Output(UInt(2.W))
val storeAddr = Output(Vec(2, UInt(64.W)))
val storeData = Output(Vec(2, UInt(64.W)))
val storeMask = Output(Vec(2, UInt(8.W)))
}
})
difftestIO <> DontCare
val dcache = outer.dcache.module
val uncache = outer.uncache.module
......@@ -191,6 +205,10 @@ class MemBlockImp
io.ptw <> dtlb.io.ptw
dtlb.io.sfence <> io.sfence
dtlb.io.csr <> io.tlbCsr
if (env.DualCoreDifftest) {
difftestIO.fromSbuffer <> sbuffer.difftestIO
difftestIO.fromSQ <> lsq.difftestIO.fromSQ
}
// LoadUnit
for (i <- 0 until exuParameters.LduCnt) {
......@@ -208,6 +226,7 @@ class MemBlockImp
// passdown to lsq
lsq.io.loadIn(i) <> loadUnits(i).io.lsq.loadIn
lsq.io.ldout(i) <> loadUnits(i).io.lsq.ldout
lsq.io.loadDataForwarded(i) <> loadUnits(i).io.lsq.loadDataForwarded
}
// StoreUnit
......@@ -236,10 +255,9 @@ class MemBlockImp
}
// Lsq
lsq.io.commits <> io.lsqio.commits
lsq.io.roq <> io.lsqio.roq
lsq.io.enq <> io.fromCtrlBlock.enqLsq
lsq.io.brqRedirect <> io.fromCtrlBlock.redirect
lsq.io.roqDeqPtr <> io.lsqio.roqDeqPtr
io.toCtrlBlock.replay <> lsq.io.rollback
lsq.io.dcache <> dcache.io.lsu.lsq
lsq.io.uncache <> uncache.io.lsq
......
......@@ -116,6 +116,7 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
io.redirectOut.valid := wbValid && wbIsMisPred
io.redirectOut.bits := wbEntry.redirect
io.redirectOut.bits.level := RedirectLevel.flushAfter
io.redirectOut.bits.brTag := BrqPtr(ptrFlagVec(writebackIdx), writebackIdx)
io.out.valid := wbValid || wbIsAuipc
......@@ -315,21 +316,60 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
val mbpRRight = predRight && isRType
val mbpRWrong = predWrong && isRType
val predictor = io.cfiInfo.bits.bpuMeta.predictor
if(!env.FPGAPlatform && env.EnablePerfDebug) {
val predictor = io.cfiInfo.bits.bpuMeta.predictor
val ubtbRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 0.U
val ubtbWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 0.U
val cfiCountValid = io.cfiInfo.valid && !io.cfiInfo.bits.isReplay
val btbRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 1.U
val btbWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 1.U
val ubtbAns = io.cfiInfo.bits.bpuMeta.ubtbAns
val btbAns = io.cfiInfo.bits.bpuMeta.btbAns
val tageAns = io.cfiInfo.bits.bpuMeta.tageAns
val rasAns = io.cfiInfo.bits.bpuMeta.rasAns
val loopAns = io.cfiInfo.bits.bpuMeta.loopAns
val tageRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 2.U
val tageWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 2.U
// Pipeline stage counter
val s1Right = cfiCountValid && !io.cfiInfo.bits.isMisPred && predictor === 0.U
val s1Wrong = cfiCountValid && io.cfiInfo.bits.isMisPred && predictor === 0.U
val loopRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 3.U
val loopWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 3.U
val s2Right = cfiCountValid && !io.cfiInfo.bits.isMisPred && predictor === 1.U
val s2Wrong = cfiCountValid && io.cfiInfo.bits.isMisPred && predictor === 1.U
val s3Right = cfiCountValid && !io.cfiInfo.bits.isMisPred && predictor === 2.U
val s3Wrong = cfiCountValid && io.cfiInfo.bits.isMisPred && predictor === 2.U
// Predictor counter
// val ubtbRight = cfiCountValid && ubtbAns.hit && io.cfiInfo.bits.target === ubtbAns.target && io.cfiInfo.bits.taken === ubtbAns.taken
// val ubtbWrong = cfiCountValid && ubtbAns.hit && (io.cfiInfo.bits.target =/= ubtbAns.target || io.cfiInfo.bits.taken =/= ubtbAns.taken)
val ubtbRight = cfiCountValid && ubtbAns.hit && Mux(ubtbAns.taken,
io.cfiInfo.bits.target === ubtbAns.target && io.cfiInfo.bits.taken === ubtbAns.taken, // taken
io.cfiInfo.bits.taken === ubtbAns.taken) // noTaken
val ubtbWrong = cfiCountValid && ubtbAns.hit && Mux(ubtbAns.taken,
io.cfiInfo.bits.target =/= ubtbAns.target || io.cfiInfo.bits.taken =/= ubtbAns.taken, // taken
io.cfiInfo.bits.taken =/= ubtbAns.taken) // noTaken
val takenAndRight = ubtbAns.taken && ubtbRight
val takenButWrong = ubtbAns.taken && ubtbWrong
// val btbRight = cfiCountValid && btbAns.hit && io.cfiInfo.bits.target === btbAns.target && io.cfiInfo.bits.taken === btbAns.taken
// val btbWrong = cfiCountValid && btbAns.hit && (io.cfiInfo.bits.target =/= btbAns.target || io.cfiInfo.bits.taken =/= btbAns.taken)
val btbRight = cfiCountValid && btbAns.hit && Mux(btbAns.taken,
io.cfiInfo.bits.target === btbAns.target && io.cfiInfo.bits.taken === btbAns.taken, // taken
io.cfiInfo.bits.taken === btbAns.taken) // noTaken
val btbWrong = cfiCountValid && btbAns.hit && Mux(btbAns.taken,
io.cfiInfo.bits.target =/= btbAns.target || io.cfiInfo.bits.taken =/= btbAns.taken, // taken
io.cfiInfo.bits.taken =/= btbAns.taken) // noTaken
val tageRight = cfiCountValid && io.cfiInfo.bits.pd.brType =/= "b10".U && io.cfiInfo.bits.taken === tageAns.taken // DontCare jal
val tageWrong = cfiCountValid && io.cfiInfo.bits.pd.brType =/= "b10".U && io.cfiInfo.bits.taken =/= tageAns.taken // DontCare jal
val rasRight = cfiCountValid && io.cfiInfo.bits.pd.isRet && rasAns.hit && io.cfiInfo.bits.target === rasAns.target
val rasWrong = cfiCountValid && io.cfiInfo.bits.pd.isRet && rasAns.hit && io.cfiInfo.bits.target =/= rasAns.target
val loopRight = cfiCountValid && loopAns.hit && io.cfiInfo.bits.taken === loopAns.taken
val loopWrong = cfiCountValid && loopAns.hit && io.cfiInfo.bits.taken =/= loopAns.taken
if(!env.FPGAPlatform){
ExcitingUtils.addSource(mbpInstr, "perfCntCondBpInstr", Perf)
ExcitingUtils.addSource(mbpRight, "perfCntCondBpRight", Perf)
ExcitingUtils.addSource(mbpWrong, "perfCntCondBpWrong", Perf)
......@@ -342,14 +382,26 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
ExcitingUtils.addSource(mbpRRight, "perfCntCondBpRRight", Perf)
ExcitingUtils.addSource(mbpRWrong, "perfCntCondBpRWrong", Perf)
ExcitingUtils.addSource(s1Right, "perfCntS1Right", Perf)
ExcitingUtils.addSource(s1Wrong, "perfCntS1Wrong", Perf)
ExcitingUtils.addSource(s2Right, "perfCntS2Right", Perf)
ExcitingUtils.addSource(s2Wrong, "perfCntS2Wrong", Perf)
ExcitingUtils.addSource(s3Right, "perfCntS3Right", Perf)
ExcitingUtils.addSource(s3Wrong, "perfCntS3Wrong", Perf)
ExcitingUtils.addSource(ubtbRight, "perfCntubtbRight", Perf)
ExcitingUtils.addSource(ubtbWrong, "perfCntubtbWrong", Perf)
ExcitingUtils.addSource(btbRight, "perfCntbtbRight", Perf)
ExcitingUtils.addSource(btbWrong, "perfCntbtbWrong", Perf)
ExcitingUtils.addSource(tageRight, "perfCnttageRight", Perf)
ExcitingUtils.addSource(tageWrong, "perfCnttageWrong", Perf)
ExcitingUtils.addSource(rasRight, "perfCntrasRight", Perf)
ExcitingUtils.addSource(rasWrong, "perfCntrasWrong", Perf)
ExcitingUtils.addSource(loopRight, "perfCntloopRight", Perf)
ExcitingUtils.addSource(loopWrong, "perfCntloopWrong", Perf)
ExcitingUtils.addSource(takenAndRight, "perfCntTakenAndRight", Perf)
ExcitingUtils.addSource(takenButWrong, "perfCntTakenButWrong", Perf)
}
val utilization = Mux(headPtr.flag === tailPtr.flag, tailPtr.value - headPtr.value, BrqSize.U + tailPtr.value - headPtr.value)
......
......@@ -101,7 +101,8 @@ class Dispatch1 extends XSModule with HasExceptionNO {
// update commitType
updatedUop(i).ctrl.commitType := updatedCommitType(i)
// update roqIdx, lqIdx, sqIdx
updatedUop(i).roqIdx := io.enqRoq.resp(i)
// updatedUop(i).roqIdx := io.enqRoq.resp(i)
XSError(io.fromRename(i).valid && updatedUop(i).roqIdx.asUInt =/= io.enqRoq.resp(i).asUInt, "they should equal")
updatedUop(i).lqIdx := io.enqLsq.resp(i).lqIdx
updatedUop(i).sqIdx := io.enqLsq.resp(i).sqIdx
}
......
......@@ -30,6 +30,31 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg)
val fencei = Output(Bool())
val sbuffer = new FenceToSbuffer
})
val difftestIO = IO(new Bundle() {
val fromCSR = new Bundle() {
val intrNO = Output(UInt(64.W))
val cause = Output(UInt(64.W))
val priviledgeMode = Output(UInt(2.W))
val mstatus = Output(UInt(64.W))
val sstatus = Output(UInt(64.W))
val mepc = Output(UInt(64.W))
val sepc = Output(UInt(64.W))
val mtval = Output(UInt(64.W))
val stval = Output(UInt(64.W))
val mtvec = Output(UInt(64.W))
val stvec = Output(UInt(64.W))
val mcause = Output(UInt(64.W))
val scause = Output(UInt(64.W))
val satp = Output(UInt(64.W))
val mip = Output(UInt(64.W))
val mie = Output(UInt(64.W))
val mscratch = Output(UInt(64.W))
val sscratch = Output(UInt(64.W))
val mideleg = Output(UInt(64.W))
val medeleg = Output(UInt(64.W))
}
})
difftestIO <> DontCare
val jmp = supportedFunctionUnits.collectFirst{
case j: Jump => j
......@@ -58,6 +83,10 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg)
csr.csrio.externalInterrupt <> csrio.externalInterrupt
csr.csrio.tlb <> csrio.tlb
if (env.DualCoreDifftest) {
difftestIO.fromCSR <> csr.difftestIO
}
fenceio.sfence <> fence.sfence
fenceio.fencei <> fence.fencei
fenceio.sbuffer <> fence.toSbuffer
......
......@@ -145,6 +145,29 @@ class CSR extends FunctionUnit with HasCSRConst
// TLB
val tlb = Output(new TlbCsrBundle)
})
val difftestIO = IO(new Bundle() {
val intrNO = Output(UInt(64.W))
val cause = Output(UInt(64.W))
val priviledgeMode = Output(UInt(2.W))
val mstatus = Output(UInt(64.W))
val sstatus = Output(UInt(64.W))
val mepc = Output(UInt(64.W))
val sepc = Output(UInt(64.W))
val mtval = Output(UInt(64.W))
val stval = Output(UInt(64.W))
val mtvec = Output(UInt(64.W))
val stvec = Output(UInt(64.W))
val mcause = Output(UInt(64.W))
val scause = Output(UInt(64.W))
val satp = Output(UInt(64.W))
val mip = Output(UInt(64.W))
val mie = Output(UInt(64.W))
val mscratch = Output(UInt(64.W))
val sscratch = Output(UInt(64.W))
val mideleg = Output(UInt(64.W))
val medeleg = Output(UInt(64.W))
})
difftestIO <> DontCare
val cfIn = io.in.bits.uop.cf
val cfOut = Wire(new CtrlFlow)
......@@ -812,24 +835,34 @@ class CSR extends FunctionUnit with HasCSRConst
"btbWrong" -> (0x1033, "perfCntbtbWrong"),
"tageRight" -> (0x1034, "perfCnttageRight"),
"tageWrong" -> (0x1035, "perfCnttageWrong"),
"loopRight" -> (0x1036, "perfCntloopRight"),
"loopWrong" -> (0x1037, "perfCntloopWrong")
"rasRight" -> (0x1036, "perfCntrasRight"),
"rasWrong" -> (0x1037, "perfCntrasWrong"),
"loopRight" -> (0x1038, "perfCntloopRight"),
"loopWrong" -> (0x1039, "perfCntloopWrong"),
"s1Right" -> (0x103a, "perfCntS1Right"),
"s1Wrong" -> (0x103b, "perfCntS1Wrong"),
"s2Right" -> (0x103c, "perfCntS2Right"),
"s2Wrong" -> (0x103d, "perfCntS2Wrong"),
"s3Right" -> (0x103e, "perfCntS3Right"),
"s3Wrong" -> (0x103f, "perfCntS3Wrong"),
"takenAndRight" -> (0x1040, "perfCntTakenAndRight"),
"takenButWrong" -> (0x1041, "perfCntTakenButWrong"),
// "L2cacheHit" -> (0x1023, "perfCntCondL2cacheHit")
) ++ (
(0 until dcacheParameters.nMissEntries).map(i =>
("DCacheMissQueuePenalty" + Integer.toString(i, 10), (0x102a + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))
(0 until dcacheParameters.nMissEntries).map(i =>
("DCacheMissQueuePenalty" + Integer.toString(i, 10), (0x1042 + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))
).toMap
) ++ (
(0 until icacheParameters.nMissEntries).map(i =>
("ICacheMissQueuePenalty" + Integer.toString(i, 10), (0x102a + dcacheParameters.nMissEntries + i, "perfCntICacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))
("ICacheMissQueuePenalty" + Integer.toString(i, 10), (0x1042 + dcacheParameters.nMissEntries + i, "perfCntICacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))
).toMap
) ++ (
(0 until l1plusPrefetcherParameters.nEntries).map(i =>
("L1+PrefetchPenalty" + Integer.toString(i, 10), (0x102a + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + i, "perfCntL1plusPrefetchPenaltyEntry" + Integer.toString(i, 10)))
("L1+PrefetchPenalty" + Integer.toString(i, 10), (0x1042 + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + i, "perfCntL1plusPrefetchPenaltyEntry" + Integer.toString(i, 10)))
).toMap
) ++ (
(0 until l2PrefetcherParameters.nEntries).map(i =>
("L2PrefetchPenalty" + Integer.toString(i, 10), (0x102a + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + l1plusPrefetcherParameters.nEntries + i, "perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10)))
("L2PrefetchPenalty" + Integer.toString(i, 10), (0x1042 + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + l1plusPrefetcherParameters.nEntries + i, "perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10)))
).toMap
)
......@@ -845,13 +878,15 @@ class CSR extends FunctionUnit with HasCSRConst
// }
// }
}
val xstrap = WireInit(false.B)
if (!env.FPGAPlatform && EnableBPU) {
ExcitingUtils.addSink(xstrap, "XSTRAP", ConnectionType.Debug)
}
def readWithScala(addr: Int): UInt = mapping(addr)._1
val difftestIntrNO = Mux(raiseIntr, causeNO, 0.U)
if (!env.FPGAPlatform) {
// display all perfcnt when nooptrap is executed
......@@ -862,7 +897,6 @@ class CSR extends FunctionUnit with HasCSRConst
}
}
val difftestIntrNO = Mux(raiseIntr, causeNO, 0.U)
ExcitingUtils.addSource(difftestIntrNO, "difftestIntrNOfromCSR")
ExcitingUtils.addSource(causeNO, "difftestCausefromCSR")
ExcitingUtils.addSource(priviledgeMode, "difftestMode", Debug)
......@@ -884,4 +918,27 @@ class CSR extends FunctionUnit with HasCSRConst
ExcitingUtils.addSource(mideleg, "difftestMideleg", Debug)
ExcitingUtils.addSource(medeleg, "difftestMedeleg", Debug)
}
if (env.DualCoreDifftest) {
difftestIO.intrNO := RegNext(difftestIntrNO)
difftestIO.cause := RegNext(causeNO)
difftestIO.priviledgeMode := priviledgeMode
difftestIO.mstatus := mstatus
difftestIO.sstatus := mstatus & sstatusRmask
difftestIO.mepc := mepc
difftestIO.sepc := sepc
difftestIO.mtval:= mtval
difftestIO.stval:= stval
difftestIO.mtvec := mtvec
difftestIO.stvec := stvec
difftestIO.mcause := mcause
difftestIO.scause := scause
difftestIO.satp := satp
difftestIO.mip := mipReg
difftestIO.mie := mie
difftestIO.mscratch := mscratch
difftestIO.sscratch := sscratch
difftestIO.mideleg := mideleg
difftestIO.medeleg := medeleg
}
}
......@@ -4,6 +4,22 @@ import chisel3._
import chisel3.util._
import xiangshan._
object hartIdRFInt extends (() => Int) {
var x = 0
def apply(): Int = {
x = x + 1
x-1
}
}
object hartIdRFFp extends (() => Int) {
var x = 0
def apply(): Int = {
x = x + 1
x-1
}
}
class RfReadPort(len: Int) extends XSBundle {
val addr = Input(UInt(PhyRegIdxWidth.W))
val data = Output(UInt(len.W))
......@@ -65,6 +81,29 @@ class Regfile
ExcitingUtils.Debug
)
}
if (env.DualCoreDifftest) {
val id = if (hasZero) hartIdRFInt() else hartIdRFFp()
val debugArchRat = WireInit(VecInit(Seq.fill(32)(0.U(PhyRegIdxWidth.W))))
ExcitingUtils.addSink(
debugArchRat,
if(hasZero) s"DEBUG_INI_ARCH_RAT$id" else s"DEBUG_FP_ARCH_RAT$id",
ExcitingUtils.Debug
)
val debugArchReg = WireInit(VecInit(debugArchRat.zipWithIndex.map(
x => if(hasZero){
if(x._2 == 0) 0.U else mem(x._1)
} else {
ieee(mem(x._1))
}
)))
ExcitingUtils.addSource(
debugArchReg,
if(hasZero) s"DEBUG_INT_ARCH_REG$id" else s"DEBUG_FP_ARCH_REG$id",
ExcitingUtils.Debug
)
}
} else {
val regfile = Module(new regfile_160x64_10w16r_sim)
......
......@@ -4,6 +4,7 @@ import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.roq.RoqPtr
class RenameBypassInfo extends XSBundle {
val lsrc1_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
......@@ -12,7 +13,7 @@ class RenameBypassInfo extends XSBundle {
val ldest_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
}
class Rename extends XSModule {
class Rename extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
val redirect = Flipped(ValidIO(new Redirect))
val roqCommits = Flipped(new RoqCommitIO)
......@@ -51,6 +52,7 @@ class Rename extends XSModule {
freelist.redirect := io.redirect
freelist.walk.valid := io.roqCommits.isWalk
}
val canOut = io.out(0).ready && fpFreeList.req.canAlloc && intFreeList.req.canAlloc && !io.roqCommits.isWalk
def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = {
{if(fp) x.ctrl.fpWen else x.ctrl.rfWen && (x.ctrl.ldest =/= 0.U)}
......@@ -64,6 +66,16 @@ class Rename extends XSModule {
fpFreeList.req.doAlloc := intFreeList.req.canAlloc && io.out(0).ready
intFreeList.req.doAlloc := fpFreeList.req.canAlloc && io.out(0).ready
// speculatively assign the instruction with an roqIdx
val validCount = PopCount(io.in.map(_.valid))
val roqIdxHead = RegInit(0.U.asTypeOf(new RoqPtr))
val lastCycleMisprediction = RegNext(io.redirect.valid && !io.redirect.bits.isUnconditional() && !io.redirect.bits.flushItself())
val roqIdxHeadNext = Mux(io.redirect.valid,
Mux(io.redirect.bits.isUnconditional(), 0.U.asTypeOf(new RoqPtr), io.redirect.bits.roqIdx),
Mux(lastCycleMisprediction, roqIdxHead + 1.U, Mux(canOut, roqIdxHead + validCount, roqIdxHead))
)
roqIdxHead := roqIdxHeadNext
/**
* Rename: allocate free physical register and update rename table
*/
......@@ -85,7 +97,6 @@ class Rename extends XSModule {
val needFpDest = Wire(Vec(RenameWidth, Bool()))
val needIntDest = Wire(Vec(RenameWidth, Bool()))
val hasValid = Cat(io.in.map(_.valid)).orR
val canOut = io.out(0).ready && fpFreeList.req.canAlloc && intFreeList.req.canAlloc && !io.roqCommits.isWalk
for (i <- 0 until RenameWidth) {
uops(i).cf := io.in(i).bits.cf
uops(i).ctrl := io.in(i).bits.ctrl
......@@ -115,6 +126,8 @@ class Rename extends XSModule {
)
)
uops(i).roqIdx := roqIdxHead + i.U
io.out(i).valid := io.in(i).valid && intFreeList.req.canAlloc && fpFreeList.req.canAlloc && !io.roqCommits.isWalk
io.out(i).bits := uops(i)
......
......@@ -15,6 +15,22 @@ class RatWritePort extends XSBundle {
val wdata = Input(UInt(PhyRegIdxWidth.W))
}
object hartIdRTInt extends (() => Int) {
var x = 0
def apply(): Int = {
x = x + 1
x-1
}
}
object hartIdRTFp extends (() => Int) {
var x = 0
def apply(): Int = {
x = x + 1
x-1
}
}
class RenameTable(float: Boolean) extends XSModule {
val io = IO(new Bundle() {
val redirect = Flipped(ValidIO(new Redirect))
......@@ -65,4 +81,13 @@ class RenameTable(float: Boolean) extends XSModule {
ExcitingUtils.Debug
)
}
if (env.DualCoreDifftest) {
val id = if (float) hartIdRTFp() else hartIdRTInt()
ExcitingUtils.addSource(
arch_table,
if(float) s"DEBUG_FP_ARCH_RAT$id" else s"DEBUG_INI_ARCH_RAT$id",
ExcitingUtils.Debug
)
}
}
......@@ -43,6 +43,14 @@ class RoqCSRIO extends XSBundle {
}
}
class RoqLsqIO extends XSBundle {
val lcommit = Output(UInt(3.W))
val scommit = Output(UInt(3.W))
val pendingld = Output(Bool())
val pendingst = Output(Bool())
val commit = Output(Bool())
}
class RoqEnqIO extends XSBundle {
val canAccept = Output(Bool())
val isEmpty = Output(Bool())
......@@ -57,7 +65,6 @@ class RoqDispatchData extends RoqCommitInfo {
}
class RoqWbData extends XSBundle {
val fflags = UInt(5.W)
val flushPipe = Bool()
}
......@@ -204,11 +211,29 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
// exu + brq
val exeWbResults = Vec(numWbPorts, Flipped(ValidIO(new ExuOutput)))
val commits = new RoqCommitIO
val lsq = new RoqLsqIO
val bcommit = Output(UInt(BrTagWidth.W))
val roqDeqPtr = Output(new RoqPtr)
val csr = new RoqCSRIO
})
val difftestIO = IO(new Bundle() {
val commit = Output(UInt(32.W))
val thisPC = Output(UInt(XLEN.W))
val thisINST = Output(UInt(32.W))
val skip = Output(UInt(32.W))
val wen = Output(UInt(32.W))
val wdata = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6
val wdst = Output(Vec(CommitWidth, UInt(32.W))) // set difftest width to 6
val wpc = Output(Vec(CommitWidth, UInt(XLEN.W))) // set difftest width to 6
val isRVC = Output(UInt(32.W))
val scFailed = Output(Bool())
})
difftestIO <> DontCare
val trapIO = IO(new TrapIO())
trapIO <> DontCare
// instvalid field
// val valid = RegInit(VecInit(List.fill(RoqSize)(false.B)))
val valid = Mem(RoqSize, Bool())
......@@ -264,6 +289,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val writebackDataRead = writebackData.io.rdata
val exceptionDataRead = Wire(Vec(CommitWidth, ExceptionVec()))
val fflagsDataRead = Wire(Vec(CommitWidth, UInt(5.W)))
io.roqDeqPtr := deqPtr
......@@ -353,8 +379,6 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
io.exception := debug_deqUop
io.exception.ctrl.commitType := deqDispatchData.commitType
io.exception.lqIdx := deqDispatchData.lqIdx
io.exception.sqIdx := deqDispatchData.sqIdx
io.exception.cf.pc := deqDispatchData.pc
io.exception.cf.exceptionVec := deqExceptionVec
io.exception.cf.crossPageIPFFix := deqDispatchData.crossPageIPFFix
......@@ -391,7 +415,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
}).unzip
val fflags = Wire(Valid(UInt(5.W)))
fflags.valid := Mux(io.commits.isWalk, false.B, Cat(wflags).orR())
fflags.bits := wflags.zip(writebackDataRead.map(_.fflags)).map({
fflags.bits := wflags.zip(fflagsDataRead).map({
case (w, f) => Mux(w, f, 0.U)
}).reduce(_|_)
val dirty_fs = Mux(io.commits.isWalk, false.B, Cat(fpWen).orR())
......@@ -425,7 +449,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
io.commits.info(i).pdest,
io.commits.info(i).old_pdest,
debug_exuData(deqPtrVec(i).value),
writebackDataRead(i).fflags.asUInt
fflagsDataRead(i)
)
XSInfo(state === s_walk && io.commits.valid(i), "walked pc %x wen %d ldst %d data %x\n",
debug_microOp(walkPtrVec(i).value).cf.pc,
......@@ -442,12 +466,22 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
io.commits.info.map(info => dontTouch(info.pc))
}
// sync fflags/dirty_fs to csr
io.csr.fflags := fflags
io.csr.dirty_fs := dirty_fs
// commit branch to brq
val cfiCommitVec = VecInit(io.commits.valid.zip(io.commits.info.map(_.commitType)).map{case(v, t) => v && CommitType.isBranch(t)})
io.bcommit := Mux(io.commits.isWalk, 0.U, PopCount(cfiCommitVec))
// commit load/store to lsq
val ldCommitVec = VecInit((0 until CommitWidth).map(i => io.commits.valid(i) && io.commits.info(i).commitType === CommitType.LOAD))
val stCommitVec = VecInit((0 until CommitWidth).map(i => io.commits.valid(i) && io.commits.info(i).commitType === CommitType.STORE))
io.lsq.lcommit := Mux(io.commits.isWalk, 0.U, PopCount(ldCommitVec))
io.lsq.scommit := Mux(io.commits.isWalk, 0.U, PopCount(stCommitVec))
io.lsq.pendingld := !io.commits.isWalk && io.commits.info(0).commitType === CommitType.LOAD && valid(deqPtr.value)
io.lsq.pendingst := !io.commits.isWalk && io.commits.info(0).commitType === CommitType.STORE && valid(deqPtr.value)
io.lsq.commit := !io.commits.isWalk && io.commits.valid(0)
/**
* state changes
......@@ -617,8 +651,6 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
wdata.commitType := req.ctrl.commitType
wdata.pdest := req.pdest
wdata.old_pdest := req.old_pdest
wdata.lqIdx := req.lqIdx
wdata.sqIdx := req.sqIdx
wdata.pc := req.cf.pc
wdata.crossPageIPFFix := req.cf.crossPageIPFFix
// wdata.exceptionVec := req.cf.exceptionVec
......@@ -628,13 +660,13 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
writebackData.io.wen := io.exeWbResults.map(_.valid)
writebackData.io.waddr := io.exeWbResults.map(_.bits.uop.roqIdx.value)
writebackData.io.wdata.zip(io.exeWbResults.map(_.bits)).map{ case (wdata, wb) =>
wdata.fflags := wb.fflags
wdata.flushPipe := wb.uop.ctrl.flushPipe
}
writebackData.io.raddr := commitReadAddr_next
for (i <- 0 until 16) {
val exceptionData = Module(new SyncDataModuleTemplate(Bool(), RoqSize, CommitWidth, RenameWidth + writebackCount(i)))
exceptionData.suggestName("exceptionData")
var wPortIdx = 0
for (j <- 0 until RenameWidth) {
exceptionData.io.wen (wPortIdx) := canEnqueue(j)
......@@ -675,6 +707,30 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
exceptionDataRead.zip(exceptionData.io.rdata).map{ case (d, r) => d(i) := r }
}
val fflagsDataModule = Module(new SyncDataModuleTemplate(UInt(5.W), RoqSize, CommitWidth, 7))
var wPortIdx = 0
// 4 FMACs
for (i <- 0 until 4) {
fflagsDataModule.io.wen (wPortIdx) := io.exeWbResults(8+i).valid
fflagsDataModule.io.waddr(wPortIdx) := io.exeWbResults(8+i).bits.uop.roqIdx.value
fflagsDataModule.io.wdata(wPortIdx) := io.exeWbResults(8+i).bits.fflags
wPortIdx = wPortIdx + 1
}
// 2 FMISCs (the first one includes I2F from JumpUnit)
for (i <- 0 until 2) {
fflagsDataModule.io.wen (wPortIdx) := io.exeWbResults(14+i).valid
fflagsDataModule.io.waddr(wPortIdx) := io.exeWbResults(14+i).bits.uop.roqIdx.value
fflagsDataModule.io.wdata(wPortIdx) := io.exeWbResults(14+i).bits.fflags
wPortIdx = wPortIdx + 1
}
// 1 FMISC (Int Wb)
fflagsDataModule.io.wen (wPortIdx) := io.exeWbResults(7).valid
fflagsDataModule.io.waddr(wPortIdx) := io.exeWbResults(7).bits.uop.roqIdx.value
fflagsDataModule.io.wdata(wPortIdx) := io.exeWbResults(7).bits.fflags
fflagsDataModule.io.raddr := VecInit(deqPtrVec_next.map(_.value))
fflagsDataRead := fflagsDataModule.io.rdata
/**
* debug info
*/
......@@ -718,55 +774,59 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
instrCnt := instrCnt + retireCounter
io.csr.perfinfo.retiredInstr := RegNext(retireCounter)
if(!env.FPGAPlatform) {
//difftest signals
val firstValidCommit = (deqPtr + PriorityMux(io.commits.valid, VecInit(List.tabulate(CommitWidth)(_.U)))).value
val skip = Wire(Vec(CommitWidth, Bool()))
val wen = Wire(Vec(CommitWidth, Bool()))
val wdata = Wire(Vec(CommitWidth, UInt(XLEN.W)))
val wdst = Wire(Vec(CommitWidth, UInt(32.W)))
val diffTestDebugLrScValid = Wire(Vec(CommitWidth, Bool()))
val wpc = Wire(Vec(CommitWidth, UInt(XLEN.W)))
val trapVec = Wire(Vec(CommitWidth, Bool()))
val isRVC = Wire(Vec(CommitWidth, Bool()))
for(i <- 0 until CommitWidth){
// io.commits(i).valid
val idx = deqPtrVec(i).value
val uop = debug_microOp(idx)
val DifftestSkipSC = false
if(!DifftestSkipSC){
skip(i) := (debug_exuDebug(idx).isMMIO || debug_exuDebug(idx).isPerfCnt) && io.commits.valid(i)
}else{
skip(i) := (
debug_exuDebug(idx).isMMIO ||
debug_exuDebug(idx).isPerfCnt ||
uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_d ||
uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_w
) && io.commits.valid(i)
}
wen(i) := io.commits.valid(i) && uop.ctrl.rfWen && uop.ctrl.ldest =/= 0.U
wdata(i) := debug_exuData(idx)
wdst(i) := uop.ctrl.ldest
diffTestDebugLrScValid(i) := uop.diffTestDebugLrScValid
wpc(i) := SignExt(uop.cf.pc, XLEN)
trapVec(i) := io.commits.valid(i) && (state===s_idle) && uop.ctrl.isXSTrap
isRVC(i) := uop.cf.brUpdate.pd.isRVC
//difftest signals
val firstValidCommit = (deqPtr + PriorityMux(io.commits.valid, VecInit(List.tabulate(CommitWidth)(_.U)))).value
val skip = Wire(Vec(CommitWidth, Bool()))
val wen = Wire(Vec(CommitWidth, Bool()))
val wdata = Wire(Vec(CommitWidth, UInt(XLEN.W)))
val wdst = Wire(Vec(CommitWidth, UInt(32.W)))
val diffTestDebugLrScValid = Wire(Vec(CommitWidth, Bool()))
val wpc = Wire(Vec(CommitWidth, UInt(XLEN.W)))
val trapVec = Wire(Vec(CommitWidth, Bool()))
val isRVC = Wire(Vec(CommitWidth, Bool()))
for(i <- 0 until CommitWidth) {
// io.commits(i).valid
val idx = deqPtrVec(i).value
val uop = debug_microOp(idx)
val DifftestSkipSC = false
if(!DifftestSkipSC){
skip(i) := (debug_exuDebug(idx).isMMIO || debug_exuDebug(idx).isPerfCnt) && io.commits.valid(i)
}else{
skip(i) := (
debug_exuDebug(idx).isMMIO ||
debug_exuDebug(idx).isPerfCnt ||
uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_d ||
uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_w
) && io.commits.valid(i)
}
wen(i) := io.commits.valid(i) && uop.ctrl.rfWen && uop.ctrl.ldest =/= 0.U
wdata(i) := debug_exuData(idx)
wdst(i) := uop.ctrl.ldest
diffTestDebugLrScValid(i) := uop.diffTestDebugLrScValid
wpc(i) := SignExt(uop.cf.pc, XLEN)
trapVec(i) := io.commits.valid(i) && (state===s_idle) && uop.ctrl.isXSTrap
isRVC(i) := uop.cf.brUpdate.pd.isRVC
}
val retireCounterFix = Mux(io.redirectOut.valid, 1.U, retireCounter)
val retirePCFix = SignExt(Mux(io.redirectOut.valid, debug_deqUop.cf.pc, debug_microOp(firstValidCommit).cf.pc), XLEN)
val retireInstFix = Mux(io.redirectOut.valid, debug_deqUop.cf.instr, debug_microOp(firstValidCommit).cf.instr)
val scFailed = !diffTestDebugLrScValid(0) &&
debug_deqUop.ctrl.fuType === FuType.mou &&
(debug_deqUop.ctrl.fuOpType === LSUOpType.sc_d || debug_deqUop.ctrl.fuOpType === LSUOpType.sc_w)
val scFailed = !diffTestDebugLrScValid(0) &&
debug_deqUop.ctrl.fuType === FuType.mou &&
(debug_deqUop.ctrl.fuOpType === LSUOpType.sc_d || debug_deqUop.ctrl.fuOpType === LSUOpType.sc_w)
val hitTrap = trapVec.reduce(_||_)
val trapCode = PriorityMux(wdata.zip(trapVec).map(x => x._2 -> x._1))
val trapPC = SignExt(PriorityMux(wpc.zip(trapVec).map(x => x._2 ->x._1)), XLEN)
if (!env.FPGAPlatform) {
val difftestIntrNO = WireInit(0.U(XLEN.W))
val difftestCause = WireInit(0.U(XLEN.W))
ExcitingUtils.addSink(difftestIntrNO, "difftestIntrNOfromCSR")
ExcitingUtils.addSink(difftestCause, "difftestCausefromCSR")
XSDebug(difftestIntrNO =/= 0.U, "difftest intrNO set %x\n", difftestIntrNO)
val retireCounterFix = Mux(io.redirectOut.valid, 1.U, retireCounter)
val retirePCFix = SignExt(Mux(io.redirectOut.valid, debug_deqUop.cf.pc, debug_microOp(firstValidCommit).cf.pc), XLEN)
val retireInstFix = Mux(io.redirectOut.valid, debug_deqUop.cf.instr, debug_microOp(firstValidCommit).cf.instr)
ExcitingUtils.addSource(RegNext(retireCounterFix), "difftestCommit", ExcitingUtils.Debug)
ExcitingUtils.addSource(RegNext(retirePCFix), "difftestThisPC", ExcitingUtils.Debug)//first valid PC
......@@ -781,10 +841,6 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
ExcitingUtils.addSource(RegNext(difftestIntrNO), "difftestIntrNO", ExcitingUtils.Debug)
ExcitingUtils.addSource(RegNext(difftestCause), "difftestCause", ExcitingUtils.Debug)
val hitTrap = trapVec.reduce(_||_)
val trapCode = PriorityMux(wdata.zip(trapVec).map(x => x._2 -> x._1))
val trapPC = SignExt(PriorityMux(wpc.zip(trapVec).map(x => x._2 ->x._1)), XLEN)
ExcitingUtils.addSource(RegNext(hitTrap), "trapValid")
ExcitingUtils.addSource(RegNext(trapCode), "trapCode")
ExcitingUtils.addSource(RegNext(trapPC), "trapPC")
......@@ -795,4 +851,23 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
ExcitingUtils.addSource(hitTrap, "XSTRAP", ConnectionType.Debug)
}
}
if (env.DualCoreDifftest) {
difftestIO.commit := RegNext(retireCounterFix)
difftestIO.thisPC := RegNext(retirePCFix)
difftestIO.thisINST := RegNext(retireInstFix)
difftestIO.skip := RegNext(skip.asUInt)
difftestIO.wen := RegNext(wen.asUInt)
difftestIO.wdata := RegNext(wdata)
difftestIO.wdst := RegNext(wdst)
difftestIO.wpc := RegNext(wpc)
difftestIO.isRVC := RegNext(isRVC.asUInt)
difftestIO.scFailed := RegNext(scFailed)
trapIO.valid := RegNext(hitTrap)
trapIO.code := RegNext(trapCode)
trapIO.pc := RegNext(trapPC)
trapIO.cycleCnt := RegNext(GTimer())
trapIO.instrCnt := RegNext(instrCnt)
}
}
......@@ -16,6 +16,7 @@ case class ICacheParameters(
nTLBEntries: Int = 32,
tagECC: Option[String] = None,
dataECC: Option[String] = None,
replacer: Option[String] = Some("random"),
nSDQ: Int = 17,
nRPQ: Int = 16,
nMissEntries: Int = 1,
......@@ -25,7 +26,7 @@ case class ICacheParameters(
def tagCode: Code = Code.fromString(tagECC)
def dataCode: Code = Code.fromString(dataECC)
def replacement = new RandomReplacement(nWays)
def replacement = ReplacementPolicy.fromString(replacer,nWays,nSets)
}
trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst with HasInstrMMIOConst {
......@@ -33,7 +34,7 @@ trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst with Has
val groupAlign = log2Up(cacheParams.blockBytes)
val packetInstNum = packetBytes/instBytes
val packetInstNumBit = log2Up(packetInstNum)
val ptrHighBit = log2Up(groupBytes) - 1
val ptrHighBit = log2Up(groupBytes) - 1
val ptrLowBit = log2Up(packetBytes)
val encUnitBits = 8
val bankRows = 2
......@@ -51,7 +52,7 @@ trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst with Has
//
def encMetaBits = cacheParams.tagCode.width(tagBits)
def metaEntryBits = encMetaBits
def encDataBits = cacheParams.dataCode.width(encUnitBits)
def encDataBits = cacheParams.dataCode.width(encUnitBits)
def dataEntryBits = encDataBits * bankUnitNum
// def encDataBits
// def encCacheline
......@@ -75,7 +76,6 @@ abstract class ICacheBundle extends XSBundle
abstract class ICacheModule extends XSModule
with HasICacheParameters
with ICacheBase
with HasFrontEndExceptionNo
abstract class ICacheArray extends XSModule
......@@ -122,39 +122,6 @@ class ICacheIO extends ICacheBundle
val pd_out = Output(new PreDecodeResp)
}
/* ------------------------------------------------------------
* The 3-stage pipeline register
* ------------------------------------------------------------
*/
trait ICacheBase extends HasICacheParameters
{
//----------------------------
// Stage 1
//----------------------------
// val s1_valid = WireInit(false.B)
val s1_req_pc = Wire(UInt(VAddrBits.W))
val s1_req_mask = Wire(UInt(PredictWidth.W))
val s1_fire = WireInit(false.B)
//----------------------------
// Stage 2
//----------------------------
val s2_valid = RegInit(false.B)
val s2_req_pc = RegEnable(next = s1_req_pc,init = 0.U, enable = s1_fire)
val s2_req_mask = RegEnable(next = s1_req_mask,init = 0.U, enable = s1_fire)
val s2_ready = WireInit(false.B)
val s2_fire = WireInit(false.B)
//----------------------------
// Stage 3
//----------------------------
val s3_valid = RegInit(false.B)
val s3_req_pc = RegEnable(next = s2_req_pc,init = 0.U, enable = s2_fire)
val s3_req_mask = RegEnable(next = s2_req_mask,init = 0.U, enable = s2_fire)
val s3_ready = WireInit(false.B)
}
class ICacheMetaWriteBundle extends ICacheBundle
{
val virIdx = UInt(idxBits.W)
......@@ -255,15 +222,15 @@ class ICacheDataArray extends ICachArray
}
}
val rdatas_decoded = rdatas.map{wdata => wdata.map{ bdata => bdata.map{ unit => cacheParams.dataCode.decode(unit)}}}
val rdata_corrected = VecInit((0 until nWays).map{ w =>
VecInit((0 until nBanks).map{ b =>
val rdata_corrected = VecInit((0 until nWays).map{ w =>
VecInit((0 until nBanks).map{ b =>
VecInit((0 until bankUnitNum).map{ i =>
rdatas_decoded(w)(b)(i).corrected
})
})
})
(0 until nWays).map{ w =>
(0 until nWays).map{ w =>
(0 until blockRows).map{ r =>
io.readResp(w)(r) := Cat(
(0 until bankUnitNum/2).map{ i =>
......@@ -292,7 +259,7 @@ class ICacheDataArray extends ICachArray
for(w <- 0 until nWays){
for(b <- 0 until nBanks){
dataArray(w)(b).io.w.req.valid := io.write.valid && w.U === write_way
dataArray(w)(b).io.w.req.valid := io.write.valid && w.U === write_way
dataArray(w)(b).io.w.req.bits.setIdx := write.virIdx
dataArray(w)(b).io.w.req.bits.data := write_bank_data(b)
}
......@@ -308,67 +275,23 @@ class ICacheDataArray extends ICachArray
*/
class ICache extends ICacheModule
{
// cut a cacheline into a fetch packet
def cutHelper(sourceVec: Vec[UInt], pc: UInt, mask: UInt): UInt = {
val sourceVec_inst = Wire(Vec(blockRows*rowBytes/instBytes,UInt(insLen.W)))
(0 until blockRows).foreach{ i =>
(0 until rowBytes/instBytes).foreach{ j =>
sourceVec_inst(i*rowBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen)
}
}
val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)}))
val start = Cat(pc(ptrHighBit,ptrLowBit),0.U(packetInstNumBit.W))
(0 until PredictWidth ).foreach{ i =>
cutPacket(i) := Mux(mask(i).asBool,sourceVec_inst(start + i.U),0.U)
}
cutPacket.asUInt
}
def cutHelperMMIO(sourceVec: Vec[UInt], pc: UInt, mask: UInt) = {
val sourceVec_inst = Wire(Vec(mmioBeats * mmioBusBytes/instBytes,UInt(insLen.W)))
(0 until mmioBeats).foreach{ i =>
(0 until mmioBusBytes/instBytes).foreach{ j =>
sourceVec_inst(i*mmioBusBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen)
}
}
val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)}))
val insLenLog = log2Ceil(insLen)
val start = (pc >> insLenLog.U)(log2Ceil(mmioBeats * mmioBusBytes/instBytes) -1, 0)
val outMask = mask >> start
(0 until PredictWidth ).foreach{ i =>
cutPacket(i) := Mux(outMask(i).asBool,sourceVec_inst(start + i.U),0.U)
}
(cutPacket.asUInt, outMask.asUInt)
}
// generate the one hot code according to a UInt between 0-8
def PriorityMask(sourceVec: UInt) : UInt = {
val oneHot = Mux(sourceVec >= 8.U, "b1000".U,
Mux(sourceVec >= 4.U, "b0100".U,
Mux(sourceVec >= 2.U, "b0010".U, "b0001".U)))
oneHot
}
val io = IO(new ICacheIO)
val s2_flush = io.flush(0)
val s3_flush = io.flush(1)
val (s2_flush,s3_flush) = (io.flush(0), io.flush(1))
//----------------------------
// Memory Part
//----------------------------
val metaArray = Module(new ICacheMetaArray)
val dataArray = Module(new ICacheDataArray)
// 256-bit valid
val validArray = RegInit(0.U((nSets * nWays).W))
//----------------------------
// Stage 1
//----------------------------
s1_fire := io.req.valid
s1_req_pc := io.req.bits.addr
s1_req_mask := io.req.bits.mask
s2_ready := WireInit(false.B)
// s1_fire := s1_valid && (s2_ready || s2_flush)
val req_in = io.req.bits
val req_valid = io.req.valid
val (s1_fire, s1_req_pc, s1_req_mask) = {(req_valid, req_in.addr, req_in.mask)}
// SRAM(Meta and Data) read request
val s1_idx = get_idx(s1_req_pc)
......@@ -378,41 +301,51 @@ class ICache extends ICacheModule
dataArray.io.read.valid := s1_fire
dataArray.io.read.bits :=s1_idx
XSDebug("[Stage 1] r : f (%d %d) request pc: 0x%x mask: %b\n",s2_ready,s1_fire,s1_req_pc,s1_req_mask)
XSDebug("[Stage 1] index: %d\n",s1_idx)
// XSDebug("[Stage 1] r : f (%d %d) request pc: 0x%x mask: %b\n",s2_ready,s1_fire,s1_req_pc,s1_req_mask)
// XSDebug("[Stage 1] index: %d\n",s1_idx)
//----------------------------
// Stage 2
// Stage 2
//----------------------------
val s2_idx = get_idx(s2_req_pc)
val s2_tlb_resp = WireInit(io.tlb.resp.bits)
val s2_tag = get_tag(s2_tlb_resp.paddr)
val s2_hit = WireInit(false.B)
val s2_allValid = s2_valid && io.tlb.resp.valid
val s2_mmio = WireInit(false.B)
val s3_ready = WireInit(false.B)
val s2_tlb_resp = WireInit(io.tlb.resp.bits)
val s2_valid = RegInit(false.B)
val s2_req_pc = RegEnable(next = s1_req_pc,init = 0.U, enable = s1_fire)
val s2_req_mask = RegEnable(next = s1_req_mask,init = 0.U, enable = s1_fire)
val (s2_idx, s2_tag) = { (get_idx(s2_req_pc), get_tag(s2_tlb_resp.paddr)) }
val (s2_ready, s2_allValid) = {((s3_ready || !s2_valid), (s2_valid && io.tlb.resp.valid)) }
val s2_fire = s2_allValid && s3_ready
s2_fire := s2_allValid && s3_ready
s2_ready := s3_ready || !s2_valid
when(s1_fire) { s2_valid := true.B }
.elsewhen(s2_flush) { s2_valid := false.B }
.elsewhen(s2_fire) { s2_valid := false.B }
// SRAM(Meta and Data) read reseponse
// TODO :Parity wrong excetion
val metas = metaArray.io.readResp
val datas =RegEnable(next=dataArray.io.readResp, enable=s2_fire)
val (metas, datas) = {(metaArray.io.readResp , RegEnable(next=dataArray.io.readResp, enable=s2_fire))}
val validMeta = Cat((0 until nWays).map{w => validArray(Cat(s2_idx, w.U(log2Ceil(nWays).W)))}.reverse).asUInt
// hit check and generate victim cacheline mask
def PriorityMask(sourceVec: UInt) : UInt = {
val oneHot = Mux(sourceVec >= 8.U, "b1000".U,
Mux(sourceVec >= 4.U, "b0100".U,
Mux(sourceVec >= 2.U, "b0010".U, "b0001".U)))
oneHot
}
val hitVec = VecInit((0 until nWays).map{w => metas(w)=== s2_tag && validMeta(w) === 1.U})
val victimWayMask = (1.U << LFSR64()(log2Up(nWays)-1,0))
val invalidVec = ~validMeta
val hasInvalidWay = invalidVec.orR
val refillInvalidWaymask = PriorityMask(invalidVec)
val replacer = cacheParams.replacement
val victimWayMask = UIntToOH(replacer.way(s2_idx))
when(s2_hit) {replacer.access(s2_idx, OHToUInt(hitVec))}
//deal with icache exception
val icacheExceptionVec = Wire(Vec(8,Bool()))
......@@ -422,47 +355,62 @@ class ICache extends ICacheModule
icacheExceptionVec(pageFault) := s2_tlb_resp.excp.pf.instr && s2_allValid
s2_mmio := s2_valid && io.tlb.resp.valid && s2_tlb_resp.mmio && !hasIcacheException
s2_hit := s2_valid && ParallelOR(hitVec)
s2_hit := s2_valid && ParallelOR(hitVec)
val waymask = Mux(hasIcacheException,1.U(nWays.W),Mux(s2_hit, hitVec.asUInt, Mux(hasInvalidWay, refillInvalidWaymask, victimWayMask)))
assert(!(s2_hit && s2_mmio),"MMIO address should not hit in icache")
XSDebug("[Stage 2] v : r : f (%d %d %d) pc: 0x%x mask: %b mmio:%d \n",s2_valid,s3_ready,s2_fire,s2_req_pc,s2_req_mask,s2_mmio)
XSDebug("[Stage 2] exception: af:%d pf:%d \n",icacheExceptionVec(accessFault),icacheExceptionVec(pageFault))
XSDebug(p"[Stage 2] tlb req: v ${io.tlb.req.valid} r ${io.tlb.req.ready} ${io.tlb.req.bits}\n")
XSDebug(p"[Stage 2] tlb resp: v ${io.tlb.resp.valid} r ${io.tlb.resp.ready} ${s2_tlb_resp}\n")
XSDebug("[Stage 2] tag: %x hit:%d mmio:%d\n",s2_tag,s2_hit,s2_mmio)
XSDebug("[Stage 2] validMeta: %b victimWayMaks:%b invalidVec:%b hitVec:%b waymask:%b \n",validMeta,victimWayMask,invalidVec.asUInt,hitVec.asUInt,waymask.asUInt)
//----------------------------
// Stage 3
//----------------------------
val s3_valid = RegInit(false.B)
val s3_miss = WireInit(false.B)
val s3_req_pc = RegEnable(next = s2_req_pc,init = 0.U, enable = s2_fire)
val s3_req_mask = RegEnable(next = s2_req_mask,init = 0.U, enable = s2_fire)
val s3_tlb_resp = RegEnable(next = s2_tlb_resp, init = 0.U.asTypeOf(new TlbResp), enable = s2_fire)
val s3_data = datas
val s3_tag = RegEnable(s2_tag, s2_fire)
val s3_hit = RegEnable(next=s2_hit,init=false.B,enable=s2_fire)
val s3_mmio = RegEnable(next=s2_mmio,init=false.B,enable=s2_fire)
val s3_wayMask = RegEnable(next=waymask,init=0.U,enable=s2_fire)
val s3_idx = get_idx(s3_req_pc)
val s3_exception_vec = RegEnable(next= icacheExceptionVec,init=0.U.asTypeOf(Vec(8,Bool())), enable=s2_fire)
val s3_has_exception = s3_exception_vec.asUInt.orR
val s3_miss = s3_valid && !s3_hit && !s3_mmio && !s3_has_exception
val s3_has_exception = RegEnable(next= hasIcacheException,init=false.B,enable=s2_fire)
val s3_idx = get_idx(s3_req_pc)
val s3_data = datas
when(s3_flush) { s3_valid := false.B }
.elsewhen(s2_fire && !s2_flush) { s3_valid := true.B }
.elsewhen(io.resp.fire()) { s3_valid := false.B }
// icache hit
// data Parity encoding
// simply cut the hit cacheline
/* icache hit
* simply cut the cacheline into a fetchpacket according to the req_pc
* use hitVec to do data way choosing
*/
def cutHelper(sourceVec: Vec[UInt], pc: UInt, mask: UInt): UInt = {
val sourceVec_inst = Wire(Vec(blockRows*rowBytes/instBytes,UInt(insLen.W)))
(0 until blockRows).foreach{ i =>
(0 until rowBytes/instBytes).foreach{ j =>
sourceVec_inst(i*rowBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen)
}
}
val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)}))
val start = Cat(pc(ptrHighBit,ptrLowBit),0.U(packetInstNumBit.W))
(0 until PredictWidth ).foreach{ i =>
cutPacket(i) := Mux(mask(i).asBool,sourceVec_inst(start + i.U),0.U)
}
cutPacket.asUInt
}
val dataHitWay = Mux1H(s3_wayMask,s3_data)
val outPacket = Wire(UInt((FetchWidth * 32).W))
outPacket := cutHelper(dataHitWay,s3_req_pc.asUInt,s3_req_mask.asUInt)
//ICache MissQueue
/* icache miss
* send a miss req to ICache Miss Queue, excluding exception/flush/blocking
* block the pipeline until refill finishes
*/
val icacheMissQueue = Module(new IcacheMissQueue)
val blocking = RegInit(false.B)
val isICacheResp = icacheMissQueue.io.resp.valid && icacheMissQueue.io.resp.bits.clientID === cacheID.U(2.W)
......@@ -474,17 +422,19 @@ class ICache extends ICacheModule
when(icacheMissQueue.io.req.fire() || io.mmio_acquire.fire()){blocking := true.B}
.elsewhen(blocking && ((icacheMissQueue.io.resp.fire() && isICacheResp) || io.mmio_grant.fire() || s3_flush) ){blocking := false.B}
XSDebug(blocking && s3_flush,"check for icache non-blocking")
//cache flush register
/* icache flush
* backend send fence.i signal to flush all the cacheline in icache for consistency
* set a flag to inform the refill meta that should not write in validArray
*/
val icacheFlush = io.fencei
val cacheflushed = RegInit(false.B)
XSDebug("[Fence.i] icacheFlush:%d, cacheflushed:%d\n",icacheFlush,cacheflushed)
when(icacheFlush && blocking && !isICacheResp){ cacheflushed := true.B}
.elsewhen(isICacheResp && cacheflushed) {cacheflushed := false.B }
//TODO: Prefetcher
//refill write
XSDebug(blocking && s3_flush,"WARNING:icache non-blocking happens")
//refill meta write
val metaWriteReq = icacheMissQueue.io.meta_write.bits
icacheMissQueue.io.meta_write.ready := true.B
metaArray.io.write.valid := icacheMissQueue.io.meta_write.valid
......@@ -498,7 +448,7 @@ class ICache extends ICacheModule
validArray := validArray.bitSet(validPtr, true.B)
}
//data
//refill data write
icacheMissQueue.io.refill.ready := true.B
val refillReq = icacheMissQueue.io.refill.bits
dataArray.io.write.valid := icacheMissQueue.io.refill.valid
......@@ -506,25 +456,55 @@ class ICache extends ICacheModule
idx=refillReq.refill_idx,
waymask=refillReq.refill_waymask)
//icache flush: only flush valid Array register
s3_ready := ((io.resp.ready && s3_hit || !s3_valid) && !blocking) || (blocking && ((icacheMissQueue.io.resp.fire()) || io.mmio_grant.fire()))
when(icacheFlush){ validArray := 0.U }
XSDebug(icacheFlush,"WARNING:icache flush happens")
/* refill output
* cut the refill data cacheline into a fetch packet for responsing to predecoder
*/
val refillDataVec = icacheMissQueue.io.resp.bits.data.asTypeOf(Vec(blockRows,UInt(wordBits.W)))
val refillDataOut = cutHelper(refillDataVec, s3_req_pc,s3_req_mask )
// deal with same cacheline miss in s3 and s2
val is_same_cacheline = s3_miss && s2_valid && (groupAligned(s2_req_pc) ===groupAligned(s3_req_pc))
val useRefillReg = RegNext(is_same_cacheline && icacheMissQueue.io.resp.fire())
val refillDataVecReg = RegEnable(next=refillDataVec, enable= (is_same_cacheline && icacheMissQueue.io.resp.fire()))
//FIXME!!
s3_miss := s3_valid && !s3_hit && !s3_mmio && !s3_has_exception && !useRefillReg
/* mmio response output
* cut the mmio response data cacheline into a fetch packet for responsing to predecoder
* TODO: no need to wait for a whole fetch packet(once per beat)?
*/
def cutHelperMMIO(sourceVec: Vec[UInt], pc: UInt, mask: UInt) = {
val sourceVec_inst = Wire(Vec(mmioBeats * mmioBusBytes/instBytes,UInt(insLen.W)))
(0 until mmioBeats).foreach{ i =>
(0 until mmioBusBytes/instBytes).foreach{ j =>
sourceVec_inst(i*mmioBusBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen)
}
}
val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)}))
val insLenLog = log2Ceil(insLen)
val start = (pc >> insLenLog.U)(log2Ceil(mmioBeats * mmioBusBytes/instBytes) -1, 0)
val outMask = mask >> start
(0 until PredictWidth ).foreach{ i =>
cutPacket(i) := Mux(outMask(i).asBool,sourceVec_inst(start + i.U),0.U)
}
(cutPacket.asUInt, outMask.asUInt)
}
val mmioDataVec = io.mmio_grant.bits.data.asTypeOf(Vec(mmioBeats,UInt(mmioBusWidth.W)))
val mmio_packet = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)._1
val mmio_mask = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)._2
val (mmio_packet,mmio_mask) = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)
XSDebug("mmio data %x\n", mmio_packet)
s3_ready := ((io.resp.ready && s3_hit || !s3_valid) && !blocking) || (blocking && ((icacheMissQueue.io.resp.fire()) || io.mmio_grant.fire()))
val pds = Seq.fill(nWays)(Module(new PreDecode))
......@@ -542,26 +522,11 @@ class ICache extends ICacheModule
pds(i).io.prev <> io.prev
pds(i).io.prev_pc := io.prev_pc
}
// if a fetch packet triggers page fault, at least send a valid instruction
io.pd_out := Mux1H(s3_wayMask, pds.map(_.io.out))
val s3_noHit = s3_wayMask === 0.U
//TODO: coherence
XSDebug("[Stage 3] valid:%d miss:%d pc: 0x%x mmio :%d mask: %b ipf:%d\n",s3_valid, s3_miss,s3_req_pc,s3_req_mask,s3_tlb_resp.excp.pf.instr, s3_mmio)
XSDebug("[Stage 3] hit:%d miss:%d waymask:%x blocking:%d\n",s3_hit,s3_miss,s3_wayMask.asUInt,blocking)
XSDebug("[Stage 3] tag: %x idx: %d\n",s3_tag,get_idx(s3_req_pc))
XSDebug(p"[Stage 3] tlb resp: ${s3_tlb_resp}\n")
XSDebug("[mem_acquire] valid:%d ready:%d\n",io.mem_acquire.valid,io.mem_acquire.ready)
XSDebug("[mem_grant] valid:%d ready:%d data:%x id:%d \n",io.mem_grant.valid,io.mem_grant.ready,io.mem_grant.bits.data,io.mem_grant.bits.id)
XSDebug("[Stage 3] ---------Hit Way--------- \n")
for(i <- 0 until blockRows){
XSDebug("[Stage 3] %x\n",dataHitWay(i))
}
XSDebug("[Stage 3] outPacket :%x\n",outPacket)
XSDebug("[Stage 3] refillDataOut :%x\n",refillDataOut)
XSDebug("[Stage 3] refillDataOutVec :%x startPtr:%d\n",refillDataVec.asUInt, s3_req_pc(5,1).asUInt)
//----------------------------
// Out Put
......@@ -571,9 +536,9 @@ class ICache extends ICacheModule
//icache response: to pre-decoder
io.resp.valid := s3_valid && (s3_hit || s3_has_exception || icacheMissQueue.io.resp.valid || io.mmio_grant.valid)
io.resp.bits.data := Mux(s3_mmio,mmio_packet,Mux((s3_valid && s3_hit),outPacket,refillDataOut))
io.resp.bits.mask := Mux(s3_mmio,mmio_mask,s3_req_mask)
io.resp.bits.pc := s3_req_pc
io.resp.bits.data := DontCare
io.resp.bits.ipf := s3_tlb_resp.excp.pf.instr
io.resp.bits.acf := s3_exception_vec(accessFault)
io.resp.bits.mmio := s3_mmio
......@@ -589,7 +554,7 @@ class ICache extends ICacheModule
//To L1 plus
io.mem_acquire <> icacheMissQueue.io.mem_acquire
icacheMissQueue.io.mem_grant <> io.mem_grant
// to train l1plus prefetcher
io.prefetchTrainReq.valid := s3_valid && icacheMissQueue.io.req.fire()
io.prefetchTrainReq.bits := DontCare
......@@ -608,6 +573,61 @@ class ICache extends ICacheModule
XSDebug("[flush] flush_0:%d flush_1:%d\n",s2_flush,s3_flush)
def dump_s1_info() = {
XSDebug("[Stage 1] r : f (%d %d) request pc: 0x%x mask: %b\n",s2_ready,s1_fire,s1_req_pc,s1_req_mask)
XSDebug("[Stage 1] virtula index: %x\n",s1_idx)
}
def dump_s2_info() = {
XSDebug("[Stage 2] v : r : f (%d %d %d) pc: 0x%x mask: %b mmio:%d \n",s2_valid,s3_ready,s2_fire,s2_req_pc,s2_req_mask,s2_mmio)
XSDebug("[Stage 2] exception: af:%d pf:%d \n",icacheExceptionVec(accessFault),icacheExceptionVec(pageFault))
XSDebug(p"[Stage 2] tlb req: v ${io.tlb.req.valid} r ${io.tlb.req.ready} ${io.tlb.req.bits}\n")
XSDebug(p"[Stage 2] tlb resp: v ${io.tlb.resp.valid} r ${io.tlb.resp.ready} ${s2_tlb_resp}\n")
XSDebug("[Stage 2] tag: %x idx:%x hit:%d mmio:%d\n",s2_tag,s2_idx,s2_hit,s2_mmio)
XSDebug("[Stage 2] validMeta: %b victimWayMaks:%b invalidVec:%b hitVec:%b waymask:%b \n",validMeta,victimWayMask,invalidVec.asUInt,hitVec.asUInt,waymask.asUInt)
}
def dump_s3_info() = {
XSDebug("[Stage 3] valid:%d miss:%d pc: 0x%x mmio :%d mask: %b ipf:%d\n",s3_valid, s3_miss,s3_req_pc,s3_req_mask,s3_tlb_resp.excp.pf.instr, s3_mmio)
XSDebug("[Stage 3] hit:%d miss:%d waymask:%x blocking:%d\n",s3_hit,s3_miss,s3_wayMask.asUInt,blocking)
XSDebug("[Stage 3] tag: %x idx: %d\n",s3_tag,get_idx(s3_req_pc))
XSDebug(p"[Stage 3] tlb resp: ${s3_tlb_resp}\n")
XSDebug(s3_hit && io.resp.fire(),"[Stage 3] ---------Hit Way--------- \n")
for(i <- 0 until blockRows){
XSDebug(s3_hit && io.resp.fire(),"[Stage 3] (%d) %x\n",i.U,dataHitWay(i))
}
XSDebug("[Stage 3] outPacket :%x\n",outPacket)
XSDebug("[Stage 3] startPtr:%d refillDataOut :%x\n",Cat(s3_req_pc(ptrHighBit,ptrLowBit),0.U(packetInstNumBit.W)),refillDataVec.asUInt)
XSDebug(icacheMissQueue.io.resp.fire(),"[Stage 3] ---------refill cacheline--------- \n")
for(i <- 0 until blockRows){
XSDebug(icacheMissQueue.io.resp.fire(),"[Stage 3] (%d) %x\n",i.U,refillDataVec(i))
}
XSDebug(is_same_cacheline,"WARNING: same cacheline happen!")
}
def dump_mem_info() = {
val toMem = io.mem_acquire
val fromMem = io.mem_grant
XSDebug(toMem.fire(),"[mem_acquire] valid:%d ready:%d\n",toMem.valid,toMem.ready)
XSDebug(fromMem.fire(),"[mem_grant] valid:%d ready:%d data:%x id:%d \n",fromMem.valid,fromMem.ready,fromMem.bits.data,fromMem.bits.id)
}
def dump_mmio_info() = {
val toMMIO = io.mmio_acquire
val fromMMMIO = io.mmio_grant
XSDebug(toMMIO.fire(),"[mmio_acquire] valid:%d ready:%d\n",toMMIO.valid,toMMIO.ready)
XSDebug(fromMMMIO.fire(),"[mmio_grant] valid:%d ready:%d data:%x id:%d \n",fromMMMIO.valid,fromMMMIO.ready,fromMMMIO.bits.data,fromMMMIO.bits.id)
}
def dump_pipe_info(){
dump_s1_info()
dump_s2_info()
dump_s3_info()
dump_mem_info()
dump_mmio_info()
}
dump_pipe_info()
//Performance Counter
if (!env.FPGAPlatform ) {
ExcitingUtils.addSource( s3_valid && !blocking, "perfCntIcacheReqCnt", Perf)
......
......@@ -167,7 +167,7 @@ class RecentRequestTable(p: BOPParameters) extends PrefetchModule {
rrTable.io.r.req.bits.setIdx := idx(rAddr)
rData := rrTable.io.r.resp.data(0)
val rwConflict = io.w.fire() && io.r.req.fire() && idx(wAddr) === idx(rAddr)
val rwConflict = io.w.fire() && io.r.req.fire()// && idx(wAddr) === idx(rAddr)
// when (rwConflict) {
// rrTable.io.r.req.valid := false.B
// }
......@@ -295,7 +295,7 @@ class OffsetScoreTable(p: BOPParameters) extends PrefetchModule {
XSDebug(io.req.fire(), p"receive req from L1. io.req.bits=0x${Hexadecimal(io.req.bits)}\n")
}
class BestOffsetPrefetchEntry(p: BOPParameters) extends PrefetchModule {
class BestOffsetPrefetchEntry(p: BOPParameters) extends PrefetchModule with HasTlbConst {
val io = IO(new Bundle {
val id = Input(UInt(p.totalWidth.W))
val prefetchOffset = Input(UInt(p.offsetWidth.W))
......@@ -305,19 +305,27 @@ class BestOffsetPrefetchEntry(p: BOPParameters) extends PrefetchModule {
})
def blockBytes = p.blockBytes
def getBlockAddr(addr: UInt) = Cat(addr(PAddrBits - 1, log2Up(blockBytes)), 0.U(log2Up(blockBytes).W))
def getBlock(addr: UInt) = addr(PAddrBits - 1, log2Up(blockBytes))
def getBlockAddr(addr: UInt) = Cat(getBlock(addr), 0.U(log2Up(blockBytes).W))
def getPageNum(addr: UInt) = addr(PAddrBits - 1, offLen)
val s_idle :: s_req :: s_resp :: s_write_recent_req :: s_finish :: Nil = Enum(5)
val state = RegInit(s_idle)
val req = RegInit(0.U.asTypeOf(new PrefetchReq))
val baseAddr = RegInit(0.U(PAddrBits.W))
val baseBlock = getBlock(io.pft.train.bits.addr)
val nextBlock = baseBlock + io.prefetchOffset
val nextAddr = Cat(nextBlock, 0.U(log2Up(blockBytes).W))
val crossPage = getPageNum(nextAddr) =/= getPageNum(io.pft.train.bits.addr)
when (state === s_idle) {
when (io.pft.train.valid) {
state := s_req
req.addr := getBlockAddr(io.pft.train.bits.addr) + (io.prefetchOffset << log2Up(blockBytes))
// state := s_req
state := Mux(crossPage, s_idle, s_req)
req.addr := nextAddr
req.write := io.pft.train.bits.write
baseAddr := getBlockAddr(io.pft.train.bits.addr)
XSDebug(crossPage, p"prefetch addr 0x${nextAddr} cross page, ignore this!\n")
}
}
......@@ -357,7 +365,7 @@ class BestOffsetPrefetchEntry(p: BOPParameters) extends PrefetchModule {
io.writeRRTable.valid := state === s_write_recent_req
io.writeRRTable.bits := baseAddr // write this into recent request table
XSDebug(p"bopEntry ${io.id}: state=${state} prefetchOffset=${io.prefetchOffset} inflight=${io.inflight.valid} 0x${Hexadecimal(io.inflight.bits)} writeRRTable: ${io.writeRRTable.valid} 0x${Hexadecimal(io.writeRRTable.bits)} baseAddr=0x${Hexadecimal(baseAddr)} req: ${req}\n")
XSDebug(p"bopEntry ${io.id}: state=${state} prefetchOffset=${io.prefetchOffset} inflight=${io.inflight.valid} 0x${Hexadecimal(io.inflight.bits)} writeRRTable: ${io.writeRRTable.valid} 0x${Hexadecimal(io.writeRRTable.bits)} baseAddr=0x${Hexadecimal(baseAddr)} nextAddr=0x${Hexadecimal(nextAddr)} crossPage=${crossPage} req: ${req}\n")
XSDebug(p"bopEntry ${io.id}: io.pft: ${io.pft}\n")
}
......
......@@ -85,7 +85,7 @@ class StreamBufferAlloc(p: StreamPrefetchParameters) extends StreamPrefetchReq(p
}
class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule with HasTlbConst {
val io = IO(new Bundle {
val streamBufId = Input(UInt(log2Up(streamCnt).W))
val addrs = Vec(p.streamSize, ValidIO(UInt(PAddrBits.W)))
......@@ -102,6 +102,7 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
def blockBytes = p.blockBytes
// def getBlockAddr(addr: UInt) = addr & ~((blockBytes - 1).U(addr.getWidth.W))
def getBlockAddr(addr: UInt) = Cat(addr(PAddrBits - 1, log2Up(p.blockBytes)), 0.U(log2Up(p.blockBytes).W))
def getPageNum(addr: UInt) = addr(PAddrBits - 1, offLen)
val baseReq = RegInit(0.U.asTypeOf(Valid(new PrefetchReq)))
val nextReq = RegInit(0.U.asTypeOf(new PrefetchReq))
......@@ -163,11 +164,17 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
}
// enqueue
val nextAddrCrossPage = getPageNum(baseReq.bits.addr) =/= getPageNum(nextReq.addr)
when (!full && baseReq.valid && !needRealloc) {
state(tail) := s_req
tail := tail + 1.U
buf(tail) := nextReq
nextReq.addr := nextReq.addr + blockBytes.U
when (!nextAddrCrossPage) {
state(tail) := s_req
tail := tail + 1.U
buf(tail) := nextReq
nextReq.addr := nextReq.addr + blockBytes.U
XSDebug(p"enqueue 0x${nextReq.addr}\n")
}.otherwise {
XSDebug(p"addr 0x${nextReq.addr} could not enqueue for crossing pages\n")
}
}
val reqs = Wire(Vec(streamSize, Decoupled(new StreamPrefetchReq(p))))
......@@ -259,7 +266,7 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
p"deqLater: ${deqLater(i)} deqValid: ${deqValid(i)}\n")
}
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} head: ${head} tail: ${tail} full: ${full} empty: ${empty} nextHead: ${nextHead} blockBytes: ${blockBytes.U}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} baseReq: v=${baseReq.valid} ${baseReq.bits} nextReq: ${nextReq}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} baseReq: v=${baseReq.valid} ${baseReq.bits} nextReq: ${nextReq} crossPage: ${nextAddrCrossPage}\n")
XSDebug(needRealloc, s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} needRealloc: ${needRealloc} reallocReq: ${reallocReq}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} prefetchPrior: ")
(0 until streamSize).foreach(i => XSDebug(false, true.B, p"${prefetchPrior(i)} "))
......@@ -312,38 +319,40 @@ class StreamPrefetch(p: StreamPrefetchParameters) extends PrefetchModule {
// 1. streamBufs hit while l1i miss
val hit = WireInit(false.B)
val hitVec = WireInit(VecInit(Seq.fill(streamCnt * streamSize)(false.B)))
for (i <- 0 until streamCnt) {
for (j <- 0 until streamSize) {
when (io.train.valid && addrValids(i)(j) && getBlockAddr(io.train.bits.addr) === streamBufs(i).io.addrs(j).bits) {
hit := true.B
// hit := true.B
hitVec(i*streamSize+j) := true.B
streamBufs(i).io.update.valid := true.B
streamBufs(i).io.update.bits.hitIdx := j.U
ages(i) := maxAge
}
}
}
hit := ParallelOR(hitVec)
// 2. streamBufs miss
val allocIdx = Wire(UInt(log2Up(streamCnt).W))
val ageCmp = Seq.fill(streamCnt)(Wire(new CompareBundle(ageWidth)))
(0 until streamCnt).foreach(i => ageCmp(i).bits := ages(i))
(0 until streamCnt).foreach(i => ageCmp(i).idx := i.U)
when ((~bufValids.asUInt).orR) {
allocIdx := PriorityMux(~bufValids.asUInt, VecInit(List.tabulate(streamCnt)(_.U)))
}.otherwise {
allocIdx := ParallelMin(ageCmp).idx
}
when (!hit && io.train.valid) {
(0 until streamCnt).foreach(i => ages(i) := Mux(ages(i) =/= 0.U, ages(i) - 1.U, 0.U))
// realloc an invalid or the eldest stream buffer with new one
val idx = Wire(UInt(log2Up(streamCnt).W))
when ((~bufValids.asUInt).orR) {
idx := PriorityMux(~bufValids.asUInt, VecInit(List.tabulate(streamCnt)(_.U)))
}.otherwise {
val ageCmp = Seq.fill(streamCnt)(Wire(new CompareBundle(ageWidth)))
(0 until streamCnt).foreach(i => ageCmp(i).bits := ages(i))
(0 until streamCnt).foreach(i => ageCmp(i).idx := i.U)
idx := ParallelMin(ageCmp).idx
}
for (i <- 0 until streamCnt) {
streamBufs(i).io.alloc.valid := idx === i.U
streamBufs(i).io.alloc.valid := allocIdx === i.U
streamBufs(i).io.alloc.bits := DontCare
streamBufs(i).io.alloc.bits.addr := io.train.bits.addr
streamBufs(i).io.alloc.bits.write := io.train.bits.write
when (idx === i.U) { ages(i) := maxAge }
when (allocIdx === i.U) { ages(i) := maxAge }
}
}
......
......@@ -104,6 +104,14 @@ class BIM extends BasePredictor with BimParams {
bim(b).io.w.req.bits.data := Mux(doing_reset, 2.U(2.W), newCtr)
}
if (!env.FPGAPlatform && env.EnablePerfDebug) {
val bimResp = Wire(Vec(PredictWidth, Bool()))
for(i <- 0 until PredictWidth) {
bimResp(i) := io.resp.ctrs(i)(1)
}
ExcitingUtils.addSource(bimResp, "bimResp")
}
if (BPUDebug && debug) {
XSDebug(doing_reset, "Reseting...\n")
XSDebug("[update] v=%d pc=%x pnpc=%x tgt=%x", io.update.valid, u.pc, u.pnpc, u.target)
......
......@@ -206,6 +206,18 @@ class BTB extends BasePredictor with BTBParams{
edata.io.w.req.bits.setIdx := updateRow
edata.io.w.req.bits.data := u.target
if (!env.FPGAPlatform && env.EnablePerfDebug) {
val btbAns = Wire(Vec(PredictWidth, new PredictorAnswer))
btbAns.zipWithIndex.foreach{ case(x,i) =>
x.hit := io.resp.hits(i)
x.taken := DontCare
x.target := io.resp.targets(i)
}
ExcitingUtils.addSource(btbAns, "btbAns")
}
if (BPUDebug && debug) {
val debug_verbose = true
......
......@@ -500,34 +500,45 @@ class IFU extends XSModule with HasIFUConst
io.fetchPacket.bits := fetchPacketWire
io.fetchPacket.valid := fetchPacketValid
// if(IFUDebug) {
if(!env.FPGAPlatform && env.EnablePerfDebug) {
val predictor_s3 = RegEnable(Mux(if3_redirect, 1.U(log2Up(4).W), 0.U(log2Up(4).W)), if3_fire)
val predictor_s4 = Mux(if4_redirect, 2.U, predictor_s3)
val predictor_s4 = Mux(if4_redirect, 2.U(log2Up(4).W), predictor_s3)
val predictor = predictor_s4
fetchPacketWire.bpuMeta.map(_.predictor := predictor)
// }
// val predRight = cfiUpdate.valid && !cfiUpdate.bits.isMisPred && !cfiUpdate.bits.isReplay
// val predWrong = cfiUpdate.valid && cfiUpdate.bits.isMisPred && !cfiUpdate.bits.isReplay
// val ubtbRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 0.U
// val ubtbWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 0.U
// val btbRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 1.U
// val btbWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 1.U
// val tageRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 2.U
// val tageWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 2.U
// val loopRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 3.U
// val loopWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 3.U
// ExcitingUtils.addSource(ubtbRight, "perfCntubtbRight", Perf)
// ExcitingUtils.addSource(ubtbWrong, "perfCntubtbWrong", Perf)
// ExcitingUtils.addSource(btbRight, "perfCntbtbRight", Perf)
// ExcitingUtils.addSource(btbWrong, "perfCntbtbWrong", Perf)
// ExcitingUtils.addSource(tageRight, "perfCnttageRight", Perf)
// ExcitingUtils.addSource(tageWrong, "perfCnttageWrong", Perf)
// ExcitingUtils.addSource(loopRight, "perfCntloopRight", Perf)
// ExcitingUtils.addSource(loopWrong, "perfCntloopWrong", Perf)
// io.pc.valid && read_hit_vec.asUInt ubtb hit
val ubtbAns = WireInit(VecInit(Seq.fill(PredictWidth) {0.U.asTypeOf(new PredictorAnswer)} ))
val btbAns = WireInit(VecInit(Seq.fill(PredictWidth) {0.U.asTypeOf(new PredictorAnswer)} ))
val bimResp = WireInit(VecInit(Seq.fill(PredictWidth) {false.B} ))
val tageAns = WireInit(VecInit(Seq.fill(PredictWidth) {0.U.asTypeOf(new PredictorAnswer)} ))
val rasAns = WireInit(0.U.asTypeOf(new PredictorAnswer))
val loopAns = WireInit(VecInit(Seq.fill(PredictWidth) {0.U.asTypeOf(new PredictorAnswer)} ))
ExcitingUtils.addSink(ubtbAns, "ubtbAns")
ExcitingUtils.addSink(btbAns, "btbAns")
ExcitingUtils.addSink(bimResp, "bimResp")
ExcitingUtils.addSink(tageAns, "tageAns")
ExcitingUtils.addSink(rasAns, "rasAns")
ExcitingUtils.addSink(loopAns, "loopAns")
val ubtbAns_s3 = RegEnable(ubtbAns, if2_fire)
val ubtbAns_s4 = RegEnable(ubtbAns_s3, if3_fire)
val btbAns_s3 = RegEnable(btbAns, if2_fire)
val btbAns_s4 = RegEnable(btbAns_s3, if3_fire)
val bimResp_s3 = RegEnable(bimResp, if2_fire)
val bimResp_s4 = RegEnable(bimResp_s3, if3_fire)
fetchPacketWire.bpuMeta.zipWithIndex.foreach{ case(x,i) =>
x.predictor := predictor
x.ubtbAns := ubtbAns_s4(i)
x.btbAns := btbAns_s4(i)
x.btbAns.taken := bimResp_s4(i)
x.tageAns := tageAns(i)
x.rasAns := rasAns // Is this right?
x.loopAns := loopAns(i)
}
}
// debug info
if (IFUDebug) {
......@@ -540,7 +551,6 @@ class IFU extends XSModule with HasIFUConst
XSDebug("[IF2] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x snpc=%x\n", if2_valid, if2_ready, if2_fire, if2_redirect, if2_flush, if2_pc, if2_snpc)
XSDebug("[IF3] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if3_valid, if3_ready, if3_fire, if3_redirect, if3_flush, if3_pc, crossPageIPF, if3_bp.hasNotTakenBrs)
XSDebug("[IF4] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if4_valid, if4_ready, if4_fire, if4_redirect, if4_flush, if4_pc, if4_crossPageIPF, if4_bp.hasNotTakenBrs)
XSDebug("[predictor] predictor_s3=%d, predictor_s4=%d, predictor=%d\n", predictor_s3, predictor_s4, predictor)
XSDebug("[IF1][icacheReq] v=%d r=%d addr=%x\n", icache.io.req.valid, icache.io.req.ready, icache.io.req.bits.addr)
XSDebug("[IF1][ghr] hist=%b\n", if1_gh.asUInt)
XSDebug("[IF1][ghr] extHist=%b\n\n", if1_gh.asUInt)
......
......@@ -403,8 +403,18 @@ class LoopPredictor extends BasePredictor with LTBParams {
io.meta.specCnts(i) := ltbResps(i).meta
}
if (!env.FPGAPlatform) {
if (!env.FPGAPlatform && env.EnablePerfDebug) {
ExcitingUtils.addSource(io.resp.exit.reduce(_||_), "perfCntLoopExit", Perf)
val loopAns = Wire(Vec(PredictWidth, new PredictorAnswer))
loopAns.zipWithIndex.foreach{ case(x,i) =>
x.hit := io.resp.exit(i)
x.taken := false.B
x.target := DontCare
}
ExcitingUtils.addSource(loopAns, "loopAns")
}
if (BPUDebug && debug) {
......
......@@ -227,6 +227,15 @@ class RAS extends BasePredictor
io.meta.rasTopCtr := DontCare
io.meta.rasToqAddr := DontCare
if (!env.FPGAPlatform && env.EnablePerfDebug) {
val rasAns = Wire(new PredictorAnswer)
rasAns.hit := io.out.valid
rasAns.taken := DontCare
rasAns.target := io.out.bits.target
ExcitingUtils.addSource(rasAns, "rasAns")
}
if (BPUDebug && debug) {
val spec_debug = spec.debugIO
val commit_debug = commit.debugIO
......
......@@ -633,7 +633,17 @@ class Tage extends BaseTage {
scTables(i).io.update.fetchIdx := u.bpuMeta.fetchIdx
}
if (!env.FPGAPlatform && env.EnablePerfDebug) {
val tageAns = Wire(Vec(PredictWidth, new PredictorAnswer))
tageAns.zipWithIndex.foreach{ case(x,i) =>
x.hit := io.resp.hits(i)
x.taken := io.resp.takens(i)
x.target := DontCare
}
ExcitingUtils.addSource(tageAns, "tageAns")
}
if (BPUDebug && debug) {
val m = updateMeta
......
......@@ -5,6 +5,7 @@ import chisel3.util._
import utils._
import xiangshan._
import chisel3.experimental.chiselName
import chisel3.ExcitingUtils._
import scala.math.min
......@@ -258,6 +259,19 @@ class MicroBTB extends BasePredictor
metas(b).wdata := Mux(do_reset, 0.U.asTypeOf(new MicroBTBMeta), update_write_meta)
}
if (!env.FPGAPlatform && env.EnablePerfDebug) {
val ubtbAns = Wire(Vec(PredictWidth, new PredictorAnswer))
// ubtbAns.hit := io.pc.valid && read_hit_vec.asUInt.orR
ubtbAns.zipWithIndex.foreach{ case(x,i) =>
x.hit := io.out.hits(i)
x.taken := io.out.takens(i)
x.target := io.out.targets(i)
}
ExcitingUtils.addSource(ubtbAns, "ubtbAns")
}
if (BPUDebug && debug) {
XSDebug(read_valid,"uBTB read req: pc:0x%x, tag:%x \n",io.pc.bits,read_req_tag)
XSDebug(read_valid,"uBTB read resp: read_hit_vec:%b, \n",read_hit_vec.asUInt)
......
......@@ -8,7 +8,7 @@ import xiangshan.cache._
import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
import xiangshan.backend.LSUOpType
import xiangshan.mem._
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.roq.RoqLsqIO
class ExceptionAddrIO extends XSBundle {
val lsIdx = Input(new LSIdx)
......@@ -41,18 +41,27 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
val brqRedirect = Input(Valid(new Redirect))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool()))
val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq))
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val commits = Flipped(new RoqCommitIO)
val roq = Flipped(new RoqLsqIO)
val rollback = Output(Valid(new Redirect))
val dcache = Flipped(ValidIO(new Refill))
val uncache = new DCacheWordIO
val roqDeqPtr = Input(new RoqPtr)
val exceptionAddr = new ExceptionAddrIO
val sqempty = Output(Bool())
})
val difftestIO = IO(new Bundle() {
val fromSQ = new Bundle() {
val storeCommit = Output(UInt(2.W))
val storeAddr = Output(Vec(2, UInt(64.W)))
val storeData = Output(Vec(2, UInt(64.W)))
val storeMask = Output(Vec(2, UInt(8.W)))
}
})
difftestIO <> DontCare
val loadQueue = Module(new LoadQueue)
val storeQueue = Module(new StoreQueue)
......@@ -82,11 +91,11 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
loadQueue.io.brqRedirect <> io.brqRedirect
loadQueue.io.loadIn <> io.loadIn
loadQueue.io.storeIn <> io.storeIn
loadQueue.io.loadDataForwarded <> io.loadDataForwarded
loadQueue.io.ldout <> io.ldout
loadQueue.io.commits <> io.commits
loadQueue.io.roq <> io.roq
loadQueue.io.rollback <> io.rollback
loadQueue.io.dcache <> io.dcache
loadQueue.io.roqDeqPtr <> io.roqDeqPtr
loadQueue.io.exceptionAddr.lsIdx := io.exceptionAddr.lsIdx
loadQueue.io.exceptionAddr.isStore := DontCare
......@@ -96,8 +105,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
storeQueue.io.storeIn <> io.storeIn
storeQueue.io.sbuffer <> io.sbuffer
storeQueue.io.mmioStout <> io.mmioStout
storeQueue.io.commits <> io.commits
storeQueue.io.roqDeqPtr <> io.roqDeqPtr
storeQueue.io.roq <> io.roq
storeQueue.io.exceptionAddr.lsIdx := io.exceptionAddr.lsIdx
storeQueue.io.exceptionAddr.isStore := DontCare
......@@ -106,26 +114,30 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
storeQueue.io.sqempty <> io.sqempty
if (env.DualCoreDifftest) {
difftestIO.fromSQ <> storeQueue.difftestIO
}
io.exceptionAddr.vaddr := Mux(io.exceptionAddr.isStore, storeQueue.io.exceptionAddr.vaddr, loadQueue.io.exceptionAddr.vaddr)
// naive uncache arbiter
val s_idle :: s_load :: s_store :: Nil = Enum(3)
val uncacheState = RegInit(s_idle)
val pendingstate = RegInit(s_idle)
switch(uncacheState){
switch(pendingstate){
is(s_idle){
when(io.uncache.req.fire()){
uncacheState := Mux(loadQueue.io.uncache.req.valid, s_load, s_store)
pendingstate := Mux(loadQueue.io.uncache.req.valid, s_load, s_store)
}
}
is(s_load){
when(io.uncache.resp.fire()){
uncacheState := s_idle
pendingstate := s_idle
}
}
is(s_store){
when(io.uncache.resp.fire()){
uncacheState := s_idle
pendingstate := s_idle
}
}
}
......@@ -139,7 +151,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
}.otherwise{
io.uncache.req <> storeQueue.io.uncache.req
}
when(uncacheState === s_load){
when(pendingstate === s_load){
io.uncache.resp <> loadQueue.io.uncache.resp
}.otherwise{
io.uncache.resp <> storeQueue.io.uncache.resp
......@@ -147,6 +159,6 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
assert(!(loadQueue.io.uncache.req.valid && storeQueue.io.uncache.req.valid))
assert(!(loadQueue.io.uncache.resp.valid && storeQueue.io.uncache.resp.valid))
assert(!((loadQueue.io.uncache.resp.valid || storeQueue.io.uncache.resp.valid) && uncacheState === s_idle))
assert(!((loadQueue.io.uncache.resp.valid || storeQueue.io.uncache.resp.valid) && pendingstate === s_idle))
}
......@@ -9,7 +9,7 @@ import xiangshan.cache._
import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbRequestIO}
import xiangshan.backend.LSUOpType
import xiangshan.mem._
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.roq.RoqLsqIO
import xiangshan.backend.fu.HasExceptionNO
......@@ -66,13 +66,13 @@ class LoadQueue extends XSModule
val brqRedirect = Input(Valid(new Redirect))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool()))
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val commits = Flipped(new RoqCommitIO)
val roq = Flipped(new RoqLsqIO)
val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
val dcache = Flipped(ValidIO(new Refill))
val uncache = new DCacheWordIO
val roqDeqPtr = Input(new RoqPtr)
val exceptionAddr = new ExceptionAddrIO
})
......@@ -85,7 +85,6 @@ class LoadQueue extends XSModule
val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
// val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
......@@ -95,22 +94,16 @@ class LoadQueue extends XSModule
val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
val deqPtrExtNext = Wire(new LqPtr)
val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
val allowEnqueue = RegInit(true.B)
val enqPtr = enqPtrExt(0).value
val deqPtr = deqPtrExt.value
val sameFlag = enqPtrExt(0).flag === deqPtrExt.flag
val isEmpty = enqPtr === deqPtr && sameFlag
val isFull = enqPtr === deqPtr && !sameFlag
val allowIn = !isFull
val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.info(i).commitType === CommitType.LOAD)
val mcommitIdx = (0 until CommitWidth).map(i => io.commits.info(i).lqIdx.value)
val deqMask = UIntToMask(deqPtr, LoadQueueSize)
val enqMask = UIntToMask(enqPtr, LoadQueueSize)
val commitCount = RegNext(io.roq.lcommit)
/**
* Enqueue at dispatch
*
......@@ -127,7 +120,6 @@ class LoadQueue extends XSModule
allocated(index) := true.B
datavalid(index) := false.B
writebacked(index) := false.B
commited(index) := false.B
miss(index) := false.B
// listening(index) := false.B
pending(index) := false.B
......@@ -177,13 +169,13 @@ class LoadQueue extends XSModule
io.loadIn(i).bits.mmio
)}
val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
datavalid(loadWbIndex) := (!io.loadIn(i).bits.miss || io.loadDataForwarded(i)) && !io.loadIn(i).bits.mmio
writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
val loadWbData = Wire(new LQDataEntry)
loadWbData.paddr := io.loadIn(i).bits.paddr
loadWbData.mask := io.loadIn(i).bits.mask
loadWbData.data := io.loadIn(i).bits.data // fwd data
loadWbData.data := io.loadIn(i).bits.forwardData.asUInt // fwd data
loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
dataModule.io.wb.wen(i) := true.B
......@@ -195,7 +187,7 @@ class LoadQueue extends XSModule
debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
miss(loadWbIndex) := dcacheMissed
miss(loadWbIndex) := dcacheMissed && !io.loadDataForwarded(i)
pending(loadWbIndex) := io.loadIn(i).bits.mmio
uop(loadWbIndex).debugInfo.issueTime := io.loadIn(i).bits.uop.debugInfo.issueTime
}
......@@ -324,9 +316,8 @@ class LoadQueue extends XSModule
* When load commited, mark it as !allocated and move deqPtrExt forward.
*/
(0 until CommitWidth).map(i => {
when(loadCommit(i)) {
allocated(mcommitIdx(i)) := false.B
XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
when(commitCount > i.U){
allocated(deqPtr+i.U) := false.B
}
})
......@@ -501,11 +492,39 @@ class LoadQueue extends XSModule
/**
* Memory mapped IO / other uncached operations
*
* States:
* (1) writeback from store units: mark as pending
* (2) when they reach ROB's head, they can be sent to uncache channel
* (3) response from uncache channel: mark as datavalid
* (4) writeback to ROB (and other units): mark as writebacked
* (5) ROB commits the instruction: same as normal instructions
*/
io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
io.commits.info(0).commitType === CommitType.LOAD &&
io.roqDeqPtr === uop(deqPtr).roqIdx &&
!io.commits.isWalk
//(2) when they reach ROB's head, they can be sent to uncache channel
val s_idle :: s_req :: s_resp :: s_wait :: Nil = Enum(4)
val uncacheState = RegInit(s_idle)
switch(uncacheState) {
is(s_idle) {
when(io.roq.pendingld && pending(deqPtr) && allocated(deqPtr)) {
uncacheState := s_req
}
}
is(s_req) {
when(io.uncache.req.fire()) {
uncacheState := s_resp
}
}
is(s_resp) {
when(io.uncache.resp.fire()) {
uncacheState := s_wait
}
}
is(s_wait) {
when(io.roq.commit) {
uncacheState := s_idle // ready for next mmio
}
}
}
io.uncache.req.valid := uncacheState === s_req
dataModule.io.uncache.raddr := deqPtrExtNext.value
......@@ -537,6 +556,7 @@ class LoadQueue extends XSModule
)
}
// (3) response from uncache channel: mark as datavalid
dataModule.io.uncache.wen := false.B
when(io.uncache.resp.fire()){
datavalid(deqPtr) := true.B
......@@ -547,14 +567,14 @@ class LoadQueue extends XSModule
}
// Read vaddr for mem exception
vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.lqIdx.value
vaddrModule.io.raddr(0) := deqPtr + commitCount
io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
// misprediction recovery / exception redirect
// invalidate lq term using robIdx
val needCancel = Wire(Vec(LoadQueueSize, Bool()))
for (i <- 0 until LoadQueueSize) {
needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i)
when (needCancel(i)) {
allocated(i) := false.B
}
......@@ -573,24 +593,13 @@ class LoadQueue extends XSModule
enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
}
val commitCount = PopCount(loadCommit)
deqPtrExtNext := deqPtrExt + commitCount
deqPtrExt := deqPtrExtNext
val lastLastCycleRedirect = RegNext(lastCycleRedirect.valid)
val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt)
validCounter := Mux(lastLastCycleRedirect,
trueValidCounter,
validCounter + enqNumber - commitCount
)
allowEnqueue := Mux(io.brqRedirect.valid,
false.B,
Mux(lastLastCycleRedirect,
trueValidCounter <= (LoadQueueSize - RenameWidth).U,
validCounter + enqNumber <= (LoadQueueSize - RenameWidth).U
)
)
val validCount = distanceBetween(enqPtrExt(0), deqPtrExt)
allowEnqueue := validCount + enqNumber <= (LoadQueueSize - RenameWidth).U
// debug info
XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
......@@ -609,7 +618,6 @@ class LoadQueue extends XSModule
PrintFlag(allocated(i), "a")
PrintFlag(allocated(i) && datavalid(i), "v")
PrintFlag(allocated(i) && writebacked(i), "w")
PrintFlag(allocated(i) && commited(i), "c")
PrintFlag(allocated(i) && miss(i), "m")
// PrintFlag(allocated(i) && listening(i), "l")
PrintFlag(allocated(i) && pending(i), "p")
......
......@@ -106,6 +106,51 @@ class MaskModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule
}
}
class Data8Module(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule with HasDCacheParameters {
val io = IO(new Bundle {
// read
val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
val rdata = Output(Vec(numRead, UInt(8.W)))
// address indexed write
val wen = Input(Vec(numWrite, Bool()))
val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
val wdata = Input(Vec(numWrite, UInt(8.W)))
// masked write
val mwmask = Input(Vec(blockWords, Vec(numEntries, Bool())))
val mwdata = Input(Vec(blockWords, UInt(8.W)))
})
val data = Reg(Vec(numEntries, UInt(8.W)))
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(RegNext(io.raddr(i)))
}
// below is the write ports (with priorities)
for (i <- 0 until numWrite) {
when (io.wen(i)) {
data(io.waddr(i)) := io.wdata(i)
}
}
// masked write
for (i <- 0 until blockWords) {
for (j <- 0 until numEntries) {
when (io.mwmask(i)(j)) {
data(j) := io.mwdata(i)
}
}
}
// DataModuleTemplate should not be used when there're any write conflicts
for (i <- 0 until numWrite) {
for (j <- i+1 until numWrite) {
assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
}
}
}
class CoredataModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule with HasDCacheParameters {
val io = IO(new Bundle {
// data io
......@@ -131,20 +176,28 @@ class CoredataModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSMod
val paddrWen = Input(Vec(numWrite, Bool()))
})
val data = Reg(Vec(numEntries, UInt(XLEN.W)))
val data8 = Seq.fill(8)(Module(new Data8Module(numEntries, numRead, numWrite)))
val fwdMask = Reg(Vec(numEntries, UInt(8.W)))
val wordIndex = Reg(Vec(numEntries, UInt((blockOffBits - wordOffBits).W)))
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(RegNext(io.raddr(i)))
for (j <- 0 until 8) {
data8(j).io.raddr(i) := io.raddr(i)
}
io.rdata(i) := VecInit((0 until 8).map(j => data8(j).io.rdata(i))).asUInt
}
// below is the write ports (with priorities)
for (i <- 0 until numWrite) {
when (io.wen(i)) {
data(io.waddr(i)) := io.wdata(i)
// write to data8
for (j <- 0 until 8) {
data8(j).io.waddr(i) := io.waddr(i)
data8(j).io.wdata(i) := io.wdata(i)(8*(j+1)-1, 8*j)
data8(j).io.wen(i) := io.wen(i)
}
// write ctrl info
when (io.fwdMaskWen(i)) {
fwdMask(io.waddr(i)) := io.fwdMaskWdata(i)
}
......@@ -153,25 +206,25 @@ class CoredataModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSMod
}
}
// write refilled data to data8
// masked write
// refill missed load
def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = {
val res = Wire(Vec(8, UInt(8.W)))
(0 until 8).foreach(i => {
res(i) := Mux(fwdMask(i), fwd(8 * (i + 1) - 1, 8 * i), refill(8 * (i + 1) - 1, 8 * i))
})
res.asUInt
}
// select refill data
// split dcache result into words
val words = VecInit((0 until blockWords) map { i => io.refillData(DataBits * (i + 1) - 1, DataBits * i)})
// select refill data according to wordIndex (paddr)
for (i <- 0 until 8) {
for (j <- 0 until blockWords) {
data8(i).io.mwdata(j) := words(j)(8*(i+1)-1, 8*i)
}
}
// refill data according to matchMask, refillMask and refill.vald
for (j <- 0 until numEntries) {
when (io.mwmask(j)) {
val refillData = words(wordIndex(j)) // TODO
data(j) := mergeRefillData(refillData, data(j), fwdMask(j))
// gen refill wmask
for (j <- 0 until blockWords) {
for (k <- 0 until numEntries) {
val wordMatch = wordIndex(k) === j.U
for (i <- 0 until 8) {
data8(i).io.mwmask(j)(k) := wordMatch && io.mwmask(k) && !fwdMask(k)(i)
}
}
}
......
......@@ -7,7 +7,7 @@ import xiangshan._
import xiangshan.cache._
import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
import xiangshan.backend.LSUOpType
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.roq.RoqLsqIO
class SqPtr extends CircularQueuePtr(SqPtr.StoreQueueSize) { }
......@@ -38,19 +38,28 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq))
val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val commits = Flipped(new RoqCommitIO)
val roq = Flipped(new RoqLsqIO)
val uncache = new DCacheWordIO
val roqDeqPtr = Input(new RoqPtr)
// val refill = Flipped(Valid(new DCacheLineReq ))
val exceptionAddr = new ExceptionAddrIO
val sqempty = Output(Bool())
})
val difftestIO = IO(new Bundle() {
val storeCommit = Output(UInt(2.W))
val storeAddr = Output(Vec(2, UInt(64.W)))
val storeData = Output(Vec(2, UInt(64.W)))
val storeMask = Output(Vec(2, UInt(8.W)))
})
difftestIO <> DontCare
// data modules
val uop = Reg(Vec(StoreQueueSize, new MicroOp))
// val data = Reg(Vec(StoreQueueSize, new LsqEntry))
val dataModule = Module(new StoreQueueData(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth))
dataModule.io := DontCare
val paddrModule = Module(new SQPaddrModule(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth))
paddrModule.io := DontCare
val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), StoreQueueSize, numRead = 1, numWrite = StorePipelineWidth))
vaddrModule.io := DontCare
......@@ -66,14 +75,18 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
require(StoreQueueSize > RenameWidth)
val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new SqPtr))))
val deqPtrExt = RegInit(VecInit((0 until StorePipelineWidth).map(_.U.asTypeOf(new SqPtr))))
val cmtPtrExt = RegInit(VecInit((0 until CommitWidth).map(_.U.asTypeOf(new SqPtr))))
val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
val allowEnqueue = RegInit(true.B)
val enqPtr = enqPtrExt(0).value
val deqPtr = deqPtrExt(0).value
val cmtPtr = cmtPtrExt(0).value
val deqMask = UIntToMask(deqPtr, StoreQueueSize)
val enqMask = UIntToMask(enqPtr, StoreQueueSize)
val tailMask = UIntToMask(deqPtr, StoreQueueSize)
val headMask = UIntToMask(enqPtr, StoreQueueSize)
val commitCount = RegNext(io.roq.scommit)
// Read dataModule
// deqPtrExtNext and deqPtrExtNext+1 entry will be read from dataModule
......@@ -86,11 +99,11 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
deqPtrExt
)
))
val dataModuleRead = dataModule.io.rdata
for (i <- 0 until StorePipelineWidth) {
dataModule.io.raddr(i) := deqPtrExtNext(i).value
paddrModule.io.raddr(i) := deqPtrExtNext(i).value
}
vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.sqIdx.value
vaddrModule.io.raddr(0) := cmtPtr + commitCount
/**
* Enqueue at dispatch
......@@ -129,6 +142,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
*/
for (i <- 0 until StorePipelineWidth) {
dataModule.io.wen(i) := false.B
paddrModule.io.wen(i) := false.B
vaddrModule.io.wen(i) := false.B
when (io.storeIn(i).fire()) {
val stWbIndex = io.storeIn(i).bits.uop.sqIdx.value
......@@ -138,13 +152,17 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val storeWbData = Wire(new SQDataEntry)
storeWbData := DontCare
storeWbData.paddr := io.storeIn(i).bits.paddr
storeWbData.mask := io.storeIn(i).bits.mask
storeWbData.data := io.storeIn(i).bits.data
dataModule.io.waddr(i) := stWbIndex
dataModule.io.wdata(i) := storeWbData
dataModule.io.wen(i) := true.B
paddrModule.io.waddr(i) := stWbIndex
paddrModule.io.wdata(i) := io.storeIn(i).bits.paddr
paddrModule.io.wen(i) := true.B
vaddrModule.io.waddr(i) := stWbIndex
vaddrModule.io.wdata(i) := io.storeIn(i).bits.vaddr
vaddrModule.io.wen(i) := true.B
......@@ -185,7 +203,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
for (j <- 0 until StoreQueueSize) {
storeWritebackedVec(j) := datavalid(j) && allocated(j) // all datavalid terms need to be checked
}
val needForward1 = Mux(differentFlag, ~tailMask, tailMask ^ forwardMask) & storeWritebackedVec.asUInt
val needForward1 = Mux(differentFlag, ~deqMask, deqMask ^ forwardMask) & storeWritebackedVec.asUInt
val needForward2 = Mux(differentFlag, forwardMask, 0.U(StoreQueueSize.W)) & storeWritebackedVec.asUInt
XSDebug(p"$i f1 ${Binary(needForward1)} f2 ${Binary(needForward2)} " +
......@@ -193,15 +211,13 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
)
// do real fwd query
dataModule.io.forwardQuery(
numForward = i,
paddr = io.forward(i).paddr,
needForward1 = needForward1,
needForward2 = needForward2
)
dataModule.io.needForward(i)(0) := needForward1 & paddrModule.io.forwardMmask(i).asUInt
dataModule.io.needForward(i)(1) := needForward2 & paddrModule.io.forwardMmask(i).asUInt
paddrModule.io.forwardMdata(i) := io.forward(i).paddr
io.forward(i).forwardMask := dataModule.io.forward(i).forwardMask
io.forward(i).forwardData := dataModule.io.forward(i).forwardData
io.forward(i).forwardMask := dataModule.io.forwardMask(i)
io.forward(i).forwardData := dataModule.io.forwardData(i)
}
/**
......@@ -215,19 +231,40 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
* (5) ROB commits the instruction: same as normal instructions
*/
//(2) when they reach ROB's head, they can be sent to uncache channel
io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
io.commits.info(0).commitType === CommitType.STORE &&
io.roqDeqPtr === uop(deqPtr).roqIdx &&
!io.commits.isWalk
val s_idle :: s_req :: s_resp :: s_wait :: Nil = Enum(4)
val uncacheState = RegInit(s_idle)
switch(uncacheState) {
is(s_idle) {
when(io.roq.pendingst && pending(deqPtr) && allocated(deqPtr)) {
uncacheState := s_req
}
}
is(s_req) {
when(io.uncache.req.fire()) {
uncacheState := s_resp
}
}
is(s_resp) {
when(io.uncache.resp.fire()) {
uncacheState := s_wait
}
}
is(s_wait) {
when(io.roq.commit) {
uncacheState := s_idle // ready for next mmio
}
}
}
io.uncache.req.valid := uncacheState === s_req
io.uncache.req.bits.cmd := MemoryOpConstants.M_XWR
io.uncache.req.bits.addr := dataModule.io.rdata(0).paddr // data(deqPtr) -> rdata(0)
io.uncache.req.bits.addr := paddrModule.io.rdata(0) // data(deqPtr) -> rdata(0)
io.uncache.req.bits.data := dataModule.io.rdata(0).data
io.uncache.req.bits.mask := dataModule.io.rdata(0).mask
io.uncache.req.bits.meta.id := DontCare
io.uncache.req.bits.meta.vaddr := DontCare
io.uncache.req.bits.meta.paddr := dataModule.io.rdata(0).paddr
io.uncache.req.bits.meta.paddr := paddrModule.io.rdata(0)
io.uncache.req.bits.meta.uop := uop(deqPtr)
io.uncache.req.bits.meta.mmio := true.B
io.uncache.req.bits.meta.tlb_miss := false.B
......@@ -256,7 +293,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.mmioStout.valid := allocated(deqPtr) && datavalid(deqPtr) && !writebacked(deqPtr)
io.mmioStout.bits.uop := uop(deqPtr)
io.mmioStout.bits.uop.sqIdx := deqPtrExt(0)
io.mmioStout.bits.data := dataModuleRead(0).data // dataModuleRead.read(deqPtr)
io.mmioStout.bits.data := dataModule.io.rdata(0).data // dataModule.io.rdata.read(deqPtr)
io.mmioStout.bits.redirectValid := false.B
io.mmioStout.bits.redirect := DontCare
io.mmioStout.bits.brUpdate := DontCare
......@@ -275,12 +312,11 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
* (2) They will not be cancelled and can be sent to lower level.
*/
for (i <- 0 until CommitWidth) {
val storeCommit = !io.commits.isWalk && io.commits.valid(i) && io.commits.info(i).commitType === CommitType.STORE
when (storeCommit) {
commited(io.commits.info(i).sqIdx.value) := true.B
XSDebug("store commit %d: idx %d\n", i.U, io.commits.info(i).sqIdx.value)
when (commitCount > i.U) {
commited(cmtPtrExt(i).value) := true.B
}
}
cmtPtrExt := cmtPtrExt.map(_ + commitCount)
// Commited stores will not be cancelled and can be sent to lower level.
// remove retired insts from sq, add retired store to sbuffer
......@@ -291,9 +327,9 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
// if sbuffer.fire(), read next
io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !mmio(ptr)
io.sbuffer(i).bits.cmd := MemoryOpConstants.M_XWR
io.sbuffer(i).bits.addr := dataModuleRead(i).paddr
io.sbuffer(i).bits.data := dataModuleRead(i).data
io.sbuffer(i).bits.mask := dataModuleRead(i).mask
io.sbuffer(i).bits.addr := paddrModule.io.rdata(i)
io.sbuffer(i).bits.data := dataModule.io.rdata(i).data
io.sbuffer(i).bits.mask := dataModule.io.rdata(i).mask
io.sbuffer(i).bits.meta := DontCare
io.sbuffer(i).bits.meta.tlb_miss := false.B
io.sbuffer(i).bits.meta.uop := DontCare
......@@ -309,17 +345,23 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
assert(io.sbuffer(0).fire())
}
if (!env.FPGAPlatform) {
val storeCommit = PopCount(io.sbuffer.map(_.fire()))
val waddr = VecInit(io.sbuffer.map(req => SignExt(req.bits.addr, 64)))
val wdata = VecInit(io.sbuffer.map(req => req.bits.data & MaskExpand(req.bits.mask)))
val wmask = VecInit(io.sbuffer.map(_.bits.mask))
val storeCommit = PopCount(io.sbuffer.map(_.fire()))
val waddr = VecInit(io.sbuffer.map(req => SignExt(req.bits.addr, 64)))
val wdata = VecInit(io.sbuffer.map(req => req.bits.data & MaskExpand(req.bits.mask)))
val wmask = VecInit(io.sbuffer.map(_.bits.mask))
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(RegNext(storeCommit), "difftestStoreCommit", ExcitingUtils.Debug)
ExcitingUtils.addSource(RegNext(waddr), "difftestStoreAddr", ExcitingUtils.Debug)
ExcitingUtils.addSource(RegNext(wdata), "difftestStoreData", ExcitingUtils.Debug)
ExcitingUtils.addSource(RegNext(wmask), "difftestStoreMask", ExcitingUtils.Debug)
}
if (env.DualCoreDifftest) {
difftestIO.storeCommit := RegNext(storeCommit)
difftestIO.storeAddr := RegNext(waddr)
difftestIO.storeData := RegNext(wdata)
difftestIO.storeMask := RegNext(wmask)
}
// Read vaddr for mem exception
io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
......@@ -352,19 +394,9 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val lastLastCycleRedirect = RegNext(lastCycleRedirect)
val dequeueCount = Mux(io.sbuffer(1).fire(), 2.U, Mux(io.sbuffer(0).fire() || io.mmioStout.fire(), 1.U, 0.U))
val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt(0))
validCounter := Mux(lastLastCycleRedirect,
trueValidCounter - dequeueCount,
validCounter + enqNumber - dequeueCount
)
allowEnqueue := Mux(io.brqRedirect.valid,
false.B,
Mux(lastLastCycleRedirect,
trueValidCounter <= (StoreQueueSize - RenameWidth).U,
validCounter + enqNumber <= (StoreQueueSize - RenameWidth).U
)
)
val validCount = distanceBetween(enqPtrExt(0), deqPtrExt(0))
allowEnqueue := validCount + enqNumber <= (StoreQueueSize - RenameWidth).U
// io.sqempty will be used by sbuffer
// We delay it for 1 cycle for better timing
......@@ -385,7 +417,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
for (i <- 0 until StoreQueueSize) {
if (i % 4 == 0) XSDebug("")
XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.debug(i).paddr)
XSDebug(false, true.B, "%x ", uop(i).cf.pc)
PrintFlag(allocated(i), "a")
PrintFlag(allocated(i) && datavalid(i), "v")
PrintFlag(allocated(i) && writebacked(i), "w")
......
......@@ -11,12 +11,52 @@ import xiangshan.mem._
import xiangshan.backend.roq.RoqPtr
// Data module define
// These data modules are like SyncDataModuleTemplate, but support cam-like ops
class SQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int) extends XSModule with HasDCacheParameters {
val io = IO(new Bundle {
val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
val rdata = Output(Vec(numRead, UInt((PAddrBits).W)))
val wen = Input(Vec(numWrite, Bool()))
val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
val wdata = Input(Vec(numWrite, UInt((PAddrBits).W)))
val forwardMdata = Input(Vec(numForward, UInt((PAddrBits).W)))
val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool())))
})
val data = Reg(Vec(numEntries, UInt((PAddrBits).W)))
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(RegNext(io.raddr(i)))
}
// below is the write ports (with priorities)
for (i <- 0 until numWrite) {
when (io.wen(i)) {
data(io.waddr(i)) := io.wdata(i)
}
}
// content addressed match
for (i <- 0 until numForward) {
for (j <- 0 until numEntries) {
io.forwardMmask(i)(j) := io.forwardMdata(i)(PAddrBits-1, 3) === data(j)(PAddrBits-1, 3)
}
}
// DataModuleTemplate should not be used when there're any write conflicts
for (i <- 0 until numWrite) {
for (j <- i+1 until numWrite) {
assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
}
}
}
class SQDataEntry extends XSBundle {
// val vaddr = UInt(VAddrBits.W) // TODO: need opt
val paddr = UInt(PAddrBits.W)
// val paddr = UInt(PAddrBits.W)
val mask = UInt(8.W)
val data = UInt(XLEN.W)
// val exception = UInt(16.W) // TODO: opt size
}
class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
......@@ -29,13 +69,8 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) ex
val debug = Vec(size, Output(new SQDataEntry))
val needForward = Input(Vec(numForward, Vec(2, UInt(size.W))))
val forward = Vec(numForward, Flipped(new LoadForwardQueryIO))
def forwardQuery(numForward: Int, paddr: UInt, needForward1: Data, needForward2: Data): Unit = {
this.needForward(numForward)(0) := needForward1
this.needForward(numForward)(1) := needForward2
this.forward(numForward).paddr := paddr
}
val forwardMask = Vec(numForward, Output(Vec(8, Bool())))
val forwardData = Vec(numForward, Output(Vec(8, UInt(8.W))))
})
io := DontCare
......@@ -72,32 +107,7 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) ex
// entry with larger index should have higher priority since it's data is younger
(0 until numForward).map(i => {
val forwardMask1 = WireInit(VecInit(Seq.fill(8)(false.B)))
val forwardData1 = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))
val forwardMask2 = WireInit(VecInit(Seq.fill(8)(false.B)))
val forwardData2 = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))
for (j <- 0 until size) {
val needCheck = io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
(0 until XLEN / 8).foreach(k => {
when (needCheck && data(j).mask(k)) {
when (io.needForward(i)(0)(j)) {
forwardMask1(k) := true.B
forwardData1(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
}
when (io.needForward(i)(1)(j)) {
forwardMask2(k) := true.B
forwardData2(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
}
XSDebug(io.needForward(i)(0)(j) || io.needForward(i)(1)(j),
p"forwarding $k-th byte ${Hexadecimal(data(j).data(8 * (k + 1) - 1, 8 * k))} " +
p"from ptr $j\n")
}
})
}
// parallel fwd logic
val paddrMatch = Wire(Vec(size, Bool()))
val matchResultVec = Wire(Vec(size * 2, new FwdEntry))
def parallelFwd(xs: Seq[Data]): Data = {
......@@ -113,13 +123,14 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) ex
})
}
for (j <- 0 until size) {
paddrMatch(j) := io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
}
// paddrMatch is now included in io.needForward
// for (j <- 0 until size) {
// paddrMatch(j) := io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
// }
for (j <- 0 until size) {
val needCheck0 = RegNext(paddrMatch(j) && io.needForward(i)(0)(j))
val needCheck1 = RegNext(paddrMatch(j) && io.needForward(i)(1)(j))
val needCheck0 = RegNext(io.needForward(i)(0)(j))
val needCheck1 = RegNext(io.needForward(i)(1)(j))
(0 until XLEN / 8).foreach(k => {
matchResultVec(j).mask(k) := needCheck0 && data(j).mask(k)
matchResultVec(j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
......@@ -130,8 +141,8 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) ex
val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry)
io.forward(i).forwardMask := parallelFwdResult.mask
io.forward(i).forwardData := parallelFwdResult.data
io.forwardMask(i) := parallelFwdResult.mask
io.forwardData(i) := parallelFwdResult.data
})
......
......@@ -12,6 +12,7 @@ import xiangshan.backend.LSUOpType
class LoadToLsqIO extends XSBundle {
val loadIn = ValidIO(new LsPipelineBundle)
val ldout = Flipped(DecoupledIO(new ExuOutput))
val loadDataForwarded = Output(Bool())
val forward = new LoadForwardQueryIO
}
......@@ -26,8 +27,18 @@ class LoadUnit_S0 extends XSModule {
})
val s0_uop = io.in.bits.uop
val s0_vaddr = io.in.bits.src1 + SignExt(ImmUnion.I.toImm32(s0_uop.ctrl.imm), XLEN)
val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0))
val s0_vaddr_old = io.in.bits.src1 + SignExt(ImmUnion.I.toImm32(s0_uop.ctrl.imm), XLEN)
val imm12 = WireInit(s0_uop.ctrl.imm(11,0))
val s0_vaddr_lo = io.in.bits.src1(11,0) + Cat(0.U(1.W), imm12)
val s0_vaddr_hi = Mux(imm12(11),
Mux((s0_vaddr_lo(12)), io.in.bits.src1(VAddrBits-1, 12), io.in.bits.src1(VAddrBits-1, 12)+SignExt(1.U, VAddrBits-12)),
Mux((s0_vaddr_lo(12)), io.in.bits.src1(VAddrBits-1, 12)+1.U, io.in.bits.src1(VAddrBits-1, 12))
)
val s0_vaddr = Cat(s0_vaddr_hi, s0_vaddr_lo(11,0))
when(io.in.fire() && s0_vaddr(VAddrBits-1,0) =/= (io.in.bits.src1 + SignExt(ImmUnion.I.toImm32(s0_uop.ctrl.imm), XLEN))(VAddrBits-1,0)){
printf("s0_vaddr %x s0_vaddr_old %x\n", s0_vaddr, s0_vaddr_old(VAddrBits-1,0))
}
val s0_mask = genWmask(s0_vaddr_lo, s0_uop.ctrl.fuOpType(1,0))
// query DTLB
io.dtlbReq.valid := io.in.valid
......@@ -141,6 +152,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
val lsq = new LoadForwardQueryIO
val sbuffer = new LoadForwardQueryIO
val dataForwarded = Output(Bool())
})
val s2_uop = io.in.bits.uop
......@@ -194,10 +206,17 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
io.out.bits := io.in.bits
io.out.bits.data := rdataPartialLoad
// when exception occurs, set it to not miss and let it write back to roq (via int port)
io.out.bits.miss := s2_cache_miss && !fullForward && !s2_exception
io.out.bits.miss := s2_cache_miss && !s2_exception
io.out.bits.uop.ctrl.fpWen := io.in.bits.uop.ctrl.fpWen && !s2_exception
io.out.bits.mmio := s2_mmio
// For timing reasons, we can not let
// io.out.bits.miss := s2_cache_miss && !s2_exception && !fullForward
// We use io.dataForwarded instead. It means forward logic have prepared all data needed,
// and dcache query is no longer needed.
// Such inst will be writebacked from load queue.
io.dataForwarded := s2_cache_miss && fullForward && !s2_exception
io.in.ready := io.out.ready || !io.in.valid
// merge forward result
......@@ -259,6 +278,7 @@ class LoadUnit extends XSModule with HasLoadHelper {
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
load_s2.io.sbuffer.forwardData <> io.sbuffer.forwardData
load_s2.io.sbuffer.forwardMask <> io.sbuffer.forwardMask
load_s2.io.dataForwarded <> io.lsq.loadDataForwarded
XSDebug(load_s0.io.out.valid,
p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " +
......
......@@ -17,7 +17,17 @@ class StoreUnit_S0 extends XSModule {
})
// send req to dtlb
val saddr = io.in.bits.src1 + SignExt(ImmUnion.S.toImm32(io.in.bits.uop.ctrl.imm), XLEN)
val saddr_old = io.in.bits.src1 + SignExt(ImmUnion.S.toImm32(io.in.bits.uop.ctrl.imm), XLEN)
val imm12 = WireInit(io.in.bits.uop.ctrl.imm(11,0))
val saddr_lo = io.in.bits.src1(11,0) + Cat(0.U(1.W), imm12)
val saddr_hi = Mux(imm12(11),
Mux((saddr_lo(12)), io.in.bits.src1(VAddrBits-1, 12), io.in.bits.src1(VAddrBits-1, 12)+SignExt(1.U, VAddrBits-12)),
Mux((saddr_lo(12)), io.in.bits.src1(VAddrBits-1, 12)+1.U, io.in.bits.src1(VAddrBits-1, 12))
)
val saddr = Cat(saddr_hi, saddr_lo(11,0))
when(io.in.fire() && saddr(VAddrBits-1,0) =/= (io.in.bits.src1 + SignExt(ImmUnion.S.toImm32(io.in.bits.uop.ctrl.imm), XLEN))(VAddrBits-1,0)){
printf("saddr %x saddr_old %x\n", saddr, saddr_old(VAddrBits-1,0))
}
io.dtlbReq.bits.vaddr := saddr
io.dtlbReq.valid := io.in.valid
......
......@@ -8,10 +8,16 @@ import xiangshan.cache._
trait HasSbufferCst extends HasXSParameter {
def s_invalid = 0.U(2.W)
def s_valid = 1.U(2.W)
def s_prepare = 2.U(2.W)
def s_inflight = 3.U(2.W)
// use 1h to speedup selection
def s_invalid = (1<<0).U(4.W)
def s_valid = (1<<1).U(4.W)
def s_prepare = (1<<2).U(4.W)
def s_inflight = (1<<3).U(4.W)
def isInvalid(i: UInt): Bool = i(0).asBool
def isValid(i: UInt): Bool = i(1).asBool
def isPrepare(i: UInt): Bool = i(2).asBool
def isInflight(i: UInt): Bool = i(3).asBool
val evictCycle = 1 << 20
require(isPow2(evictCycle))
......@@ -114,6 +120,13 @@ class NewSbuffer extends XSModule with HasSbufferCst {
val empty = Output(Bool())
} // sbuffer flush
})
val difftestIO = IO(new Bundle() {
val sbufferResp = Output(Bool())
val sbufferAddr = Output(UInt(64.W))
val sbufferData = Output(Vec(64, UInt(8.W)))
val sbufferMask = Output(UInt(64.W))
})
difftestIO <> DontCare
val buffer = Mem(StoreBufferSize, new SbufferLine)
val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(s_invalid)))
......@@ -150,7 +163,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
// sbuffer entry count
val invalidCount = RegInit(StoreBufferSize.U((log2Up(StoreBufferSize) + 1).W))
val validCount = RegInit(0.U((log2Up(StoreBufferSize) + 1).W))
val full = invalidCount === 0.U
val full = invalidCount === 0.U // full = TODO: validCount(log2Up(StoreBufferSize))
val bufferRead = VecInit((0 until StoreBufferSize).map(i => buffer(i)))
val stateRead = VecInit((0 until StoreBufferSize).map(i => stateVec(i)))
......@@ -172,7 +185,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
val lru = Module(new ChooseReplace(StoreBufferSize))
val evictionIdx = lru.io.way
lru.io.mask := stateRead.map(_ === s_valid)
lru.io.mask := stateRead.map(isValid(_))
val tags = io.in.map(in => getTag(in.bits.addr))
val sameTag = tags(0) === tags(1)
......@@ -188,21 +201,36 @@ class NewSbuffer extends XSModule with HasSbufferCst {
for(i <- 0 until StorePipelineWidth){
mergeMask(i) := widthMap(j =>
Mux(tags(i) === tagRead(j) && stateRead(j) === s_valid, true.B, false.B))
Mux(tags(i) === tagRead(j) && isValid(stateRead(j)), true.B, false.B))
}
// insert confition
// firstInsert: the first invalid entry
// if first entry canMerge or second entry has the same tag with the first entry , secondInsert equal the first invalid entry, otherwise, the second invalid entry
val invalidMask = stateRead.map(s => s === s_invalid)
val firstInsertMask = PriorityEncoderOH(invalidMask)
val secondInsertMask = Wire(Vec(StoreBufferSize, Bool()))
for (i <- 0 until StoreBufferSize){
secondInsertMask(i) := Mux(canMerge(0) || sameTag, firstInsertMask(i), invalidMask(i) - firstInsertMask(i))
val invalidMask = stateRead.map(s => isInvalid(s))
val evenInvalidMask = GetEvenBits(VecInit(invalidMask).asUInt)
val oddInvalidMask = GetOddBits(VecInit(invalidMask).asUInt)
val (evenRawInsertIdx, evenCanInsert) = PriorityEncoderWithFlag(evenInvalidMask)
val (oddRawInsertIdx, oddCanInsert) = PriorityEncoderWithFlag(oddInvalidMask)
val evenInsertIdx = Cat(evenRawInsertIdx, 0.U(1.W))
val oddInsertIdx = Cat(oddRawInsertIdx, 1.U(1.W))
val enbufferSelReg = RegInit(false.B)
when(io.in(0).valid) {
enbufferSelReg := ~enbufferSelReg
}
val (firstInsertIdx, firstCanInsert) = PriorityEncoderWithFlag(invalidMask)
val (secondInsertIdx, secondCanInsert) = PriorityEncoderWithFlag(secondInsertMask)
val firstInsertIdx = Mux(enbufferSelReg, evenInsertIdx, oddInsertIdx)
val secondInsertIdx = Mux(sameTag,
firstInsertIdx,
Mux(~enbufferSelReg, evenInsertIdx, oddInsertIdx)
)
val firstCanInsert = Mux(enbufferSelReg, evenCanInsert, oddCanInsert)
val secondCanInsert = Mux(sameTag,
firstCanInsert,
Mux(~enbufferSelReg, evenCanInsert, oddCanInsert)
)
io.in(0).ready := firstCanInsert || canMerge(0)
io.in(1).ready := (secondCanInsert || canMerge(1)) && !sameWord && io.in(0).ready
......@@ -244,7 +272,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
when(canMerge(0)){
mergeWordReq(io.in(0).bits, mergeIdx(0), firstWord)
XSDebug(p"merge req 0 to line [${mergeIdx(0)}]\n")
}.elsewhen(firstCanInsert){
}.otherwise{
wordReqToBufLine(io.in(0).bits, tags(0), firstInsertIdx, firstWord, true.B)
XSDebug(p"insert req 0 to line[$firstInsertIdx]\n")
}
......@@ -255,7 +283,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
when(canMerge(1)){
mergeWordReq(io.in(1).bits, mergeIdx(1), secondWord)
XSDebug(p"merge req 1 to line [${mergeIdx(1)}]\n")
}.elsewhen(secondCanInsert){
}.otherwise{
wordReqToBufLine(io.in(1).bits, tags(1), secondInsertIdx, secondWord, !sameTag)
XSDebug(p"insert req 1 to line[$secondInsertIdx]\n")
}
......@@ -288,7 +316,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
// ---------------------- Send Dcache Req ---------------------
val do_eviction = Wire(Bool())
val empty = Cat(stateVec.map(s => s===s_invalid)).andR() && !Cat(io.in.map(_.valid)).orR()
val empty = Cat(stateVec.map(s => isInvalid(s))).andR() && !Cat(io.in.map(_.valid)).orR()
do_eviction := validCount >= 12.U
......@@ -321,7 +349,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
val tag = tagRead(idx)
!Cat(widthMap(i => {
// stateVec(idx) itself must not be s_inflight*
(stateRead(i) === s_inflight || stateRead(i) === s_prepare) &&
(isInflight(stateRead(i)) || isPrepare(stateRead(i))) &&
tag === tagRead(i)
})).orR()
}
......@@ -340,28 +368,34 @@ class NewSbuffer extends XSModule with HasSbufferCst {
// evictionEntry.bits := evictionIdx
val prepareValid = ((do_eviction && sbuffer_state === x_replace) || (sbuffer_state === x_drain_sbuffer)) &&
stateVec(evictionIdx)===s_valid &&
isValid(stateVec(evictionIdx)) &&
noSameBlockInflight(evictionIdx)
when(prepareValid){
stateVec(evictionIdx) := s_prepare
}
val prepareMask = stateVec.map(s => s === s_prepare)
val prepareMask = stateVec.map(s => isPrepare(s))
val (prepareIdx, prepareEn) = PriorityEncoderWithFlag(prepareMask)
io.dcache.req.valid := prepareEn
io.dcache.req.bits.addr := getAddr(tagRead(prepareIdx))
io.dcache.req.bits.data := bufferRead(prepareIdx).data
io.dcache.req.bits.mask := bufferRead(prepareIdx).mask
io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR
io.dcache.req.bits.meta := DontCare
io.dcache.req.bits.meta.id := prepareIdx
val dcacheReqValid = RegInit(false.B)
val dcacheCandidate = Reg(new DCacheLineReq)
when(io.dcache.req.fire()){
dcacheReqValid := false.B
}
when(prepareEn && (!dcacheReqValid || io.dcache.req.fire())) {
dcacheCandidate.addr := getAddr(tagRead(prepareIdx))
dcacheCandidate.data := bufferRead(prepareIdx).data
dcacheCandidate.mask := bufferRead(prepareIdx).mask
dcacheCandidate.cmd := MemoryOpConstants.M_XWR
dcacheCandidate.meta := DontCare
dcacheCandidate.meta.id := prepareIdx
stateVec(prepareIdx) := s_inflight
dcacheReqValid := true.B
}
io.dcache.req.valid := dcacheReqValid
io.dcache.req.bits := dcacheCandidate
// evictionEntry.ready := io.dcache.req.ready
XSDebug(io.dcache.req.fire(),
......@@ -376,6 +410,13 @@ class NewSbuffer extends XSModule with HasSbufferCst {
XSDebug(p"recv cache resp: id=[$respId]\n")
}
if (env.DualCoreDifftest) {
difftestIO.sbufferResp := WireInit(io.dcache.resp.fire())
difftestIO.sbufferAddr := WireInit(getAddr(tagRead(respId)))
difftestIO.sbufferData := WireInit(bufferRead(respId).data.asTypeOf(Vec(CacheLineBytes, UInt(8.W))))
difftestIO.sbufferMask := WireInit(bufferRead(respId).mask)
}
val needSpace = (io.in(0).fire && !canMerge(0)) +& (io.in(1).fire && !canMerge(1) && !sameTag)
invalidCount := invalidCount - needSpace + io.dcache.resp.fire()
validCount := validCount + needSpace - prepareValid
......@@ -388,7 +429,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
// every cycle cohCount+=1
// if cohCount(countBits-1)==1,evict
for(i <- 0 until StoreBufferSize){
when(stateVec(i) === s_valid){
when(isValid(stateVec(i))){
when(cohCount(i)(countBits-1)){
assert(stateVec(i) === s_valid)
stateUpdate(i) := s_prepare
......@@ -401,9 +442,9 @@ class NewSbuffer extends XSModule with HasSbufferCst {
for ((forward, i) <- io.forward.zipWithIndex) {
val tag_matches = widthMap(i => tagRead(i) === getTag(forward.paddr))
val valid_tag_matches = widthMap(i => tag_matches(i) && stateVec(i) === s_valid)
val valid_tag_matches = widthMap(i => tag_matches(i) && isValid(stateVec(i)))
val inflight_tag_matches = widthMap(i =>
tag_matches(i) && (stateVec(i) === s_inflight || stateVec(i) === s_prepare)
tag_matches(i) && (isInflight(stateVec(i)) || isPrepare(stateVec(i)))
)
val line_offset_mask = UIntToOH(getWordOffset(forward.paddr))
......
......@@ -21,5 +21,7 @@ extern "C" void xs_assert(long long line) {
}
void sig_handler(int signo) {
if (signal_num != 0)
exit(0);
signal_num = signo;
}
......@@ -16,7 +16,7 @@ int isGzFile(const char *filename) {
long snapshot_compressToFile(uint8_t *ptr, const char *filename, long buf_size) {
gzFile compressed_mem = gzopen(filename, "wb");
if(compressed_mem == NULL) {
if (compressed_mem == NULL) {
printf("Can't open compressed binary file '%s'", filename);
return -1;
}
......@@ -44,7 +44,7 @@ long snapshot_compressToFile(uint8_t *ptr, const char *filename, long buf_size)
delete [] temp_page;
if(gzclose(compressed_mem)) {
if (gzclose(compressed_mem)) {
printf("Error closing '%s'\n", filename);
return -1;
}
......@@ -55,7 +55,7 @@ long readFromGz(void* ptr, const char *file_name, long buf_size, uint8_t load_ty
assert(buf_size > 0);
gzFile compressed_mem = gzopen(file_name, "rb");
if(compressed_mem == NULL) {
if (compressed_mem == NULL) {
printf("Can't open compressed binary file '%s'", file_name);
return -1;
}
......
......@@ -12,19 +12,19 @@
#define DEBUG_RETIRE_TRACE_SIZE 16
#define DEBUG_WB_TRACE_SIZE 16
void (*ref_difftest_memcpy_from_dut)(paddr_t dest, void *src, size_t n) = NULL;
void (*ref_difftest_memcpy_from_ref)(void *dest, paddr_t src, size_t n) = NULL;
void (*ref_difftest_getregs)(void *c) = NULL;
void (*ref_difftest_setregs)(const void *c) = NULL;
void (*ref_difftest_get_mastatus)(void *s) = NULL;
void (*ref_difftest_set_mastatus)(const void *s) = NULL;
void (*ref_difftest_get_csr)(void *c) = NULL;
void (*ref_difftest_set_csr)(const void *c) = NULL;
vaddr_t (*ref_disambiguate_exec)(void *disambiguate_para) = NULL;
int (*ref_difftest_store_commit)(uint64_t *saddr, uint64_t *sdata, uint8_t *smask) = NULL;
static void (*ref_difftest_exec)(uint64_t n) = NULL;
static void (*ref_difftest_raise_intr)(uint64_t NO) = NULL;
static void (*ref_isa_reg_display)(void) = NULL;
void (*ref_difftest_memcpy_from_dut)(paddr_t dest, void *src, size_t n, int coreid) = NULL;
void (*ref_difftest_memcpy_from_ref)(void *dest, paddr_t src, size_t n, int coreid) = NULL;
void (*ref_difftest_getregs)(void *c, int coreid) = NULL;
void (*ref_difftest_setregs)(const void *c, int coreid) = NULL;
void (*ref_difftest_get_mastatus)(void *s, int coreid) = NULL;
void (*ref_difftest_set_mastatus)(const void *s, int coreid) = NULL;
void (*ref_difftest_get_csr)(void *c, int coreid) = NULL;
void (*ref_difftest_set_csr)(const void *c, int coreid) = NULL;
vaddr_t (*ref_disambiguate_exec)(void *disambiguate_para, int coreid) = NULL;
int (*ref_difftest_store_commit)(uint64_t *saddr, uint64_t *sdata, uint8_t *smask, int coreid) = NULL;
static void (*ref_difftest_exec)(uint64_t n, int coreid) = NULL;
static void (*ref_difftest_raise_intr)(uint64_t NO, int coreid) = NULL;
static void (*ref_isa_reg_display)(int coreid) = NULL;
static bool is_skip_ref;
static bool is_skip_dut;
......@@ -41,7 +41,7 @@ void difftest_skip_ref() {
void difftest_skip_dut() {
if (is_skip_dut) return;
ref_difftest_exec(1);
ref_difftest_exec(1, 0);
is_skip_dut = true;
}
......@@ -51,49 +51,49 @@ void init_difftest() {
puts("Using " REF_SO " for difftest");
assert(handle);
ref_difftest_memcpy_from_dut = (void (*)(paddr_t, void *, size_t))dlsym(handle, "difftest_memcpy_from_dut");
ref_difftest_memcpy_from_dut = (void (*)(paddr_t, void *, size_t, int))dlsym(handle, "difftest_memcpy_from_dut");
assert(ref_difftest_memcpy_from_dut);
ref_difftest_memcpy_from_ref = (void (*)(void *, paddr_t, size_t))dlsym(handle, "difftest_memcpy_from_ref");
ref_difftest_memcpy_from_ref = (void (*)(void *, paddr_t, size_t, int))dlsym(handle, "difftest_memcpy_from_ref");
assert(ref_difftest_memcpy_from_ref);
ref_difftest_getregs = (void (*)(void *))dlsym(handle, "difftest_getregs");
ref_difftest_getregs = (void (*)(void *, int))dlsym(handle, "difftest_getregs");
assert(ref_difftest_getregs);
ref_difftest_setregs = (void (*)(const void *))dlsym(handle, "difftest_setregs");
ref_difftest_setregs = (void (*)(const void *, int))dlsym(handle, "difftest_setregs");
assert(ref_difftest_setregs);
ref_difftest_get_mastatus = (void (*)(void *))dlsym(handle, "difftest_get_mastatus");
ref_difftest_get_mastatus = (void (*)(void *, int))dlsym(handle, "difftest_get_mastatus");
assert(ref_difftest_get_mastatus);
ref_difftest_set_mastatus = (void (*)(const void *))dlsym(handle, "difftest_set_mastatus");
ref_difftest_set_mastatus = (void (*)(const void *, int))dlsym(handle, "difftest_set_mastatus");
assert(ref_difftest_set_mastatus);
ref_difftest_get_csr = (void (*)(void *))dlsym(handle, "difftest_get_csr");
ref_difftest_get_csr = (void (*)(void *, int))dlsym(handle, "difftest_get_csr");
assert(ref_difftest_get_csr);
ref_difftest_set_csr = (void (*)(const void *))dlsym(handle, "difftest_set_csr");
ref_difftest_set_csr = (void (*)(const void *, int))dlsym(handle, "difftest_set_csr");
assert(ref_difftest_set_csr);
ref_disambiguate_exec = (vaddr_t (*)(void *))dlsym(handle, "disambiguate_exec");
ref_disambiguate_exec = (vaddr_t (*)(void *, int))dlsym(handle, "disambiguate_exec");
assert(ref_disambiguate_exec);
ref_difftest_store_commit = (int (*)(uint64_t*, uint64_t*, uint8_t*))dlsym(handle, "difftest_store_commit");
ref_difftest_store_commit = (int (*)(uint64_t*, uint64_t*, uint8_t*, int))dlsym(handle, "difftest_store_commit");
assert(ref_difftest_store_commit);
ref_difftest_exec = (void (*)(uint64_t))dlsym(handle, "difftest_exec");
ref_difftest_exec = (void (*)(uint64_t, int))dlsym(handle, "difftest_exec");
assert(ref_difftest_exec);
ref_difftest_raise_intr = (void (*)(uint64_t))dlsym(handle, "difftest_raise_intr");
ref_difftest_raise_intr = (void (*)(uint64_t, int))dlsym(handle, "difftest_raise_intr");
assert(ref_difftest_raise_intr);
ref_isa_reg_display = (void (*)(void))dlsym(handle, "isa_reg_display");
ref_isa_reg_display = (void (*)(int))dlsym(handle, "isa_reg_display");
assert(ref_isa_reg_display);
void (*ref_difftest_init)(void) = (void (*)(void))dlsym(handle, "difftest_init");
void (*ref_difftest_init)(int) = (void (*)(int))dlsym(handle, "difftest_init");
assert(ref_difftest_init);
ref_difftest_init();
ref_difftest_init(0);
}
static const char *reg_name[DIFFTEST_NR_REG] = {
......@@ -140,7 +140,7 @@ void difftest_display(uint8_t mode) {
j, pc_wb_queue[j], wen_wb_queue[j]!=0, wdst_wb_queue[j], wdata_wb_queue[j], (j==((wb_pointer-1)%DEBUG_WB_TRACE_SIZE))?"<--":"");
}
printf("\n============== Reg Diff ==============\n");
ref_isa_reg_display();
ref_isa_reg_display(0);
printf("priviledgeMode: %d\n", mode);
}
......@@ -171,12 +171,12 @@ int difftest_step(DiffState *s) {
struct SyncState sync;
sync.lrscValid = 0;
sync.lrscAddr = 0;
ref_difftest_set_mastatus((uint64_t*)&sync); // sync lr/sc microarchitectural regs
ref_difftest_set_mastatus((uint64_t*)&sync, 0); // sync lr/sc microarchitectural regs
}
// single step difftest
if (s->intrNO) {
ref_difftest_raise_intr(s->intrNO);
ref_difftest_raise_intr(s->intrNO, 0);
// ref_difftest_exec(1);//TODO
}
else {
......@@ -191,14 +191,14 @@ int difftest_step(DiffState *s) {
// MMIO accessing should not be a branch or jump, just +2/+4 to get the next pc
// printf("SKIP %d\n", i);
// to skip the checking of an instruction, just copy the reg state to reference design
ref_difftest_getregs(&ref_r);
ref_difftest_getregs(&ref_r, 0);
ref_r[DIFFTEST_THIS_PC] += selectBit(s->isRVC, i) ? 2 : 4;
if(selectBit(s->wen, i)){
if(s->wdst[i] != 0){
ref_r[s->wdst[i]] = s->wdata[i];
}
}
ref_difftest_setregs(ref_r);
ref_difftest_setregs(ref_r, 0);
}else{
// single step exec
// IPF, LPF, SPF
......@@ -208,14 +208,14 @@ int difftest_step(DiffState *s) {
ds.exceptionNo = s->cause;
ds.mtval = s->reg_scala[DIFFTEST_MTVAL];
ds.stval = s->reg_scala[DIFFTEST_STVAL];
ref_disambiguate_exec(&ds);
ref_disambiguate_exec(&ds, 0);
}else{
ref_difftest_exec(1);
ref_difftest_exec(1, 0);
}
}
}
}
ref_difftest_getregs(&ref_r);
ref_difftest_getregs(&ref_r, 0);
uint64_t next_pc = ref_r[DIFFTEST_THIS_PC];
pc_retire_pointer = (pc_retire_pointer+1) % DEBUG_RETIRE_TRACE_SIZE;
......@@ -255,5 +255,5 @@ int difftest_step(DiffState *s) {
}
int difftest_store_step(uint64_t *saddr, uint64_t *sdata, uint8_t *smask) {
return ref_difftest_store_commit(saddr, sdata, smask);
return ref_difftest_store_commit(saddr, sdata, smask, 0);
}
......@@ -82,16 +82,16 @@ struct DisambiguationState {
uint64_t stval;
};
extern void (*ref_difftest_memcpy_from_dut)(paddr_t dest, void *src, size_t n);
extern void (*ref_difftest_memcpy_from_ref)(void *dest, paddr_t src, size_t n);
extern void (*ref_difftest_getregs)(void *c);
extern void (*ref_difftest_setregs)(const void *c);
extern void (*ref_difftest_get_mastatus)(void *s);
extern void (*ref_difftest_set_mastatus)(const void *s);
extern void (*ref_difftest_get_csr)(void *c);
extern void (*ref_difftest_set_csr)(const void *c);
extern vaddr_t (*ref_disambiguate_exec)(void *disambiguate_para);
extern int (*ref_difftest_store_commit)(uint64_t *saddr, uint64_t *sdata, uint8_t *smask);
extern void (*ref_difftest_memcpy_from_dut)(paddr_t dest, void *src, size_t n, int coreid);
extern void (*ref_difftest_memcpy_from_ref)(void *dest, paddr_t src, size_t n, int coreid);
extern void (*ref_difftest_getregs)(void *c, int coreid);
extern void (*ref_difftest_setregs)(const void *c, int coreid);
extern void (*ref_difftest_get_mastatus)(void *s, int coreid);
extern void (*ref_difftest_set_mastatus)(const void *s, int coreid);
extern void (*ref_difftest_get_csr)(void *c, int coreid);
extern void (*ref_difftest_set_csr)(const void *c, int coreid);
extern vaddr_t (*ref_disambiguate_exec)(void *disambiguate_para, int coreid);
extern int (*ref_difftest_store_commit)(uint64_t *saddr, uint64_t *sdata, uint8_t *smask, int coreid);
void init_difftest();
int difftest_step(DiffState *s);
......
......@@ -276,19 +276,26 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) {
extern uint32_t uptime(void);
uint32_t lasttime_poll = 0;
uint32_t lasttime_snapshot = 0;
uint64_t lastcommit = max_cycle;
uint64_t instr_left_last_cycle = max_instr;
uint64_t lastcommit[NumCore];
uint64_t instr_left_last_cycle[NumCore];
const int stuck_limit = 2000;
uint64_t core_max_instr[NumCore];
uint32_t wdst[NumCore][DIFFTEST_WIDTH];
uint64_t wdata[NumCore][DIFFTEST_WIDTH];
uint64_t wpc[NumCore][DIFFTEST_WIDTH];
uint64_t reg[NumCore][DIFFTEST_NR_REG];
DiffState diff[NumCore];
for (int i = 0; i < NumCore; i++) {
diff[i].reg_scala = reg[i];
diff[i].wpc = wpc[i];
diff[i].wdata = wdata[i];
diff[i].wdst = wdst[i];
lastcommit[i] = max_cycle;
instr_left_last_cycle[i] = max_cycle;
core_max_instr[i] = max_instr;
}
uint32_t wdst[DIFFTEST_WIDTH];
uint64_t wdata[DIFFTEST_WIDTH];
uint64_t wpc[DIFFTEST_WIDTH];
uint64_t reg[DIFFTEST_NR_REG];
DiffState diff;
diff.reg_scala = reg;
diff.wpc = wpc;
diff.wdata = wdata;
diff.wdst = wdst;
#if VM_COVERAGE == 1
// we dump coverage into files at the end
......@@ -298,8 +305,10 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) {
#endif
while (!Verilated::gotFinish() && trapCode == STATE_RUNNING) {
if (!(max_cycle > 0 && max_instr > 0 && instr_left_last_cycle >= max_instr /* handle overflow */)) {
trapCode = STATE_LIMIT_EXCEEDED;
if (!(max_cycle > 0 &&
core_max_instr[0] > 0 &&
instr_left_last_cycle[0] >= core_max_instr[0])) {
trapCode = STATE_LIMIT_EXCEEDED; /* handle overflow */
break;
}
if (assert_count > 0) {
......@@ -319,7 +328,7 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) {
if (dut_ptr->io_trap_valid) trapCode = dut_ptr->io_trap_code;
if (trapCode != STATE_RUNNING) break;
if (lastcommit - max_cycle > stuck_limit && hascommit) {
if (lastcommit[0] - max_cycle > stuck_limit && hascommit) {
eprintf("No instruction commits for %d cycles, maybe get stuck\n"
"(please also check whether a fence.i instruction requires more than %d cycles to flush the icache)\n",
stuck_limit, stuck_limit);
......@@ -329,57 +338,66 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) {
if (!hascommit && dut_ptr->io_difftest_commit && dut_ptr->io_difftest_thisPC == 0x80000000u) {
hascommit = 1;
read_emu_regs(reg);
read_emu_regs(reg[0]);
void* get_img_start();
long get_img_size();
ref_difftest_memcpy_from_dut(0x80000000, get_img_start(), get_img_size());
ref_difftest_setregs(reg);
ref_difftest_memcpy_from_dut(0x80000000, get_img_start(), get_img_size(), 0);
ref_difftest_setregs(reg[0], 0);
printf("The first instruction has commited. Difftest enabled. \n");
}
// difftest
if (dut_ptr->io_difftest_commit && hascommit) {
read_emu_regs(reg);
read_wb_info(wpc, wdata, wdst);
diff.commit = dut_ptr->io_difftest_commit;
diff.this_inst = dut_ptr->io_difftest_thisINST;
diff.skip = dut_ptr->io_difftest_skip;
diff.isRVC = dut_ptr->io_difftest_isRVC;
diff.wen = dut_ptr->io_difftest_wen;
diff.intrNO = dut_ptr->io_difftest_intrNO;
diff.cause = dut_ptr->io_difftest_cause;
diff.priviledgeMode = dut_ptr->io_difftest_priviledgeMode;
diff.sync.scFailed = dut_ptr->io_difftest_scFailed;
if (difftest_step(&diff)) {
trapCode = STATE_ABORT;
}
lastcommit = max_cycle;
// update instr_cnt
instr_left_last_cycle = max_instr;
max_instr -= diff.commit;
}
for (int i = 0; i < NumCore; i++) {
if (dut_ptr->io_difftest_commit && hascommit) {
read_emu_regs(reg[i]);
read_wb_info(wpc[i], wdata[i], wdst[i]);
diff[i].commit = dut_ptr->io_difftest_commit;
diff[i].this_inst = dut_ptr->io_difftest_thisINST;
diff[i].skip = dut_ptr->io_difftest_skip;
diff[i].isRVC = dut_ptr->io_difftest_isRVC;
diff[i].wen = dut_ptr->io_difftest_wen;
diff[i].intrNO = dut_ptr->io_difftest_intrNO;
diff[i].cause = dut_ptr->io_difftest_cause;
diff[i].priviledgeMode = dut_ptr->io_difftest_priviledgeMode;
diff[i].sync.scFailed = dut_ptr->io_difftest_scFailed;
if (i == 0) {
if (difftest_step(&diff[i])) {
trapCode = STATE_ABORT;
}
}
lastcommit[i] = max_cycle;
if (dut_ptr->io_difftest_storeCommit) {
read_store_info(diff.store_addr, diff.store_data, diff.store_mask);
for (int i = 0; i < dut_ptr->io_difftest_storeCommit; i++) {
auto addr = diff.store_addr[i];
auto data = diff.store_data[i];
auto mask = diff.store_mask[i];
if (difftest_store_step(&addr, &data, &mask)) {
difftest_display(dut_ptr->io_difftest_priviledgeMode);
printf("Mismatch for store commits: \n");
printf("REF commits addr 0x%lx, data 0x%lx, mask 0x%x\n", addr, data, mask);
printf("DUT commits addr 0x%lx, data 0x%lx, mask 0x%x\n",
diff.store_addr[i], diff.store_data[i], diff.store_mask[i]);
trapCode = STATE_ABORT;
break;
// update instr_cnt
instr_left_last_cycle[i] = core_max_instr[i];
core_max_instr[i] -= diff[i].commit;
}
#ifdef DIFFTEST_STORE_COMMIT
for (int core = 0; core < NumCore; core++) {
if (dut_ptr->io_difftest_storeCommit) {
read_store_info(diff[core].store_addr, diff[core].store_data, diff[core].store_mask);
for (int i = 0; i < dut_ptr->io_difftest_storeCommit; i++) {
auto addr = diff[core].store_addr[i];
auto data = diff[core].store_data[i];
auto mask = diff[core].store_mask[i];
if (difftest_store_step(&addr, &data, &mask)) {
difftest_display(dut_ptr->io_difftest_priviledgeMode);
printf("Mismatch for store commits: \n");
printf("REF commits addr 0x%lx, data 0x%lx, mask 0x%x\n", addr, data, mask);
printf("DUT commits addr 0x%lx, data 0x%lx, mask 0x%x\n",
diff[core].store_addr[i], diff[core].store_data[i], diff[core].store_mask[i]);
trapCode = STATE_ABORT;
break;
}
}
}
}
#endif
}
uint32_t t = uptime();
......@@ -504,23 +522,23 @@ void Emulator::snapshot_save(const char *filename) {
stream.unbuf_write(get_ram_start(), size);
uint64_t ref_r[DIFFTEST_NR_REG];
ref_difftest_getregs(&ref_r);
ref_difftest_getregs(&ref_r, 0);
stream.unbuf_write(ref_r, sizeof(ref_r));
uint64_t nemu_this_pc = get_nemu_this_pc();
stream.unbuf_write(&nemu_this_pc, sizeof(nemu_this_pc));
char *buf = (char *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
ref_difftest_memcpy_from_ref(buf, 0x80000000, size);
ref_difftest_memcpy_from_ref(buf, 0x80000000, size, 0);
stream.unbuf_write(buf, size);
munmap(buf, size);
struct SyncState sync_mastate;
ref_difftest_get_mastatus(&sync_mastate);
ref_difftest_get_mastatus(&sync_mastate, 0);
stream.unbuf_write(&sync_mastate, sizeof(struct SyncState));
uint64_t csr_buf[4096];
ref_difftest_get_csr(csr_buf);
ref_difftest_get_csr(csr_buf, 0);
stream.unbuf_write(&csr_buf, sizeof(csr_buf));
long sdcard_offset;
......@@ -553,7 +571,7 @@ void Emulator::snapshot_load(const char *filename) {
char *buf = (char *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
stream.read(buf, size);
ref_difftest_memcpy_from_dut(0x80000000, buf, size);
ref_difftest_memcpy_from_dut(0x80000000, buf, size, 0);
munmap(buf, size);
struct SyncState sync_mastate;
......
......@@ -7,6 +7,8 @@
#include <verilated_vcd_c.h> // Trace file format header
#define SNAPSHOT_INTERVAL 60 // unit: second
#define DIFFTEST_STORE_COMMIT
#define NumCore 1
struct EmuArgs {
uint32_t seed;
......
......@@ -53,6 +53,11 @@ class DiffTestIO extends XSBundle {
val storeAddr = Output(Vec(2, UInt(64.W)))
val storeData = Output(Vec(2, UInt(64.W)))
val storeMask = Output(Vec(2, UInt(8.W)))
val sbufferResp = Output(Bool())
val sbufferAddr = Output(UInt(64.W))
val sbufferData = Output(Vec(64, UInt(8.W)))
val sbufferMask = Output(UInt(64.W))
}
class LogCtrlIO extends Bundle {
......@@ -60,14 +65,6 @@ class LogCtrlIO extends Bundle {
val log_level = Input(UInt(64.W)) // a cpp uint
}
class TrapIO extends XSBundle {
val valid = Output(Bool())
val code = Output(UInt(3.W))
val pc = Output(UInt(VAddrBits.W))
val cycleCnt = Output(UInt(XLEN.W))
val instrCnt = Output(UInt(XLEN.W))
}
class XSSimSoC(axiSim: Boolean)(implicit p: config.Parameters) extends LazyModule with HasXSParameter {
// address space[0G - 1024G)
val fullRange = AddressSet(0x0L, 0xffffffffffL)
......@@ -112,11 +109,14 @@ class XSSimSoC(axiSim: Boolean)(implicit p: config.Parameters) extends LazyModul
lazy val module = new LazyModuleImp(this) {
val io = IO(new Bundle {
val difftest = new DiffTestIO
val difftest = new DiffTestIO
val difftest2 = new DiffTestIO
val logCtrl = new LogCtrlIO
val trap = new TrapIO
val trap2 = new TrapIO
val uart = new UARTIO
})
io.difftest2 <> DontCare
dontTouch(io.difftest)
dontTouch(io.logCtrl)
......@@ -129,58 +129,112 @@ class XSSimSoC(axiSim: Boolean)(implicit p: config.Parameters) extends LazyModul
soc.module.io.extIntrs(i) := false.B
}
val difftest = WireInit(0.U.asTypeOf(new DiffTestIO))
val difftest = Seq(WireInit(0.U.asTypeOf(new DiffTestIO)), WireInit(0.U.asTypeOf(new DiffTestIO)))
val trap = Seq(WireInit(0.U.asTypeOf(new TrapIO)), WireInit(0.U.asTypeOf(new TrapIO)))
if (!env.FPGAPlatform) {
ExcitingUtils.addSink(difftest.commit, "difftestCommit", Debug)
ExcitingUtils.addSink(difftest.thisPC, "difftestThisPC", Debug)
ExcitingUtils.addSink(difftest.thisINST, "difftestThisINST", Debug)
ExcitingUtils.addSink(difftest.skip, "difftestSkip", Debug)
ExcitingUtils.addSink(difftest.isRVC, "difftestIsRVC", Debug)
ExcitingUtils.addSink(difftest.wen, "difftestWen", Debug)
ExcitingUtils.addSink(difftest.wdata, "difftestWdata", Debug)
ExcitingUtils.addSink(difftest.wdst, "difftestWdst", Debug)
ExcitingUtils.addSink(difftest.wpc, "difftestWpc", Debug)
ExcitingUtils.addSink(difftest.intrNO, "difftestIntrNO", Debug)
ExcitingUtils.addSink(difftest.cause, "difftestCause", Debug)
ExcitingUtils.addSink(difftest.r, "difftestRegs", Debug)
ExcitingUtils.addSink(difftest.priviledgeMode, "difftestMode", Debug)
ExcitingUtils.addSink(difftest.mstatus, "difftestMstatus", Debug)
ExcitingUtils.addSink(difftest.sstatus, "difftestSstatus", Debug)
ExcitingUtils.addSink(difftest.mepc, "difftestMepc", Debug)
ExcitingUtils.addSink(difftest.sepc, "difftestSepc", Debug)
ExcitingUtils.addSink(difftest.mtval, "difftestMtval", Debug)
ExcitingUtils.addSink(difftest.stval, "difftestStval", Debug)
ExcitingUtils.addSink(difftest.mtvec, "difftestMtvec", Debug)
ExcitingUtils.addSink(difftest.stvec, "difftestStvec", Debug)
ExcitingUtils.addSink(difftest.mcause, "difftestMcause", Debug)
ExcitingUtils.addSink(difftest.scause, "difftestScause", Debug)
ExcitingUtils.addSink(difftest.satp, "difftestSatp", Debug)
ExcitingUtils.addSink(difftest.mip, "difftestMip", Debug)
ExcitingUtils.addSink(difftest.mie, "difftestMie", Debug)
ExcitingUtils.addSink(difftest.mscratch, "difftestMscratch", Debug)
ExcitingUtils.addSink(difftest.sscratch, "difftestSscratch", Debug)
ExcitingUtils.addSink(difftest.mideleg, "difftestMideleg", Debug)
ExcitingUtils.addSink(difftest.medeleg, "difftestMedeleg", Debug)
ExcitingUtils.addSink(difftest.scFailed, "difftestScFailed", Debug)
ExcitingUtils.addSink(difftest.storeCommit, "difftestStoreCommit", Debug)
ExcitingUtils.addSink(difftest.storeAddr, "difftestStoreAddr", Debug)
ExcitingUtils.addSink(difftest.storeData, "difftestStoreData", Debug)
ExcitingUtils.addSink(difftest.storeMask, "difftestStoreMask", Debug)
ExcitingUtils.addSink(difftest(0).commit, "difftestCommit", Debug)
ExcitingUtils.addSink(difftest(0).thisPC, "difftestThisPC", Debug)
ExcitingUtils.addSink(difftest(0).thisINST, "difftestThisINST", Debug)
ExcitingUtils.addSink(difftest(0).skip, "difftestSkip", Debug)
ExcitingUtils.addSink(difftest(0).isRVC, "difftestIsRVC", Debug)
ExcitingUtils.addSink(difftest(0).wen, "difftestWen", Debug)
ExcitingUtils.addSink(difftest(0).wdata, "difftestWdata", Debug)
ExcitingUtils.addSink(difftest(0).wdst, "difftestWdst", Debug)
ExcitingUtils.addSink(difftest(0).wpc, "difftestWpc", Debug)
ExcitingUtils.addSink(difftest(0).intrNO, "difftestIntrNO", Debug)
ExcitingUtils.addSink(difftest(0).cause, "difftestCause", Debug)
ExcitingUtils.addSink(difftest(0).r, "difftestRegs", Debug)
ExcitingUtils.addSink(difftest(0).priviledgeMode, "difftestMode", Debug)
ExcitingUtils.addSink(difftest(0).mstatus, "difftestMstatus", Debug)
ExcitingUtils.addSink(difftest(0).sstatus, "difftestSstatus", Debug)
ExcitingUtils.addSink(difftest(0).mepc, "difftestMepc", Debug)
ExcitingUtils.addSink(difftest(0).sepc, "difftestSepc", Debug)
ExcitingUtils.addSink(difftest(0).mtval, "difftestMtval", Debug)
ExcitingUtils.addSink(difftest(0).stval, "difftestStval", Debug)
ExcitingUtils.addSink(difftest(0).mtvec, "difftestMtvec", Debug)
ExcitingUtils.addSink(difftest(0).stvec, "difftestStvec", Debug)
ExcitingUtils.addSink(difftest(0).mcause, "difftestMcause", Debug)
ExcitingUtils.addSink(difftest(0).scause, "difftestScause", Debug)
ExcitingUtils.addSink(difftest(0).satp, "difftestSatp", Debug)
ExcitingUtils.addSink(difftest(0).mip, "difftestMip", Debug)
ExcitingUtils.addSink(difftest(0).mie, "difftestMie", Debug)
ExcitingUtils.addSink(difftest(0).mscratch, "difftestMscratch", Debug)
ExcitingUtils.addSink(difftest(0).sscratch, "difftestSscratch", Debug)
ExcitingUtils.addSink(difftest(0).mideleg, "difftestMideleg", Debug)
ExcitingUtils.addSink(difftest(0).medeleg, "difftestMedeleg", Debug)
ExcitingUtils.addSink(difftest(0).scFailed, "difftestScFailed", Debug)
ExcitingUtils.addSink(difftest(0).storeCommit, "difftestStoreCommit", Debug)
ExcitingUtils.addSink(difftest(0).storeAddr, "difftestStoreAddr", Debug)
ExcitingUtils.addSink(difftest(0).storeData, "difftestStoreData", Debug)
ExcitingUtils.addSink(difftest(0).storeMask, "difftestStoreMask", Debug)
}
// BoringUtils.addSink(difftest.lrscAddr, "difftestLrscAddr")
io.difftest := difftest
if (env.DualCoreDifftest) {
for (i <- 0 until NumCores) {
difftest(i).commit := soc.module.difftestIO(i).fromRoq.commit
difftest(i).thisPC := soc.module.difftestIO(i).fromRoq.thisPC
difftest(i).thisINST := soc.module.difftestIO(i).fromRoq.thisINST
difftest(i).skip := soc.module.difftestIO(i).fromRoq.skip
difftest(i).isRVC := soc.module.difftestIO(i).fromRoq.isRVC
difftest(i).wen := soc.module.difftestIO(i).fromRoq.wen
difftest(i).wdata := soc.module.difftestIO(i).fromRoq.wdata
difftest(i).wdst := soc.module.difftestIO(i).fromRoq.wdst
difftest(i).wpc := soc.module.difftestIO(i).fromRoq.wpc
difftest(i).scFailed := soc.module.difftestIO(i).fromRoq.scFailed
difftest(i).r := soc.module.difftestIO(i).fromXSCore.r
difftest(i).intrNO := soc.module.difftestIO(i).fromCSR.intrNO
difftest(i).cause := soc.module.difftestIO(i).fromCSR.cause
difftest(i).priviledgeMode := soc.module.difftestIO(i).fromCSR.priviledgeMode
difftest(i).mstatus := soc.module.difftestIO(i).fromCSR.mstatus
difftest(i).sstatus := soc.module.difftestIO(i).fromCSR.sstatus
difftest(i).mepc := soc.module.difftestIO(i).fromCSR.mepc
difftest(i).sepc := soc.module.difftestIO(i).fromCSR.sepc
difftest(i).mtval := soc.module.difftestIO(i).fromCSR.mtval
difftest(i).stval := soc.module.difftestIO(i).fromCSR.stval
difftest(i).mtvec := soc.module.difftestIO(i).fromCSR.mtvec
difftest(i).stvec := soc.module.difftestIO(i).fromCSR.stvec
difftest(i).mcause := soc.module.difftestIO(i).fromCSR.mcause
difftest(i).scause := soc.module.difftestIO(i).fromCSR.scause
difftest(i).satp := soc.module.difftestIO(i).fromCSR.satp
difftest(i).mip := soc.module.difftestIO(i).fromCSR.mip
difftest(i).mie := soc.module.difftestIO(i).fromCSR.mie
difftest(i).mscratch := soc.module.difftestIO(i).fromCSR.mscratch
difftest(i).sscratch := soc.module.difftestIO(i).fromCSR.sscratch
difftest(i).mideleg := soc.module.difftestIO(i).fromCSR.mideleg
difftest(i).medeleg := soc.module.difftestIO(i).fromCSR.medeleg
difftest(i).storeCommit := soc.module.difftestIO(i).fromSQ.storeCommit
difftest(i).storeAddr := soc.module.difftestIO(i).fromSQ.storeAddr
difftest(i).storeData := soc.module.difftestIO(i).fromSQ.storeData
difftest(i).storeMask := soc.module.difftestIO(i).fromSQ.storeMask
val trap = WireInit(0.U.asTypeOf(new TrapIO))
difftest(i).sbufferResp := soc.module.difftestIO(i).fromSbuffer.sbufferResp
difftest(i).sbufferAddr := soc.module.difftestIO(i).fromSbuffer.sbufferAddr
difftest(i).sbufferData := soc.module.difftestIO(i).fromSbuffer.sbufferData
difftest(i).sbufferMask := soc.module.difftestIO(i).fromSbuffer.sbufferMask
trap(i) <> soc.module.trapIO(i)
}
}
if (!env.FPGAPlatform) {
ExcitingUtils.addSink(trap.valid, "trapValid")
ExcitingUtils.addSink(trap.code, "trapCode")
ExcitingUtils.addSink(trap.pc, "trapPC")
ExcitingUtils.addSink(trap.cycleCnt, "trapCycleCnt")
ExcitingUtils.addSink(trap.instrCnt, "trapInstrCnt")
ExcitingUtils.addSink(trap(0).valid, "trapValid")
ExcitingUtils.addSink(trap(0).code, "trapCode")
ExcitingUtils.addSink(trap(0).pc, "trapPC")
ExcitingUtils.addSink(trap(0).cycleCnt, "trapCycleCnt")
ExcitingUtils.addSink(trap(0).instrCnt, "trapInstrCnt")
}
io.trap := trap
io.difftest := difftest(0)
io.trap := trap(0)
if (env.DualCoreDifftest) {
io.difftest2 := difftest(1)
io.trap2 := trap(1)
}
if (env.EnableDebug) {
val timer = GTimer()
......@@ -213,17 +267,24 @@ class XSSimTop(axiSim: Boolean)(implicit p: config.Parameters) extends LazyModul
lazy val module = new LazyModuleImp(this) {
val io = IO(new Bundle {
val difftest = new DiffTestIO
val difftest = new DiffTestIO
val difftest2 = new DiffTestIO
val logCtrl = new LogCtrlIO
val trap = new TrapIO
val trap2 = new TrapIO
val uart = new UARTIO
val memAXI = if (axiSim) chiselTypeOf(axiSimRam.module.io) else Input(Bool())
})
io.difftest2 <> DontCare
io.difftest <> dut.module.io.difftest
io.difftest <> dut.module.io.difftest
io.logCtrl <> dut.module.io.logCtrl
io.trap <> dut.module.io.trap
io.uart <> dut.module.io.uart
if (env.DualCoreDifftest) {
io.difftest2 <> dut.module.io.difftest2
io.trap2 <> dut.module.io.trap2
}
if (axiSim) {
io.memAXI <> axiSimRam.module.io
}
......
......@@ -31,15 +31,25 @@ object AddSinks {
"perfCntCondMbpIWrong",
"perfCntCondMbpRRight",
"perfCntCondMbpRWrong",
"perfCntS1Right",
"perfCntS1Wrong",
"perfCntS2Right",
"perfCntS2Wrong",
"perfCntS3Right",
"perfCntS3Wrong",
"perfCntubtbRight",
"perfCntubtbWrong",
"perfCntbtbRight",
"perfCntbtbWrong",
"perfCnttageRight",
"perfCnttageWrong",
"perfCntrasRight",
"perfCntrasWrong",
"perfCntloopRight",
"perfCntloopWrong",
"perfCntLoopExit",
"perfCntTakenAndRight",
"perfCntTakenButWrong",
// "CntFetchFromICache",
// "CntFetchFromLoopBuffer",
// "CntExitLoop1",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册