IFU.scala 31.8 KB
Newer Older
L
Lingrui98 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*          http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/

J
JinYue 已提交
17 18 19 20 21
package xiangshan.frontend

import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
J
Jay 已提交
22
import freechips.rocketchip.rocket.RVCDecoder
J
JinYue 已提交
23
import xiangshan._
J
JinYue 已提交
24
import xiangshan.cache.mmu._
25
import xiangshan.frontend.icache._
J
JinYue 已提交
26
import utils._
L
Lemover 已提交
27
import xiangshan.backend.fu.{PMPReqBundle, PMPRespBundle}
J
JinYue 已提交
28

29 30
trait HasInstrMMIOConst extends HasXSParameter with HasIFUConst{
  def mmioBusWidth = 64
31
  def mmioBusBytes = mmioBusWidth / 8
32
  def maxInstrLen = 32
33 34
}

J
Jay 已提交
35
trait HasIFUConst extends HasXSParameter{
36 37 38
  def addrAlign(addr: UInt, bytes: Int, highest: Int): UInt = Cat(addr(highest-1, log2Ceil(bytes)), 0.U(log2Ceil(bytes).W))
  def fetchQueueSize = 2

J
Jay 已提交
39 40 41
  def getBasicBlockIdx( pc: UInt, start:  UInt ): UInt = {
    val byteOffset = pc - start
    (byteOffset - instBytes.U)(log2Ceil(PredictWidth),instOffsetBits)
42
  }
43
}
44

45 46 47 48
class IfuToFtqIO(implicit p:Parameters) extends XSBundle {
  val pdWb = Valid(new PredecodeWritebackBundle)
}

J
JinYue 已提交
49
class FtqInterface(implicit p: Parameters) extends XSBundle {
Z
zoujr 已提交
50
  val fromFtq = Flipped(new FtqToIfuIO)
Y
Yinan Xu 已提交
51
  val toFtq   = new IfuToFtqIO
52 53
}

54 55 56 57
class UncacheInterface(implicit p: Parameters) extends XSBundle {
  val fromUncache = Flipped(DecoupledIO(new InsUncacheResp))
  val toUncache   = DecoupledIO( new InsUncacheReq )
}
58
class NewIFUIO(implicit p: Parameters) extends XSBundle {
Y
Yinan Xu 已提交
59
  val ftqInter        = new FtqInterface
60 61 62
  val icacheInter     = Vec(2, Flipped(new ICacheMainPipeBundle))
  val icacheStop      = Output(Bool())
  val icachePerfInfo  = Input(new ICachePerfInfo)
63
  val toIbuffer       = Decoupled(new FetchToIBuffer)
64
  val uncacheInter   =  new UncacheInterface
65 66
  val frontendTrigger = Flipped(new FrontendTdataDistributeIO)
  val csrTriggerEnable = Input(Vec(4, Bool()))
67
  val rob_commits = Flipped(Vec(CommitWidth, Valid(new RobCommitInfo)))
68 69
  val iTLBInter       = new BlockTlbRequestIO
  val pmp             =   new IPrefetchPMPBundle
J
JinYue 已提交
70 71
}

72 73 74 75 76 77 78 79
// record the situation in which fallThruAddr falls into
// the middle of an RVI inst
class LastHalfInfo(implicit p: Parameters) extends XSBundle {
  val valid = Bool()
  val middlePC = UInt(VAddrBits.W)
  def matchThisBlock(startAddr: UInt) = valid && middlePC === startAddr
}

80
class IfuToPreDecode(implicit p: Parameters) extends XSBundle {
J
Jay 已提交
81 82 83 84 85 86 87 88
  val data                =  if(HasCExtension) Vec(PredictWidth + 1, UInt(16.W)) else Vec(PredictWidth, UInt(32.W))
  val frontendTrigger     = new FrontendTdataDistributeIO
  val csrTriggerEnable    = Vec(4, Bool())
  val pc                  = Vec(PredictWidth, UInt(VAddrBits.W))
}


class IfuToPredChecker(implicit p: Parameters) extends XSBundle {
89
  val ftqOffset     = Valid(UInt(log2Ceil(PredictWidth).W))
J
Jay 已提交
90
  val jumpOffset    = Vec(PredictWidth, UInt(XLEN.W))
J
jinyue110 已提交
91
  val target        = UInt(VAddrBits.W)
J
Jay 已提交
92 93 94 95
  val instrRange    = Vec(PredictWidth, Bool())
  val instrValid    = Vec(PredictWidth, Bool())
  val pds           = Vec(PredictWidth, new PreDecodeInfo)
  val pc            = Vec(PredictWidth, UInt(VAddrBits.W))
96 97
}

J
Jay 已提交
98 99 100 101 102 103
class NewIFU(implicit p: Parameters) extends XSModule
  with HasICacheParameters
  with HasIFUConst
  with HasPdConst
  with HasCircularQueuePtrHelper
  with HasPerfEvents
J
JinYue 已提交
104
{
105
  println(s"icache ways: ${nWays} sets:${nSets}")
J
JinYue 已提交
106
  val io = IO(new NewIFUIO)
107
  val (toFtq, fromFtq)    = (io.ftqInter.toFtq, io.ftqInter.fromFtq)
108
  val (toICache, fromICache) = (VecInit(io.icacheInter.map(_.req)), VecInit(io.icacheInter.map(_.resp)))
109
  val (toUncache, fromUncache) = (io.uncacheInter.toUncache , io.uncacheInter.fromUncache)
Y
Yinan Xu 已提交
110

111
  def isCrossLineReq(start: UInt, end: UInt): Bool = start(blockOffBits) ^ end(blockOffBits)
112

J
JinYue 已提交
113
  def isLastInCacheline(addr: UInt): Bool = addr(blockOffBits - 1, 1) === 0.U
114

115 116 117 118
  class TlbExept(implicit p: Parameters) extends XSBundle{
    val pageFault = Bool()
    val accessFault = Bool()
    val mmio = Bool()
119
  }
J
JinYue 已提交
120

J
Jay 已提交
121 122 123 124 125
  val preDecoder      = Module(new PreDecode)
  val predChecker     = Module(new PredChecker)
  val frontendTrigger = Module(new FrontendTrigger)
  val (preDecoderIn, preDecoderOut)   = (preDecoder.io.in, preDecoder.io.out)
  val (checkerIn, checkerOut)         = (predChecker.io.in, predChecker.io.out)
126

127 128
  io.iTLBInter.resp.ready := true.B 

129 130 131 132 133 134
  /**
    ******************************************************************************
    * IFU Stage 0
    * - send cacheline fetch request to ICacheMainPipe
    ******************************************************************************
    */
Y
Yinan Xu 已提交
135

J
JinYue 已提交
136
  val f0_valid                             = fromFtq.req.valid
Z
zoujr 已提交
137
  val f0_ftq_req                           = fromFtq.req.bits
J
JinYue 已提交
138
  val f0_doubleLine                        = fromFtq.req.bits.crossCacheline
J
JinYue 已提交
139
  val f0_vSetIdx                           = VecInit(get_idx((f0_ftq_req.startAddr)), get_idx(f0_ftq_req.nextlineStart))
Z
zoujr 已提交
140
  val f0_fire                              = fromFtq.req.fire()
J
JinYue 已提交
141

J
JinYue 已提交
142
  val f0_flush, f1_flush, f2_flush, f3_flush = WireInit(false.B)
143
  val from_bpu_f0_flush, from_bpu_f1_flush, from_bpu_f2_flush, from_bpu_f3_flush = WireInit(false.B)
Y
Yinan Xu 已提交
144

L
Lingrui98 已提交
145 146
  from_bpu_f0_flush := fromFtq.flushFromBpu.shouldFlushByStage2(f0_ftq_req.ftqIdx)/*  ||
                       fromFtq.flushFromBpu.shouldFlushByStage3(f0_ftq_req.ftqIdx) */
147

J
Jay 已提交
148 149 150 151 152 153
  val wb_redirect , mmio_redirect,  backend_redirect= WireInit(false.B)
  val f3_wb_not_flush = WireInit(false.B)

  backend_redirect := fromFtq.redirect.valid
  f3_flush := backend_redirect || (wb_redirect && !f3_wb_not_flush)
  f2_flush := backend_redirect || mmio_redirect || wb_redirect
154 155
  f1_flush := f2_flush || from_bpu_f1_flush
  f0_flush := f1_flush || from_bpu_f0_flush
J
JinYue 已提交
156

J
JinYue 已提交
157 158
  val f1_ready, f2_ready, f3_ready         = WireInit(false.B)

159
  fromFtq.req.ready := toICache(0).ready && toICache(1).ready && f2_ready && GTimer() > 500.U
J
jinyue110 已提交
160

J
Jay 已提交
161
  toICache(0).valid       := fromFtq.req.valid && !f0_flush
162
  toICache(0).bits.vaddr  := fromFtq.req.bits.startAddr
J
Jay 已提交
163
  toICache(1).valid       := fromFtq.req.valid && f0_doubleLine && !f0_flush
J
JinYue 已提交
164
  toICache(1).bits.vaddr  := fromFtq.req.bits.nextlineStart//fromFtq.req.bits.startAddr + (PredictWidth * 2).U //TODO: timing critical
J
JinYue 已提交
165

166
  /** <PERF> f0 fetch bubble */
J
jinyue110 已提交
167

168 169 170 171 172 173 174 175 176 177 178 179
  XSPerfAccumulate("fetch_bubble_ftq_not_valid",   !f0_valid )
  XSPerfAccumulate("fetch_bubble_pipe_stall",    f0_valid && toICache(0).ready && toICache(1).ready && !f1_ready )
  XSPerfAccumulate("fetch_bubble_sram_0_busy",   f0_valid && !toICache(0).ready  )
  XSPerfAccumulate("fetch_bubble_sram_1_busy",   f0_valid && !toICache(1).ready  )


  /**
    ******************************************************************************
    * IFU Stage 1
    * - calculate pc/half_pc/cut_ptr for every instruction
    ******************************************************************************
    */
J
jinyue110 已提交
180

J
JinYue 已提交
181
  val f1_valid      = RegInit(false.B)
J
jinyue110 已提交
182
  val f1_ftq_req    = RegEnable(next = f0_ftq_req,    enable=f0_fire)
J
JinYue 已提交
183
  // val f1_situation  = RegEnable(next = f0_situation,  enable=f0_fire)
J
jinyue110 已提交
184 185
  val f1_doubleLine = RegEnable(next = f0_doubleLine, enable=f0_fire)
  val f1_vSetIdx    = RegEnable(next = f0_vSetIdx,    enable=f0_fire)
186
  val f1_fire       = f1_valid && f1_ready
J
JinYue 已提交
187

188
  f1_ready := f2_ready || !f1_valid
Z
zoujr 已提交
189

L
Lingrui98 已提交
190 191
  // from_bpu_f1_flush := fromFtq.flushFromBpu.shouldFlushByStage3(f1_ftq_req.ftqIdx)
  from_bpu_f1_flush := false.B
192

L
Lingrui98 已提交
193 194 195
  when(f1_flush)                  {f1_valid  := false.B}
  .elsewhen(f0_fire && !f0_flush) {f1_valid  := true.B}
  .elsewhen(f1_fire)              {f1_valid  := false.B}
J
JinYue 已提交
196

J
Jay 已提交
197 198 199 200
  val f1_pc                 = VecInit((0 until PredictWidth).map(i => f1_ftq_req.startAddr + (i * 2).U))
  val f1_half_snpc          = VecInit((0 until PredictWidth).map(i => f1_ftq_req.startAddr + ((i+2) * 2).U))
  val f1_cut_ptr            = if(HasCExtension)  VecInit((0 until PredictWidth + 1).map(i =>  Cat(0.U(1.W), f1_ftq_req.startAddr(blockOffBits-1, 1)) + i.U ))
                                  else           VecInit((0 until PredictWidth).map(i =>     Cat(0.U(1.W), f1_ftq_req.startAddr(blockOffBits-1, 2)) + i.U ))
J
JinYue 已提交
201

202 203 204 205 206 207 208 209 210 211 212
  /**
    ******************************************************************************
    * IFU Stage 2
    * - icache response data (latched for pipeline stop)
    * - generate exceprion bits for every instruciton (page fault/access fault/mmio)
    * - generate predicted instruction range (1 means this instruciton is in this fetch packet)
    * - cut data from cachlines to packet instruction code
    * - instruction predecode and RVC expand
    ******************************************************************************
    */

213
  val icacheRespAllValid = WireInit(false.B)
214

215 216
  val f2_valid      = RegInit(false.B)
  val f2_ftq_req    = RegEnable(next = f1_ftq_req,    enable=f1_fire)
J
JinYue 已提交
217
  // val f2_situation  = RegEnable(next = f1_situation,  enable=f1_fire)
218 219 220
  val f2_doubleLine = RegEnable(next = f1_doubleLine, enable=f1_fire)
  val f2_vSetIdx    = RegEnable(next = f1_vSetIdx,    enable=f1_fire)
  val f2_fire       = f2_valid && f2_ready
221

222 223
  f2_ready := f3_ready && icacheRespAllValid || !f2_valid
  //TODO: addr compare may be timing critical
J
JinYue 已提交
224
  val f2_icache_all_resp_wire       =  fromICache(0).valid && (fromICache(0).bits.vaddr ===  f2_ftq_req.startAddr) && ((fromICache(1).valid && (fromICache(1).bits.vaddr ===  f2_ftq_req.nextlineStart)) || !f2_doubleLine)
225
  val f2_icache_all_resp_reg        = RegInit(false.B)
J
JinYue 已提交
226

227
  icacheRespAllValid := f2_icache_all_resp_reg || f2_icache_all_resp_wire
228

229
  io.icacheStop := !f3_ready
J
JinYue 已提交
230

231 232 233
  when(f2_flush)                                              {f2_icache_all_resp_reg := false.B}
  .elsewhen(f2_valid && f2_icache_all_resp_wire && !f3_ready) {f2_icache_all_resp_reg := true.B}
  .elsewhen(f2_fire && f2_icache_all_resp_reg)                {f2_icache_all_resp_reg := false.B}
J
JinYue 已提交
234

L
Lingrui98 已提交
235 236
  when(f2_flush)                  {f2_valid := false.B}
  .elsewhen(f1_fire && !f1_flush) {f2_valid := true.B }
J
JinYue 已提交
237 238
  .elsewhen(f2_fire)              {f2_valid := false.B}

239
  val f2_cache_response_data = ResultHoldBypass(valid = f2_icache_all_resp_wire, data = VecInit(fromICache.map(_.bits.readData)))
J
jinyue110 已提交
240

241 242
  val f2_except_pf    = VecInit((0 until PortNumber).map(i => fromICache(i).bits.tlbExcp.pageFault))
  val f2_except_af    = VecInit((0 until PortNumber).map(i => fromICache(i).bits.tlbExcp.accessFault))
R
rvcoresjw 已提交
243 244
  val f2_mmio         = fromICache(0).bits.tlbExcp.mmio && !fromICache(0).bits.tlbExcp.accessFault && 
                                                           !fromICache(0).bits.tlbExcp.pageFault
J
jinyue110 已提交
245

J
Jay 已提交
246 247 248
  val f2_pc               = RegEnable(next = f1_pc, enable = f1_fire)
  val f2_half_snpc        = RegEnable(next = f1_half_snpc, enable = f1_fire)
  val f2_cut_ptr          = RegEnable(next = f1_cut_ptr, enable = f1_fire)
249

250
  val f2_resend_vaddr     = RegEnable(next = f1_ftq_req.startAddr + 2.U, enable = f1_fire)
J
Jay 已提交
251 252 253

  def isNextLine(pc: UInt, startAddr: UInt) = {
    startAddr(blockOffBits) ^ pc(blockOffBits)
J
JinYue 已提交
254 255
  }

J
Jay 已提交
256 257
  def isLastInLine(pc: UInt) = {
    pc(blockOffBits - 1, 0) === "b111110".U
258
  }
Y
Yinan Xu 已提交
259

J
Jay 已提交
260 261
  val f2_foldpc = VecInit(f2_pc.map(i => XORFold(i(VAddrBits-1,1), MemPredPCWidth)))
  val f2_jump_range = Fill(PredictWidth, !f2_ftq_req.ftqOffset.valid) | Fill(PredictWidth, 1.U(1.W)) >> ~f2_ftq_req.ftqOffset.bits
J
JinYue 已提交
262
  val f2_ftr_range  = Fill(PredictWidth, f2_ftq_req.oversize || f2_ftq_req.ftqOffset.valid) | Fill(PredictWidth, 1.U(1.W)) >> ~getBasicBlockIdx(f2_ftq_req.nextStartAddr, f2_ftq_req.startAddr)
J
Jay 已提交
263 264 265
  val f2_instr_range = f2_jump_range & f2_ftr_range
  val f2_pf_vec = VecInit((0 until PredictWidth).map(i => (!isNextLine(f2_pc(i), f2_ftq_req.startAddr) && f2_except_pf(0)   ||  isNextLine(f2_pc(i), f2_ftq_req.startAddr) && f2_doubleLine &&  f2_except_pf(1))))
  val f2_af_vec = VecInit((0 until PredictWidth).map(i => (!isNextLine(f2_pc(i), f2_ftq_req.startAddr) && f2_except_af(0)   ||  isNextLine(f2_pc(i), f2_ftq_req.startAddr) && f2_doubleLine && f2_except_af(1))))
J
JinYue 已提交
266

267 268
  val f2_paddrs       = VecInit((0 until PortNumber).map(i => fromICache(i).bits.paddr))
  val f2_perf_info    = io.icachePerfInfo
J
JinYue 已提交
269

J
Jay 已提交
270
  def cut(cacheline: UInt, cutPtr: Vec[UInt]) : Vec[UInt] ={
271 272 273 274
    if(HasCExtension){
      val result   = Wire(Vec(PredictWidth + 1, UInt(16.W)))
      val dataVec  = cacheline.asTypeOf(Vec(blockBytes * 2/ 2, UInt(16.W)))
      (0 until PredictWidth + 1).foreach( i =>
J
Jay 已提交
275
        result(i) := dataVec(cutPtr(i))
276
      )
Y
Yinan Xu 已提交
277
      result
278 279 280 281
    } else {
      val result   = Wire(Vec(PredictWidth, UInt(32.W)) )
      val dataVec  = cacheline.asTypeOf(Vec(blockBytes * 2/ 4, UInt(32.W)))
      (0 until PredictWidth).foreach( i =>
J
Jay 已提交
282
        result(i) := dataVec(cutPtr(i))
283
      )
Y
Yinan Xu 已提交
284
      result
285
    }
286 287
  }

J
Jay 已提交
288 289
  val f2_datas        = VecInit((0 until PortNumber).map(i => f2_cache_response_data(i)))
  val f2_cut_data = cut( Cat(f2_datas.map(cacheline => cacheline.asUInt ).reverse).asUInt, f2_cut_ptr )
J
JinYue 已提交
290

291
  /** predecode (include RVC expander) */
J
Jay 已提交
292 293 294 295
  preDecoderIn.data := f2_cut_data
  preDecoderIn.frontendTrigger := io.frontendTrigger
  preDecoderIn.csrTriggerEnable := io.csrTriggerEnable
  preDecoderIn.pc  := f2_pc
J
JinYue 已提交
296

J
Jay 已提交
297 298 299 300 301
  val f2_expd_instr   = preDecoderOut.expInstr
  val f2_pd           = preDecoderOut.pd
  val f2_jump_offset  = preDecoderOut.jumpOffset
  val f2_hasHalfValid  =  preDecoderOut.hasHalfValid
  val f2_crossPageFault = VecInit((0 until PredictWidth).map(i => isLastInLine(f2_pc(i)) && !f2_except_pf(0) && f2_doubleLine &&  f2_except_pf(1) && !f2_pd(i).isRVC ))
J
JinYue 已提交
302

303
  val predecodeOutValid = WireInit(false.B)
J
JinYue 已提交
304 305


306 307 308 309 310 311 312 313 314 315 316 317 318 319
  /**
    ******************************************************************************
    * IFU Stage 3
    * - handle MMIO instruciton
    *  -send request to Uncache fetch Unit
    *  -every packet include 1 MMIO instruction
    *  -MMIO instructions will stop fetch pipeline until commiting from RoB
    *  -flush to snpc (send ifu_redirect to Ftq)
    * - Ibuffer enqueue
    * - check predict result in Frontend (jalFault/retFault/notCFIFault/invalidTakenFault/targetFault)
    * - handle last half RVI instruction 
    ******************************************************************************
    */

J
JinYue 已提交
320 321
  val f3_valid          = RegInit(false.B)
  val f3_ftq_req        = RegEnable(next = f2_ftq_req,    enable=f2_fire)
J
JinYue 已提交
322
  // val f3_situation      = RegEnable(next = f2_situation,  enable=f2_fire)
J
JinYue 已提交
323
  val f3_doubleLine     = RegEnable(next = f2_doubleLine, enable=f2_fire)
324 325 326
  val f3_fire           = io.toIbuffer.fire()

  f3_ready := io.toIbuffer.ready || !f3_valid
J
JinYue 已提交
327 328

  val f3_cut_data       = RegEnable(next = f2_cut_data, enable=f2_fire)
329

J
JinYue 已提交
330 331
  val f3_except_pf      = RegEnable(next = f2_except_pf, enable = f2_fire)
  val f3_except_af      = RegEnable(next = f2_except_af, enable = f2_fire)
332
  val f3_mmio           = RegEnable(next = f2_mmio   , enable = f2_fire)
J
JinYue 已提交
333

J
Jay 已提交
334 335 336 337 338 339 340 341 342 343 344
  val f3_expd_instr     = RegEnable(next = f2_expd_instr,  enable = f2_fire)
  val f3_pd             = RegEnable(next = f2_pd,          enable = f2_fire)
  val f3_jump_offset    = RegEnable(next = f2_jump_offset, enable = f2_fire)
  val f3_af_vec         = RegEnable(next = f2_af_vec,      enable = f2_fire)
  val f3_pf_vec         = RegEnable(next = f2_pf_vec ,     enable = f2_fire)
  val f3_pc             = RegEnable(next = f2_pc,          enable = f2_fire)
  val f3_half_snpc        = RegEnable(next = f2_half_snpc, enable = f2_fire)
  val f3_instr_range    = RegEnable(next = f2_instr_range, enable = f2_fire)
  val f3_foldpc         = RegEnable(next = f2_foldpc,      enable = f2_fire)
  val f3_crossPageFault = RegEnable(next = f2_crossPageFault,      enable = f2_fire)
  val f3_hasHalfValid   = RegEnable(next = f2_hasHalfValid,      enable = f2_fire)
J
JinYue 已提交
345 346
  val f3_except         = VecInit((0 until 2).map{i => f3_except_pf(i) || f3_except_af(i)})
  val f3_has_except     = f3_valid && (f3_except_af.reduce(_||_) || f3_except_pf.reduce(_||_))
347
  val f3_pAddrs   = RegEnable(next = f2_paddrs, enable = f2_fire)
348 349
  val f3_resend_vaddr   = RegEnable(next = f2_resend_vaddr,      enable = f2_fire)

350

351 352
  val f3_oversize_target = f3_pc.last + 2.U

J
Jay 已提交
353
  /*** MMIO State Machine***/
354 355 356 357
  val f3_mmio_data    = Reg(Vec(2, UInt(16.W)))
  val mmio_is_RVC     = RegInit(false.B)
  val mmio_resend_addr =RegInit(0.U(PAddrBits.W))
  val mmio_resend_af  = RegInit(false.B)
358

359 360
  val m_idle :: m_sendReq :: m_waitResp :: m_sendTLB :: m_tlbResp :: m_sendPMP :: m_resendReq :: m_waitResendResp :: m_waitCommit :: m_commited :: Nil = Enum(10)
  val mmio_state = RegInit(m_idle)
361

362
  val f3_req_is_mmio     = f3_mmio && f3_valid
J
Jay 已提交
363
  val mmio_commit = VecInit(io.rob_commits.map{commit => commit.valid && commit.bits.ftqIdx === f3_ftq_req.ftqIdx &&  commit.bits.ftqOffset === 0.U}).asUInt.orR
364
  val f3_mmio_req_commit = f3_req_is_mmio && mmio_state === m_commited
365
   
366
  val f3_mmio_to_commit =  f3_req_is_mmio && mmio_state === m_waitCommit
367 368 369
  val f3_mmio_to_commit_next = RegNext(f3_mmio_to_commit)
  val f3_mmio_can_go      = f3_mmio_to_commit && !f3_mmio_to_commit_next

J
Jay 已提交
370 371
  val f3_ftq_flush_self     = fromFtq.redirect.valid && RedirectLevel.flushItself(fromFtq.redirect.bits.level)
  val f3_ftq_flush_by_older = fromFtq.redirect.valid && isBefore(fromFtq.redirect.bits.ftqIdx, f3_ftq_req.ftqIdx)
372

J
Jay 已提交
373
  val f3_need_not_flush = f3_req_is_mmio && fromFtq.redirect.valid && !f3_ftq_flush_self && !f3_ftq_flush_by_older
374 375

  when(f3_flush && !f3_need_not_flush)               {f3_valid := false.B}
J
Jay 已提交
376 377 378
  .elsewhen(f2_fire && !f2_flush )                   {f3_valid := true.B }
  .elsewhen(io.toIbuffer.fire() && !f3_req_is_mmio)          {f3_valid := false.B}
  .elsewhen{f3_req_is_mmio && f3_mmio_req_commit}            {f3_valid := false.B}
379 380 381 382 383 384 385 386 387 388 389

  val f3_mmio_use_seq_pc = RegInit(false.B)

  val (redirect_ftqIdx, redirect_ftqOffset)  = (fromFtq.redirect.bits.ftqIdx,fromFtq.redirect.bits.ftqOffset)
  val redirect_mmio_req = fromFtq.redirect.valid && redirect_ftqIdx === f3_ftq_req.ftqIdx && redirect_ftqOffset === 0.U

  when(RegNext(f2_fire && !f2_flush) && f3_req_is_mmio)        { f3_mmio_use_seq_pc := true.B  }
  .elsewhen(redirect_mmio_req)                                 { f3_mmio_use_seq_pc := false.B }

  f3_ready := Mux(f3_req_is_mmio, io.toIbuffer.ready && f3_mmio_req_commit || !f3_valid , io.toIbuffer.ready || !f3_valid)

390
  // when(fromUncache.fire())    {f3_mmio_data   :=  fromUncache.bits.data}
391 392 393


  switch(mmio_state){
394
    is(m_idle){
395
      when(f3_req_is_mmio){
396
        mmio_state :=  m_sendReq
397 398 399
      }
    }
  
400 401
    is(m_sendReq){
      mmio_state :=  Mux(toUncache.fire(), m_waitResp, m_sendReq )
402 403
    }

404
    is(m_waitResp){
405 406
      when(fromUncache.fire()){
          val isRVC =  fromUncache.bits.data(1,0) =/= 3.U
407 408 409 410 411 412
          val needResend = !isRVC && f3_pAddrs(0)(2,1) === 3.U
          mmio_state :=  Mux(needResend, m_sendTLB , m_waitCommit)

          mmio_is_RVC := isRVC
          f3_mmio_data(0)   :=  fromUncache.bits.data(15,0)
          f3_mmio_data(1)   :=  fromUncache.bits.data(31,16)
413 414 415
      }
    }  

416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
    is(m_sendTLB){
          mmio_state :=  m_tlbResp
    }

    is(m_tlbResp){
          mmio_state :=  m_sendPMP
          mmio_resend_addr := io.iTLBInter.resp.bits.paddr
    }

    is(m_sendPMP){
          val pmpExcpAF = io.pmp.resp.instr
          mmio_state :=  Mux(pmpExcpAF, m_waitCommit , m_resendReq)
          mmio_resend_af := pmpExcpAF
    }

    is(m_resendReq){
      mmio_state :=  Mux(toUncache.fire(), m_waitResendResp, m_resendReq )
433 434
    }  

435
    is(m_waitResendResp){
436
      when(fromUncache.fire()){
437 438
          mmio_state :=  m_waitCommit
          f3_mmio_data(1)   :=  fromUncache.bits.data(15,0)
439 440 441
      }
    }  

442
    is(m_waitCommit){
J
Jay 已提交
443
      when(mmio_commit){
444
          mmio_state  :=  m_commited
445 446
      }
    }  
J
Jay 已提交
447

448 449 450 451 452
    //normal mmio instruction 
    is(m_commited){
        mmio_state := m_idle
        mmio_is_RVC := false.B 
        mmio_resend_addr := 0.U 
J
Jay 已提交
453
    }
454 455
  }

456
  //exception or flush by older branch prediction
J
Jay 已提交
457
  when(f3_ftq_flush_self || f3_ftq_flush_by_older)  {
458 459 460 461 462
    mmio_state := m_idle 
    mmio_is_RVC := false.B 
    mmio_resend_addr := 0.U 
    mmio_resend_af := false.B
    f3_mmio_data.map(_ := 0.U) 
463 464
  }

465 466
  toUncache.valid     :=  ((mmio_state === m_sendReq) || (mmio_state === m_resendReq)) && f3_req_is_mmio
  toUncache.bits.addr := Mux((mmio_state === m_resendReq), mmio_resend_addr, f3_pAddrs(0))
467 468
  fromUncache.ready   := true.B

469 470 471 472 473 474 475 476 477 478 479 480 481
  io.iTLBInter.req.valid         := (mmio_state === m_sendTLB) && f3_req_is_mmio
  io.iTLBInter.req.bits.size     := 3.U 
  io.iTLBInter.req.bits.vaddr    := f3_resend_vaddr
  io.iTLBInter.req.bits.debug.pc := f3_resend_vaddr

  io.iTLBInter.req.bits.cmd                 := TlbCmd.exec
  io.iTLBInter.req.bits.robIdx              := DontCare
  io.iTLBInter.req.bits.debug.isFirstIssue  := DontCare

  io.pmp.req.valid := (mmio_state === m_sendPMP) && f3_req_is_mmio
  io.pmp.req.bits.addr  := mmio_resend_addr
  io.pmp.req.bits.size  := 3.U
  io.pmp.req.bits.cmd   := TlbCmd.exec
J
JinYue 已提交
482

J
Jay 已提交
483 484 485 486 487 488 489 490 491
  val f3_lastHalf       = RegInit(0.U.asTypeOf(new LastHalfInfo))

  val f3_predecode_range = VecInit(preDecoderOut.pd.map(inst => inst.valid)).asUInt
  val f3_mmio_range      = VecInit((0 until PredictWidth).map(i => if(i ==0) true.B else false.B))
  val f3_instr_valid     = Wire(Vec(PredictWidth, Bool()))

  /*** prediction result check   ***/
  checkerIn.ftqOffset   := f3_ftq_req.ftqOffset
  checkerIn.jumpOffset  := f3_jump_offset
J
JinYue 已提交
492
  checkerIn.target      := f3_ftq_req.nextStartAddr
J
Jay 已提交
493 494 495 496 497
  checkerIn.instrRange  := f3_instr_range.asTypeOf(Vec(PredictWidth, Bool()))
  checkerIn.instrValid  := f3_instr_valid.asTypeOf(Vec(PredictWidth, Bool()))
  checkerIn.pds         := f3_pd
  checkerIn.pc          := f3_pc

498
  /*** handle half RVI in the last 2 Bytes  ***/
J
Jay 已提交
499 500 501 502 503 504 505 506 507 508 509 510

  def hasLastHalf(idx: UInt) = {
    !f3_pd(idx).isRVC && checkerOut.fixedRange(idx) && f3_instr_valid(idx) && !checkerOut.fixedTaken(idx) && !checkerOut.fixedMissPred(idx) && ! f3_req_is_mmio && !f3_ftq_req.oversize
  }

  val f3_last_validIdx             = ~ParallelPriorityEncoder(checkerOut.fixedRange.reverse)

  val f3_hasLastHalf         = hasLastHalf((PredictWidth - 1).U)
  val f3_false_lastHalf      = hasLastHalf(f3_last_validIdx)
  val f3_false_snpc          = f3_half_snpc(f3_last_validIdx)

  val f3_lastHalf_mask    = VecInit((0 until PredictWidth).map( i => if(i ==0) false.B else true.B )).asUInt()
J
JinYue 已提交
511 512 513

  when (f3_flush) {
    f3_lastHalf.valid := false.B
J
Jay 已提交
514 515
  }.elsewhen (f3_fire) {
    f3_lastHalf.valid := f3_hasLastHalf
J
JinYue 已提交
516
    f3_lastHalf.middlePC := f3_ftq_req.nextStartAddr
517 518
  }

J
Jay 已提交
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
  f3_instr_valid := Mux(f3_lastHalf.valid,f3_hasHalfValid ,VecInit(f3_pd.map(inst => inst.valid)))

  /*** frontend Trigger  ***/
  frontendTrigger.io.pds  := f3_pd
  frontendTrigger.io.pc   := f3_pc
  frontendTrigger.io.data   := f3_cut_data

  frontendTrigger.io.frontendTrigger  := io.frontendTrigger
  frontendTrigger.io.csrTriggerEnable := io.csrTriggerEnable

  val f3_triggered = frontendTrigger.io.triggered

  /*** send to Ibuffer  ***/

  io.toIbuffer.valid            := f3_valid && (!f3_req_is_mmio || f3_mmio_can_go) && !f3_flush
  io.toIbuffer.bits.instrs      := f3_expd_instr
  io.toIbuffer.bits.valid       := f3_instr_valid.asUInt
  io.toIbuffer.bits.enqEnable   := checkerOut.fixedRange.asUInt & f3_instr_valid.asUInt
  io.toIbuffer.bits.pd          := f3_pd
  io.toIbuffer.bits.ftqPtr      := f3_ftq_req.ftqIdx
  io.toIbuffer.bits.pc          := f3_pc
  io.toIbuffer.bits.ftqOffset.zipWithIndex.map{case(a, i) => a.bits := i.U; a.valid := checkerOut.fixedTaken(i) && !f3_req_is_mmio}
  io.toIbuffer.bits.foldpc      := f3_foldpc
  io.toIbuffer.bits.ipf         := f3_pf_vec
  io.toIbuffer.bits.acf         := f3_af_vec
  io.toIbuffer.bits.crossPageIPFFix := f3_crossPageFault
  io.toIbuffer.bits.triggered   := f3_triggered

  val lastHalfMask = VecInit((0 until PredictWidth).map(i => if(i ==0) false.B else true.B))
  when(f3_lastHalf.valid){
    io.toIbuffer.bits.enqEnable := checkerOut.fixedRange.asUInt & f3_instr_valid.asUInt & lastHalfMask.asUInt
    io.toIbuffer.bits.valid     := f3_lastHalf_mask & f3_instr_valid.asUInt
  }

  /** external predecode for MMIO instruction */
  when(f3_req_is_mmio){
555
    val inst  = Cat(f3_mmio_data(1), f3_mmio_data(0))
J
Jay 已提交
556 557 558 559 560 561 562 563 564 565 566 567 568 569
    val currentIsRVC   = isRVC(inst)

    val brType::isCall::isRet::Nil = brInfo(inst)
    val jalOffset = jal_offset(inst, currentIsRVC)
    val brOffset  = br_offset(inst, currentIsRVC)

    io.toIbuffer.bits.instrs (0) := new RVCDecoder(inst, XLEN).decode.bits

    io.toIbuffer.bits.pd(0).valid   := true.B
    io.toIbuffer.bits.pd(0).isRVC   := currentIsRVC
    io.toIbuffer.bits.pd(0).brType  := brType
    io.toIbuffer.bits.pd(0).isCall  := isCall
    io.toIbuffer.bits.pd(0).isRet   := isRet

570 571
    io.toIbuffer.bits.acf(0) := mmio_resend_af

J
Jay 已提交
572 573
    io.toIbuffer.bits.enqEnable   := f3_mmio_range.asUInt
  }
574

J
JinYue 已提交
575

J
JinYue 已提交
576
  //Write back to Ftq
577 578 579
  val f3_cache_fetch = f3_valid && !(f2_fire && !f2_flush)
  val finishFetchMaskReg = RegNext(f3_cache_fetch)

J
Jay 已提交
580
  val mmioFlushWb = Wire(Valid(new PredecodeWritebackBundle))
581
  val f3_mmio_missOffset = Wire(ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)))
582
  f3_mmio_missOffset.valid := f3_req_is_mmio
583 584
  f3_mmio_missOffset.bits  := 0.U

585
  mmioFlushWb.valid           := (f3_req_is_mmio && mmio_state === m_waitCommit && RegNext(fromUncache.fire())  && f3_mmio_use_seq_pc)
J
Jay 已提交
586 587 588 589 590 591 592
  mmioFlushWb.bits.pc         := f3_pc
  mmioFlushWb.bits.pd         := f3_pd
  mmioFlushWb.bits.pd.zipWithIndex.map{case(instr,i) => instr.valid :=  f3_mmio_range(i)}
  mmioFlushWb.bits.ftqIdx     := f3_ftq_req.ftqIdx
  mmioFlushWb.bits.ftqOffset  := f3_ftq_req.ftqOffset.bits
  mmioFlushWb.bits.misOffset  := f3_mmio_missOffset
  mmioFlushWb.bits.cfiOffset  := DontCare
593
  mmioFlushWb.bits.target     := Mux(mmio_is_RVC, f3_ftq_req.startAddr + 2.U , f3_ftq_req.startAddr + 4.U)
J
Jay 已提交
594 595 596
  mmioFlushWb.bits.jalTarget  := DontCare
  mmioFlushWb.bits.instrRange := f3_mmio_range

597
  mmio_redirect := (f3_req_is_mmio && mmio_state === m_waitCommit && RegNext(fromUncache.fire())  && f3_mmio_use_seq_pc)
J
Jay 已提交
598

599 600 601 602 603 604 605 606 607
  /**
    ******************************************************************************
    * IFU Write Back Stage
    * - write back predecode information to Ftq to update
    * - redirect if found fault prediction 
    * - redirect if has false hit last half (last PC is not start + 32 Bytes, but in the midle of an notCFI RVI instruction)
    ******************************************************************************
    */

J
Jay 已提交
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
  val wb_valid          = RegNext(RegNext(f2_fire && !f2_flush) && !f3_req_is_mmio && !f3_flush)
  val wb_ftq_req        = RegNext(f3_ftq_req)

  val wb_check_result   = RegNext(checkerOut)
  val wb_instr_range    = RegNext(io.toIbuffer.bits.enqEnable)
  val wb_pc             = RegNext(f3_pc)
  val wb_pd             = RegNext(f3_pd)
  val wb_instr_valid    = RegNext(f3_instr_valid)

  /* false hit lastHalf */
  val wb_lastIdx        = RegNext(f3_last_validIdx)
  val wb_false_lastHalf = RegNext(f3_false_lastHalf) && wb_lastIdx =/= (PredictWidth - 1).U 
  val wb_false_target   = RegNext(f3_false_snpc)
  
  val wb_half_flush = wb_false_lastHalf
  val wb_half_target = wb_false_target

625 626 627 628 629 630 631 632 633 634 635
  /* false oversize */
  val lastIsRVC = wb_instr_range.asTypeOf(Vec(PredictWidth,Bool())).last  && wb_pd.last.isRVC
  val lastIsRVI = wb_instr_range.asTypeOf(Vec(PredictWidth,Bool()))(PredictWidth - 2) && !wb_pd(PredictWidth - 2).isRVC 
  val lastTaken = wb_check_result.fixedTaken.last
  val wb_false_oversize = wb_valid &&  wb_ftq_req.oversize && (lastIsRVC || lastIsRVI) && !lastTaken
  val wb_oversize_target = RegNext(f3_oversize_target)

  when(wb_valid){
    assert(!wb_false_oversize || !wb_half_flush, "False oversize and false half should be exclusive. ")
  }

J
Jay 已提交
636 637 638 639 640 641 642 643 644
  f3_wb_not_flush := wb_ftq_req.ftqIdx === f3_ftq_req.ftqIdx && f3_valid && wb_valid

  val checkFlushWb = Wire(Valid(new PredecodeWritebackBundle))
  checkFlushWb.valid                  := wb_valid
  checkFlushWb.bits.pc                := wb_pc
  checkFlushWb.bits.pd                := wb_pd
  checkFlushWb.bits.pd.zipWithIndex.map{case(instr,i) => instr.valid := wb_instr_valid(i)}
  checkFlushWb.bits.ftqIdx            := wb_ftq_req.ftqIdx
  checkFlushWb.bits.ftqOffset         := wb_ftq_req.ftqOffset.bits
645
  checkFlushWb.bits.misOffset.valid   := ParallelOR(wb_check_result.fixedMissPred) || wb_half_flush || wb_false_oversize
J
Jay 已提交
646 647 648
  checkFlushWb.bits.misOffset.bits    := Mux(wb_half_flush, (PredictWidth - 1).U, ParallelPriorityEncoder(wb_check_result.fixedMissPred))
  checkFlushWb.bits.cfiOffset.valid   := ParallelOR(wb_check_result.fixedTaken)
  checkFlushWb.bits.cfiOffset.bits    := ParallelPriorityEncoder(wb_check_result.fixedTaken)
649 650
  checkFlushWb.bits.target            := Mux(wb_false_oversize, wb_oversize_target,
                                            Mux(wb_half_flush, wb_half_target, wb_check_result.fixedTarget(ParallelPriorityEncoder(wb_check_result.fixedMissPred))))
651
  checkFlushWb.bits.jalTarget         := wb_check_result.fixedTarget(ParallelPriorityEncoder(VecInit(wb_pd.zip(wb_instr_valid).map{case (pd, v) => v && pd.isJal })))
J
Jay 已提交
652 653 654
  checkFlushWb.bits.instrRange        := wb_instr_range.asTypeOf(Vec(PredictWidth, Bool()))

  toFtq.pdWb := Mux(f3_req_is_mmio, mmioFlushWb,  checkFlushWb)
655

J
Jay 已提交
656
  wb_redirect := checkFlushWb.bits.misOffset.valid && wb_valid
657

658 659 660 661 662 663 664 665

  /** performance counter */
  val f3_perf_info     = RegEnable(next = f2_perf_info, enable = f2_fire)
  val f3_req_0    = io.toIbuffer.fire()
  val f3_req_1    = io.toIbuffer.fire() && f3_doubleLine
  val f3_hit_0    = io.toIbuffer.fire() && f3_perf_info.bank_hit(0)
  val f3_hit_1    = io.toIbuffer.fire() && f3_doubleLine & f3_perf_info.bank_hit(1)
  val f3_hit      = f3_perf_info.hit
666
  val perfEvents = Seq(
J
Jay 已提交
667
    ("frontendFlush                ", wb_redirect                                ),
668
    ("ifu_req                      ", io.toIbuffer.fire()                        ),
669
    ("ifu_miss                     ", io.toIbuffer.fire() && !f3_perf_info.hit   ),
670 671 672 673
    ("ifu_req_cacheline_0          ", f3_req_0                                   ),
    ("ifu_req_cacheline_1          ", f3_req_1                                   ),
    ("ifu_req_cacheline_0_hit      ", f3_hit_1                                   ),
    ("ifu_req_cacheline_1_hit      ", f3_hit_1                                   ),
674 675 676 677 678 679
    ("only_0_hit                   ", f3_perf_info.only_0_hit       && io.toIbuffer.fire() ),
    ("only_0_miss                  ", f3_perf_info.only_0_miss      && io.toIbuffer.fire() ),
    ("hit_0_hit_1                  ", f3_perf_info.hit_0_hit_1      && io.toIbuffer.fire() ),
    ("hit_0_miss_1                 ", f3_perf_info.hit_0_miss_1     && io.toIbuffer.fire() ),
    ("miss_0_hit_1                 ", f3_perf_info.miss_0_hit_1     && io.toIbuffer.fire() ),
    ("miss_0_miss_1                ", f3_perf_info.miss_0_miss_1    && io.toIbuffer.fire() ),
J
JinYue 已提交
680 681
    // ("cross_line_block             ", io.toIbuffer.fire() && f3_situation(0)     ),
    // ("fall_through_is_cacheline_end", io.toIbuffer.fire() && f3_situation(1)     ),
682
  )
683
  generatePerfEvent()
J
JinYue 已提交
684

J
JinYue 已提交
685 686 687 688 689 690
  XSPerfAccumulate("ifu_req",   io.toIbuffer.fire() )
  XSPerfAccumulate("ifu_miss",  io.toIbuffer.fire() && !f3_hit )
  XSPerfAccumulate("ifu_req_cacheline_0", f3_req_0  )
  XSPerfAccumulate("ifu_req_cacheline_1", f3_req_1  )
  XSPerfAccumulate("ifu_req_cacheline_0_hit",   f3_hit_0 )
  XSPerfAccumulate("ifu_req_cacheline_1_hit",   f3_hit_1 )
J
Jay 已提交
691
  XSPerfAccumulate("frontendFlush",  wb_redirect )
692 693 694 695 696 697
  XSPerfAccumulate("only_0_hit",      f3_perf_info.only_0_hit   && io.toIbuffer.fire()  )
  XSPerfAccumulate("only_0_miss",     f3_perf_info.only_0_miss  && io.toIbuffer.fire()  )
  XSPerfAccumulate("hit_0_hit_1",     f3_perf_info.hit_0_hit_1  && io.toIbuffer.fire()  )
  XSPerfAccumulate("hit_0_miss_1",    f3_perf_info.hit_0_miss_1  && io.toIbuffer.fire()  )
  XSPerfAccumulate("miss_0_hit_1",    f3_perf_info.miss_0_hit_1   && io.toIbuffer.fire() )
  XSPerfAccumulate("miss_0_miss_1",   f3_perf_info.miss_0_miss_1 && io.toIbuffer.fire() )
698 699 700
  XSPerfAccumulate("hit_0_except_1",   f3_perf_info.hit_0_except_1 && io.toIbuffer.fire() )
  XSPerfAccumulate("miss_0_except_1",   f3_perf_info.miss_0_except_1 && io.toIbuffer.fire() )
  XSPerfAccumulate("except_0",   f3_perf_info.except_0 && io.toIbuffer.fire() )
701
}