Don't stall the frontend, making it easier to add more features later
This commit is contained in:
committed by
Andrew Waterman
parent
061a0adceb
commit
d24d8ff84b
@ -5,6 +5,7 @@ package rocket
|
||||
|
||||
import Chisel._
|
||||
import Chisel.ImplicitConversions._
|
||||
import chisel3.core.withReset
|
||||
import config._
|
||||
import coreplex._
|
||||
import diplomacy._
|
||||
@ -64,7 +65,9 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
val icache = outer.icache.module
|
||||
|
||||
val tlb = Module(new TLB(log2Ceil(coreInstBytes*fetchWidth), nTLBEntries))
|
||||
val fq = withReset(reset || io.cpu.req.valid) { Module(new Queue(new FrontendResp, 3, flow = true)) }
|
||||
|
||||
val s0_valid = io.cpu.req.valid || fq.io.enq.ready
|
||||
val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
|
||||
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
|
||||
val s1_speculative = Reg(Bool())
|
||||
@ -84,32 +87,30 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
|
||||
val predicted_npc = Wire(init = ntpc)
|
||||
val predicted_taken = Wire(init = Bool(false))
|
||||
val icmiss = s2_valid && !icache.io.resp.valid
|
||||
val npc = Mux(icmiss, s2_pc, predicted_npc)
|
||||
|
||||
val stall = io.cpu.resp.valid && !io.cpu.resp.ready
|
||||
when (!stall) {
|
||||
s1_pc_ := io.cpu.npc
|
||||
// consider RVC fetches across blocks to be non-speculative if the first
|
||||
// part was non-speculative
|
||||
val s0_speculative =
|
||||
if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken
|
||||
else Bool(true)
|
||||
s1_speculative := Mux(icmiss, s2_speculative, s0_speculative)
|
||||
s2_valid := !icmiss
|
||||
when (!icmiss) {
|
||||
s2_pc := s1_pc
|
||||
s2_speculative := s1_speculative
|
||||
s2_cacheable := tlb.io.resp.cacheable
|
||||
s2_maybe_pf := tlb.io.resp.pf.inst
|
||||
s2_maybe_ae := tlb.io.resp.ae.inst
|
||||
s2_tlb_miss := tlb.io.resp.miss
|
||||
}
|
||||
}
|
||||
when (io.cpu.req.valid) {
|
||||
s1_pc_ := io.cpu.npc
|
||||
s1_speculative := io.cpu.req.bits.speculative
|
||||
s2_valid := Bool(false)
|
||||
val s2_replay = Wire(Bool())
|
||||
s2_replay :=
|
||||
(s2_valid && (!icache.io.resp.valid || (fq.io.enq.valid && !fq.io.enq.ready))) ||
|
||||
RegNext(s2_replay && !s0_valid)
|
||||
val npc = Mux(s2_replay, s2_pc, predicted_npc)
|
||||
|
||||
s1_pc_ := io.cpu.npc
|
||||
// consider RVC fetches across blocks to be non-speculative if the first
|
||||
// part was non-speculative
|
||||
val s0_speculative =
|
||||
if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken
|
||||
else Bool(true)
|
||||
s1_speculative := Mux(io.cpu.req.valid, io.cpu.req.bits.speculative, Mux(s2_replay, s2_speculative, s0_speculative))
|
||||
|
||||
s2_valid := false
|
||||
when (!s2_replay && !io.cpu.req.valid) {
|
||||
s2_valid := true
|
||||
s2_pc := s1_pc
|
||||
s2_speculative := s1_speculative
|
||||
s2_cacheable := tlb.io.resp.cacheable
|
||||
s2_maybe_pf := tlb.io.resp.pf.inst
|
||||
s2_maybe_ae := tlb.io.resp.ae.inst
|
||||
s2_tlb_miss := tlb.io.resp.miss
|
||||
}
|
||||
|
||||
if (usingBTB) {
|
||||
@ -119,7 +120,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
btb.io.btb_update := io.cpu.btb_update
|
||||
btb.io.bht_update := io.cpu.bht_update
|
||||
btb.io.ras_update := io.cpu.ras_update
|
||||
when (!stall && !icmiss) {
|
||||
when (!s2_replay) {
|
||||
btb.io.req.valid := true
|
||||
s2_btb_resp_valid := btb.io.resp.valid
|
||||
s2_btb_resp_bits := btb.io.resp.bits
|
||||
@ -131,7 +132,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
}
|
||||
|
||||
io.ptw <> tlb.io.ptw
|
||||
tlb.io.req.valid := !stall && !icmiss
|
||||
tlb.io.req.valid := !s2_replay
|
||||
tlb.io.req.bits.vaddr := s1_pc
|
||||
tlb.io.req.bits.passthrough := Bool(false)
|
||||
tlb.io.req.bits.instruction := Bool(true)
|
||||
@ -139,26 +140,27 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
tlb.io.req.bits.sfence := io.cpu.sfence
|
||||
tlb.io.req.bits.size := log2Ceil(coreInstBytes*fetchWidth)
|
||||
|
||||
icache.io.req.valid := !stall
|
||||
icache.io.req.valid := s0_valid
|
||||
icache.io.req.bits.addr := io.cpu.npc
|
||||
icache.io.invalidate := io.cpu.flush_icache
|
||||
icache.io.s1_paddr := tlb.io.resp.paddr
|
||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || s1_speculative && !tlb.io.resp.cacheable || tlb.io.resp.pf.inst || tlb.io.resp.ae.inst
|
||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || s2_replay || s1_speculative && !tlb.io.resp.cacheable || tlb.io.resp.pf.inst || tlb.io.resp.ae.inst
|
||||
icache.io.s2_kill := false
|
||||
icache.io.resp.ready := !stall
|
||||
|
||||
val s2_kill = s2_speculative && !s2_cacheable || s2_xcpt
|
||||
io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || s2_kill)
|
||||
io.cpu.resp.bits.pc := s2_pc
|
||||
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || s2_kill)
|
||||
fq.io.enq.bits.pc := s2_pc
|
||||
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
|
||||
|
||||
io.cpu.resp.bits.data := icache.io.resp.bits
|
||||
io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
|
||||
io.cpu.resp.bits.pf := s2_pf
|
||||
io.cpu.resp.bits.ae := s2_ae
|
||||
io.cpu.resp.bits.replay := s2_kill && !icache.io.resp.valid && !s2_xcpt
|
||||
io.cpu.resp.bits.btb.valid := s2_btb_resp_valid
|
||||
io.cpu.resp.bits.btb.bits := s2_btb_resp_bits
|
||||
fq.io.enq.bits.data := icache.io.resp.bits
|
||||
fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
|
||||
fq.io.enq.bits.pf := s2_pf
|
||||
fq.io.enq.bits.ae := s2_ae
|
||||
fq.io.enq.bits.replay := s2_kill && !icache.io.resp.valid && !s2_xcpt
|
||||
fq.io.enq.bits.btb.valid := s2_btb_resp_valid
|
||||
fq.io.enq.bits.btb.bits := s2_btb_resp_bits
|
||||
|
||||
io.cpu.resp <> fq.io.deq
|
||||
|
||||
// performance events
|
||||
io.cpu.acquire := edge.done(icache.io.mem(0).a)
|
||||
|
Reference in New Issue
Block a user