Don't speculatively refill I$ in uncacheable regions
This commit is contained in:
parent
f7b392306e
commit
1699622730
@ -129,9 +129,11 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
|
val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
|
||||||
val s2_valid_masked = s2_valid && Reg(next = !s1_nack)
|
val s2_valid_masked = s2_valid && Reg(next = !s1_nack)
|
||||||
val s2_req = Reg(io.cpu.req.bits)
|
val s2_req = Reg(io.cpu.req.bits)
|
||||||
|
val s2_uncached = Reg(Bool())
|
||||||
when (s1_valid_not_nacked || s1_flush_valid) {
|
when (s1_valid_not_nacked || s1_flush_valid) {
|
||||||
s2_req := s1_req
|
s2_req := s1_req
|
||||||
s2_req.addr := s1_paddr
|
s2_req.addr := s1_paddr
|
||||||
|
s2_uncached := !tlb.io.resp.cacheable
|
||||||
}
|
}
|
||||||
val s2_read = isRead(s2_req.cmd)
|
val s2_read = isRead(s2_req.cmd)
|
||||||
val s2_write = isWrite(s2_req.cmd)
|
val s2_write = isWrite(s2_req.cmd)
|
||||||
@ -145,7 +147,6 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
val s2_hit = s2_hit_state.isHit(s2_req.cmd)
|
val s2_hit = s2_hit_state.isHit(s2_req.cmd)
|
||||||
val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit
|
val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit
|
||||||
val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait
|
val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait
|
||||||
val s2_uncached = !addrMap.isCacheable(s2_req.addr)
|
|
||||||
val s2_valid_cached_miss = s2_valid_miss && !s2_uncached
|
val s2_valid_cached_miss = s2_valid_miss && !s2_uncached
|
||||||
val s2_victimize = s2_valid_cached_miss || s2_flush_valid
|
val s2_victimize = s2_valid_cached_miss || s2_flush_valid
|
||||||
val s2_valid_uncached = s2_valid_miss && s2_uncached
|
val s2_valid_uncached = s2_valid_miss && s2_uncached
|
||||||
|
@ -7,6 +7,7 @@ import cde.{Parameters, Field}
|
|||||||
|
|
||||||
class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) {
|
class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) {
|
||||||
val pc = UInt(width = vaddrBitsExtended)
|
val pc = UInt(width = vaddrBitsExtended)
|
||||||
|
val speculative = Bool()
|
||||||
}
|
}
|
||||||
|
|
||||||
class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
|
class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
|
||||||
@ -14,6 +15,7 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
|
|||||||
val data = Vec(fetchWidth, Bits(width = coreInstBits))
|
val data = Vec(fetchWidth, Bits(width = coreInstBits))
|
||||||
val mask = Bits(width = fetchWidth)
|
val mask = Bits(width = fetchWidth)
|
||||||
val xcpt_if = Bool()
|
val xcpt_if = Bool()
|
||||||
|
val replay = Bool()
|
||||||
}
|
}
|
||||||
|
|
||||||
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
|
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||||
@ -40,13 +42,15 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
|
|||||||
|
|
||||||
val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
|
val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
|
||||||
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
|
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
|
||||||
|
val s1_speculative = Reg(Bool())
|
||||||
val s1_same_block = Reg(Bool())
|
val s1_same_block = Reg(Bool())
|
||||||
val s2_valid = Reg(init=Bool(true))
|
val s2_valid = Reg(init=Bool(true))
|
||||||
val s2_pc = Reg(init=UInt(p(ResetVector)))
|
val s2_pc = Reg(init=UInt(p(ResetVector)))
|
||||||
val s2_btb_resp_valid = Reg(init=Bool(false))
|
val s2_btb_resp_valid = Reg(init=Bool(false))
|
||||||
val s2_btb_resp_bits = Reg(new BTBResp)
|
val s2_btb_resp_bits = Reg(new BTBResp)
|
||||||
val s2_xcpt_if = Reg(init=Bool(false))
|
val s2_xcpt_if = Reg(init=Bool(false))
|
||||||
val s2_resp_valid = Wire(init=Bool(false))
|
val s2_speculative = Reg(init=Bool(false))
|
||||||
|
val s2_resp_valid = Wire(Bool())
|
||||||
val s2_resp_data = Wire(UInt(width = rowBits))
|
val s2_resp_data = Wire(UInt(width = rowBits))
|
||||||
|
|
||||||
val ntpc_0 = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
|
val ntpc_0 = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
|
||||||
@ -62,15 +66,18 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
|
|||||||
when (!stall) {
|
when (!stall) {
|
||||||
s1_same_block := s0_same_block && !tlb.io.resp.miss
|
s1_same_block := s0_same_block && !tlb.io.resp.miss
|
||||||
s1_pc_ := npc
|
s1_pc_ := npc
|
||||||
|
s1_speculative := Mux(icmiss, s2_speculative, true)
|
||||||
s2_valid := !icmiss
|
s2_valid := !icmiss
|
||||||
when (!icmiss) {
|
when (!icmiss) {
|
||||||
s2_pc := s1_pc
|
s2_pc := s1_pc
|
||||||
|
s2_speculative := s1_speculative && !tlb.io.resp.cacheable
|
||||||
s2_xcpt_if := tlb.io.resp.xcpt_if
|
s2_xcpt_if := tlb.io.resp.xcpt_if
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when (io.cpu.req.valid) {
|
when (io.cpu.req.valid) {
|
||||||
s1_same_block := Bool(false)
|
s1_same_block := Bool(false)
|
||||||
s1_pc_ := io.cpu.req.bits.pc
|
s1_pc_ := io.cpu.req.bits.pc
|
||||||
|
s1_speculative := io.cpu.req.bits.speculative
|
||||||
s2_valid := Bool(false)
|
s2_valid := Bool(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -105,8 +112,9 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
|
|||||||
icache.io.invalidate := io.cpu.flush_icache
|
icache.io.invalidate := io.cpu.flush_icache
|
||||||
icache.io.s1_ppn := tlb.io.resp.ppn
|
icache.io.s1_ppn := tlb.io.resp.ppn
|
||||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb
|
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb
|
||||||
|
icache.io.s2_kill := s2_speculative
|
||||||
|
|
||||||
io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid)
|
io.cpu.resp.valid := s2_valid && (s2_resp_valid || s2_speculative || s2_xcpt_if)
|
||||||
io.cpu.resp.bits.pc := s2_pc
|
io.cpu.resp.bits.pc := s2_pc
|
||||||
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
|
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
|
||||||
|
|
||||||
@ -138,6 +146,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
|
|||||||
val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2)
|
val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2)
|
||||||
io.cpu.resp.bits.mask := msk_pc
|
io.cpu.resp.bits.mask := msk_pc
|
||||||
io.cpu.resp.bits.xcpt_if := s2_xcpt_if
|
io.cpu.resp.bits.xcpt_if := s2_xcpt_if
|
||||||
|
io.cpu.resp.bits.replay := s2_speculative && !s2_resp_valid && !s2_xcpt_if
|
||||||
|
|
||||||
io.cpu.btb_resp.valid := s2_btb_resp_valid
|
io.cpu.btb_resp.valid := s2_btb_resp_valid
|
||||||
io.cpu.btb_resp.bits := s2_btb_resp_bits
|
io.cpu.btb_resp.bits := s2_btb_resp_bits
|
||||||
|
@ -30,6 +30,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
|
|||||||
val req = Valid(new ICacheReq).flip
|
val req = Valid(new ICacheReq).flip
|
||||||
val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req
|
val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req
|
||||||
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
|
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
|
||||||
|
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
|
||||||
|
|
||||||
val resp = Decoupled(new ICacheResp)
|
val resp = Decoupled(new ICacheResp)
|
||||||
val invalidate = Bool(INPUT)
|
val invalidate = Bool(INPUT)
|
||||||
@ -67,7 +68,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
|
|||||||
val s1_miss = out_valid && !s1_any_tag_hit
|
val s1_miss = out_valid && !s1_any_tag_hit
|
||||||
rdy := state === s_ready && !s1_miss
|
rdy := state === s_ready && !s1_miss
|
||||||
|
|
||||||
when (s1_valid && state === s_ready && s1_miss) {
|
when (s1_miss && state === s_ready) {
|
||||||
refill_addr := s1_paddr
|
refill_addr := s1_paddr
|
||||||
}
|
}
|
||||||
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
||||||
@ -135,7 +136,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
|
|||||||
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
|
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
|
||||||
io.resp.valid := s1_hit
|
io.resp.valid := s1_hit
|
||||||
}
|
}
|
||||||
io.mem.acquire.valid := (state === s_request)
|
io.mem.acquire.valid := state === s_request && !io.s2_kill
|
||||||
io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)
|
io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)
|
||||||
|
|
||||||
// control state machine
|
// control state machine
|
||||||
@ -146,6 +147,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
|
|||||||
}
|
}
|
||||||
is (s_request) {
|
is (s_request) {
|
||||||
when (io.mem.acquire.ready) { state := s_refill_wait }
|
when (io.mem.acquire.ready) { state := s_refill_wait }
|
||||||
|
when (io.s2_kill) { state := s_ready }
|
||||||
}
|
}
|
||||||
is (s_refill_wait) {
|
is (s_refill_wait) {
|
||||||
when (io.mem.grant.valid) { state := s_refill }
|
when (io.mem.grant.valid) { state := s_refill }
|
||||||
|
@ -157,6 +157,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
val ex_reg_flush_pipe = Reg(Bool())
|
val ex_reg_flush_pipe = Reg(Bool())
|
||||||
val ex_reg_load_use = Reg(Bool())
|
val ex_reg_load_use = Reg(Bool())
|
||||||
val ex_reg_cause = Reg(UInt())
|
val ex_reg_cause = Reg(UInt())
|
||||||
|
val ex_reg_replay = Reg(Bool())
|
||||||
val ex_reg_pc = Reg(UInt())
|
val ex_reg_pc = Reg(UInt())
|
||||||
val ex_reg_inst = Reg(Bits())
|
val ex_reg_inst = Reg(Bits())
|
||||||
|
|
||||||
@ -295,8 +296,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
div.io.req.bits.tag := ex_waddr
|
div.io.req.bits.tag := ex_waddr
|
||||||
|
|
||||||
ex_reg_valid := !ctrl_killd
|
ex_reg_valid := !ctrl_killd
|
||||||
|
ex_reg_replay := !take_pc && io.imem.resp.valid && io.imem.resp.bits.replay
|
||||||
ex_reg_xcpt := !ctrl_killd && id_xcpt
|
ex_reg_xcpt := !ctrl_killd && id_xcpt
|
||||||
ex_reg_xcpt_interrupt := csr.io.interrupt && !take_pc && io.imem.resp.valid
|
ex_reg_xcpt_interrupt := !take_pc && io.imem.resp.valid && csr.io.interrupt
|
||||||
when (id_xcpt) { ex_reg_cause := id_cause }
|
when (id_xcpt) { ex_reg_cause := id_cause }
|
||||||
|
|
||||||
when (!ctrl_killd) {
|
when (!ctrl_killd) {
|
||||||
@ -323,18 +325,18 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when (!ctrl_killd || csr.io.interrupt) {
|
when (!ctrl_killd || csr.io.interrupt || io.imem.resp.bits.replay) {
|
||||||
ex_reg_inst := id_inst
|
ex_reg_inst := id_inst
|
||||||
ex_reg_pc := id_pc
|
ex_reg_pc := id_pc
|
||||||
}
|
}
|
||||||
|
|
||||||
// replay inst in ex stage?
|
// replay inst in ex stage?
|
||||||
val ex_pc_valid = ex_reg_valid || ex_reg_xcpt_interrupt
|
val ex_pc_valid = ex_reg_valid || ex_reg_replay || ex_reg_xcpt_interrupt
|
||||||
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
|
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
|
||||||
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
|
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
|
||||||
ex_ctrl.div && !div.io.req.ready
|
ex_ctrl.div && !div.io.req.ready
|
||||||
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
|
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
|
||||||
val replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use)
|
val replay_ex = ex_reg_replay || (ex_reg_valid && (replay_ex_structural || replay_ex_load_use))
|
||||||
val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid
|
val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid
|
||||||
// detect 2-cycle load-use delay for LB/LH/SC
|
// detect 2-cycle load-use delay for LB/LH/SC
|
||||||
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
|
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
|
||||||
@ -536,9 +538,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy
|
id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy
|
||||||
id_do_fence ||
|
id_do_fence ||
|
||||||
csr.io.csr_stall
|
csr.io.csr_stall
|
||||||
ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt
|
ctrl_killd := !io.imem.resp.valid || io.imem.resp.bits.replay || take_pc || ctrl_stalld || csr.io.interrupt
|
||||||
|
|
||||||
io.imem.req.valid := take_pc
|
io.imem.req.valid := take_pc
|
||||||
|
io.imem.req.bits.speculative := !take_pc_wb
|
||||||
io.imem.req.bits.pc :=
|
io.imem.req.bits.pc :=
|
||||||
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
|
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
|
||||||
Mux(replay_wb, wb_reg_pc, // replay
|
Mux(replay_wb, wb_reg_pc, // replay
|
||||||
|
@ -32,6 +32,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
|
|||||||
val xcpt_ld = Bool(OUTPUT)
|
val xcpt_ld = Bool(OUTPUT)
|
||||||
val xcpt_st = Bool(OUTPUT)
|
val xcpt_st = Bool(OUTPUT)
|
||||||
val xcpt_if = Bool(OUTPUT)
|
val xcpt_if = Bool(OUTPUT)
|
||||||
|
val cacheable = Bool(OUTPUT)
|
||||||
}
|
}
|
||||||
|
|
||||||
class TLB(implicit val p: Parameters) extends Module with HasTLBParameters {
|
class TLB(implicit val p: Parameters) extends Module with HasTLBParameters {
|
||||||
@ -111,6 +112,7 @@ class TLB(implicit val p: Parameters) extends Module with HasTLBParameters {
|
|||||||
io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & hits).orR)
|
io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & hits).orR)
|
||||||
io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & hits).orR)
|
io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & hits).orR)
|
||||||
io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & hits).orR)
|
io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & hits).orR)
|
||||||
|
io.resp.cacheable := addrMap.isCacheable(paddr)
|
||||||
io.resp.miss := tlb_miss
|
io.resp.miss := tlb_miss
|
||||||
io.resp.ppn := Mux(vm_enabled, Mux1H(hitsVec, ppns), io.req.bits.vpn(ppnBits-1,0))
|
io.resp.ppn := Mux(vm_enabled, Mux1H(hitsVec, ppns), io.req.bits.vpn(ppnBits-1,0))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user