1
0

Don't speculatively refill I$ in uncacheable regions

This commit is contained in:
Andrew Waterman 2016-07-09 01:08:52 -07:00
parent f7b392306e
commit 1699622730
5 changed files with 27 additions and 10 deletions

View File

@ -129,9 +129,11 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
val s2_valid_masked = s2_valid && Reg(next = !s1_nack) val s2_valid_masked = s2_valid && Reg(next = !s1_nack)
val s2_req = Reg(io.cpu.req.bits) val s2_req = Reg(io.cpu.req.bits)
val s2_uncached = Reg(Bool())
when (s1_valid_not_nacked || s1_flush_valid) { when (s1_valid_not_nacked || s1_flush_valid) {
s2_req := s1_req s2_req := s1_req
s2_req.addr := s1_paddr s2_req.addr := s1_paddr
s2_uncached := !tlb.io.resp.cacheable
} }
val s2_read = isRead(s2_req.cmd) val s2_read = isRead(s2_req.cmd)
val s2_write = isWrite(s2_req.cmd) val s2_write = isWrite(s2_req.cmd)
@ -145,7 +147,6 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val s2_hit = s2_hit_state.isHit(s2_req.cmd) val s2_hit = s2_hit_state.isHit(s2_req.cmd)
val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit
val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait
val s2_uncached = !addrMap.isCacheable(s2_req.addr)
val s2_valid_cached_miss = s2_valid_miss && !s2_uncached val s2_valid_cached_miss = s2_valid_miss && !s2_uncached
val s2_victimize = s2_valid_cached_miss || s2_flush_valid val s2_victimize = s2_valid_cached_miss || s2_flush_valid
val s2_valid_uncached = s2_valid_miss && s2_uncached val s2_valid_uncached = s2_valid_miss && s2_uncached

View File

@ -7,6 +7,7 @@ import cde.{Parameters, Field}
class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) { class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) {
val pc = UInt(width = vaddrBitsExtended) val pc = UInt(width = vaddrBitsExtended)
val speculative = Bool()
} }
class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
@ -14,6 +15,7 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
val data = Vec(fetchWidth, Bits(width = coreInstBits)) val data = Vec(fetchWidth, Bits(width = coreInstBits))
val mask = Bits(width = fetchWidth) val mask = Bits(width = fetchWidth)
val xcpt_if = Bool() val xcpt_if = Bool()
val replay = Bool()
} }
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
@ -40,13 +42,15 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
val s1_pc_ = Reg(UInt(width=vaddrBitsExtended)) val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline) val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
val s1_speculative = Reg(Bool())
val s1_same_block = Reg(Bool()) val s1_same_block = Reg(Bool())
val s2_valid = Reg(init=Bool(true)) val s2_valid = Reg(init=Bool(true))
val s2_pc = Reg(init=UInt(p(ResetVector))) val s2_pc = Reg(init=UInt(p(ResetVector)))
val s2_btb_resp_valid = Reg(init=Bool(false)) val s2_btb_resp_valid = Reg(init=Bool(false))
val s2_btb_resp_bits = Reg(new BTBResp) val s2_btb_resp_bits = Reg(new BTBResp)
val s2_xcpt_if = Reg(init=Bool(false)) val s2_xcpt_if = Reg(init=Bool(false))
val s2_resp_valid = Wire(init=Bool(false)) val s2_speculative = Reg(init=Bool(false))
val s2_resp_valid = Wire(Bool())
val s2_resp_data = Wire(UInt(width = rowBits)) val s2_resp_data = Wire(UInt(width = rowBits))
val ntpc_0 = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth) val ntpc_0 = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
@ -62,15 +66,18 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
when (!stall) { when (!stall) {
s1_same_block := s0_same_block && !tlb.io.resp.miss s1_same_block := s0_same_block && !tlb.io.resp.miss
s1_pc_ := npc s1_pc_ := npc
s1_speculative := Mux(icmiss, s2_speculative, true)
s2_valid := !icmiss s2_valid := !icmiss
when (!icmiss) { when (!icmiss) {
s2_pc := s1_pc s2_pc := s1_pc
s2_speculative := s1_speculative && !tlb.io.resp.cacheable
s2_xcpt_if := tlb.io.resp.xcpt_if s2_xcpt_if := tlb.io.resp.xcpt_if
} }
} }
when (io.cpu.req.valid) { when (io.cpu.req.valid) {
s1_same_block := Bool(false) s1_same_block := Bool(false)
s1_pc_ := io.cpu.req.bits.pc s1_pc_ := io.cpu.req.bits.pc
s1_speculative := io.cpu.req.bits.speculative
s2_valid := Bool(false) s2_valid := Bool(false)
} }
@ -105,8 +112,9 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
icache.io.invalidate := io.cpu.flush_icache icache.io.invalidate := io.cpu.flush_icache
icache.io.s1_ppn := tlb.io.resp.ppn icache.io.s1_ppn := tlb.io.resp.ppn
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb
icache.io.s2_kill := s2_speculative
io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid) io.cpu.resp.valid := s2_valid && (s2_resp_valid || s2_speculative || s2_xcpt_if)
io.cpu.resp.bits.pc := s2_pc io.cpu.resp.bits.pc := s2_pc
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
@ -138,6 +146,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2) val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2)
io.cpu.resp.bits.mask := msk_pc io.cpu.resp.bits.mask := msk_pc
io.cpu.resp.bits.xcpt_if := s2_xcpt_if io.cpu.resp.bits.xcpt_if := s2_xcpt_if
io.cpu.resp.bits.replay := s2_speculative && !s2_resp_valid && !s2_xcpt_if
io.cpu.btb_resp.valid := s2_btb_resp_valid io.cpu.btb_resp.valid := s2_btb_resp_valid
io.cpu.btb_resp.bits := s2_btb_resp_bits io.cpu.btb_resp.bits := s2_btb_resp_bits

View File

@ -30,6 +30,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
val req = Valid(new ICacheReq).flip val req = Valid(new ICacheReq).flip
val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
val resp = Decoupled(new ICacheResp) val resp = Decoupled(new ICacheResp)
val invalidate = Bool(INPUT) val invalidate = Bool(INPUT)
@ -67,7 +68,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
val s1_miss = out_valid && !s1_any_tag_hit val s1_miss = out_valid && !s1_any_tag_hit
rdy := state === s_ready && !s1_miss rdy := state === s_ready && !s1_miss
when (s1_valid && state === s_ready && s1_miss) { when (s1_miss && state === s_ready) {
refill_addr := s1_paddr refill_addr := s1_paddr
} }
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits) val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
@ -135,7 +136,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout) io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
io.resp.valid := s1_hit io.resp.valid := s1_hit
} }
io.mem.acquire.valid := (state === s_request) io.mem.acquire.valid := state === s_request && !io.s2_kill
io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits) io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)
// control state machine // control state machine
@ -146,6 +147,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
} }
is (s_request) { is (s_request) {
when (io.mem.acquire.ready) { state := s_refill_wait } when (io.mem.acquire.ready) { state := s_refill_wait }
when (io.s2_kill) { state := s_ready }
} }
is (s_refill_wait) { is (s_refill_wait) {
when (io.mem.grant.valid) { state := s_refill } when (io.mem.grant.valid) { state := s_refill }

View File

@ -157,6 +157,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val ex_reg_flush_pipe = Reg(Bool()) val ex_reg_flush_pipe = Reg(Bool())
val ex_reg_load_use = Reg(Bool()) val ex_reg_load_use = Reg(Bool())
val ex_reg_cause = Reg(UInt()) val ex_reg_cause = Reg(UInt())
val ex_reg_replay = Reg(Bool())
val ex_reg_pc = Reg(UInt()) val ex_reg_pc = Reg(UInt())
val ex_reg_inst = Reg(Bits()) val ex_reg_inst = Reg(Bits())
@ -295,8 +296,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
div.io.req.bits.tag := ex_waddr div.io.req.bits.tag := ex_waddr
ex_reg_valid := !ctrl_killd ex_reg_valid := !ctrl_killd
ex_reg_replay := !take_pc && io.imem.resp.valid && io.imem.resp.bits.replay
ex_reg_xcpt := !ctrl_killd && id_xcpt ex_reg_xcpt := !ctrl_killd && id_xcpt
ex_reg_xcpt_interrupt := csr.io.interrupt && !take_pc && io.imem.resp.valid ex_reg_xcpt_interrupt := !take_pc && io.imem.resp.valid && csr.io.interrupt
when (id_xcpt) { ex_reg_cause := id_cause } when (id_xcpt) { ex_reg_cause := id_cause }
when (!ctrl_killd) { when (!ctrl_killd) {
@ -323,18 +325,18 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
} }
} }
} }
when (!ctrl_killd || csr.io.interrupt) { when (!ctrl_killd || csr.io.interrupt || io.imem.resp.bits.replay) {
ex_reg_inst := id_inst ex_reg_inst := id_inst
ex_reg_pc := id_pc ex_reg_pc := id_pc
} }
// replay inst in ex stage? // replay inst in ex stage?
val ex_pc_valid = ex_reg_valid || ex_reg_xcpt_interrupt val ex_pc_valid = ex_reg_valid || ex_reg_replay || ex_reg_xcpt_interrupt
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready || val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
ex_ctrl.div && !div.io.req.ready ex_ctrl.div && !div.io.req.ready
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
val replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use) val replay_ex = ex_reg_replay || (ex_reg_valid && (replay_ex_structural || replay_ex_load_use))
val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid
// detect 2-cycle load-use delay for LB/LH/SC // detect 2-cycle load-use delay for LB/LH/SC
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type) val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
@ -536,9 +538,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy
id_do_fence || id_do_fence ||
csr.io.csr_stall csr.io.csr_stall
ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt ctrl_killd := !io.imem.resp.valid || io.imem.resp.bits.replay || take_pc || ctrl_stalld || csr.io.interrupt
io.imem.req.valid := take_pc io.imem.req.valid := take_pc
io.imem.req.bits.speculative := !take_pc_wb
io.imem.req.bits.pc := io.imem.req.bits.pc :=
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
Mux(replay_wb, wb_reg_pc, // replay Mux(replay_wb, wb_reg_pc, // replay

View File

@ -32,6 +32,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
val xcpt_ld = Bool(OUTPUT) val xcpt_ld = Bool(OUTPUT)
val xcpt_st = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT)
val xcpt_if = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT)
val cacheable = Bool(OUTPUT)
} }
class TLB(implicit val p: Parameters) extends Module with HasTLBParameters { class TLB(implicit val p: Parameters) extends Module with HasTLBParameters {
@ -111,6 +112,7 @@ class TLB(implicit val p: Parameters) extends Module with HasTLBParameters {
io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & hits).orR) io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & hits).orR)
io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & hits).orR) io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & hits).orR)
io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & hits).orR) io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & hits).orR)
io.resp.cacheable := addrMap.isCacheable(paddr)
io.resp.miss := tlb_miss io.resp.miss := tlb_miss
io.resp.ppn := Mux(vm_enabled, Mux1H(hitsVec, ppns), io.req.bits.vpn(ppnBits-1,0)) io.resp.ppn := Mux(vm_enabled, Mux1H(hitsVec, ppns), io.req.bits.vpn(ppnBits-1,0))