Separate I$ and D$ interface signals that span clock cycles
For example, Decopuled[HellaCacheReq].bits.kill doesn't make sense, since it doesn't come the same cycle as ready/valid.
This commit is contained in:
parent
dc662f28a0
commit
51e0870e23
@ -21,20 +21,26 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
|
||||
for (i <- 1 until n)
|
||||
io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid
|
||||
|
||||
io.mem.req.bits := io.requestor(n-1).req.bits
|
||||
io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UInt(n-1, log2Up(n)))
|
||||
for (i <- n-2 to 0 by -1) {
|
||||
for (i <- n-1 to 0 by -1) {
|
||||
val req = io.requestor(i).req
|
||||
when (req.valid) {
|
||||
def connect_s0() = {
|
||||
io.mem.req.bits.cmd := req.bits.cmd
|
||||
io.mem.req.bits.typ := req.bits.typ
|
||||
io.mem.req.bits.addr := req.bits.addr
|
||||
io.mem.req.bits.phys := req.bits.phys
|
||||
io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n)))
|
||||
}
|
||||
when (r_valid(i)) {
|
||||
io.mem.req.bits.kill := req.bits.kill
|
||||
io.mem.req.bits.data := req.bits.data
|
||||
def connect_s1() = {
|
||||
io.mem.s1_kill := io.requestor(i).s1_kill
|
||||
io.mem.s1_data := io.requestor(i).s1_data
|
||||
}
|
||||
|
||||
if (i == n-1) {
|
||||
connect_s0()
|
||||
connect_s1()
|
||||
} else {
|
||||
when (req.valid) { connect_s0() }
|
||||
when (r_valid(i)) { connect_s1() }
|
||||
}
|
||||
}
|
||||
|
||||
@ -44,10 +50,9 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
|
||||
resp.valid := io.mem.resp.valid && tag_hit
|
||||
io.requestor(i).xcpt := io.mem.xcpt
|
||||
io.requestor(i).ordered := io.mem.ordered
|
||||
io.requestor(i).s2_nack := io.mem.s2_nack && tag_hit
|
||||
resp.bits := io.mem.resp.bits
|
||||
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
|
||||
resp.bits.nack := io.mem.resp.bits.nack && tag_hit
|
||||
resp.bits.replay := io.mem.resp.bits.replay && tag_hit
|
||||
|
||||
io.requestor(i).replay_next.valid := io.mem.replay_next.valid &&
|
||||
io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i)
|
||||
|
@ -104,10 +104,8 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
|
||||
icache.io.req.valid := !stall && !s0_same_block
|
||||
icache.io.req.bits.idx := io.cpu.npc
|
||||
icache.io.invalidate := io.cpu.invalidate
|
||||
icache.io.req.bits.ppn := tlb.io.resp.ppn
|
||||
icache.io.req.bits.kill := io.cpu.req.valid ||
|
||||
tlb.io.resp.miss || tlb.io.resp.xcpt_if ||
|
||||
icmiss || io.ptw.invalidate
|
||||
icache.io.s1_ppn := tlb.io.resp.ppn
|
||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.ptw.invalidate
|
||||
|
||||
io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid)
|
||||
io.cpu.resp.bits.pc := s2_pc
|
||||
|
@ -16,8 +16,6 @@ trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters {
|
||||
|
||||
class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val idx = UInt(width = pgIdxBits)
|
||||
val ppn = UInt(width = ppnBits) // delayed one cycle
|
||||
val kill = Bool() // delayed one cycle
|
||||
}
|
||||
|
||||
class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
|
||||
@ -28,6 +26,9 @@ class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1Cache
|
||||
class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
|
||||
val io = new Bundle {
|
||||
val req = Valid(new ICacheReq).flip
|
||||
val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req
|
||||
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
|
||||
|
||||
val resp = Decoupled(new ICacheResp)
|
||||
val invalidate = Bool(INPUT)
|
||||
val mem = new ClientUncachedTileLinkIO
|
||||
@ -47,18 +48,18 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
|
||||
|
||||
val s1_valid = Reg(init=Bool(false))
|
||||
val s1_pgoff = Reg(UInt(width = pgIdxBits))
|
||||
val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUInt
|
||||
val s1_addr = Cat(io.s1_ppn, s1_pgoff).toUInt
|
||||
val s1_tag = s1_addr(tagBits+untagBits-1,untagBits)
|
||||
|
||||
val s0_valid = io.req.valid || s1_valid && stall
|
||||
val s0_pgoff = Mux(s1_valid && stall, s1_pgoff, io.req.bits.idx)
|
||||
|
||||
s1_valid := io.req.valid && rdy || s1_valid && stall && !io.req.bits.kill
|
||||
s1_valid := io.req.valid && rdy || s1_valid && stall && !io.s1_kill
|
||||
when (io.req.valid && rdy) {
|
||||
s1_pgoff := io.req.bits.idx
|
||||
}
|
||||
|
||||
val out_valid = s1_valid && !io.req.bits.kill && state === s_ready
|
||||
val out_valid = s1_valid && !io.s1_kill && state === s_ready
|
||||
val s1_idx = s1_addr(untagBits-1,blockOffBits)
|
||||
val s1_offset = s1_addr(blockOffBits-1,0)
|
||||
val s1_hit = out_valid && s1_any_tag_hit
|
||||
|
@ -33,6 +33,11 @@ trait HasL1HellaCacheParameters extends HasL1CacheParameters {
|
||||
val nMSHRs = p(NMSHRs)
|
||||
val nIOMSHRs = 1
|
||||
val lrscCycles = p(LRSCCycles)
|
||||
|
||||
require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed
|
||||
require(isPow2(nSets))
|
||||
require(rowBits <= outerDataBits)
|
||||
require(untagBits <= pgIdxBits)
|
||||
}
|
||||
|
||||
abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module
|
||||
@ -63,7 +68,6 @@ trait HasMissInfo extends HasL1HellaCacheParameters {
|
||||
|
||||
class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p)
|
||||
with HasCoreMemOp {
|
||||
val kill = Bool()
|
||||
val phys = Bool()
|
||||
}
|
||||
|
||||
@ -72,7 +76,6 @@ class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) w
|
||||
class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p)
|
||||
with HasCoreMemOp
|
||||
with HasCoreData {
|
||||
val nack = Bool() // comes 2 cycles after req.fire
|
||||
val replay = Bool()
|
||||
val has_data = Bool()
|
||||
val data_word_bypass = Bits(width = coreDataBits)
|
||||
@ -92,6 +95,10 @@ class HellaCacheExceptions extends Bundle {
|
||||
// interface between D$ and processor/DTLB
|
||||
class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val req = Decoupled(new HellaCacheReq)
|
||||
val s1_kill = Bool(OUTPUT) // kill previous cycle's req
|
||||
val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req
|
||||
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
|
||||
|
||||
val resp = Valid(new HellaCacheResp).flip
|
||||
val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip
|
||||
val xcpt = (new HellaCacheExceptions).asInput
|
||||
@ -207,8 +214,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
io.resp.bits.has_data := isRead(req.cmd)
|
||||
io.resp.bits.data := loadgen.data | req_cmd_sc
|
||||
io.resp.bits.store_data := req.data
|
||||
io.resp.bits.nack := Bool(false)
|
||||
io.resp.bits.replay := io.resp.valid
|
||||
io.resp.bits.replay := Bool(true)
|
||||
|
||||
when (io.req.fire()) {
|
||||
req := io.req.bits
|
||||
@ -764,11 +770,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
val mem = new ClientTileLinkIO
|
||||
}
|
||||
|
||||
require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed
|
||||
require(isPow2(nSets))
|
||||
require(isPow2(nWays)) // TODO: relax this
|
||||
require(rowBits <= outerDataBits)
|
||||
require(untagBits <= pgIdxBits)
|
||||
|
||||
val wb = Module(new WritebackUnit)
|
||||
val prober = Module(new ProbeUnit)
|
||||
@ -777,7 +779,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
io.cpu.req.ready := Bool(true)
|
||||
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
|
||||
val s1_req = Reg(io.cpu.req.bits)
|
||||
val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill
|
||||
val s1_valid_masked = s1_valid && !io.cpu.s1_kill
|
||||
val s1_replay = Reg(init=Bool(false))
|
||||
val s1_clk_en = Reg(Bool())
|
||||
|
||||
@ -826,12 +828,11 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
|
||||
|
||||
when (s1_clk_en) {
|
||||
s2_req.kill := s1_req.kill
|
||||
s2_req.typ := s1_req.typ
|
||||
s2_req.phys := s1_req.phys
|
||||
s2_req.addr := s1_addr
|
||||
when (s1_write) {
|
||||
s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.req.bits.data)
|
||||
s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data)
|
||||
}
|
||||
when (s1_recycled) { s2_req.data := s1_req.data }
|
||||
s2_req.tag := s1_req.tag
|
||||
@ -1075,7 +1076,6 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
cache_resp.bits.has_data := isRead(s2_req.cmd)
|
||||
cache_resp.bits.data := loadgen.data | s2_sc_fail
|
||||
cache_resp.bits.store_data := s2_req.data
|
||||
cache_resp.bits.nack := s2_valid && s2_nack
|
||||
cache_resp.bits.replay := s2_replay
|
||||
|
||||
val uncache_resp = Wire(Valid(new HellaCacheResp))
|
||||
@ -1083,6 +1083,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
uncache_resp.valid := mshrs.io.resp.valid
|
||||
mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay))
|
||||
|
||||
io.cpu.s2_nack := s2_valid && s2_nack
|
||||
io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp)
|
||||
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
|
||||
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
|
||||
@ -1111,17 +1112,15 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module
|
||||
req_arb.io.in(1).bits := io.requestor.req.bits
|
||||
io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready
|
||||
|
||||
val s2_nack = io.cache.resp.bits.nack
|
||||
val s3_nack = Reg(next=s2_nack)
|
||||
|
||||
val s0_req_fire = io.cache.req.fire()
|
||||
val s1_req_fire = Reg(next=s0_req_fire)
|
||||
val s2_req_fire = Reg(next=s1_req_fire)
|
||||
val s3_nack = Reg(next=io.cache.s2_nack)
|
||||
|
||||
io.cache.req <> req_arb.io.out
|
||||
io.cache.req.bits.kill := s2_nack
|
||||
io.cache.req.bits.phys := Bool(true)
|
||||
io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
|
||||
io.cache.s1_kill := io.cache.s2_nack
|
||||
io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
|
||||
|
||||
/* replay queues:
|
||||
replayq1 holds the older request.
|
||||
@ -1147,13 +1146,13 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module
|
||||
replayq2.io.enq.bits.data := io.cache.resp.bits.store_data
|
||||
replayq2.io.deq.ready := Bool(false)
|
||||
|
||||
when (s2_nack) {
|
||||
when (io.cache.s2_nack) {
|
||||
replayq1.io.enq.valid := Bool(true)
|
||||
replaying_cmb := Bool(true)
|
||||
}
|
||||
|
||||
// when replaying request got sunk into the d$
|
||||
when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !s2_nack) {
|
||||
when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !io.cache.s2_nack) {
|
||||
// see if there's a stashed request in replayq2
|
||||
when (replayq2.io.deq.valid) {
|
||||
replayq1.io.enq.valid := Bool(true)
|
||||
|
@ -118,8 +118,8 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
|
||||
io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD)
|
||||
io.mem.req.bits.typ := MT_D
|
||||
io.mem.req.bits.addr := pte_addr
|
||||
io.mem.req.bits.kill := Bool(false)
|
||||
io.mem.req.bits.data := pte_wdata.toBits
|
||||
io.mem.s1_data := pte_wdata.toBits
|
||||
io.mem.s1_kill := Bool(false)
|
||||
|
||||
val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits
|
||||
val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count)
|
||||
@ -152,7 +152,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
|
||||
}
|
||||
}
|
||||
is (s_wait) {
|
||||
when (io.mem.resp.bits.nack) {
|
||||
when (io.mem.s2_nack) {
|
||||
state := s_req
|
||||
}
|
||||
when (io.mem.resp.valid) {
|
||||
@ -172,7 +172,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
|
||||
}
|
||||
}
|
||||
is (s_wait_dirty) {
|
||||
when (io.mem.resp.bits.nack) {
|
||||
when (io.mem.s2_nack) {
|
||||
state := s_set_dirty
|
||||
}
|
||||
when (io.mem.resp.valid) {
|
||||
|
@ -376,7 +376,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
|
||||
}
|
||||
|
||||
val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
|
||||
val replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replay
|
||||
val replay_wb_common = io.dmem.s2_nack || wb_reg_replay
|
||||
val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
|
||||
val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
|
||||
val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt
|
||||
@ -388,9 +388,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
|
||||
// writeback arbitration
|
||||
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
|
||||
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
|
||||
val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt()(5,1)
|
||||
val dmem_resp_waddr = io.dmem.resp.bits.tag >> 1
|
||||
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
|
||||
val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data
|
||||
val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay
|
||||
|
||||
div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
|
||||
val ll_wdata = Wire(init = div.io.resp.bits.data)
|
||||
@ -532,14 +532,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
|
||||
io.fpu.dmem_resp_tag := dmem_resp_waddr
|
||||
|
||||
io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
|
||||
io.dmem.req.bits.kill := killm_common || mem_xcpt
|
||||
val ex_dcache_tag = Cat(ex_waddr, ex_ctrl.fp)
|
||||
require(coreDCacheReqTagBits >= ex_dcache_tag.getWidth)
|
||||
io.dmem.req.bits.tag := ex_dcache_tag
|
||||
io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
|
||||
io.dmem.req.bits.typ := ex_ctrl.mem_type
|
||||
io.dmem.req.bits.phys := Bool(false)
|
||||
io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out)
|
||||
io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp)
|
||||
io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
|
||||
require(coreDCacheReqTagBits >= 6)
|
||||
io.dmem.s1_kill := killm_common || mem_xcpt
|
||||
io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
|
||||
io.dmem.invalidate_lr := wb_xcpt
|
||||
|
||||
io.rocc.cmd.valid := wb_rocc_val
|
||||
|
Loading…
x
Reference in New Issue
Block a user