1
0

Separate I$ and D$ interface signals that span clock cycles

For example, Decopuled[HellaCacheReq].bits.kill doesn't make sense,
since it doesn't come the same cycle as ready/valid.
This commit is contained in:
Andrew Waterman 2016-04-01 19:30:39 -07:00
parent dc662f28a0
commit 51e0870e23
6 changed files with 52 additions and 48 deletions

View File

@ -21,20 +21,26 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
for (i <- 1 until n) for (i <- 1 until n)
io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid
io.mem.req.bits := io.requestor(n-1).req.bits for (i <- n-1 to 0 by -1) {
io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UInt(n-1, log2Up(n)))
for (i <- n-2 to 0 by -1) {
val req = io.requestor(i).req val req = io.requestor(i).req
when (req.valid) { def connect_s0() = {
io.mem.req.bits.cmd := req.bits.cmd io.mem.req.bits.cmd := req.bits.cmd
io.mem.req.bits.typ := req.bits.typ io.mem.req.bits.typ := req.bits.typ
io.mem.req.bits.addr := req.bits.addr io.mem.req.bits.addr := req.bits.addr
io.mem.req.bits.phys := req.bits.phys io.mem.req.bits.phys := req.bits.phys
io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n))) io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n)))
} }
when (r_valid(i)) { def connect_s1() = {
io.mem.req.bits.kill := req.bits.kill io.mem.s1_kill := io.requestor(i).s1_kill
io.mem.req.bits.data := req.bits.data io.mem.s1_data := io.requestor(i).s1_data
}
if (i == n-1) {
connect_s0()
connect_s1()
} else {
when (req.valid) { connect_s0() }
when (r_valid(i)) { connect_s1() }
} }
} }
@ -44,10 +50,9 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
resp.valid := io.mem.resp.valid && tag_hit resp.valid := io.mem.resp.valid && tag_hit
io.requestor(i).xcpt := io.mem.xcpt io.requestor(i).xcpt := io.mem.xcpt
io.requestor(i).ordered := io.mem.ordered io.requestor(i).ordered := io.mem.ordered
io.requestor(i).s2_nack := io.mem.s2_nack && tag_hit
resp.bits := io.mem.resp.bits resp.bits := io.mem.resp.bits
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
resp.bits.nack := io.mem.resp.bits.nack && tag_hit
resp.bits.replay := io.mem.resp.bits.replay && tag_hit
io.requestor(i).replay_next.valid := io.mem.replay_next.valid && io.requestor(i).replay_next.valid := io.mem.replay_next.valid &&
io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i) io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i)

View File

@ -104,10 +104,8 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
icache.io.req.valid := !stall && !s0_same_block icache.io.req.valid := !stall && !s0_same_block
icache.io.req.bits.idx := io.cpu.npc icache.io.req.bits.idx := io.cpu.npc
icache.io.invalidate := io.cpu.invalidate icache.io.invalidate := io.cpu.invalidate
icache.io.req.bits.ppn := tlb.io.resp.ppn icache.io.s1_ppn := tlb.io.resp.ppn
icache.io.req.bits.kill := io.cpu.req.valid || icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.ptw.invalidate
tlb.io.resp.miss || tlb.io.resp.xcpt_if ||
icmiss || io.ptw.invalidate
io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid) io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid)
io.cpu.resp.bits.pc := s2_pc io.cpu.resp.bits.pc := s2_pc

View File

@ -16,8 +16,6 @@ trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters {
class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) { class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) {
val idx = UInt(width = pgIdxBits) val idx = UInt(width = pgIdxBits)
val ppn = UInt(width = ppnBits) // delayed one cycle
val kill = Bool() // delayed one cycle
} }
class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters { class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
@ -28,6 +26,9 @@ class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1Cache
class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters { class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
val io = new Bundle { val io = new Bundle {
val req = Valid(new ICacheReq).flip val req = Valid(new ICacheReq).flip
val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
val resp = Decoupled(new ICacheResp) val resp = Decoupled(new ICacheResp)
val invalidate = Bool(INPUT) val invalidate = Bool(INPUT)
val mem = new ClientUncachedTileLinkIO val mem = new ClientUncachedTileLinkIO
@ -47,18 +48,18 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
val s1_valid = Reg(init=Bool(false)) val s1_valid = Reg(init=Bool(false))
val s1_pgoff = Reg(UInt(width = pgIdxBits)) val s1_pgoff = Reg(UInt(width = pgIdxBits))
val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUInt val s1_addr = Cat(io.s1_ppn, s1_pgoff).toUInt
val s1_tag = s1_addr(tagBits+untagBits-1,untagBits) val s1_tag = s1_addr(tagBits+untagBits-1,untagBits)
val s0_valid = io.req.valid || s1_valid && stall val s0_valid = io.req.valid || s1_valid && stall
val s0_pgoff = Mux(s1_valid && stall, s1_pgoff, io.req.bits.idx) val s0_pgoff = Mux(s1_valid && stall, s1_pgoff, io.req.bits.idx)
s1_valid := io.req.valid && rdy || s1_valid && stall && !io.req.bits.kill s1_valid := io.req.valid && rdy || s1_valid && stall && !io.s1_kill
when (io.req.valid && rdy) { when (io.req.valid && rdy) {
s1_pgoff := io.req.bits.idx s1_pgoff := io.req.bits.idx
} }
val out_valid = s1_valid && !io.req.bits.kill && state === s_ready val out_valid = s1_valid && !io.s1_kill && state === s_ready
val s1_idx = s1_addr(untagBits-1,blockOffBits) val s1_idx = s1_addr(untagBits-1,blockOffBits)
val s1_offset = s1_addr(blockOffBits-1,0) val s1_offset = s1_addr(blockOffBits-1,0)
val s1_hit = out_valid && s1_any_tag_hit val s1_hit = out_valid && s1_any_tag_hit

View File

@ -33,6 +33,11 @@ trait HasL1HellaCacheParameters extends HasL1CacheParameters {
val nMSHRs = p(NMSHRs) val nMSHRs = p(NMSHRs)
val nIOMSHRs = 1 val nIOMSHRs = 1
val lrscCycles = p(LRSCCycles) val lrscCycles = p(LRSCCycles)
require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed
require(isPow2(nSets))
require(rowBits <= outerDataBits)
require(untagBits <= pgIdxBits)
} }
abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module
@ -63,7 +68,6 @@ trait HasMissInfo extends HasL1HellaCacheParameters {
class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p) class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p)
with HasCoreMemOp { with HasCoreMemOp {
val kill = Bool()
val phys = Bool() val phys = Bool()
} }
@ -72,7 +76,6 @@ class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) w
class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p) class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p)
with HasCoreMemOp with HasCoreMemOp
with HasCoreData { with HasCoreData {
val nack = Bool() // comes 2 cycles after req.fire
val replay = Bool() val replay = Bool()
val has_data = Bool() val has_data = Bool()
val data_word_bypass = Bits(width = coreDataBits) val data_word_bypass = Bits(width = coreDataBits)
@ -92,6 +95,10 @@ class HellaCacheExceptions extends Bundle {
// interface between D$ and processor/DTLB // interface between D$ and processor/DTLB
class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Decoupled(new HellaCacheReq) val req = Decoupled(new HellaCacheReq)
val s1_kill = Bool(OUTPUT) // kill previous cycle's req
val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
val resp = Valid(new HellaCacheResp).flip val resp = Valid(new HellaCacheResp).flip
val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip
val xcpt = (new HellaCacheExceptions).asInput val xcpt = (new HellaCacheExceptions).asInput
@ -207,8 +214,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) {
io.resp.bits.has_data := isRead(req.cmd) io.resp.bits.has_data := isRead(req.cmd)
io.resp.bits.data := loadgen.data | req_cmd_sc io.resp.bits.data := loadgen.data | req_cmd_sc
io.resp.bits.store_data := req.data io.resp.bits.store_data := req.data
io.resp.bits.nack := Bool(false) io.resp.bits.replay := Bool(true)
io.resp.bits.replay := io.resp.valid
when (io.req.fire()) { when (io.req.fire()) {
req := io.req.bits req := io.req.bits
@ -764,11 +770,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val mem = new ClientTileLinkIO val mem = new ClientTileLinkIO
} }
require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed
require(isPow2(nSets))
require(isPow2(nWays)) // TODO: relax this require(isPow2(nWays)) // TODO: relax this
require(rowBits <= outerDataBits)
require(untagBits <= pgIdxBits)
val wb = Module(new WritebackUnit) val wb = Module(new WritebackUnit)
val prober = Module(new ProbeUnit) val prober = Module(new ProbeUnit)
@ -777,7 +779,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
io.cpu.req.ready := Bool(true) io.cpu.req.ready := Bool(true)
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
val s1_req = Reg(io.cpu.req.bits) val s1_req = Reg(io.cpu.req.bits)
val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill val s1_valid_masked = s1_valid && !io.cpu.s1_kill
val s1_replay = Reg(init=Bool(false)) val s1_replay = Reg(init=Bool(false))
val s1_clk_en = Reg(Bool()) val s1_clk_en = Reg(Bool())
@ -826,12 +828,11 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
when (s1_clk_en) { when (s1_clk_en) {
s2_req.kill := s1_req.kill
s2_req.typ := s1_req.typ s2_req.typ := s1_req.typ
s2_req.phys := s1_req.phys s2_req.phys := s1_req.phys
s2_req.addr := s1_addr s2_req.addr := s1_addr
when (s1_write) { when (s1_write) {
s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.req.bits.data) s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data)
} }
when (s1_recycled) { s2_req.data := s1_req.data } when (s1_recycled) { s2_req.data := s1_req.data }
s2_req.tag := s1_req.tag s2_req.tag := s1_req.tag
@ -1075,7 +1076,6 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
cache_resp.bits.has_data := isRead(s2_req.cmd) cache_resp.bits.has_data := isRead(s2_req.cmd)
cache_resp.bits.data := loadgen.data | s2_sc_fail cache_resp.bits.data := loadgen.data | s2_sc_fail
cache_resp.bits.store_data := s2_req.data cache_resp.bits.store_data := s2_req.data
cache_resp.bits.nack := s2_valid && s2_nack
cache_resp.bits.replay := s2_replay cache_resp.bits.replay := s2_replay
val uncache_resp = Wire(Valid(new HellaCacheResp)) val uncache_resp = Wire(Valid(new HellaCacheResp))
@ -1083,6 +1083,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
uncache_resp.valid := mshrs.io.resp.valid uncache_resp.valid := mshrs.io.resp.valid
mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay)) mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay))
io.cpu.s2_nack := s2_valid && s2_nack
io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp) io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp)
io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.resp.bits.data_word_bypass := loadgen.wordData
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
@ -1111,17 +1112,15 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module
req_arb.io.in(1).bits := io.requestor.req.bits req_arb.io.in(1).bits := io.requestor.req.bits
io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready
val s2_nack = io.cache.resp.bits.nack
val s3_nack = Reg(next=s2_nack)
val s0_req_fire = io.cache.req.fire() val s0_req_fire = io.cache.req.fire()
val s1_req_fire = Reg(next=s0_req_fire) val s1_req_fire = Reg(next=s0_req_fire)
val s2_req_fire = Reg(next=s1_req_fire) val s2_req_fire = Reg(next=s1_req_fire)
val s3_nack = Reg(next=io.cache.s2_nack)
io.cache.req <> req_arb.io.out io.cache.req <> req_arb.io.out
io.cache.req.bits.kill := s2_nack
io.cache.req.bits.phys := Bool(true) io.cache.req.bits.phys := Bool(true)
io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) io.cache.s1_kill := io.cache.s2_nack
io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
/* replay queues: /* replay queues:
replayq1 holds the older request. replayq1 holds the older request.
@ -1147,13 +1146,13 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module
replayq2.io.enq.bits.data := io.cache.resp.bits.store_data replayq2.io.enq.bits.data := io.cache.resp.bits.store_data
replayq2.io.deq.ready := Bool(false) replayq2.io.deq.ready := Bool(false)
when (s2_nack) { when (io.cache.s2_nack) {
replayq1.io.enq.valid := Bool(true) replayq1.io.enq.valid := Bool(true)
replaying_cmb := Bool(true) replaying_cmb := Bool(true)
} }
// when replaying request got sunk into the d$ // when replaying request got sunk into the d$
when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !s2_nack) { when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !io.cache.s2_nack) {
// see if there's a stashed request in replayq2 // see if there's a stashed request in replayq2
when (replayq2.io.deq.valid) { when (replayq2.io.deq.valid) {
replayq1.io.enq.valid := Bool(true) replayq1.io.enq.valid := Bool(true)

View File

@ -118,8 +118,8 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD) io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD)
io.mem.req.bits.typ := MT_D io.mem.req.bits.typ := MT_D
io.mem.req.bits.addr := pte_addr io.mem.req.bits.addr := pte_addr
io.mem.req.bits.kill := Bool(false) io.mem.s1_data := pte_wdata.toBits
io.mem.req.bits.data := pte_wdata.toBits io.mem.s1_kill := Bool(false)
val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits
val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count)
@ -152,7 +152,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
} }
} }
is (s_wait) { is (s_wait) {
when (io.mem.resp.bits.nack) { when (io.mem.s2_nack) {
state := s_req state := s_req
} }
when (io.mem.resp.valid) { when (io.mem.resp.valid) {
@ -172,7 +172,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
} }
} }
is (s_wait_dirty) { is (s_wait_dirty) {
when (io.mem.resp.bits.nack) { when (io.mem.s2_nack) {
state := s_set_dirty state := s_set_dirty
} }
when (io.mem.resp.valid) { when (io.mem.resp.valid) {

View File

@ -376,7 +376,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
} }
val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
val replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replay val replay_wb_common = io.dmem.s2_nack || wb_reg_replay
val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt
@ -388,9 +388,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
// writeback arbitration // writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt()(5,1) val dmem_resp_waddr = io.dmem.resp.bits.tag >> 1
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay
div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd) div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
val ll_wdata = Wire(init = div.io.resp.bits.data) val ll_wdata = Wire(init = div.io.resp.bits.data)
@ -532,14 +532,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
io.fpu.dmem_resp_tag := dmem_resp_waddr io.fpu.dmem_resp_tag := dmem_resp_waddr
io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
io.dmem.req.bits.kill := killm_common || mem_xcpt val ex_dcache_tag = Cat(ex_waddr, ex_ctrl.fp)
require(coreDCacheReqTagBits >= ex_dcache_tag.getWidth)
io.dmem.req.bits.tag := ex_dcache_tag
io.dmem.req.bits.cmd := ex_ctrl.mem_cmd io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
io.dmem.req.bits.typ := ex_ctrl.mem_type io.dmem.req.bits.typ := ex_ctrl.mem_type
io.dmem.req.bits.phys := Bool(false) io.dmem.req.bits.phys := Bool(false)
io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out) io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out)
io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp) io.dmem.s1_kill := killm_common || mem_xcpt
io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
require(coreDCacheReqTagBits >= 6)
io.dmem.invalidate_lr := wb_xcpt io.dmem.invalidate_lr := wb_xcpt
io.rocc.cmd.valid := wb_rocc_val io.rocc.cmd.valid := wb_rocc_val