Add uncached support to blocking D$
This commit is contained in:
parent
42f079ce57
commit
3b35c7470e
@ -126,10 +126,13 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
val s2_hit_dirty = s2_hit && s2_hit_state.requiresVoluntaryWriteback()
|
val s2_hit_dirty = s2_hit && s2_hit_state.requiresVoluntaryWriteback()
|
||||||
val s2_valid_hit = s2_valid_masked && s2_hit
|
val s2_valid_hit = s2_valid_masked && s2_hit
|
||||||
val s2_valid_miss = s2_valid_masked && !s2_hit && !(pstore1_valid || pstore2_valid)
|
val s2_valid_miss = s2_valid_masked && !s2_hit && !(pstore1_valid || pstore2_valid)
|
||||||
val s2_repl = RegEnable(meta.io.resp(replacer.way), s1_valid_not_nacked)
|
val s2_uncached = !addrMap.isCacheable(s2_req.addr)
|
||||||
val s2_repl_dirty = s2_repl.coh.requiresVoluntaryWriteback()
|
val s2_valid_cached_miss = s2_valid_miss && !s2_uncached
|
||||||
io.cpu.s2_nack := s2_valid && !s2_valid_hit
|
val s2_valid_uncached = s2_valid_miss && s2_uncached
|
||||||
when (io.cpu.s2_nack) { s1_nack := true }
|
val s2_victim_state = RegEnable(meta.io.resp(replacer.way), s1_valid_not_nacked)
|
||||||
|
val s2_victim_dirty = s2_victim_state.coh.requiresVoluntaryWriteback()
|
||||||
|
io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready)
|
||||||
|
when (s2_valid && !s2_valid_hit) { s1_nack := true }
|
||||||
|
|
||||||
// exceptions
|
// exceptions
|
||||||
val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned
|
val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned
|
||||||
@ -174,10 +177,11 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
pstore1_valid := {
|
pstore1_valid := {
|
||||||
val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) && !s2_sc_fail
|
val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) && !s2_sc_fail
|
||||||
val pstore1_held = Reg(Bool())
|
val pstore1_held = Reg(Bool())
|
||||||
|
assert(!s2_store_valid || !pstore1_held)
|
||||||
pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain
|
pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain
|
||||||
s2_store_valid || pstore1_held
|
s2_store_valid || pstore1_held
|
||||||
}
|
}
|
||||||
val advance_pstore1 = pstore1_valid && (!pstore2_valid || pstore_drain)
|
val advance_pstore1 = pstore1_valid && !(pstore2_valid && !pstore_drain)
|
||||||
pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1
|
pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1
|
||||||
val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1)
|
val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1)
|
||||||
val pstore2_way = RegEnable(pstore1_way, advance_pstore1)
|
val pstore2_way = RegEnable(pstore1_way, advance_pstore1)
|
||||||
@ -201,24 +205,50 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
when (s1_valid && s1_raw_hazard) { s1_nack := true }
|
when (s1_valid && s1_raw_hazard) { s1_nack := true }
|
||||||
|
|
||||||
val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd)
|
val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd)
|
||||||
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_hit_state =/= s2_new_hit_state) || (s2_valid_miss && !s2_repl_dirty)
|
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_hit_state =/= s2_new_hit_state) || (s2_valid_cached_miss && !s2_victim_dirty)
|
||||||
metaWriteArb.io.in(0).bits.way_en := Mux(s2_hit, s2_hit_way, UIntToOH(replacer.way))
|
metaWriteArb.io.in(0).bits.way_en := Mux(s2_hit, s2_hit_way, UIntToOH(replacer.way))
|
||||||
metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB)
|
metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB)
|
||||||
metaWriteArb.io.in(0).bits.data.coh := Mux(s2_hit, s2_new_hit_state, ClientMetadata.onReset)
|
metaWriteArb.io.in(0).bits.data.coh := Mux(s2_hit, s2_new_hit_state, ClientMetadata.onReset)
|
||||||
metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
|
metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
|
||||||
|
|
||||||
// acquire
|
// acquire
|
||||||
io.mem.acquire.valid := s2_valid_miss && !s2_repl_dirty && fq.io.enq.ready
|
val cachedGetMessage = s2_hit_state.makeAcquire(
|
||||||
io.mem.acquire.bits := s2_hit_state.makeAcquire(addr_block = s2_req.addr(paddrBits-1, blockOffBits), client_xact_id = UInt(0), op_code = s2_req.cmd)
|
client_xact_id = UInt(0),
|
||||||
|
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
|
||||||
|
op_code = s2_req.cmd)
|
||||||
|
val uncachedGetMessage = Get(
|
||||||
|
client_xact_id = UInt(0),
|
||||||
|
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
|
||||||
|
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
|
||||||
|
addr_byte = s2_req.addr(beatOffBits-1, 0),
|
||||||
|
operand_size = s2_req.typ,
|
||||||
|
alloc = Bool(false))
|
||||||
|
val uncachedPutMessage = Put(
|
||||||
|
client_xact_id = UInt(0),
|
||||||
|
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
|
||||||
|
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
|
||||||
|
data = Fill(beatWords, pstore1_storegen.data),
|
||||||
|
wmask = pstore1_storegen.mask << (s2_req.addr(beatOffBits-1, wordOffBits) << wordOffBits),
|
||||||
|
alloc = Bool(false))
|
||||||
|
io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready
|
||||||
|
io.mem.acquire.bits := cachedGetMessage
|
||||||
|
when (s2_uncached) {
|
||||||
|
assert(!s2_valid_masked || !s2_hit, "cache hit on uncached access")
|
||||||
|
io.mem.acquire.bits := uncachedGetMessage
|
||||||
|
when (isWrite(s2_req.cmd)) {
|
||||||
|
assert(!s2_valid || !isRead(s2_req.cmd), "uncached AMOs are unsupported")
|
||||||
|
io.mem.acquire.bits := uncachedPutMessage
|
||||||
|
}
|
||||||
|
}
|
||||||
when (io.mem.acquire.fire()) { grant_wait := true }
|
when (io.mem.acquire.fire()) { grant_wait := true }
|
||||||
|
|
||||||
// grant
|
// grant
|
||||||
val grantIsRefill = io.mem.grant.bits.hasMultibeatData()
|
val grantIsRefill = io.mem.grant.bits.hasMultibeatData()
|
||||||
val grantHasData = io.mem.grant.bits.hasData()
|
val grantIsVoluntary = io.mem.grant.bits.isVoluntary()
|
||||||
val grantIsUncached = grantHasData && !grantIsRefill
|
val grantIsUncached = !grantIsRefill && !grantIsVoluntary
|
||||||
when (io.mem.grant.valid) {
|
when (io.mem.grant.valid) {
|
||||||
assert(grantIsRefill === io.mem.grant.bits.requiresAck(), "")
|
assert(grant_wait || grantIsVoluntary, "unexpected grant")
|
||||||
assert(!grantIsUncached, "TODO uncached")
|
when (grantIsUncached) { s2_data := io.mem.grant.bits.data }
|
||||||
}
|
}
|
||||||
val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles)
|
val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles)
|
||||||
val grantDone = refillDone || grantIsUncached
|
val grantDone = refillDone || grantIsUncached
|
||||||
@ -227,7 +257,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
// data refill
|
// data refill
|
||||||
dataArb.io.in(1).valid := grantIsRefill && io.mem.grant.valid
|
dataArb.io.in(1).valid := grantIsRefill && io.mem.grant.valid
|
||||||
io.mem.grant.ready := true
|
io.mem.grant.ready := true
|
||||||
assert(dataArb.io.in(1).ready || !dataArb.io.in(1).valid, "")
|
assert(dataArb.io.in(1).ready || !dataArb.io.in(1).valid)
|
||||||
dataArb.io.in(1).bits.write := true
|
dataArb.io.in(1).bits.write := true
|
||||||
dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits
|
dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits
|
||||||
dataArb.io.in(1).bits.way_en := UIntToOH(replacer.way)
|
dataArb.io.in(1).bits.way_en := UIntToOH(replacer.way)
|
||||||
@ -235,12 +265,21 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes)
|
dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes)
|
||||||
// tag updates on refill
|
// tag updates on refill
|
||||||
metaWriteArb.io.in(1).valid := refillDone
|
metaWriteArb.io.in(1).valid := refillDone
|
||||||
assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready, "")
|
assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready)
|
||||||
metaWriteArb.io.in(1).bits.way_en := UIntToOH(replacer.way)
|
metaWriteArb.io.in(1).bits.way_en := UIntToOH(replacer.way)
|
||||||
metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB)
|
metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB)
|
||||||
metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd)
|
metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd)
|
||||||
metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
|
metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
|
||||||
|
|
||||||
|
// finish
|
||||||
|
fq.io.enq.valid := io.mem.grant.fire() && io.mem.grant.bits.requiresAck() && (!grantIsRefill || refillDone)
|
||||||
|
fq.io.enq.bits := io.mem.grant.bits.makeFinish()
|
||||||
|
io.mem.finish <> fq.io.deq
|
||||||
|
when (fq.io.enq.valid) {
|
||||||
|
assert(fq.io.enq.ready)
|
||||||
|
replacer.miss
|
||||||
|
}
|
||||||
|
|
||||||
// probe
|
// probe
|
||||||
val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr)
|
val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr)
|
||||||
metaReadArb.io.in(0).valid := io.mem.probe.valid && !block_probe
|
metaReadArb.io.in(0).valid := io.mem.probe.valid && !block_probe
|
||||||
@ -248,29 +287,24 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
metaReadArb.io.in(0).bits.idx := io.mem.probe.bits.addr_block
|
metaReadArb.io.in(0).bits.idx := io.mem.probe.bits.addr_block
|
||||||
metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays)
|
metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays)
|
||||||
|
|
||||||
// finish
|
|
||||||
fq.io.enq.valid := refillDone
|
|
||||||
fq.io.enq.bits := io.mem.grant.bits.makeFinish()
|
|
||||||
io.mem.finish <> fq.io.deq
|
|
||||||
when (fq.io.enq.valid) {
|
|
||||||
assert(fq.io.enq.ready, "")
|
|
||||||
replacer.miss
|
|
||||||
}
|
|
||||||
|
|
||||||
// release
|
// release
|
||||||
val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles)
|
val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles)
|
||||||
val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback)
|
val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback)
|
||||||
val new_coh = Wire(init = s2_hit_state.onProbe(probe_bits))
|
val releaseWay = Wire(init = s2_hit_way)
|
||||||
val release_way = Wire(init = s2_hit_way)
|
|
||||||
val releaseRejected = io.mem.release.valid && !io.mem.release.ready
|
val releaseRejected = io.mem.release.valid && !io.mem.release.ready
|
||||||
val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire())
|
val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire())
|
||||||
val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected)
|
val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected)
|
||||||
val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid))
|
val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid))
|
||||||
io.mem.release.valid := s2_release_data_valid
|
io.mem.release.valid := s2_release_data_valid
|
||||||
io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits)
|
io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits)
|
||||||
when (s2_valid_miss && s2_repl_dirty) {
|
val voluntaryReleaseMessage = s2_hit_state.makeVoluntaryWriteback(UInt(0), UInt(0))
|
||||||
|
val voluntaryNewCoh = s2_hit_state.onCacheControl(M_FLUSH)
|
||||||
|
val probeResponseMessage = s2_hit_state.makeRelease(probe_bits)
|
||||||
|
val probeNewCoh = s2_hit_state.onProbe(probe_bits)
|
||||||
|
val newCoh = Wire(init = probeNewCoh)
|
||||||
|
when (s2_valid_cached_miss && s2_victim_dirty) {
|
||||||
release_state := s_voluntary_writeback
|
release_state := s_voluntary_writeback
|
||||||
probe_bits.addr_block := Cat(s2_repl.tag, s2_req.addr(idxMSB, idxLSB))
|
probe_bits.addr_block := Cat(s2_victim_state.tag, s2_req.addr(idxMSB, idxLSB))
|
||||||
}
|
}
|
||||||
when (s2_probe) {
|
when (s2_probe) {
|
||||||
when (s2_hit_dirty) { release_state := s_probe_rep_dirty }
|
when (s2_hit_dirty) { release_state := s_probe_rep_dirty }
|
||||||
@ -285,13 +319,13 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
io.mem.release.valid := true
|
io.mem.release.valid := true
|
||||||
}
|
}
|
||||||
when (release_state === s_probe_rep_clean || release_state === s_probe_rep_dirty) {
|
when (release_state === s_probe_rep_clean || release_state === s_probe_rep_dirty) {
|
||||||
io.mem.release.bits := s2_hit_state.makeRelease(probe_bits)
|
io.mem.release.bits := probeResponseMessage
|
||||||
when (releaseDone) { release_state := s_probe_write_meta }
|
when (releaseDone) { release_state := s_probe_write_meta }
|
||||||
}
|
}
|
||||||
when (release_state === s_voluntary_writeback || release_state === s_voluntary_write_meta) {
|
when (release_state === s_voluntary_writeback || release_state === s_voluntary_write_meta) {
|
||||||
io.mem.release.bits := s2_hit_state.makeVoluntaryWriteback(UInt(0), UInt(0))
|
io.mem.release.bits := voluntaryReleaseMessage
|
||||||
new_coh := s2_hit_state.onCacheControl(M_FLUSH)
|
newCoh := voluntaryNewCoh
|
||||||
release_way := UIntToOH(replacer.way)
|
releaseWay := UIntToOH(replacer.way)
|
||||||
when (releaseDone) { release_state := s_voluntary_write_meta }
|
when (releaseDone) { release_state := s_voluntary_write_meta }
|
||||||
}
|
}
|
||||||
when (s2_probe && !io.mem.release.fire()) { s1_nack := true }
|
when (s2_probe && !io.mem.release.fire()) { s1_nack := true }
|
||||||
@ -305,20 +339,28 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
dataArb.io.in(2).bits.way_en := ~UInt(0, nWays)
|
dataArb.io.in(2).bits.way_en := ~UInt(0, nWays)
|
||||||
|
|
||||||
metaWriteArb.io.in(2).valid := (release_state === s_voluntary_write_meta || release_state === s_probe_write_meta)
|
metaWriteArb.io.in(2).valid := (release_state === s_voluntary_write_meta || release_state === s_probe_write_meta)
|
||||||
metaWriteArb.io.in(2).bits.way_en := release_way
|
metaWriteArb.io.in(2).bits.way_en := releaseWay
|
||||||
metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB)
|
metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB)
|
||||||
metaWriteArb.io.in(2).bits.data.coh := new_coh
|
metaWriteArb.io.in(2).bits.data.coh := newCoh
|
||||||
metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits)
|
metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits)
|
||||||
when (metaWriteArb.io.in(2).fire()) { release_state := s_ready }
|
when (metaWriteArb.io.in(2).fire()) { release_state := s_ready }
|
||||||
|
|
||||||
// response
|
// cached response
|
||||||
io.cpu.replay_next := io.mem.grant.valid && grantIsUncached
|
io.cpu.resp.valid := s2_valid_hit
|
||||||
io.cpu.resp.valid := s2_valid_hit || io.cpu.resp.bits.replay
|
io.cpu.resp.bits := s2_req
|
||||||
io.cpu.resp.bits := s2_req // TODO uncached
|
io.cpu.resp.bits.has_data := isRead(s2_req.cmd)
|
||||||
io.cpu.resp.bits.has_data := isRead(s2_req.cmd) // TODO uncached
|
io.cpu.resp.bits.replay := false
|
||||||
io.cpu.resp.bits.replay := Reg(next = io.cpu.replay_next)
|
|
||||||
io.cpu.ordered := !(s1_valid || s2_valid || grant_wait)
|
io.cpu.ordered := !(s1_valid || s2_valid || grant_wait)
|
||||||
|
|
||||||
|
// uncached response
|
||||||
|
io.cpu.replay_next := io.mem.grant.valid && grantIsUncached
|
||||||
|
val doUncachedResp = Reg(next = io.cpu.replay_next)
|
||||||
|
when (doUncachedResp) {
|
||||||
|
assert(!s2_valid_hit)
|
||||||
|
io.cpu.resp.valid := true
|
||||||
|
io.cpu.resp.bits.replay := true
|
||||||
|
}
|
||||||
|
|
||||||
// load data subword mux/sign extension
|
// load data subword mux/sign extension
|
||||||
val s2_word_idx = s2_req.addr(log2Up(rowWords*coreDataBytes)-1, log2Up(wordBytes))
|
val s2_word_idx = s2_req.addr(log2Up(rowWords*coreDataBytes)-1, log2Up(wordBytes))
|
||||||
val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits)))
|
val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits)))
|
||||||
@ -338,6 +380,5 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
pstore1_storegen_data := amoalu.io.out
|
pstore1_storegen_data := amoalu.io.out
|
||||||
} else {
|
} else {
|
||||||
assert(!(s1_valid_masked && isRead(s1_req.cmd) && isWrite(s1_req.cmd)), "unsupported D$ operation")
|
assert(!(s1_valid_masked && isRead(s1_req.cmd) && isWrite(s1_req.cmd)), "unsupported D$ operation")
|
||||||
assert(!pstore_drain_structural, "???")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user