Reduce D$ access energy when refill width > access width
This commit is contained in:
parent
25f585f2a9
commit
8aa16a11f3
@ -17,7 +17,8 @@ class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
|
|||||||
val addr = Bits(width = untagBits)
|
val addr = Bits(width = untagBits)
|
||||||
val write = Bool()
|
val write = Bool()
|
||||||
val wdata = Bits(width = rowBits)
|
val wdata = Bits(width = rowBits)
|
||||||
val wmask = Bits(width = rowBytes)
|
val wordMask = UInt(width = rowBytes / wordBytes)
|
||||||
|
val byteMask = UInt(width = wordBytes)
|
||||||
val way_en = Bits(width = nWays)
|
val way_en = Bits(width = nWays)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -27,16 +28,24 @@ class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
val resp = Vec(nWays, Bits(OUTPUT, rowBits))
|
val resp = Vec(nWays, Bits(OUTPUT, rowBits))
|
||||||
}
|
}
|
||||||
|
|
||||||
val data_arrays = Seq.fill(nWays) { SeqMem(nSets*refillCycles, Vec(rowBytes, Bits(width=8))) }
|
require(rowBytes % wordBytes == 0)
|
||||||
|
val eccBytes = 1
|
||||||
|
val eccBits = eccBytes * 8
|
||||||
|
val eccMask = if (eccBytes == wordBytes) Seq(true.B) else (0 until wordBytes by eccBytes).map(io.req.bits.byteMask(_))
|
||||||
|
val wMask = if (nWays == 1) eccMask else (0 until nWays).flatMap(i => eccMask.map(_ && io.req.bits.way_en(i)))
|
||||||
val addr = io.req.bits.addr >> rowOffBits
|
val addr = io.req.bits.addr >> rowOffBits
|
||||||
for ((array, w) <- data_arrays zipWithIndex) {
|
val data_arrays = Seq.fill(rowBytes / wordBytes) { SeqMem(nSets * refillCycles, Vec(nWays * (wordBytes / eccBytes), UInt(width = eccBits))) }
|
||||||
val valid = io.req.valid && (Bool(nWays == 1) || io.req.bits.way_en(w))
|
val rdata = for ((array, i) <- data_arrays zipWithIndex) yield {
|
||||||
|
val valid = io.req.valid && (Bool(data_arrays.size == 1) || io.req.bits.wordMask(i))
|
||||||
when (valid && io.req.bits.write) {
|
when (valid && io.req.bits.write) {
|
||||||
val data = Vec.tabulate(rowBytes)(i => io.req.bits.wdata(8*(i+1)-1, 8*i))
|
val word = io.req.bits.wdata((i+1)*wordBits-1, i*wordBits)
|
||||||
array.write(addr, data, io.req.bits.wmask.toBools)
|
val wData = (0 until wordBits/eccBits).map(i => word((i+1)*eccBits-1, i*eccBits))
|
||||||
|
array.write(addr, Vec((0 until nWays).flatMap(i => wData)), wMask)
|
||||||
}
|
}
|
||||||
io.resp(w) := array.read(addr, valid && !io.req.bits.write).asUInt
|
val data = array.read(addr, valid && !io.req.bits.write)
|
||||||
|
data.grouped(wordBytes / eccBytes).map(_.asUInt).toSeq
|
||||||
}
|
}
|
||||||
|
(io.resp zip rdata.transpose).foreach { case (resp, data) => resp := data.asUInt }
|
||||||
}
|
}
|
||||||
|
|
||||||
class DCache(hartid: Int, val scratch: () => Option[AddressSet] = () => None)(implicit p: Parameters) extends HellaCache(hartid)(p) {
|
class DCache(hartid: Int, val scratch: () => Option[AddressSet] = () => None)(implicit p: Parameters) extends HellaCache(hartid)(p) {
|
||||||
@ -109,6 +118,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)
|
dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)
|
||||||
dataArb.io.in(3).bits.write := false
|
dataArb.io.in(3).bits.write := false
|
||||||
dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr
|
dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr
|
||||||
|
dataArb.io.in(3).bits.wordMask := UIntToOH(io.cpu.req.bits.addr.extract(rowOffBits-1,offsetlsb))
|
||||||
dataArb.io.in(3).bits.way_en := ~UInt(0, nWays)
|
dataArb.io.in(3).bits.way_en := ~UInt(0, nWays)
|
||||||
when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false }
|
when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false }
|
||||||
metaReadArb.io.in(2).valid := io.cpu.req.valid
|
metaReadArb.io.in(2).valid := io.cpu.req.valid
|
||||||
@ -260,8 +270,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
|
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
|
||||||
dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way)
|
dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way)
|
||||||
dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data))
|
dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data))
|
||||||
val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits
|
dataArb.io.in(0).bits.wordMask := UIntToOH(Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb))
|
||||||
dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_mask) << pstore_mask_shift
|
dataArb.io.in(0).bits.byteMask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_mask)
|
||||||
|
|
||||||
// store->load RAW hazard detection
|
// store->load RAW hazard detection
|
||||||
val s1_idx = s1_req.addr(idxMSB, wordOffBits)
|
val s1_idx = s1_req.addr(idxMSB, wordOffBits)
|
||||||
@ -377,7 +387,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
dataArb.io.in(1).bits.addr := s2_req_block_addr | d_address_inc
|
dataArb.io.in(1).bits.addr := s2_req_block_addr | d_address_inc
|
||||||
dataArb.io.in(1).bits.way_en := s2_victim_way
|
dataArb.io.in(1).bits.way_en := s2_victim_way
|
||||||
dataArb.io.in(1).bits.wdata := tl_out.d.bits.data
|
dataArb.io.in(1).bits.wdata := tl_out.d.bits.data
|
||||||
dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes)
|
dataArb.io.in(1).bits.wordMask := ~UInt(0, rowBytes / wordBytes)
|
||||||
|
dataArb.io.in(1).bits.byteMask := ~UInt(0, wordBytes)
|
||||||
// tag updates on refill
|
// tag updates on refill
|
||||||
metaWriteArb.io.in(1).valid := grantIsCached && d_done
|
metaWriteArb.io.in(1).valid := grantIsCached && d_done
|
||||||
assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready)
|
assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready)
|
||||||
@ -482,6 +493,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles
|
dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles
|
||||||
dataArb.io.in(2).bits.write := false
|
dataArb.io.in(2).bits.write := false
|
||||||
dataArb.io.in(2).bits.addr := tl_out.c.bits.address | (releaseDataBeat(log2Up(refillCycles)-1,0) << rowOffBits)
|
dataArb.io.in(2).bits.addr := tl_out.c.bits.address | (releaseDataBeat(log2Up(refillCycles)-1,0) << rowOffBits)
|
||||||
|
dataArb.io.in(2).bits.wordMask := ~UInt(0, rowBytes / wordBytes)
|
||||||
dataArb.io.in(2).bits.way_en := ~UInt(0, nWays)
|
dataArb.io.in(2).bits.way_en := ~UInt(0, nWays)
|
||||||
|
|
||||||
metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta)
|
metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta)
|
||||||
|
Loading…
Reference in New Issue
Block a user