1
0

Reduce D$ access energy when refill width > access width

This commit is contained in:
Andrew Waterman 2017-06-08 17:13:01 -07:00
parent 25f585f2a9
commit 8aa16a11f3

View File

@ -17,7 +17,8 @@ class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
val addr = Bits(width = untagBits) val addr = Bits(width = untagBits)
val write = Bool() val write = Bool()
val wdata = Bits(width = rowBits) val wdata = Bits(width = rowBits)
val wmask = Bits(width = rowBytes) val wordMask = UInt(width = rowBytes / wordBytes)
val byteMask = UInt(width = wordBytes)
val way_en = Bits(width = nWays) val way_en = Bits(width = nWays)
} }
@ -27,16 +28,24 @@ class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val resp = Vec(nWays, Bits(OUTPUT, rowBits)) val resp = Vec(nWays, Bits(OUTPUT, rowBits))
} }
val data_arrays = Seq.fill(nWays) { SeqMem(nSets*refillCycles, Vec(rowBytes, Bits(width=8))) } require(rowBytes % wordBytes == 0)
val eccBytes = 1
val eccBits = eccBytes * 8
val eccMask = if (eccBytes == wordBytes) Seq(true.B) else (0 until wordBytes by eccBytes).map(io.req.bits.byteMask(_))
val wMask = if (nWays == 1) eccMask else (0 until nWays).flatMap(i => eccMask.map(_ && io.req.bits.way_en(i)))
val addr = io.req.bits.addr >> rowOffBits val addr = io.req.bits.addr >> rowOffBits
for ((array, w) <- data_arrays zipWithIndex) { val data_arrays = Seq.fill(rowBytes / wordBytes) { SeqMem(nSets * refillCycles, Vec(nWays * (wordBytes / eccBytes), UInt(width = eccBits))) }
val valid = io.req.valid && (Bool(nWays == 1) || io.req.bits.way_en(w)) val rdata = for ((array, i) <- data_arrays zipWithIndex) yield {
val valid = io.req.valid && (Bool(data_arrays.size == 1) || io.req.bits.wordMask(i))
when (valid && io.req.bits.write) { when (valid && io.req.bits.write) {
val data = Vec.tabulate(rowBytes)(i => io.req.bits.wdata(8*(i+1)-1, 8*i)) val word = io.req.bits.wdata((i+1)*wordBits-1, i*wordBits)
array.write(addr, data, io.req.bits.wmask.toBools) val wData = (0 until wordBits/eccBits).map(i => word((i+1)*eccBits-1, i*eccBits))
array.write(addr, Vec((0 until nWays).flatMap(i => wData)), wMask)
} }
io.resp(w) := array.read(addr, valid && !io.req.bits.write).asUInt val data = array.read(addr, valid && !io.req.bits.write)
data.grouped(wordBytes / eccBytes).map(_.asUInt).toSeq
} }
(io.resp zip rdata.transpose).foreach { case (resp, data) => resp := data.asUInt }
} }
class DCache(hartid: Int, val scratch: () => Option[AddressSet] = () => None)(implicit p: Parameters) extends HellaCache(hartid)(p) { class DCache(hartid: Int, val scratch: () => Option[AddressSet] = () => None)(implicit p: Parameters) extends HellaCache(hartid)(p) {
@ -109,6 +118,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd) dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)
dataArb.io.in(3).bits.write := false dataArb.io.in(3).bits.write := false
dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr
dataArb.io.in(3).bits.wordMask := UIntToOH(io.cpu.req.bits.addr.extract(rowOffBits-1,offsetlsb))
dataArb.io.in(3).bits.way_en := ~UInt(0, nWays) dataArb.io.in(3).bits.way_en := ~UInt(0, nWays)
when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false } when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false }
metaReadArb.io.in(2).valid := io.cpu.req.valid metaReadArb.io.in(2).valid := io.cpu.req.valid
@ -260,8 +270,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr) dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way) dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way)
dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data)) dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data))
val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits dataArb.io.in(0).bits.wordMask := UIntToOH(Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb))
dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_mask) << pstore_mask_shift dataArb.io.in(0).bits.byteMask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_mask)
// store->load RAW hazard detection // store->load RAW hazard detection
val s1_idx = s1_req.addr(idxMSB, wordOffBits) val s1_idx = s1_req.addr(idxMSB, wordOffBits)
@ -377,7 +387,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
dataArb.io.in(1).bits.addr := s2_req_block_addr | d_address_inc dataArb.io.in(1).bits.addr := s2_req_block_addr | d_address_inc
dataArb.io.in(1).bits.way_en := s2_victim_way dataArb.io.in(1).bits.way_en := s2_victim_way
dataArb.io.in(1).bits.wdata := tl_out.d.bits.data dataArb.io.in(1).bits.wdata := tl_out.d.bits.data
dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes) dataArb.io.in(1).bits.wordMask := ~UInt(0, rowBytes / wordBytes)
dataArb.io.in(1).bits.byteMask := ~UInt(0, wordBytes)
// tag updates on refill // tag updates on refill
metaWriteArb.io.in(1).valid := grantIsCached && d_done metaWriteArb.io.in(1).valid := grantIsCached && d_done
assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready) assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready)
@ -482,6 +493,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles
dataArb.io.in(2).bits.write := false dataArb.io.in(2).bits.write := false
dataArb.io.in(2).bits.addr := tl_out.c.bits.address | (releaseDataBeat(log2Up(refillCycles)-1,0) << rowOffBits) dataArb.io.in(2).bits.addr := tl_out.c.bits.address | (releaseDataBeat(log2Up(refillCycles)-1,0) << rowOffBits)
dataArb.io.in(2).bits.wordMask := ~UInt(0, rowBytes / wordBytes)
dataArb.io.in(2).bits.way_en := ~UInt(0, nWays) dataArb.io.in(2).bits.way_en := ~UInt(0, nWays)
metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta) metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta)