Add support for L1 data scratchpads instead of caches
They fit in the same part of the address space as DRAM would be, and are coherent (because they are not cacheable). They are currently limited to single cores without DRAM. We intend to lift both restrictions, probably when we add support for heterogeneous tiles.
This commit is contained in:
@ -54,11 +54,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
// tags
|
||||
val replacer = p(Replacer)()
|
||||
def onReset = L1Metadata(UInt(0), ClientMetadata.onReset)
|
||||
val meta = Module(new MetadataArray(onReset _))
|
||||
val metaReadArb = Module(new Arbiter(new MetaReadReq, 3))
|
||||
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3))
|
||||
meta.io.read <> metaReadArb.io.out
|
||||
meta.io.write <> metaWriteArb.io.out
|
||||
|
||||
// data
|
||||
val data = Module(new DCacheDataArray)
|
||||
@ -116,13 +113,28 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
|
||||
val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
|
||||
val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits))
|
||||
val s1_hit_way = meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt
|
||||
val s1_hit_state = ClientMetadata.onReset.fromBits(
|
||||
meta.io.resp.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0)))
|
||||
.reduce (_|_))
|
||||
val s1_victim_way = Wire(init = replacer.way)
|
||||
val (s1_hit_way, s1_hit_state, s1_victim_meta) =
|
||||
if (usingDataScratchpad) {
|
||||
require(nWays == 1)
|
||||
metaWriteArb.io.out.ready := true
|
||||
metaReadArb.io.out.ready := !metaWriteArb.io.out.valid
|
||||
val inScratchpad = addrMap(s"io:int:dmem${tileId}").containsAddress(s1_paddr)
|
||||
val hitState = Mux(inScratchpad, ClientMetadata.onReset.onHit(M_XWR), ClientMetadata.onReset)
|
||||
(inScratchpad, hitState, L1Metadata(UInt(0), ClientMetadata.onReset))
|
||||
} else {
|
||||
val meta = Module(new MetadataArray(onReset _))
|
||||
meta.io.read <> metaReadArb.io.out
|
||||
meta.io.write <> metaWriteArb.io.out
|
||||
val s1_meta = meta.io.resp
|
||||
val s1_hit_way = s1_meta.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt
|
||||
val s1_hit_state = ClientMetadata.onReset.fromBits(
|
||||
s1_meta.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0)))
|
||||
.reduce (_|_))
|
||||
(s1_hit_way, s1_hit_state, s1_meta(s1_victim_way))
|
||||
}
|
||||
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
|
||||
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
|
||||
val s1_victim_way = Wire(init = replacer.way)
|
||||
|
||||
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
|
||||
val s2_probe = Reg(next=s1_probe, init=Bool(false))
|
||||
@ -133,7 +145,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
when (s1_valid_not_nacked || s1_flush_valid) {
|
||||
s2_req := s1_req
|
||||
s2_req.addr := s1_paddr
|
||||
s2_uncached := !tlb.io.resp.cacheable
|
||||
s2_uncached := !tlb.io.resp.cacheable || Bool(usingDataScratchpad)
|
||||
}
|
||||
val s2_read = isRead(s2_req.cmd)
|
||||
val s2_write = isWrite(s2_req.cmd)
|
||||
@ -151,8 +163,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
val s2_victimize = s2_valid_cached_miss || s2_flush_valid
|
||||
val s2_valid_uncached = s2_valid_miss && s2_uncached
|
||||
val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)))
|
||||
val s2_victim_tag = RegEnable(meta.io.resp(s1_victim_way).tag, s1_valid_not_nacked || s1_flush_valid)
|
||||
val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(meta.io.resp(s1_victim_way).coh, s1_valid_not_nacked || s1_flush_valid))
|
||||
val s2_victim_tag = RegEnable(s1_victim_meta.tag, s1_valid_not_nacked || s1_flush_valid)
|
||||
val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(s1_victim_meta.coh, s1_valid_not_nacked || s1_flush_valid))
|
||||
val s2_victim_valid = s2_victim_state.isValid()
|
||||
val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback()
|
||||
val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd)
|
||||
@ -261,7 +273,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready
|
||||
io.mem.acquire.bits := cachedGetMessage
|
||||
when (s2_uncached) {
|
||||
assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access")
|
||||
if (!usingDataScratchpad)
|
||||
assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access")
|
||||
io.mem.acquire.bits := uncachedGetMessage
|
||||
when (s2_write) {
|
||||
io.mem.acquire.bits := uncachedPutMessage
|
||||
@ -419,7 +432,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
val flushed = Reg(init=Bool(true))
|
||||
val flushing = Reg(init=Bool(false))
|
||||
val flushCounter = Counter(nSets * nWays)
|
||||
when (io.mem.acquire.fire()) { flushed := false }
|
||||
when (io.mem.acquire.fire() && !s2_uncached) { flushed := false }
|
||||
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
|
||||
io.cpu.s2_nack := !flushed
|
||||
when (!flushed) {
|
||||
@ -442,3 +455,61 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class ScratchpadSlavePort(implicit p: Parameters) extends CoreModule()(p) {
|
||||
val io = new Bundle {
|
||||
val tl = new ClientUncachedTileLinkIO().flip
|
||||
val dmem = new HellaCacheIO
|
||||
}
|
||||
|
||||
val s_ready :: s_wait :: s_replay :: s_grant :: Nil = Enum(UInt(), 4)
|
||||
val state = Reg(init = s_ready)
|
||||
when (io.dmem.resp.valid) { state := s_grant }
|
||||
when (io.tl.grant.fire()) { state := s_ready }
|
||||
when (io.dmem.s2_nack) { state := s_replay }
|
||||
when (io.dmem.req.fire()) { state := s_wait }
|
||||
|
||||
val acq = Reg(io.tl.acquire.bits)
|
||||
when (io.dmem.resp.valid) { acq.data := io.dmem.resp.bits.data }
|
||||
when (io.tl.acquire.fire()) { acq := io.tl.acquire.bits }
|
||||
|
||||
val isRead = acq.isBuiltInType(Acquire.getType)
|
||||
val isWrite = acq.isBuiltInType(Acquire.putType)
|
||||
assert(state === s_ready || isRead || isWrite)
|
||||
require(coreDataBits == acq.tlDataBits)
|
||||
require(usingDataScratchpad)
|
||||
|
||||
def formCacheReq(acq: Acquire) = {
|
||||
val req = Wire(new HellaCacheReq)
|
||||
// treat all loads as full words, so bytes appear in correct lane
|
||||
req.typ := Mux(isRead, log2Ceil(acq.tlDataBytes), acq.op_size())
|
||||
req.cmd := acq.op_code()
|
||||
req.addr := Mux(isRead, ~(~acq.full_addr() | (acq.tlDataBytes-1)), acq.full_addr())
|
||||
req.tag := UInt(0)
|
||||
req
|
||||
}
|
||||
|
||||
val ready = state === s_ready || io.tl.grant.fire()
|
||||
io.dmem.req.valid := (io.tl.acquire.valid && ready) || state === s_replay
|
||||
io.tl.acquire.ready := io.dmem.req.ready && ready
|
||||
io.dmem.req.bits := formCacheReq(Mux(state === s_replay, acq, io.tl.acquire.bits))
|
||||
// this blows. the TL data is already in the correct byte lane, but the D$
|
||||
// expects right-justified store data, so that it can steer the bytes.
|
||||
io.dmem.s1_data := new LoadGen(acq.op_size(), Bool(false), acq.addr_byte(), acq.data, Bool(false), acq.tlDataBytes).data
|
||||
io.dmem.s1_kill := false
|
||||
io.dmem.invalidate_lr := false
|
||||
|
||||
// place AMO data in correct word lane
|
||||
val minAMOBytes = 4
|
||||
val grantData = Mux(io.dmem.resp.valid, io.dmem.resp.bits.data, acq.data)
|
||||
val alignedGrantData = Mux(acq.op_size() <= log2Ceil(minAMOBytes), Fill(coreDataBytes/minAMOBytes, grantData(8*minAMOBytes-1, 0)), grantData)
|
||||
|
||||
io.tl.grant.valid := io.dmem.resp.valid || state === s_grant
|
||||
io.tl.grant.bits := Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = acq.getBuiltInGrantType(),
|
||||
client_xact_id = acq.client_xact_id,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = acq.addr_beat,
|
||||
data = alignedGrantData)
|
||||
}
|
||||
|
@ -30,6 +30,7 @@ case object ResetVector extends Field[BigInt]
|
||||
case object NBreakpoints extends Field[Int]
|
||||
case object NPerfCounters extends Field[Int]
|
||||
case object NPerfEvents extends Field[Int]
|
||||
case object DataScratchpadSize extends Field[Int]
|
||||
|
||||
trait HasCoreParameters extends HasAddrMapParameters {
|
||||
implicit val p: Parameters
|
||||
@ -48,6 +49,7 @@ trait HasCoreParameters extends HasAddrMapParameters {
|
||||
val nBreakpoints = p(NBreakpoints)
|
||||
val nPerfCounters = p(NPerfCounters)
|
||||
val nPerfEvents = p(NPerfEvents)
|
||||
val usingDataScratchpad = p(DataScratchpadSize) > 0
|
||||
|
||||
val retireWidth = p(RetireWidth)
|
||||
val fetchWidth = p(FetchWidth)
|
||||
@ -55,7 +57,7 @@ trait HasCoreParameters extends HasAddrMapParameters {
|
||||
val coreInstBytes = coreInstBits/8
|
||||
val coreDataBits = xLen
|
||||
val coreDataBytes = coreDataBits/8
|
||||
val dcacheArbPorts = 1 + (if (usingVM) 1 else 0) + p(BuildRoCC).size
|
||||
val dcacheArbPorts = 1 + usingVM.toInt + usingDataScratchpad.toInt + p(BuildRoCC).size
|
||||
val coreDCacheReqTagBits = 6
|
||||
val dcacheReqTagBits = coreDCacheReqTagBits + log2Ceil(dcacheArbPorts)
|
||||
|
||||
|
@ -5,6 +5,7 @@ package rocket
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.agents._
|
||||
import uncore.converters._
|
||||
import uncore.devices._
|
||||
import Util._
|
||||
import cde.{Parameters, Field}
|
||||
@ -31,6 +32,7 @@ abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null)
|
||||
val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO)
|
||||
val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO)
|
||||
val prci = new PRCITileIO().flip
|
||||
val slave = (p(DataScratchpadSize) > 0).option(new ClientUncachedTileLinkIO().flip)
|
||||
}
|
||||
|
||||
val io = new TileIO
|
||||
@ -121,6 +123,12 @@ class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null)
|
||||
core.io.ptw <> ptw.io.dpath
|
||||
}
|
||||
|
||||
io.slave foreach { case slavePort =>
|
||||
val adapter = Module(new ScratchpadSlavePort()(dcacheParams))
|
||||
adapter.io.tl <> TileLinkFragmenter(slavePort)
|
||||
adapter.io.dmem +=: dcPorts
|
||||
}
|
||||
|
||||
require(dcPorts.size == core.dcacheArbPorts)
|
||||
val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams))
|
||||
dcArb.io.requestor <> dcPorts
|
||||
|
Reference in New Issue
Block a user