1
0

Add support for L1 data scratchpads instead of caches

They fit in the same part of the address space as DRAM would be, and
are coherent (because they are not cacheable).

They are currently limited to single cores without DRAM.  We intend
to lift both restrictions, probably when we add support for
heterogeneous tiles.
This commit is contained in:
Andrew Waterman 2016-09-02 15:59:16 -07:00
parent dc9ae19936
commit 63679bb019
8 changed files with 140 additions and 36 deletions

@ -1 +1 @@
Subproject commit 2a074c828ddd8e6c20fa21d618664d50120f3d7a
Subproject commit 16426b3a68d85ce7dd9655b0ce773431eb69fc74

@ -1 +1 @@
Subproject commit 705db10fd14b313d5cc96f193d1271256bff75da
Subproject commit 61d74b5837d270f116fc21e907ed78f582361688

View File

@ -63,6 +63,7 @@ class BaseCoreplexConfig extends Config (
case BtbKey => BtbParameters()
//L1DataCache
case DCacheKey => DCacheConfig(nMSHRs = site(Knob("L1D_MSHRS")))
case DataScratchpadSize => 0
//L2 Memory System Params
case AmoAluOperandBits => site(XLen)
case NAcquireTransactors => 7
@ -239,6 +240,13 @@ class WithNBanksPerMemChannel(n: Int) extends Config(
case _ => throw new CDEMatchError
})
class WithDataScratchpad(n: Int) extends Config(
(pname,site,here) => pname match {
case DataScratchpadSize => n
case NSets if site(CacheName) == "L1D" => n / site(CacheBlockBytes)
case _ => throw new CDEMatchError
})
class WithL2Cache extends Config(
(pname,site,here) => pname match {
case "L2_CAPACITY_IN_KB" => Knob("L2_CAPACITY_IN_KB")
@ -330,6 +338,7 @@ class WithRV32 extends Config(
"rv32mi-p-csr",
"rv32ui-p-sh",
"rv32ui-p-lh",
"rv32uc-p-rvc",
"rv32mi-p-sbreak",
"rv32ui-p-sll")
case _ => throw new CDEMatchError

View File

@ -66,6 +66,7 @@ class Uncore(implicit val p: Parameters) extends Module
val mem = Vec(nMemChannels, new ClientUncachedTileLinkIO()(outermostParams))
val tiles_cached = Vec(nCachedTilePorts, new ClientTileLinkIO).flip
val tiles_uncached = Vec(nUncachedTilePorts, new ClientUncachedTileLinkIO).flip
val tiles_slave = Vec(nTiles, new ClientUncachedTileLinkIO)
val ext_uncached = Vec(nExtClients, new ClientUncachedTileLinkIO()(innerParams)).flip
val prci = Vec(nTiles, new PRCITileIO).asOutput
val mmio = exportMMIO.option(new ClientUncachedTileLinkIO()(outermostMMIOParams))
@ -92,7 +93,8 @@ class Uncore(implicit val p: Parameters) extends Module
rom.order(ByteOrder.LITTLE_ENDIAN)
// for now, have the reset vector jump straight to memory
val resetToMemDist = p(GlobalAddrMap)("mem").start - p(ResetVector)
val memBase = (if (p(GlobalAddrMap) contains "mem") p(GlobalAddrMap)("mem") else p(GlobalAddrMap)("io:int:dmem0")).start
val resetToMemDist = memBase - p(ResetVector)
require(resetToMemDist == (resetToMemDist.toInt >> 12 << 12))
val configStringAddr = p(ResetVector).toInt + rom.capacity
@ -134,6 +136,10 @@ class Uncore(implicit val p: Parameters) extends Module
io.prci(i).reset := reset
}
val tileSlavePorts = (0 until nTiles) map (i => s"int:dmem$i") filter (ioAddrMap contains _)
for ((t, m) <- io.tiles_slave zip (tileSlavePorts map (mmioNetwork port _)))
t <> ClientUncachedTileLinkEnqueuer(m, 1)
val bootROM = Module(new ROMSlave(makeBootROM()))
bootROM.io <> mmioNetwork.port("int:bootrom")
@ -174,11 +180,9 @@ class DefaultOuterMemorySystem(implicit p: Parameters) extends OuterMemorySystem
// Cached ports are first in client list, making sharerToClientId just an indentity function
// addrToBank is sed to hash physical addresses (of cache blocks) to banks (and thereby memory channels)
def sharerToClientId(sharerId: UInt) = sharerId
def addrToBank(addr: UInt): UInt = {
def addrToBank(addr: UInt): UInt = if (nBanks == 0) UInt(0) else {
val isMemory = p(GlobalAddrMap).isInRegion("mem", addr << log2Up(p(CacheBlockBytes)))
Mux(isMemory,
if (nBanks > 1) addr(lsb + log2Up(nBanks) - 1, lsb) else UInt(0),
UInt(nBanks))
Mux(isMemory, addr.extract(lsb + log2Ceil(nBanks) - 1, lsb), UInt(nBanks))
}
val preBuffering = TileLinkDepths(1,1,2,2,0)
val l1tol2net = Module(new PortedTileLinkCrossbar(addrToBank, sharerToClientId, preBuffering))
@ -274,6 +278,7 @@ class DefaultCoreplex(topParams: Parameters) extends Coreplex()(topParams) {
// Connect the uncore to the tile memory ports, HostIO and MemIO
uncore.io.tiles_cached <> tileList.map(_.io.cached).flatten
uncore.io.tiles_uncached <> tileList.map(_.io.uncached).flatten
(tileList.map(_.io.slave).flatten zip uncore.io.tiles_slave) foreach { case (x, y) => x <> y }
uncore.io.interrupts <> io.interrupts
uncore.io.debug <> io.debug
uncore.io.ext_uncached <> io.ext_clients

View File

@ -54,11 +54,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
// tags
val replacer = p(Replacer)()
def onReset = L1Metadata(UInt(0), ClientMetadata.onReset)
val meta = Module(new MetadataArray(onReset _))
val metaReadArb = Module(new Arbiter(new MetaReadReq, 3))
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3))
meta.io.read <> metaReadArb.io.out
meta.io.write <> metaWriteArb.io.out
// data
val data = Module(new DCacheDataArray)
@ -116,13 +113,28 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits))
val s1_hit_way = meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt
val s1_hit_state = ClientMetadata.onReset.fromBits(
meta.io.resp.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0)))
.reduce (_|_))
val s1_victim_way = Wire(init = replacer.way)
val (s1_hit_way, s1_hit_state, s1_victim_meta) =
if (usingDataScratchpad) {
require(nWays == 1)
metaWriteArb.io.out.ready := true
metaReadArb.io.out.ready := !metaWriteArb.io.out.valid
val inScratchpad = addrMap(s"io:int:dmem${tileId}").containsAddress(s1_paddr)
val hitState = Mux(inScratchpad, ClientMetadata.onReset.onHit(M_XWR), ClientMetadata.onReset)
(inScratchpad, hitState, L1Metadata(UInt(0), ClientMetadata.onReset))
} else {
val meta = Module(new MetadataArray(onReset _))
meta.io.read <> metaReadArb.io.out
meta.io.write <> metaWriteArb.io.out
val s1_meta = meta.io.resp
val s1_hit_way = s1_meta.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt
val s1_hit_state = ClientMetadata.onReset.fromBits(
s1_meta.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0)))
.reduce (_|_))
(s1_hit_way, s1_hit_state, s1_meta(s1_victim_way))
}
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
val s1_victim_way = Wire(init = replacer.way)
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
val s2_probe = Reg(next=s1_probe, init=Bool(false))
@ -133,7 +145,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
when (s1_valid_not_nacked || s1_flush_valid) {
s2_req := s1_req
s2_req.addr := s1_paddr
s2_uncached := !tlb.io.resp.cacheable
s2_uncached := !tlb.io.resp.cacheable || Bool(usingDataScratchpad)
}
val s2_read = isRead(s2_req.cmd)
val s2_write = isWrite(s2_req.cmd)
@ -151,8 +163,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val s2_victimize = s2_valid_cached_miss || s2_flush_valid
val s2_valid_uncached = s2_valid_miss && s2_uncached
val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)))
val s2_victim_tag = RegEnable(meta.io.resp(s1_victim_way).tag, s1_valid_not_nacked || s1_flush_valid)
val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(meta.io.resp(s1_victim_way).coh, s1_valid_not_nacked || s1_flush_valid))
val s2_victim_tag = RegEnable(s1_victim_meta.tag, s1_valid_not_nacked || s1_flush_valid)
val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(s1_victim_meta.coh, s1_valid_not_nacked || s1_flush_valid))
val s2_victim_valid = s2_victim_state.isValid()
val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback()
val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd)
@ -261,7 +273,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready
io.mem.acquire.bits := cachedGetMessage
when (s2_uncached) {
assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access")
if (!usingDataScratchpad)
assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access")
io.mem.acquire.bits := uncachedGetMessage
when (s2_write) {
io.mem.acquire.bits := uncachedPutMessage
@ -419,7 +432,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val flushed = Reg(init=Bool(true))
val flushing = Reg(init=Bool(false))
val flushCounter = Counter(nSets * nWays)
when (io.mem.acquire.fire()) { flushed := false }
when (io.mem.acquire.fire() && !s2_uncached) { flushed := false }
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
io.cpu.s2_nack := !flushed
when (!flushed) {
@ -442,3 +455,61 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
}
}
}
class ScratchpadSlavePort(implicit p: Parameters) extends CoreModule()(p) {
val io = new Bundle {
val tl = new ClientUncachedTileLinkIO().flip
val dmem = new HellaCacheIO
}
val s_ready :: s_wait :: s_replay :: s_grant :: Nil = Enum(UInt(), 4)
val state = Reg(init = s_ready)
when (io.dmem.resp.valid) { state := s_grant }
when (io.tl.grant.fire()) { state := s_ready }
when (io.dmem.s2_nack) { state := s_replay }
when (io.dmem.req.fire()) { state := s_wait }
val acq = Reg(io.tl.acquire.bits)
when (io.dmem.resp.valid) { acq.data := io.dmem.resp.bits.data }
when (io.tl.acquire.fire()) { acq := io.tl.acquire.bits }
val isRead = acq.isBuiltInType(Acquire.getType)
val isWrite = acq.isBuiltInType(Acquire.putType)
assert(state === s_ready || isRead || isWrite)
require(coreDataBits == acq.tlDataBits)
require(usingDataScratchpad)
def formCacheReq(acq: Acquire) = {
val req = Wire(new HellaCacheReq)
// treat all loads as full words, so bytes appear in correct lane
req.typ := Mux(isRead, log2Ceil(acq.tlDataBytes), acq.op_size())
req.cmd := acq.op_code()
req.addr := Mux(isRead, ~(~acq.full_addr() | (acq.tlDataBytes-1)), acq.full_addr())
req.tag := UInt(0)
req
}
val ready = state === s_ready || io.tl.grant.fire()
io.dmem.req.valid := (io.tl.acquire.valid && ready) || state === s_replay
io.tl.acquire.ready := io.dmem.req.ready && ready
io.dmem.req.bits := formCacheReq(Mux(state === s_replay, acq, io.tl.acquire.bits))
// this blows. the TL data is already in the correct byte lane, but the D$
// expects right-justified store data, so that it can steer the bytes.
io.dmem.s1_data := new LoadGen(acq.op_size(), Bool(false), acq.addr_byte(), acq.data, Bool(false), acq.tlDataBytes).data
io.dmem.s1_kill := false
io.dmem.invalidate_lr := false
// place AMO data in correct word lane
val minAMOBytes = 4
val grantData = Mux(io.dmem.resp.valid, io.dmem.resp.bits.data, acq.data)
val alignedGrantData = Mux(acq.op_size() <= log2Ceil(minAMOBytes), Fill(coreDataBytes/minAMOBytes, grantData(8*minAMOBytes-1, 0)), grantData)
io.tl.grant.valid := io.dmem.resp.valid || state === s_grant
io.tl.grant.bits := Grant(
is_builtin_type = Bool(true),
g_type = acq.getBuiltInGrantType(),
client_xact_id = acq.client_xact_id,
manager_xact_id = UInt(0),
addr_beat = acq.addr_beat,
data = alignedGrantData)
}

View File

@ -30,6 +30,7 @@ case object ResetVector extends Field[BigInt]
case object NBreakpoints extends Field[Int]
case object NPerfCounters extends Field[Int]
case object NPerfEvents extends Field[Int]
case object DataScratchpadSize extends Field[Int]
trait HasCoreParameters extends HasAddrMapParameters {
implicit val p: Parameters
@ -48,6 +49,7 @@ trait HasCoreParameters extends HasAddrMapParameters {
val nBreakpoints = p(NBreakpoints)
val nPerfCounters = p(NPerfCounters)
val nPerfEvents = p(NPerfEvents)
val usingDataScratchpad = p(DataScratchpadSize) > 0
val retireWidth = p(RetireWidth)
val fetchWidth = p(FetchWidth)
@ -55,7 +57,7 @@ trait HasCoreParameters extends HasAddrMapParameters {
val coreInstBytes = coreInstBits/8
val coreDataBits = xLen
val coreDataBytes = coreDataBits/8
val dcacheArbPorts = 1 + (if (usingVM) 1 else 0) + p(BuildRoCC).size
val dcacheArbPorts = 1 + usingVM.toInt + usingDataScratchpad.toInt + p(BuildRoCC).size
val coreDCacheReqTagBits = 6
val dcacheReqTagBits = coreDCacheReqTagBits + log2Ceil(dcacheArbPorts)

View File

@ -5,6 +5,7 @@ package rocket
import Chisel._
import uncore.tilelink._
import uncore.agents._
import uncore.converters._
import uncore.devices._
import Util._
import cde.{Parameters, Field}
@ -31,6 +32,7 @@ abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null)
val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO)
val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO)
val prci = new PRCITileIO().flip
val slave = (p(DataScratchpadSize) > 0).option(new ClientUncachedTileLinkIO().flip)
}
val io = new TileIO
@ -121,6 +123,12 @@ class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null)
core.io.ptw <> ptw.io.dpath
}
io.slave foreach { case slavePort =>
val adapter = Module(new ScratchpadSlavePort()(dcacheParams))
adapter.io.tl <> TileLinkFragmenter(slavePort)
adapter.io.dmem +=: dcPorts
}
require(dcPorts.size == core.dcacheArbPorts)
val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams))
dcArb.io.requestor <> dcPorts

View File

@ -5,6 +5,7 @@ package rocketchip
import Chisel._
import junctions._
import rocket._
import rocket.Util._
import uncore.agents._
import uncore.tilelink._
import uncore.devices._
@ -26,6 +27,11 @@ class BasePlatformConfig extends Config (
entries += AddrMapEntry("bootrom", MemSize(4096, MemAttr(AddrMapProt.RX)))
entries += AddrMapEntry("plic", MemRange(0x40000000, 0x4000000, MemAttr(AddrMapProt.RW)))
entries += AddrMapEntry("prci", MemSize(0x4000000, MemAttr(AddrMapProt.RW)))
if (site(DataScratchpadSize) > 0) { // TODO heterogeneous tiles
require(site(NTiles) == 1) // TODO relax this
require(site(NMemoryChannels) == 0) // TODO allow both scratchpad & DRAM
entries += AddrMapEntry("dmem0", MemRange(0x80000000L, site[Int](DataScratchpadSize), MemAttr(AddrMapProt.RWX)))
}
new AddrMap(entries)
}
lazy val externalAddrMap = new AddrMap(
@ -38,13 +44,11 @@ class BasePlatformConfig extends Config (
val intern = AddrMapEntry("int", internalIOAddrMap)
val extern = AddrMapEntry("ext", externalAddrMap)
val ioMap = if (site(ExportMMIOPort)) AddrMap(intern, extern) else AddrMap(intern)
val io = AddrMapEntry("io", AddrMap((intern +: site(ExportMMIOPort).option(extern).toSeq):_*))
val mem = AddrMapEntry("mem", MemRange(memBase, memSize, MemAttr(AddrMapProt.RWX, true)))
val addrMap = AddrMap((io +: (site(NMemoryChannels) > 0).option(mem).toSeq):_*)
val addrMap = AddrMap(
AddrMapEntry("io", ioMap),
AddrMapEntry("mem", MemRange(memBase, memSize, MemAttr(AddrMapProt.RWX, true))))
Dump("MEM_BASE", addrMap("mem").start)
Dump("MEM_BASE", memBase)
addrMap
}
def makeConfigString() = {
@ -62,15 +66,17 @@ class BasePlatformConfig extends Config (
res append "rtc {\n"
res append s" addr 0x${(prciAddr + PRCI.time).toString(16)};\n"
res append "};\n"
res append "ram {\n"
res append " 0 {\n"
res append s" addr 0x${addrMap("mem").start.toString(16)};\n"
res append s" size 0x${addrMap("mem").size.toString(16)};\n"
res append " };\n"
res append "};\n"
res append "core {\n"
for (i <- 0 until site(NTiles)) {
val isa = s"rv${site(XLen)}im${if (site(UseAtomics)) "a" else ""}${if (site(FPUKey).nonEmpty) "fd" else ""}"
if (addrMap contains "mem") {
res append "ram {\n"
res append " 0 {\n"
res append s" addr 0x${addrMap("mem").start.toString(16)};\n"
res append s" size 0x${addrMap("mem").size.toString(16)};\n"
res append " };\n"
res append "};\n"
res append "core {\n"
}
for (i <- 0 until site(NTiles)) { // TODO heterogeneous tiles
val isa = s"rv${site(XLen)}i${site(MulDivKey).map(x=>"m").mkString}${if (site(UseAtomics)) "a" else ""}${if (site(FPUKey).nonEmpty) "fd" else ""}"
res append s" $i {\n"
res append " 0 {\n"
res append s" isa $isa;\n"
@ -204,6 +210,8 @@ class WithTL extends Config(
case NExtMMIOTLChannels => 1
})
class WithScratchpads extends Config(new WithNMemoryChannels(0) ++ new WithDataScratchpad(16384))
class DefaultFPGASmallConfig extends Config(new WithSmallCores ++ new DefaultFPGAConfig)
class DefaultSmallConfig extends Config(new WithSmallCores ++ new BaseConfig)
class DefaultRV32Config extends Config(new WithRV32 ++ new DefaultSmallConfig)
@ -253,6 +261,7 @@ class DualCoreConfig extends Config(
new WithNCores(2) ++ new WithL2Cache ++ new BaseConfig)
class TinyConfig extends Config(
new WithScratchpads ++
new WithRV32 ++ new WithSmallCores ++
new WithStatelessBridge ++ new BaseConfig)