diff --git a/chisel3 b/chisel3 index 2a074c82..16426b3a 160000 --- a/chisel3 +++ b/chisel3 @@ -1 +1 @@ -Subproject commit 2a074c828ddd8e6c20fa21d618664d50120f3d7a +Subproject commit 16426b3a68d85ce7dd9655b0ce773431eb69fc74 diff --git a/riscv-tools b/riscv-tools index 705db10f..61d74b58 160000 --- a/riscv-tools +++ b/riscv-tools @@ -1 +1 @@ -Subproject commit 705db10fd14b313d5cc96f193d1271256bff75da +Subproject commit 61d74b5837d270f116fc21e907ed78f582361688 diff --git a/src/main/scala/coreplex/Configs.scala b/src/main/scala/coreplex/Configs.scala index 0767cafc..7dc26198 100644 --- a/src/main/scala/coreplex/Configs.scala +++ b/src/main/scala/coreplex/Configs.scala @@ -63,6 +63,7 @@ class BaseCoreplexConfig extends Config ( case BtbKey => BtbParameters() //L1DataCache case DCacheKey => DCacheConfig(nMSHRs = site(Knob("L1D_MSHRS"))) + case DataScratchpadSize => 0 //L2 Memory System Params case AmoAluOperandBits => site(XLen) case NAcquireTransactors => 7 @@ -239,6 +240,13 @@ class WithNBanksPerMemChannel(n: Int) extends Config( case _ => throw new CDEMatchError }) +class WithDataScratchpad(n: Int) extends Config( + (pname,site,here) => pname match { + case DataScratchpadSize => n + case NSets if site(CacheName) == "L1D" => n / site(CacheBlockBytes) + case _ => throw new CDEMatchError + }) + class WithL2Cache extends Config( (pname,site,here) => pname match { case "L2_CAPACITY_IN_KB" => Knob("L2_CAPACITY_IN_KB") @@ -330,6 +338,7 @@ class WithRV32 extends Config( "rv32mi-p-csr", "rv32ui-p-sh", "rv32ui-p-lh", + "rv32uc-p-rvc", "rv32mi-p-sbreak", "rv32ui-p-sll") case _ => throw new CDEMatchError diff --git a/src/main/scala/coreplex/Coreplex.scala b/src/main/scala/coreplex/Coreplex.scala index 7ef9f275..023b471b 100644 --- a/src/main/scala/coreplex/Coreplex.scala +++ b/src/main/scala/coreplex/Coreplex.scala @@ -66,6 +66,7 @@ class Uncore(implicit val p: Parameters) extends Module val mem = Vec(nMemChannels, new ClientUncachedTileLinkIO()(outermostParams)) val tiles_cached = Vec(nCachedTilePorts, new ClientTileLinkIO).flip val tiles_uncached = Vec(nUncachedTilePorts, new ClientUncachedTileLinkIO).flip + val tiles_slave = Vec(nTiles, new ClientUncachedTileLinkIO) val ext_uncached = Vec(nExtClients, new ClientUncachedTileLinkIO()(innerParams)).flip val prci = Vec(nTiles, new PRCITileIO).asOutput val mmio = exportMMIO.option(new ClientUncachedTileLinkIO()(outermostMMIOParams)) @@ -92,7 +93,8 @@ class Uncore(implicit val p: Parameters) extends Module rom.order(ByteOrder.LITTLE_ENDIAN) // for now, have the reset vector jump straight to memory - val resetToMemDist = p(GlobalAddrMap)("mem").start - p(ResetVector) + val memBase = (if (p(GlobalAddrMap) contains "mem") p(GlobalAddrMap)("mem") else p(GlobalAddrMap)("io:int:dmem0")).start + val resetToMemDist = memBase - p(ResetVector) require(resetToMemDist == (resetToMemDist.toInt >> 12 << 12)) val configStringAddr = p(ResetVector).toInt + rom.capacity @@ -134,6 +136,10 @@ class Uncore(implicit val p: Parameters) extends Module io.prci(i).reset := reset } + val tileSlavePorts = (0 until nTiles) map (i => s"int:dmem$i") filter (ioAddrMap contains _) + for ((t, m) <- io.tiles_slave zip (tileSlavePorts map (mmioNetwork port _))) + t <> ClientUncachedTileLinkEnqueuer(m, 1) + val bootROM = Module(new ROMSlave(makeBootROM())) bootROM.io <> mmioNetwork.port("int:bootrom") @@ -174,11 +180,9 @@ class DefaultOuterMemorySystem(implicit p: Parameters) extends OuterMemorySystem // Cached ports are first in client list, making sharerToClientId just an indentity function // addrToBank is sed to hash physical addresses (of cache blocks) to banks (and thereby memory channels) def sharerToClientId(sharerId: UInt) = sharerId - def addrToBank(addr: UInt): UInt = { + def addrToBank(addr: UInt): UInt = if (nBanks == 0) UInt(0) else { val isMemory = p(GlobalAddrMap).isInRegion("mem", addr << log2Up(p(CacheBlockBytes))) - Mux(isMemory, - if (nBanks > 1) addr(lsb + log2Up(nBanks) - 1, lsb) else UInt(0), - UInt(nBanks)) + Mux(isMemory, addr.extract(lsb + log2Ceil(nBanks) - 1, lsb), UInt(nBanks)) } val preBuffering = TileLinkDepths(1,1,2,2,0) val l1tol2net = Module(new PortedTileLinkCrossbar(addrToBank, sharerToClientId, preBuffering)) @@ -274,6 +278,7 @@ class DefaultCoreplex(topParams: Parameters) extends Coreplex()(topParams) { // Connect the uncore to the tile memory ports, HostIO and MemIO uncore.io.tiles_cached <> tileList.map(_.io.cached).flatten uncore.io.tiles_uncached <> tileList.map(_.io.uncached).flatten + (tileList.map(_.io.slave).flatten zip uncore.io.tiles_slave) foreach { case (x, y) => x <> y } uncore.io.interrupts <> io.interrupts uncore.io.debug <> io.debug uncore.io.ext_uncached <> io.ext_clients diff --git a/src/main/scala/rocket/dcache.scala b/src/main/scala/rocket/dcache.scala index e2edb148..68dab522 100644 --- a/src/main/scala/rocket/dcache.scala +++ b/src/main/scala/rocket/dcache.scala @@ -54,11 +54,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // tags val replacer = p(Replacer)() def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) - val meta = Module(new MetadataArray(onReset _)) val metaReadArb = Module(new Arbiter(new MetaReadReq, 3)) val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3)) - meta.io.read <> metaReadArb.io.out - meta.io.write <> metaWriteArb.io.out // data val data = Module(new DCacheDataArray) @@ -116,13 +113,28 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits)) - val s1_hit_way = meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt - val s1_hit_state = ClientMetadata.onReset.fromBits( - meta.io.resp.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0))) - .reduce (_|_)) + val s1_victim_way = Wire(init = replacer.way) + val (s1_hit_way, s1_hit_state, s1_victim_meta) = + if (usingDataScratchpad) { + require(nWays == 1) + metaWriteArb.io.out.ready := true + metaReadArb.io.out.ready := !metaWriteArb.io.out.valid + val inScratchpad = addrMap(s"io:int:dmem${tileId}").containsAddress(s1_paddr) + val hitState = Mux(inScratchpad, ClientMetadata.onReset.onHit(M_XWR), ClientMetadata.onReset) + (inScratchpad, hitState, L1Metadata(UInt(0), ClientMetadata.onReset)) + } else { + val meta = Module(new MetadataArray(onReset _)) + meta.io.read <> metaReadArb.io.out + meta.io.write <> metaWriteArb.io.out + val s1_meta = meta.io.resp + val s1_hit_way = s1_meta.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt + val s1_hit_state = ClientMetadata.onReset.fromBits( + s1_meta.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0))) + .reduce (_|_)) + (s1_hit_way, s1_hit_state, s1_meta(s1_victim_way)) + } val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way) val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical - val s1_victim_way = Wire(init = replacer.way) val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_probe = Reg(next=s1_probe, init=Bool(false)) @@ -133,7 +145,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (s1_valid_not_nacked || s1_flush_valid) { s2_req := s1_req s2_req.addr := s1_paddr - s2_uncached := !tlb.io.resp.cacheable + s2_uncached := !tlb.io.resp.cacheable || Bool(usingDataScratchpad) } val s2_read = isRead(s2_req.cmd) val s2_write = isWrite(s2_req.cmd) @@ -151,8 +163,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s2_victimize = s2_valid_cached_miss || s2_flush_valid val s2_valid_uncached = s2_valid_miss && s2_uncached val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid))) - val s2_victim_tag = RegEnable(meta.io.resp(s1_victim_way).tag, s1_valid_not_nacked || s1_flush_valid) - val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(meta.io.resp(s1_victim_way).coh, s1_valid_not_nacked || s1_flush_valid)) + val s2_victim_tag = RegEnable(s1_victim_meta.tag, s1_valid_not_nacked || s1_flush_valid) + val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(s1_victim_meta.coh, s1_valid_not_nacked || s1_flush_valid)) val s2_victim_valid = s2_victim_state.isValid() val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback() val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd) @@ -261,7 +273,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready io.mem.acquire.bits := cachedGetMessage when (s2_uncached) { - assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access") + if (!usingDataScratchpad) + assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access") io.mem.acquire.bits := uncachedGetMessage when (s2_write) { io.mem.acquire.bits := uncachedPutMessage @@ -419,7 +432,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val flushed = Reg(init=Bool(true)) val flushing = Reg(init=Bool(false)) val flushCounter = Counter(nSets * nWays) - when (io.mem.acquire.fire()) { flushed := false } + when (io.mem.acquire.fire() && !s2_uncached) { flushed := false } when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) { io.cpu.s2_nack := !flushed when (!flushed) { @@ -442,3 +455,61 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { } } } + +class ScratchpadSlavePort(implicit p: Parameters) extends CoreModule()(p) { + val io = new Bundle { + val tl = new ClientUncachedTileLinkIO().flip + val dmem = new HellaCacheIO + } + + val s_ready :: s_wait :: s_replay :: s_grant :: Nil = Enum(UInt(), 4) + val state = Reg(init = s_ready) + when (io.dmem.resp.valid) { state := s_grant } + when (io.tl.grant.fire()) { state := s_ready } + when (io.dmem.s2_nack) { state := s_replay } + when (io.dmem.req.fire()) { state := s_wait } + + val acq = Reg(io.tl.acquire.bits) + when (io.dmem.resp.valid) { acq.data := io.dmem.resp.bits.data } + when (io.tl.acquire.fire()) { acq := io.tl.acquire.bits } + + val isRead = acq.isBuiltInType(Acquire.getType) + val isWrite = acq.isBuiltInType(Acquire.putType) + assert(state === s_ready || isRead || isWrite) + require(coreDataBits == acq.tlDataBits) + require(usingDataScratchpad) + + def formCacheReq(acq: Acquire) = { + val req = Wire(new HellaCacheReq) + // treat all loads as full words, so bytes appear in correct lane + req.typ := Mux(isRead, log2Ceil(acq.tlDataBytes), acq.op_size()) + req.cmd := acq.op_code() + req.addr := Mux(isRead, ~(~acq.full_addr() | (acq.tlDataBytes-1)), acq.full_addr()) + req.tag := UInt(0) + req + } + + val ready = state === s_ready || io.tl.grant.fire() + io.dmem.req.valid := (io.tl.acquire.valid && ready) || state === s_replay + io.tl.acquire.ready := io.dmem.req.ready && ready + io.dmem.req.bits := formCacheReq(Mux(state === s_replay, acq, io.tl.acquire.bits)) + // this blows. the TL data is already in the correct byte lane, but the D$ + // expects right-justified store data, so that it can steer the bytes. + io.dmem.s1_data := new LoadGen(acq.op_size(), Bool(false), acq.addr_byte(), acq.data, Bool(false), acq.tlDataBytes).data + io.dmem.s1_kill := false + io.dmem.invalidate_lr := false + + // place AMO data in correct word lane + val minAMOBytes = 4 + val grantData = Mux(io.dmem.resp.valid, io.dmem.resp.bits.data, acq.data) + val alignedGrantData = Mux(acq.op_size() <= log2Ceil(minAMOBytes), Fill(coreDataBytes/minAMOBytes, grantData(8*minAMOBytes-1, 0)), grantData) + + io.tl.grant.valid := io.dmem.resp.valid || state === s_grant + io.tl.grant.bits := Grant( + is_builtin_type = Bool(true), + g_type = acq.getBuiltInGrantType(), + client_xact_id = acq.client_xact_id, + manager_xact_id = UInt(0), + addr_beat = acq.addr_beat, + data = alignedGrantData) +} diff --git a/src/main/scala/rocket/rocket.scala b/src/main/scala/rocket/rocket.scala index 3c4ed21c..49b32be7 100644 --- a/src/main/scala/rocket/rocket.scala +++ b/src/main/scala/rocket/rocket.scala @@ -30,6 +30,7 @@ case object ResetVector extends Field[BigInt] case object NBreakpoints extends Field[Int] case object NPerfCounters extends Field[Int] case object NPerfEvents extends Field[Int] +case object DataScratchpadSize extends Field[Int] trait HasCoreParameters extends HasAddrMapParameters { implicit val p: Parameters @@ -48,6 +49,7 @@ trait HasCoreParameters extends HasAddrMapParameters { val nBreakpoints = p(NBreakpoints) val nPerfCounters = p(NPerfCounters) val nPerfEvents = p(NPerfEvents) + val usingDataScratchpad = p(DataScratchpadSize) > 0 val retireWidth = p(RetireWidth) val fetchWidth = p(FetchWidth) @@ -55,7 +57,7 @@ trait HasCoreParameters extends HasAddrMapParameters { val coreInstBytes = coreInstBits/8 val coreDataBits = xLen val coreDataBytes = coreDataBits/8 - val dcacheArbPorts = 1 + (if (usingVM) 1 else 0) + p(BuildRoCC).size + val dcacheArbPorts = 1 + usingVM.toInt + usingDataScratchpad.toInt + p(BuildRoCC).size val coreDCacheReqTagBits = 6 val dcacheReqTagBits = coreDCacheReqTagBits + log2Ceil(dcacheArbPorts) diff --git a/src/main/scala/rocket/tile.scala b/src/main/scala/rocket/tile.scala index bea82e65..df055f90 100644 --- a/src/main/scala/rocket/tile.scala +++ b/src/main/scala/rocket/tile.scala @@ -5,6 +5,7 @@ package rocket import Chisel._ import uncore.tilelink._ import uncore.agents._ +import uncore.converters._ import uncore.devices._ import Util._ import cde.{Parameters, Field} @@ -31,6 +32,7 @@ abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null) val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO) val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO) val prci = new PRCITileIO().flip + val slave = (p(DataScratchpadSize) > 0).option(new ClientUncachedTileLinkIO().flip) } val io = new TileIO @@ -121,6 +123,12 @@ class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null) core.io.ptw <> ptw.io.dpath } + io.slave foreach { case slavePort => + val adapter = Module(new ScratchpadSlavePort()(dcacheParams)) + adapter.io.tl <> TileLinkFragmenter(slavePort) + adapter.io.dmem +=: dcPorts + } + require(dcPorts.size == core.dcacheArbPorts) val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams)) dcArb.io.requestor <> dcPorts diff --git a/src/main/scala/rocketchip/Configs.scala b/src/main/scala/rocketchip/Configs.scala index 72c72631..916188ee 100644 --- a/src/main/scala/rocketchip/Configs.scala +++ b/src/main/scala/rocketchip/Configs.scala @@ -5,6 +5,7 @@ package rocketchip import Chisel._ import junctions._ import rocket._ +import rocket.Util._ import uncore.agents._ import uncore.tilelink._ import uncore.devices._ @@ -26,6 +27,11 @@ class BasePlatformConfig extends Config ( entries += AddrMapEntry("bootrom", MemSize(4096, MemAttr(AddrMapProt.RX))) entries += AddrMapEntry("plic", MemRange(0x40000000, 0x4000000, MemAttr(AddrMapProt.RW))) entries += AddrMapEntry("prci", MemSize(0x4000000, MemAttr(AddrMapProt.RW))) + if (site(DataScratchpadSize) > 0) { // TODO heterogeneous tiles + require(site(NTiles) == 1) // TODO relax this + require(site(NMemoryChannels) == 0) // TODO allow both scratchpad & DRAM + entries += AddrMapEntry("dmem0", MemRange(0x80000000L, site[Int](DataScratchpadSize), MemAttr(AddrMapProt.RWX))) + } new AddrMap(entries) } lazy val externalAddrMap = new AddrMap( @@ -38,13 +44,11 @@ class BasePlatformConfig extends Config ( val intern = AddrMapEntry("int", internalIOAddrMap) val extern = AddrMapEntry("ext", externalAddrMap) - val ioMap = if (site(ExportMMIOPort)) AddrMap(intern, extern) else AddrMap(intern) + val io = AddrMapEntry("io", AddrMap((intern +: site(ExportMMIOPort).option(extern).toSeq):_*)) + val mem = AddrMapEntry("mem", MemRange(memBase, memSize, MemAttr(AddrMapProt.RWX, true))) + val addrMap = AddrMap((io +: (site(NMemoryChannels) > 0).option(mem).toSeq):_*) - val addrMap = AddrMap( - AddrMapEntry("io", ioMap), - AddrMapEntry("mem", MemRange(memBase, memSize, MemAttr(AddrMapProt.RWX, true)))) - - Dump("MEM_BASE", addrMap("mem").start) + Dump("MEM_BASE", memBase) addrMap } def makeConfigString() = { @@ -62,15 +66,17 @@ class BasePlatformConfig extends Config ( res append "rtc {\n" res append s" addr 0x${(prciAddr + PRCI.time).toString(16)};\n" res append "};\n" - res append "ram {\n" - res append " 0 {\n" - res append s" addr 0x${addrMap("mem").start.toString(16)};\n" - res append s" size 0x${addrMap("mem").size.toString(16)};\n" - res append " };\n" - res append "};\n" - res append "core {\n" - for (i <- 0 until site(NTiles)) { - val isa = s"rv${site(XLen)}im${if (site(UseAtomics)) "a" else ""}${if (site(FPUKey).nonEmpty) "fd" else ""}" + if (addrMap contains "mem") { + res append "ram {\n" + res append " 0 {\n" + res append s" addr 0x${addrMap("mem").start.toString(16)};\n" + res append s" size 0x${addrMap("mem").size.toString(16)};\n" + res append " };\n" + res append "};\n" + res append "core {\n" + } + for (i <- 0 until site(NTiles)) { // TODO heterogeneous tiles + val isa = s"rv${site(XLen)}i${site(MulDivKey).map(x=>"m").mkString}${if (site(UseAtomics)) "a" else ""}${if (site(FPUKey).nonEmpty) "fd" else ""}" res append s" $i {\n" res append " 0 {\n" res append s" isa $isa;\n" @@ -204,6 +210,8 @@ class WithTL extends Config( case NExtMMIOTLChannels => 1 }) +class WithScratchpads extends Config(new WithNMemoryChannels(0) ++ new WithDataScratchpad(16384)) + class DefaultFPGASmallConfig extends Config(new WithSmallCores ++ new DefaultFPGAConfig) class DefaultSmallConfig extends Config(new WithSmallCores ++ new BaseConfig) class DefaultRV32Config extends Config(new WithRV32 ++ new DefaultSmallConfig) @@ -253,6 +261,7 @@ class DualCoreConfig extends Config( new WithNCores(2) ++ new WithL2Cache ++ new BaseConfig) class TinyConfig extends Config( + new WithScratchpads ++ new WithRV32 ++ new WithSmallCores ++ new WithStatelessBridge ++ new BaseConfig)