diff --git a/regression/Makefile b/regression/Makefile index b1218cfd..c520ebf9 100644 --- a/regression/Makefile +++ b/regression/Makefile @@ -40,14 +40,14 @@ endif ifeq ($(SUITE),RocketSuite) PROJECT=rocketchip -CONFIGS=DefaultConfig DefaultL2Config DefaultBufferlessConfig TinyConfig +CONFIGS=DefaultConfig DefaultBufferlessConfig TinyConfig endif ifeq ($(SUITE),GroundtestSuite) PROJECT=groundtest CONFIGS=MemtestConfig MemtestBufferlessConfig MemtestStatelessConfig FancyMemtestConfig \ BroadcastRegressionTestConfig BufferlessRegressionTestConfig CacheRegressionTestConfig \ - ComparatorConfig ComparatorBufferlessConfig ComparatorL2Config ComparatorStatelessConfig + ComparatorConfig ComparatorBufferlessConfig ComparatorStatelessConfig endif ifeq ($(SUITE),UnittestSuite) diff --git a/riscv-tools b/riscv-tools index 40919ef9..886d8131 160000 --- a/riscv-tools +++ b/riscv-tools @@ -1 +1 @@ -Subproject commit 40919ef94f7e6426785bf534fb018ae8d0d79fb3 +Subproject commit 886d8131dbd23533fb04d2d76a80be21d5f9ee7a diff --git a/src/main/scala/coreplex/BaseCoreplex.scala b/src/main/scala/coreplex/BaseCoreplex.scala index 15c2cc4b..21255209 100644 --- a/src/main/scala/coreplex/BaseCoreplex.scala +++ b/src/main/scala/coreplex/BaseCoreplex.scala @@ -1,7 +1,7 @@ package coreplex import Chisel._ -import cde.{Parameters, Field} +import config._ import junctions._ import diplomacy._ import uncore.tilelink._ @@ -14,34 +14,40 @@ import uncore.converters._ import rocket._ import util._ -/** Number of memory channels */ -case object NMemoryChannels extends Field[Int] -/** Number of banks per memory channel */ -case object NBanksPerMemoryChannel extends Field[Int] -/** Number of tracker per bank */ -case object NTrackersPerBank extends Field[Int] -/** Least significant bit of address used for bank partitioning */ -case object BankIdLSB extends Field[Int] -/** Function for building some kind of coherence manager agent */ -case object BuildL2CoherenceManager extends Field[(Int, Parameters) => CoherenceAgent] -/** Function for building some kind of tile connected to a reset signal */ -case object BuildTiles extends Field[Seq[Parameters => LazyTile]] +/** Widths of various points in the SoC */ +case class TLBusConfig(beatBytes: Int) +case object CBusConfig extends Field[TLBusConfig] +case object L1toL2Config extends Field[TLBusConfig] + +/** L2 Broadcast Hub configuration */ +case class BroadcastConfig( + nTrackers: Int = 4, + bufferless: Boolean = false) +case object BroadcastConfig extends Field[BroadcastConfig] + +/** L2 memory subsystem configuration */ +case class BankedL2Config( + nMemoryChannels: Int = 1, + nBanksPerChannel: Int = 1, + coherenceManager: (Int, Parameters) => (TLInwardNode, TLOutwardNode) = { case (lineBytes, p) => + val BroadcastConfig(nTrackers, bufferless) = p(BroadcastConfig) + val bh = LazyModule(new TLBroadcast(lineBytes, nTrackers, bufferless)) + (bh.node, bh.node) + }) { + val nBanks = nMemoryChannels*nBanksPerChannel +} +case object BankedL2Config extends Field[BankedL2Config] + /** The file to read the BootROM contents from */ case object BootROMFile extends Field[String] trait HasCoreplexParameters { implicit val p: Parameters - lazy val nBanksPerMemChannel = p(NBanksPerMemoryChannel) - lazy val lsb = p(BankIdLSB) - lazy val innerParams = p.alterPartial({ case TLId => "L1toL2" }) - lazy val outerMemParams = p.alterPartial({ case TLId => "L2toMC" }) - lazy val outerMMIOParams = p.alterPartial({ case TLId => "L2toMMIO" }) - lazy val globalAddrMap = p(rocketchip.GlobalAddrMap) + lazy val cbusConfig = p(CBusConfig) + lazy val l1tol2Config = p(L1toL2Config) lazy val nTiles = p(uncore.devices.NTiles) - lazy val nSlaves = p(rocketchip.NCoreplexExtClients) - lazy val nMemChannels = p(NMemoryChannels) lazy val hasSupervisor = p(rocket.UseVM) - lazy val nTrackersPerBank = p(NTrackersPerBank) + lazy val l2Config = p(BankedL2Config) } case class CoreplexParameters(implicit val p: Parameters) extends HasCoreplexParameters @@ -56,19 +62,23 @@ abstract class BareCoreplexModule[+L <: BareCoreplex, +B <: BareCoreplexBundle[L } trait CoreplexNetwork extends HasCoreplexParameters { - this: BareCoreplex => + val module: CoreplexNetworkModule val l1tol2 = LazyModule(new TLXbar) - val l1tol2_beatBytes = p(TLKey("L2toMMIO")).dataBitsPerBeat/8 + val l1tol2_beatBytes = l1tol2Config.beatBytes val l1tol2_lineBytes = p(CacheBlockBytes) val cbus = LazyModule(new TLXbar) - val cbus_beatBytes = p(XLen)/8 + val cbus_beatBytes = cbusConfig.beatBytes val cbus_lineBytes = l1tol2_lineBytes + val intBar = LazyModule(new IntXbar) + val mmio = TLOutputNode() val mmioInt = IntInputNode() + intBar.intnode := mmioInt + cbus.node := TLAtomicAutomata(arithmetic = true)( // disable once TLB uses TL2 metadata TLWidthWidget(l1tol2_beatBytes)( @@ -82,9 +92,7 @@ trait CoreplexNetwork extends HasCoreplexParameters { } trait CoreplexNetworkBundle extends HasCoreplexParameters { - this: { - val outer: CoreplexNetwork - } => + val outer: CoreplexNetwork implicit val p = outer.p val mmio = outer.mmio.bundleOut @@ -92,25 +100,26 @@ trait CoreplexNetworkBundle extends HasCoreplexParameters { } trait CoreplexNetworkModule extends HasCoreplexParameters { - this: BareCoreplexModule[BareCoreplex, BareCoreplexBundle[BareCoreplex]] => + val outer: CoreplexNetwork + val io: CoreplexNetworkBundle + implicit val p = outer.p } -trait BankedL2CoherenceManagers { - this: CoreplexNetwork => - require (isPow2(nBanksPerMemChannel)) +trait BankedL2CoherenceManagers extends CoreplexNetwork { + val module: BankedL2CoherenceManagersModule + + require (isPow2(l2Config.nBanksPerChannel)) require (isPow2(l1tol2_lineBytes)) - def l2ManagerFactory(): (TLInwardNode, TLOutwardNode) - - val mem = Seq.fill(nMemChannels) { + val mem = Seq.fill(l2Config.nMemoryChannels) { val bankBar = LazyModule(new TLXbar) val output = TLOutputNode() output := bankBar.node - val mask = ~BigInt((nBanksPerMemChannel-1) * l1tol2_lineBytes) - for (i <- 0 until nBanksPerMemChannel) { - val (in, out) = l2ManagerFactory() + val mask = ~BigInt((l2Config.nBanksPerChannel-1) * l1tol2_lineBytes) + for (i <- 0 until l2Config.nBanksPerChannel) { + val (in, out) = l2Config.coherenceManager(l1tol2_lineBytes, p) in := TLFilter(AddressSet(i * l1tol2_lineBytes, mask))(l1tol2.node) bankBar.node := out } @@ -119,168 +128,28 @@ trait BankedL2CoherenceManagers { } } -trait BankedL2CoherenceManagersBundle { - this: CoreplexNetworkBundle { - val outer: BankedL2CoherenceManagers - } => +trait BankedL2CoherenceManagersBundle extends CoreplexNetworkBundle { + val outer: BankedL2CoherenceManagers - require (nMemChannels <= 1, "Seq in Chisel Bundle needed to support > 1") // !!! + require (l2Config.nMemoryChannels <= 1, "Seq in Chisel Bundle needed to support > 1") // !!! val mem = outer.mem.map(_.bundleOut).toList.headOption // .headOption should be removed !!! } -trait BankedL2CoherenceManagersModule { - this: CoreplexNetworkModule { - val outer: BankedL2CoherenceManagers - val io: BankedL2CoherenceManagersBundle - } => +trait BankedL2CoherenceManagersModule extends CoreplexNetworkModule { + val outer: BankedL2CoherenceManagers + val io: BankedL2CoherenceManagersBundle } -trait CoreplexRISCVPlatform { - this: CoreplexNetwork => - - // Build a set of Tiles - val lazyTiles = p(BuildTiles) map { _(p) } - val legacy = LazyModule(new TLLegacy()(outerMMIOParams)) - val tileIntNodes = lazyTiles.map { _ => IntInternalOutputNode() } // this should be moved into the Tile... - - val debug = LazyModule(new TLDebugModule()) - val plic = LazyModule(new TLPLIC(hasSupervisor, maxPriorities = 7)) - val clint = LazyModule(new CoreplexLocalInterrupter) - - // Kill this once we move TL2 into rocket - l1tol2.node := - TLHintHandler()( - legacy.node) - - debug.node := TLFragmenter(cbus_beatBytes, cbus_lineBytes)(cbus.node) - plic.node := TLFragmenter(cbus_beatBytes, cbus_lineBytes)(cbus.node) - clint.node := TLFragmenter(cbus_beatBytes, cbus_lineBytes)(cbus.node) - - plic.intnode := mmioInt - tileIntNodes.foreach { _ := plic.intnode } -} - -trait CoreplexRISCVPlatformBundle { - this: CoreplexNetworkBundle { - val outer: CoreplexRISCVPlatform - } => - - val mem = Vec(nMemChannels, new ClientUncachedTileLinkIO()(outerMemParams)) - val slave = Vec(nSlaves, new ClientUncachedTileLinkIO()(innerParams)).flip - val debug = new DebugBusIO().flip - val rtcTick = Bool(INPUT) - val resetVector = UInt(INPUT, p(XLen)) - val success = Bool(OUTPUT) // used for testing -} - -trait CoreplexRISCVPlatformModule { - this: CoreplexNetworkModule { - val outer: CoreplexNetwork with CoreplexRISCVPlatform - val io: CoreplexRISCVPlatformBundle - } => - - val tiles = outer.lazyTiles.map(_.module) - val uncoreTileIOs = (tiles zipWithIndex) map { case (tile, i) => Wire(tile.io) } - - println("\nGenerated Address Map") - for (entry <- p(rocketchip.GlobalAddrMap).flatten) { - val name = entry.name - val start = entry.region.start - val end = entry.region.start + entry.region.size - 1 - val prot = entry.region.attr.prot - val protStr = (if ((prot & AddrMapProt.R) > 0) "R" else "") + - (if ((prot & AddrMapProt.W) > 0) "W" else "") + - (if ((prot & AddrMapProt.X) > 0) "X" else "") - val cacheable = if (entry.region.attr.cacheable) " [C]" else "" - println(f"\t$name%s $start%x - $end%x, $protStr$cacheable") - } - - // Create and export the ConfigString - val managers = outer.l1tol2.node.edgesIn(0).manager.managers - val configString = rocketchip.GenerateConfigString(p, outer.clint, outer.plic, managers) - // Allow something else to have override the config string - if (!ConfigStringOutput.contents.isDefined) { - ConfigStringOutput.contents = Some(configString) - } - println(s"\nGenerated Configuration String\n${ConfigStringOutput.contents.get}") - - val nCachedPorts = tiles.map(tile => tile.io.cached.size).reduce(_ + _) - val nUncachedPorts = tiles.map(tile => tile.io.uncached.size).reduce(_ + _) - val nBanks = nMemChannels * nBanksPerMemChannel - - buildUncore(p.alterPartial({ - case HastiId => "TL" - case TLId => "L1toL2" - case NCachedTileLinkPorts => nCachedPorts - case NUncachedTileLinkPorts => nUncachedPorts - })) - - def buildUncore(implicit p: Parameters) { - // Create a simple L1toL2 NoC between the tiles and the banks of outer memory - // Cached ports are first in client list, making sharerToClientId just an indentity function - // addrToBank is sed to hash physical addresses (of cache blocks) to banks (and thereby memory channels) - def sharerToClientId(sharerId: UInt) = sharerId - def addrToBank(addr: UInt): UInt = if (nBanks == 0) UInt(0) else { - val isMemory = globalAddrMap.isInRegion("mem", addr << log2Up(p(CacheBlockBytes))) - Mux(isMemory, addr.extract(lsb + log2Ceil(nBanks) - 1, lsb), UInt(nBanks)) - } - val l1tol2net = Module(new PortedTileLinkCrossbar(addrToBank, sharerToClientId)) - - // Create point(s) of coherence serialization - val managerEndpoints = List.tabulate(nBanks){id => p(BuildL2CoherenceManager)(id, p)} - managerEndpoints.flatMap(_.incoherent).foreach(_ := Bool(false)) - - val mmioManager = Module(new MMIOTileLinkManager()(p.alterPartial({ - case TLId => "L1toL2" - case InnerTLId => "L1toL2" - case OuterTLId => "L2toMMIO" - }))) - - // Wire the tiles to the TileLink client ports of the L1toL2 network, - // and coherence manager(s) to the other side - l1tol2net.io.clients_cached <> uncoreTileIOs.map(_.cached).flatten - l1tol2net.io.clients_uncached <> uncoreTileIOs.map(_.uncached).flatten ++ io.slave - l1tol2net.io.managers <> managerEndpoints.map(_.innerTL) :+ mmioManager.io.inner - outer.legacy.module.io.legacy <> mmioManager.io.outer - - val mem_ic = Module(new TileLinkMemoryInterconnect(nBanksPerMemChannel, nMemChannels)(outerMemParams)) - - val backendBuffering = TileLinkDepths(0,0,0,0,0) - for ((bank, icPort) <- managerEndpoints zip mem_ic.io.in) { - val enqueued = TileLinkEnqueuer(bank.outerTL, backendBuffering) - icPort <> TileLinkIOUnwrapper(enqueued) - } - - io.mem <> mem_ic.io.out - } - - // connect coreplex-internal interrupts to tiles - for ((tile, i) <- (uncoreTileIOs zipWithIndex)) { - tile.hartid := UInt(i) - tile.resetVector := io.resetVector - tile.interrupts := outer.clint.module.io.tiles(i) - tile.interrupts.debug := outer.debug.module.io.debugInterrupts(i) - tile.interrupts.meip := outer.tileIntNodes(i).bundleOut(0)(0) - tile.interrupts.seip.foreach(_ := outer.tileIntNodes(i).bundleOut(0)(1)) - } - - outer.debug.module.io.db <> io.debug - outer.clint.module.io.rtcTick := io.rtcTick - - // Coreplex doesn't know when to stop running - io.success := Bool(false) -} - -class BaseCoreplex(implicit p: Parameters) extends BareCoreplex +abstract class BaseCoreplex(implicit p: Parameters) extends BareCoreplex with CoreplexNetwork - with CoreplexRISCVPlatform { + with BankedL2CoherenceManagers { override lazy val module = new BaseCoreplexModule(this, () => new BaseCoreplexBundle(this)) } class BaseCoreplexBundle[+L <: BaseCoreplex](_outer: L) extends BareCoreplexBundle(_outer) with CoreplexNetworkBundle - with CoreplexRISCVPlatformBundle + with BankedL2CoherenceManagersBundle class BaseCoreplexModule[+L <: BaseCoreplex, +B <: BaseCoreplexBundle[L]](_outer: L, _io: () => B) extends BareCoreplexModule(_outer, _io) with CoreplexNetworkModule - with CoreplexRISCVPlatformModule + with BankedL2CoherenceManagersModule diff --git a/src/main/scala/coreplex/Configs.scala b/src/main/scala/coreplex/Configs.scala index 2dbe8a04..13986aa0 100644 --- a/src/main/scala/coreplex/Configs.scala +++ b/src/main/scala/coreplex/Configs.scala @@ -6,6 +6,7 @@ import Chisel._ import junctions._ import diplomacy._ import uncore.tilelink._ +import uncore.tilelink2._ import uncore.coherence._ import uncore.agents._ import uncore.devices._ @@ -13,13 +14,10 @@ import uncore.converters._ import rocket._ import util._ import util.ConfigUtils._ -import rocketchip.{GlobalAddrMap, NCoreplexExtClients} -import cde.{Parameters, Config, Dump, Knob, CDEMatchError} +import config._ class BaseCoreplexConfig extends Config ( - topDefinitions = { (pname,site,here) => - type PF = PartialFunction[Any,Any] - def findBy(sname:Any):Any = here[PF](site[Any](sname))(pname) + { (pname,site,here) => lazy val innerDataBits = site(XLen) lazy val innerDataBeats = (8 * site(CacheBlockBytes)) / innerDataBits pname match { @@ -28,56 +26,33 @@ class BaseCoreplexConfig extends Config ( case PgLevels => if (site(XLen) == 64) 3 /* Sv39 */ else 2 /* Sv32 */ case ASIdBits => 7 //Params used by all caches - case NSets => findBy(CacheName) - case NWays => findBy(CacheName) - case RowBits => findBy(CacheName) - case NTLBEntries => findBy(CacheName) - case CacheIdBits => findBy(CacheName) - case SplitMetadata => findBy(CacheName) - case "L1I" => { - case NSets => Knob("L1I_SETS") //64 - case NWays => Knob("L1I_WAYS") //4 - case RowBits => site(TLKey("L1toL2")).dataBitsPerBeat - case NTLBEntries => 8 - case CacheIdBits => 0 - case SplitMetadata => false - }:PF - case "L1D" => { - case NSets => Knob("L1D_SETS") //64 - case NWays => Knob("L1D_WAYS") //4 - case RowBits => site(TLKey("L1toL2")).dataBitsPerBeat - case NTLBEntries => 8 - case CacheIdBits => 0 - case SplitMetadata => false - }:PF + case CacheName("L1I") => CacheConfig( + nSets = 64, + nWays = 4, + rowBits = site(L1toL2Config).beatBytes*8, + nTLBEntries = 8, + cacheIdBits = 0, + splitMetadata = false) + case CacheName("L1D") => CacheConfig( + nSets = 64, + nWays = 4, + rowBits = site(L1toL2Config).beatBytes*8, + nTLBEntries = 8, + cacheIdBits = 0, + splitMetadata = false) case ECCCode => None - case Replacer => () => new RandomReplacement(site(NWays)) + case Replacer => () => new RandomReplacement(site(site(CacheName)).nWays) //L1InstCache case BtbKey => BtbParameters() //L1DataCache - case DCacheKey => DCacheConfig(nMSHRs = site(Knob("L1D_MSHRS"))) + case DCacheKey => DCacheConfig(nMSHRs = 2) case DataScratchpadSize => 0 //L2 Memory System Params case AmoAluOperandBits => site(XLen) case NAcquireTransactors => 7 case L2StoreDataQueueDepth => 1 case L2DirectoryRepresentation => new NullRepresentation(site(NTiles)) - case BuildL2CoherenceManager => (id: Int, p: Parameters) => - Module(new L2BroadcastHub()(p.alterPartial({ - case InnerTLId => "L1toL2" - case OuterTLId => "L2toMC" }))) - case NCachedTileLinkPorts => 1 - case NUncachedTileLinkPorts => 1 //Tile Constants - case BuildTiles => { - List.tabulate(site(NTiles)){ i => (p: Parameters) => - LazyModule(new RocketTile()(p.alterPartial({ - case TileId => i - case TLId => "L1toL2" - case NUncachedTileLinkPorts => 1 + site(RoccNMemChannels) - }))) - } - } case BuildRoCC => Nil case RoccNMemChannels => site(BuildRoCC).map(_.nMemChannels).foldLeft(0)(_ + _) case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _) @@ -107,15 +82,17 @@ class BaseCoreplexConfig extends Config ( case LNEndpoints => site(TLKey(site(TLId))).nManagers + site(TLKey(site(TLId))).nClients case LNHeaderBits => log2Ceil(site(TLKey(site(TLId))).nManagers) + log2Up(site(TLKey(site(TLId))).nClients) + case CBusConfig => TLBusConfig(beatBytes = site(XLen)/8) + case L1toL2Config => TLBusConfig(beatBytes = site(XLen)/8) // increase for more PCIe bandwidth case TLKey("L1toL2") => { - val useMEI = site(NTiles) <= 1 && site(NCachedTileLinkPorts) <= 1 + val useMEI = site(NTiles) <= 1 TileLinkParameters( coherencePolicy = ( if (useMEI) new MEICoherence(site(L2DirectoryRepresentation)) else new MESICoherence(site(L2DirectoryRepresentation))), - nManagers = site(NBanksPerMemoryChannel)*site(NMemoryChannels) + 1 /* MMIO */, - nCachingClients = site(NCachedTileLinkPorts), - nCachelessClients = site(NCoreplexExtClients) + site(NUncachedTileLinkPorts), + nManagers = site(BankedL2Config).nBanks + 1 /* MMIO */, + nCachingClients = 1, + nCachelessClients = 1, maxClientXacts = max_int( // L1 cache site(DCacheKey).nMSHRs + 1 /* IOMSHR */, @@ -126,149 +103,91 @@ class BaseCoreplexConfig extends Config ( dataBeats = innerDataBeats, dataBits = site(CacheBlockBytes)*8) } - case TLKey("L2toMC") => - TileLinkParameters( - coherencePolicy = new MEICoherence( - new NullRepresentation(site(NBanksPerMemoryChannel))), - nManagers = 1, - nCachingClients = site(NBanksPerMemoryChannel), - nCachelessClients = 0, - maxClientXacts = site(NAcquireTransactors) + 2, - maxClientsPerPort = site(NBanksPerMemoryChannel), - maxManagerXacts = 1, - dataBeats = innerDataBeats, - dataBits = site(CacheBlockBytes)*8) - case TLKey("L2toMMIO") => { - TileLinkParameters( - coherencePolicy = new MICoherence( - new NullRepresentation(site(NBanksPerMemoryChannel))), - nManagers = 1, - nCachingClients = 0, - nCachelessClients = 1, - maxClientXacts = 4, - maxClientsPerPort = 1, - maxManagerXacts = 1, - dataBeats = innerDataBeats, - dataBits = site(CacheBlockBytes) * 8) - } - case BootROMFile => "./bootrom/bootrom.img" case NTiles => 1 - case NBanksPerMemoryChannel => Knob("NBANKS_PER_MEM_CHANNEL") - case NTrackersPerBank => Knob("NTRACKERS_PER_BANK") - case BankIdLSB => 0 - case CacheBlockBytes => Dump("CACHE_BLOCK_BYTES", 64) + case BroadcastConfig => BroadcastConfig() + case BankedL2Config => BankedL2Config() + case CacheBlockBytes => 64 case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes)) case EnableL2Logging => false case _ => throw new CDEMatchError - }}, - knobValues = { - case "NBANKS_PER_MEM_CHANNEL" => 1 - case "NTRACKERS_PER_BANK" => 4 - case "L1D_MSHRS" => 2 - case "L1D_SETS" => 64 - case "L1D_WAYS" => 4 - case "L1I_SETS" => 64 - case "L1I_WAYS" => 4 - case _ => throw new CDEMatchError + } } ) class WithNCores(n: Int) extends Config( (pname,site,here) => pname match { case NTiles => n + case _ => throw new CDEMatchError }) class WithNBanksPerMemChannel(n: Int) extends Config( - knobValues = { - case "NBANKS_PER_MEM_CHANNEL" => n + (pname, site, here, up) => pname match { + case BankedL2Config => up(BankedL2Config).copy(nBanksPerChannel = n) case _ => throw new CDEMatchError }) class WithNTrackersPerBank(n: Int) extends Config( - knobValues = { - case "NTRACKERS_PER_BANK" => n + (pname, site, here, up) => pname match { + case BroadcastConfig => up(BroadcastConfig).copy(nTrackers = n) case _ => throw new CDEMatchError }) // This is the number of sets **per way** -class WithL1ICacheSets(sets: Int) extends Config ( - knobValues = { - case "L1I_SETS" => sets +class WithL1ICacheSets(sets: Int) extends Config( + (pname, site, here, up) => pname match { + case CacheName("L1I") => up(CacheName("L1I")).copy(nSets = sets) case _ => throw new CDEMatchError - } -) + }) // This is the number of sets **per way** -class WithL1DCacheSets(sets: Int) extends Config ( - knobValues = { - case "L1D_SETS" => sets +class WithL1DCacheSets(sets: Int) extends Config( + (pname, site, here, up) => pname match { + case CacheName("L1D") => up(CacheName("L1D")).copy(nSets = sets) case _ => throw new CDEMatchError - } -) + }) -class WithL1ICacheWays(ways: Int) extends Config ( - knobValues = { - case "L1I_WAYS" => ways +class WithL1ICacheWays(ways: Int) extends Config( + (pname, site, here, up) => pname match { + case CacheName("L1I") => up(CacheName("L1I")).copy(nWays = ways) case _ => throw new CDEMatchError - } -) + }) -class WithL1DCacheWays(ways: Int) extends Config ( - knobValues = { - case "L1D_WAYS" => ways +class WithL1DCacheWays(ways: Int) extends Config( + (pname, site, here, up) => pname match { + case CacheName("L1D") => up(CacheName("L1D")).copy(nWays = ways) case _ => throw new CDEMatchError - } -) + }) -class WithCacheBlockBytes(linesize: Int) extends Config ( - topDefinitions = { (pname,site,here) => pname match { - case CacheBlockBytes => Dump("CACHE_BLOCK_BYTES", linesize) +class WithCacheBlockBytes(linesize: Int) extends Config( + (pname,site,here) => pname match { + case CacheBlockBytes => linesize case _ => throw new CDEMatchError - }} -) + }) class WithDataScratchpad(n: Int) extends Config( - (pname,site,here) => pname match { + (pname,site,here,up) => pname match { case DataScratchpadSize => n - case NSets if site(CacheName) == "L1D" => n / site(CacheBlockBytes) + case CacheName("L1D") => up(CacheName("L1D")).copy(nSets = n / site(CacheBlockBytes)) case _ => throw new CDEMatchError }) +// TODO: re-add L2 class WithL2Cache extends Config( (pname,site,here) => pname match { - case "L2_CAPACITY_IN_KB" => Knob("L2_CAPACITY_IN_KB") - case "L2Bank" => { - case NSets => (((here[Int]("L2_CAPACITY_IN_KB")*1024) / - site(CacheBlockBytes)) / - (site(NBanksPerMemoryChannel)*site(NMemoryChannels))) / - site(NWays) - case NWays => Knob("L2_WAYS") - case RowBits => site(TLKey(site(TLId))).dataBitsPerBeat - case CacheIdBits => log2Ceil(site(NMemoryChannels) * site(NBanksPerMemoryChannel)) - case SplitMetadata => Knob("L2_SPLIT_METADATA") - }: PartialFunction[Any,Any] - case NAcquireTransactors => 2 - case NSecondaryMisses => 4 - case L2DirectoryRepresentation => new FullRepresentation(site(NTiles)) - case BuildL2CoherenceManager => (id: Int, p: Parameters) => - Module(new L2HellaCacheBank()(p.alterPartial({ - case CacheId => id - case CacheName => "L2Bank" - case InnerTLId => "L1toL2" - case OuterTLId => "L2toMC"}))) - case L2Replacer => () => new SeqRandom(site(NWays)) + case CacheName("L2") => CacheConfig( + nSets = 1024, + nWays = 1, + rowBits = site(L1toL2Config).beatBytes*8, + nTLBEntries = 0, + cacheIdBits = 1, + splitMetadata = false) case _ => throw new CDEMatchError - }, - knobValues = { case "L2_WAYS" => 8; case "L2_CAPACITY_IN_KB" => 2048; case "L2_SPLIT_METADATA" => false; case _ => throw new CDEMatchError } -) + }) class WithBufferlessBroadcastHub extends Config( - (pname, site, here) => pname match { - case BuildL2CoherenceManager => (id: Int, p: Parameters) => - Module(new BufferlessBroadcastHub()(p.alterPartial({ - case InnerTLId => "L1toL2" - case OuterTLId => "L2toMC" }))) + (pname, site, here, up) => pname match { + case BroadcastConfig => up(BroadcastConfig).copy(bufferless = true) }) /** @@ -283,35 +202,31 @@ class WithBufferlessBroadcastHub extends Config( * system depends on coherence between channels in any way, * DO NOT use this configuration. */ -class WithStatelessBridge extends Config ( - topDefinitions = (pname, site, here) => pname match { - case BuildL2CoherenceManager => (id: Int, p: Parameters) => - Module(new ManagerToClientStatelessBridge()(p.alterPartial({ - case InnerTLId => "L1toL2" - case OuterTLId => "L2toMC" }))) - }, - knobValues = { - case "L1D_MSHRS" => 0 +class WithStatelessBridge extends Config( + (pname,site,here,up) => pname match { +/* !!! FIXME + case BankedL2Config => up(BankedL2Config).copy(coherenceManager = { case (_, _) => + val pass = LazyModule(new TLBuffer(0)) + (pass.node, pass.node) + }) +*/ + case DCacheKey => up(DCacheKey).copy(nMSHRs = 0) case _ => throw new CDEMatchError - } -) + }) class WithPLRU extends Config( (pname, site, here) => pname match { - case L2Replacer => () => new SeqPLRU(site(NSets), site(NWays)) case _ => throw new CDEMatchError }) class WithL2Capacity(size_kb: Int) extends Config( - knobValues = { - case "L2_CAPACITY_IN_KB" => size_kb + (pname,site,here) => pname match { case _ => throw new CDEMatchError }) class WithNL2Ways(n: Int) extends Config( - knobValues = { - case "L2_WAYS" => n - case _ => throw new CDEMatchError + (pname,site,here,up) => pname match { + case CacheName("L2") => up(CacheName("L2")).copy(nWays = n) }) class WithRV32 extends Config( @@ -319,35 +234,26 @@ class WithRV32 extends Config( case XLen => 32 case FPUKey => Some(FPUConfig(divSqrt = false)) case _ => throw new CDEMatchError - } -) + }) -class WithBlockingL1 extends Config ( - knobValues = { - case "L1D_MSHRS" => 0 +class WithBlockingL1 extends Config( + (pname,site,here,up) => pname match { + case DCacheKey => up(DCacheKey).copy(nMSHRs = 0) case _ => throw new CDEMatchError - } -) + }) -class WithSmallCores extends Config ( - topDefinitions = { (pname,site,here) => pname match { +class WithSmallCores extends Config( + (pname,site,here,up) => pname match { case MulDivKey => Some(MulDivConfig()) case FPUKey => None case UseVM => false - case NTLBEntries => 4 case BtbKey => BtbParameters(nEntries = 0) case NAcquireTransactors => 2 + case CacheName("L1D") => up(CacheName("L1D")).copy(nSets = 64, nWays = 1, nTLBEntries = 4) + case CacheName("L1I") => up(CacheName("L1I")).copy(nSets = 64, nWays = 1, nTLBEntries = 4) + case DCacheKey => up(DCacheKey).copy(nMSHRs = 0) case _ => throw new CDEMatchError - }}, - knobValues = { - case "L1D_SETS" => 64 - case "L1D_WAYS" => 1 - case "L1I_SETS" => 64 - case "L1I_WAYS" => 1 - case "L1D_MSHRS" => 0 - case _ => throw new CDEMatchError - } -) + }) class WithRoccExample extends Config( (pname, site, here) => pname match { @@ -367,29 +273,23 @@ class WithRoccExample extends Config( case _ => throw new CDEMatchError }) -class WithSplitL2Metadata extends Config( - knobValues = { case "L2_SPLIT_METADATA" => true; case _ => throw new CDEMatchError }) - -class WithDefaultBtb extends Config ( - topDefinitions = { (pname,site,here) => pname match { +class WithDefaultBtb extends Config( + (pname,site,here) => pname match { case BtbKey => BtbParameters() case _ => throw new CDEMatchError - }} -) + }) -class WithFastMulDiv extends Config ( - topDefinitions = { (pname,site,here) => pname match { +class WithFastMulDiv extends Config( + (pname,site,here) => pname match { case MulDivKey => Some(MulDivConfig(mulUnroll = 8, mulEarlyOut = (site(XLen) > 32), divEarlyOut = true)) case _ => throw new CDEMatchError - }} -) + }) -class WithoutMulDiv extends Config ( +class WithoutMulDiv extends Config( (pname, site, here) => pname match { case MulDivKey => None case _ => throw new CDEMatchError - } -) + }) class WithoutFPU extends Config( (pname, site, here) => pname match { @@ -401,5 +301,4 @@ class WithFPUWithoutDivSqrt extends Config ( (pname, site, here) => pname match { case FPUKey => Some(FPUConfig(divSqrt = false)) case _ => throw new CDEMatchError - } -) + }) diff --git a/src/main/scala/coreplex/Coreplex.scala b/src/main/scala/coreplex/Coreplex.scala index c9fa0429..fd6fde18 100644 --- a/src/main/scala/coreplex/Coreplex.scala +++ b/src/main/scala/coreplex/Coreplex.scala @@ -1,7 +1,7 @@ package coreplex import Chisel._ -import cde.{Parameters, Field} +import config._ import junctions._ import diplomacy._ import uncore.tilelink._ @@ -10,114 +10,34 @@ import uncore.util._ import util._ import rocket._ -trait BroadcastL2 { - this: CoreplexNetwork => - def l2ManagerFactory() = { - val bh = LazyModule(new TLBroadcast(l1tol2_lineBytes, nTrackersPerBank)) - (bh.node, bh.node) - } -} - ///// -trait DirectConnection { - this: CoreplexNetwork with CoreplexRISCVPlatform => - lazyTiles.map(_.slave).flatten.foreach { scratch => scratch := cbus.node } -} - -trait DirectConnectionModule { - this: CoreplexNetworkModule with CoreplexRISCVPlatformModule => - - val tlBuffering = TileLinkDepths(1,1,2,2,0) - val ultBuffering = UncachedTileLinkDepths(1,2) - - (tiles zip uncoreTileIOs) foreach { case (tile, uncore) => - (uncore.cached zip tile.io.cached) foreach { case (u, t) => u <> TileLinkEnqueuer(t, tlBuffering) } - (uncore.uncached zip tile.io.uncached) foreach { case (u, t) => u <> TileLinkEnqueuer(t, ultBuffering) } - - tile.io.interrupts <> uncore.interrupts - - tile.io.hartid := uncore.hartid - tile.io.resetVector := uncore.resetVector - } -} - class DefaultCoreplex(implicit p: Parameters) extends BaseCoreplex - with DirectConnection { + with CoreplexRISCVPlatform + with RocketPlex { override lazy val module = new DefaultCoreplexModule(this, () => new DefaultCoreplexBundle(this)) } class DefaultCoreplexBundle[+L <: DefaultCoreplex](_outer: L) extends BaseCoreplexBundle(_outer) + with CoreplexRISCVPlatformBundle + with RocketPlexBundle class DefaultCoreplexModule[+L <: DefaultCoreplex, +B <: DefaultCoreplexBundle[L]](_outer: L, _io: () => B) extends BaseCoreplexModule(_outer, _io) - with DirectConnectionModule + with CoreplexRISCVPlatformModule + with RocketPlexModule ///// -trait AsyncConnection { - this: CoreplexNetwork with CoreplexRISCVPlatform => - val crossings = lazyTiles.map(_.slave).map(_.map { scratch => - val crossing = LazyModule(new TLAsyncCrossing) - crossing.node := cbus.node - val monitor = (scratch := crossing.node) - (crossing, monitor) - }) -} - -trait AsyncConnectionBundle { - this: CoreplexNetworkBundle with CoreplexRISCVPlatformBundle => - val tcrs = Vec(nTiles, new Bundle { - val clock = Clock(INPUT) - val reset = Bool(INPUT) - }) -} - -trait AsyncConnectionModule { - this: Module with CoreplexNetworkModule with CoreplexRISCVPlatformModule { - val outer: AsyncConnection - val io: AsyncConnectionBundle - } => - - (outer.crossings zip io.tcrs) foreach { case (slaves, tcr) => - slaves.foreach { case (crossing, monitor) => - crossing.module.io.in_clock := clock - crossing.module.io.in_reset := reset - crossing.module.io.out_clock := tcr.clock - crossing.module.io.out_reset := tcr.reset - monitor.foreach { m => - m.module.clock := tcr.clock - m.module.reset := tcr.reset - } - } - } - - (tiles, uncoreTileIOs, io.tcrs).zipped foreach { case (tile, uncore, tcr) => - tile.clock := tcr.clock - tile.reset := tcr.reset - - (uncore.cached zip tile.io.cached) foreach { case (u, t) => u <> AsyncTileLinkFrom(tcr.clock, tcr.reset, t) } - (uncore.uncached zip tile.io.uncached) foreach { case (u, t) => u <> AsyncUTileLinkFrom(tcr.clock, tcr.reset, t) } - - val ti = tile.io.interrupts - val ui = uncore.interrupts - ti.debug := LevelSyncTo(tcr.clock, ui.debug) - ti.mtip := LevelSyncTo(tcr.clock, ui.mtip) - ti.msip := LevelSyncTo(tcr.clock, ui.msip) - ti.meip := LevelSyncTo(tcr.clock, ui.meip) - ti.seip.foreach { _ := LevelSyncTo(tcr.clock, ui.seip.get) } - - tile.io.hartid := uncore.hartid - tile.io.resetVector := uncore.resetVector - } -} - class MultiClockCoreplex(implicit p: Parameters) extends BaseCoreplex - with AsyncConnection { + with CoreplexRISCVPlatform + with AsyncRocketPlex { override lazy val module = new MultiClockCoreplexModule(this, () => new MultiClockCoreplexBundle(this)) } class MultiClockCoreplexBundle[+L <: MultiClockCoreplex](_outer: L) extends BaseCoreplexBundle(_outer) - with AsyncConnectionBundle + with CoreplexRISCVPlatformBundle + with AsyncRocketPlexBundle class MultiClockCoreplexModule[+L <: MultiClockCoreplex, +B <: MultiClockCoreplexBundle[L]](_outer: L, _io: () => B) extends BaseCoreplexModule(_outer, _io) - with AsyncConnectionModule + with CoreplexRISCVPlatformModule + with AsyncRocketPlexModule diff --git a/src/main/scala/coreplex/RISCVPlatform.scala b/src/main/scala/coreplex/RISCVPlatform.scala new file mode 100644 index 00000000..18fb9368 --- /dev/null +++ b/src/main/scala/coreplex/RISCVPlatform.scala @@ -0,0 +1,72 @@ +package coreplex + +import Chisel._ +import config._ +import junctions._ +import diplomacy._ +import uncore.tilelink._ +import uncore.tilelink2._ +import uncore.coherence._ +import uncore.agents._ +import uncore.devices._ +import uncore.util._ +import uncore.converters._ +import rocket._ +import util._ + +trait CoreplexRISCVPlatform extends CoreplexNetwork { + val module: CoreplexRISCVPlatformModule + + val debug = LazyModule(new TLDebugModule()) + val plic = LazyModule(new TLPLIC(hasSupervisor, maxPriorities = 7)) + val clint = LazyModule(new CoreplexLocalInterrupter) + + debug.node := TLFragmenter(cbus_beatBytes, cbus_lineBytes)(cbus.node) + plic.node := TLFragmenter(cbus_beatBytes, cbus_lineBytes)(cbus.node) + clint.node := TLFragmenter(cbus_beatBytes, cbus_lineBytes)(cbus.node) + + plic.intnode := intBar.intnode +} + +trait CoreplexRISCVPlatformBundle extends CoreplexNetworkBundle { + val outer: CoreplexRISCVPlatform + + val debug = new AsyncDebugBusIO().flip + val rtcToggle = Bool(INPUT) + val resetVector = UInt(INPUT, p(XLen)) +} + +trait CoreplexRISCVPlatformModule extends CoreplexNetworkModule { + val outer: CoreplexRISCVPlatform + val io: CoreplexRISCVPlatformBundle + + // Synchronize the debug bus into the coreplex + outer.debug.module.io.db <> FromAsyncDebugBus(io.debug) + + // Synchronize the rtc into the coreplex + val rtcSync = ShiftRegister(io.rtcToggle, 3) + val rtcLast = Reg(init = Bool(false), next=rtcSync) + outer.clint.module.io.rtcTick := Reg(init = Bool(false), next=(rtcSync & (~rtcLast))) + + println("\nGenerated Address Map") + for (entry <- p(rocketchip.GlobalAddrMap).flatten) { + val name = entry.name + val start = entry.region.start + val end = entry.region.start + entry.region.size - 1 + val prot = entry.region.attr.prot + val protStr = (if ((prot & AddrMapProt.R) > 0) "R" else "") + + (if ((prot & AddrMapProt.W) > 0) "W" else "") + + (if ((prot & AddrMapProt.X) > 0) "X" else "") + val cacheable = if (entry.region.attr.cacheable) " [C]" else "" + println(f"\t$name%s $start%x - $end%x, $protStr$cacheable") + } + + // Create and export the ConfigString + val managers = outer.l1tol2.node.edgesIn(0).manager.managers + val configString = rocketchip.GenerateConfigString(p, outer.clint, outer.plic, managers) + // Allow something else to have override the config string + if (!ConfigStringOutput.contents.isDefined) { + ConfigStringOutput.contents = Some(configString) + } + println(s"\nGenerated Configuration String\n${ConfigStringOutput.contents.get}") +} diff --git a/src/main/scala/coreplex/RocketPlex.scala b/src/main/scala/coreplex/RocketPlex.scala new file mode 100644 index 00000000..5c85112e --- /dev/null +++ b/src/main/scala/coreplex/RocketPlex.scala @@ -0,0 +1,107 @@ +package coreplex + +import Chisel._ +import config._ +import diplomacy._ +import uncore.tilelink2._ +import uncore.coherence._ +import rocket._ +import uncore.devices.NTiles + +trait RocketPlex extends CoreplexRISCVPlatform { + val module: RocketPlexModule + + val rocketTiles = List.tabulate(p(NTiles)) { i => LazyModule(new RocketTile(i)) } + val tileIntNodes = rocketTiles.map { _ => IntInternalOutputNode() } + + tileIntNodes.foreach { _ := plic.intnode } + rocketTiles.foreach { r => + r.slaveNode.foreach { _ := cbus.node } + l1tol2.node := r.cachedOut + l1tol2.node := r.uncachedOut + } +} + +trait RocketPlexBundle extends CoreplexRISCVPlatformBundle { + val outer: CoreplexRISCVPlatform +} + +trait RocketPlexModule extends CoreplexRISCVPlatformModule { + val outer: RocketPlex + val io: RocketPlexBundle + + outer.rocketTiles.map(_.module).zipWithIndex.foreach { case (tile, i) => + tile.io.hartid := UInt(i) + tile.io.resetVector := io.resetVector + tile.io.interrupts := outer.clint.module.io.tiles(i) + tile.io.interrupts.debug := outer.debug.module.io.debugInterrupts(i) + tile.io.interrupts.meip := outer.tileIntNodes(i).bundleOut(0)(0) + tile.io.interrupts.seip.foreach(_ := outer.tileIntNodes(i).bundleOut(0)(1)) + } +} + +class AsyncRocketTile(tileId: Int)(implicit p: Parameters) extends LazyModule { + val rocket = LazyModule(new RocketTile(tileId)) + + val cachedOut = TLAsyncOutputNode() + val uncachedOut = TLAsyncOutputNode() + val slaveNode = rocket.slaveNode.map(_ => TLAsyncInputNode()) + + cachedOut := TLAsyncCrossingSource()(rocket.cachedOut) + uncachedOut := TLAsyncCrossingSource()(rocket.uncachedOut) + (rocket.slaveNode zip slaveNode) foreach { case (r,n) => r := TLAsyncCrossingSink()(n) } + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val cached = cachedOut.bundleOut + val uncached = uncachedOut.bundleOut + val slave = slaveNode.map(_.bundleIn) + val hartid = UInt(INPUT, p(XLen)) + val interrupts = new TileInterrupts().asInput + val resetVector = UInt(INPUT, p(XLen)) + } + rocket.module.io.interrupts := ShiftRegister(io.interrupts, 3) + // signals that do not change: + rocket.module.io.hartid := io.hartid + rocket.module.io.resetVector := io.resetVector + } +} + +trait AsyncRocketPlex extends CoreplexRISCVPlatform { + val module: AsyncRocketPlexModule + + val rocketTiles = List.tabulate(p(NTiles)) { i => LazyModule(new AsyncRocketTile(i)) } + val tileIntNodes = rocketTiles.map { _ => IntInternalOutputNode() } + + tileIntNodes.foreach { _ := plic.intnode } + rocketTiles.foreach { r => + r.slaveNode.foreach { _ := TLAsyncCrossingSource()(cbus.node) } + l1tol2.node := TLAsyncCrossingSink()(r.cachedOut) + l1tol2.node := TLAsyncCrossingSink()(r.uncachedOut) + } +} + +trait AsyncRocketPlexBundle extends CoreplexRISCVPlatformBundle { + val outer: CoreplexRISCVPlatform + + val tcrs = Vec(nTiles, new Bundle { + val clock = Clock(INPUT) + val reset = Bool(INPUT) + }) +} + +trait AsyncRocketPlexModule extends CoreplexRISCVPlatformModule { + val outer: AsyncRocketPlex + val io: AsyncRocketPlexBundle + + outer.rocketTiles.map(_.module).zipWithIndex.foreach { case (tile, i) => + tile.clock := io.tcrs(i).clock + tile.reset := io.tcrs(i).reset + tile.io.hartid := UInt(i) + tile.io.resetVector := io.resetVector + tile.io.interrupts := outer.clint.module.io.tiles(i) + tile.io.interrupts.debug := outer.debug.module.io.debugInterrupts(i) + tile.io.interrupts.meip := outer.tileIntNodes(i).bundleOut(0)(0) + tile.io.interrupts.seip.foreach(_ := outer.tileIntNodes(i).bundleOut(0)(1)) + } +} diff --git a/src/main/scala/groundtest/BusMasterTest.scala b/src/main/scala/groundtest/BusMasterTest.scala index 26bfdae1..8a745fc7 100644 --- a/src/main/scala/groundtest/BusMasterTest.scala +++ b/src/main/scala/groundtest/BusMasterTest.scala @@ -5,8 +5,8 @@ import uncore.tilelink._ import uncore.agents._ import uncore.coherence.{InnerTLId, OuterTLId} import util._ -import junctions.HasAddrMapParameters -import cde.Parameters +import rocketchip._ +import config._ /** * An example bus mastering devices that writes some preset data to memory. @@ -16,11 +16,10 @@ import cde.Parameters * means it has finished. */ class ExampleBusMaster(implicit val p: Parameters) extends Module - with HasAddrMapParameters with HasTileLinkParameters { val mmioParams = p.alterPartial({ case TLId => p(InnerTLId) }) val memParams = p.alterPartial({ case TLId => p(OuterTLId) }) - val memStart = addrMap("mem").start + val memStart = p(ExtMem).base val memStartBlock = memStart >> p(CacheBlockOffsetBits) val io = new Bundle { @@ -69,7 +68,7 @@ class BusMasterTest(implicit p: Parameters) extends GroundTest()(p) s_req_check :: s_resp_check :: s_done :: Nil) = Enum(Bits(), 8) val state = Reg(init = s_idle) - val busMasterBlock = addrMap("io:pbus:busmaster").start >> p(CacheBlockOffsetBits) + val busMasterBlock = p(ExtBus).base >> p(CacheBlockOffsetBits) val start_acq = Put( client_xact_id = UInt(0), addr_block = UInt(busMasterBlock), diff --git a/src/main/scala/groundtest/CacheFillTest.scala b/src/main/scala/groundtest/CacheFillTest.scala index b2298cbc..685bc8bc 100644 --- a/src/main/scala/groundtest/CacheFillTest.scala +++ b/src/main/scala/groundtest/CacheFillTest.scala @@ -5,11 +5,12 @@ import uncore.tilelink._ import uncore.constants._ import uncore.agents._ import util._ -import cde.{Parameters, Field} +import config._ class CacheFillTest(implicit p: Parameters) extends GroundTest()(p) with HasTileLinkParameters { - val capacityKb: Int = p("L2_CAPACITY_IN_KB") + val l2Config = p(CacheName("L2")) + val capacityKb = l2Config.nSets * l2Config.nWays * l2Config.rowBits / (1024*8) val nblocks = capacityKb * 1024 / p(CacheBlockBytes) val s_start :: s_prefetch :: s_retrieve :: s_finished :: Nil = Enum(Bits(), 4) val state = Reg(init = s_start) diff --git a/src/main/scala/groundtest/Comparator.scala b/src/main/scala/groundtest/Comparator.scala index 27f4ce3f..7324c6e6 100644 --- a/src/main/scala/groundtest/Comparator.scala +++ b/src/main/scala/groundtest/Comparator.scala @@ -7,7 +7,7 @@ import junctions._ import rocket._ import util.Timer import scala.util.Random -import cde.{Parameters, Field} +import config._ case class ComparatorParameters( targets: Seq[Long], diff --git a/src/main/scala/groundtest/Configs.scala b/src/main/scala/groundtest/Configs.scala index 5503e5a4..b0727f24 100644 --- a/src/main/scala/groundtest/Configs.scala +++ b/src/main/scala/groundtest/Configs.scala @@ -8,7 +8,7 @@ import uncore.coherence._ import uncore.agents._ import uncore.devices.NTiles import junctions._ -import cde.{Parameters, Config, Dump, Knob, CDEMatchError} +import config._ import scala.math.max import coreplex._ import rocketchip._ @@ -38,11 +38,11 @@ class MemtestStatelessConfig extends Config( // Test ALL the things class FancyMemtestConfig extends Config( new WithNGenerators(1, 2) ++ new WithNCores(2) ++ new WithMemtest ++ - new WithNMemoryChannels(2) ++ new WithNBanksPerMemChannel(4) ++ - new WithSplitL2Metadata ++ new WithL2Cache ++ new GroundTestConfig) + new WithNMemoryChannels(1) ++ new WithNBanksPerMemChannel(4) ++ // !!! waiting on Chisel3 support for 2 channels + new WithL2Cache ++ new GroundTestConfig) class CacheFillTestConfig extends Config( - new WithCacheFillTest ++ new WithPLRU ++ new WithL2Cache ++ new GroundTestConfig) + new WithNL2Ways(4) ++ new WithL2Capacity(4) ++ new WithCacheFillTest ++ new WithPLRU ++ new WithL2Cache ++ new GroundTestConfig) class BroadcastRegressionTestConfig extends Config( new WithBroadcastRegressionTest ++ new GroundTestConfig) @@ -73,38 +73,23 @@ class Edge32BitMemtestConfig extends Config( class WithGroundTest extends Config( (pname, site, here) => pname match { case TLKey("L1toL2") => { - val useMEI = site(NTiles) <= 1 && site(NCachedTileLinkPorts) <= 1 + val useMEI = site(NTiles) <= 1 val dataBeats = (8 * site(CacheBlockBytes)) / site(XLen) TileLinkParameters( coherencePolicy = ( if (useMEI) new MEICoherence(site(L2DirectoryRepresentation)) else new MESICoherence(site(L2DirectoryRepresentation))), - nManagers = site(NBanksPerMemoryChannel)*site(NMemoryChannels) + 1, - nCachingClients = site(NCachedTileLinkPorts), - nCachelessClients = site(NCoreplexExtClients) + site(NUncachedTileLinkPorts), + nManagers = site(BankedL2Config).nBanks + 1, + nCachingClients = 1, + nCachelessClients = 1, maxClientXacts = ((site(DCacheKey).nMSHRs + 1) +: site(GroundTestKey).map(_.maxXacts)) .reduce(max(_, _)), - maxClientsPerPort = 1, + maxClientsPerPort = site(GroundTestKey).map(_.uncached).sum, maxManagerXacts = site(NAcquireTransactors) + 2, dataBeats = dataBeats, dataBits = site(CacheBlockBytes)*8) } - case BuildTiles => { - (0 until site(NTiles)).map { i => - val tileSettings = site(GroundTestKey)(i) - (p: Parameters) => { - LazyModule(new GroundTestTile()(p.alterPartial({ - case TLId => "L1toL2" - case TileId => i - case NCachedTileLinkPorts => if(tileSettings.cached > 0) 1 else 0 - case NUncachedTileLinkPorts => tileSettings.uncached - }))) - } - } - } - case BuildExampleTop => - (p: Parameters) => LazyModule(new ExampleTopWithTestRAM(new GroundTestCoreplex()(_))(p)) case FPUKey => None case UseAtomics => false case UseCompressed => false @@ -119,15 +104,13 @@ class WithComparator extends Config( case BuildGroundTest => (p: Parameters) => Module(new ComparatorCore()(p)) case ComparatorKey => ComparatorParameters( - targets = Seq("mem", "TL2:testram").map(name => - site(GlobalAddrMap)(name).start.longValue), + targets = Seq(site(ExtMem).base, testRamAddr), width = 8, operations = 1000, atomics = site(UseAtomics), - prefetches = site("COMPARATOR_PREFETCHES")) + prefetches = false) case FPUConfig => None case UseAtomics => false - case "COMPARATOR_PREFETCHES" => false case _ => throw new CDEMatchError }) @@ -138,8 +121,8 @@ class WithAtomics extends Config( }) class WithPrefetches extends Config( - (pname, site, here) => pname match { - case "COMPARATOR_PREFETCHES" => true + (pname, site, here, up) => pname match { + case ComparatorKey => up(ComparatorKey).copy(prefetches = true) case _ => throw new CDEMatchError }) @@ -150,7 +133,7 @@ class WithMemtest extends Config( } case GeneratorKey => TrafficGeneratorParameters( maxRequests = 128, - startAddress = site(GlobalAddrMap)("mem").start) + startAddress = BigInt(site(ExtMem).base)) case BuildGroundTest => (p: Parameters) => Module(new GeneratorTest()(p)) case _ => throw new CDEMatchError @@ -172,11 +155,6 @@ class WithCacheFillTest extends Config( case BuildGroundTest => (p: Parameters) => Module(new CacheFillTest()(p)) case _ => throw new CDEMatchError - }, - knobValues = { - case "L2_WAYS" => 4 - case "L2_CAPACITY_IN_KB" => 4 - case _ => throw new CDEMatchError }) class WithBroadcastRegressionTest extends Config( @@ -204,7 +182,7 @@ class WithCacheRegressionTest extends Config( }) class WithTraceGen extends Config( - topDefinitions = (pname, site, here) => pname match { + (pname, site, here, up) => pname match { case GroundTestKey => Seq.fill(site(NTiles)) { GroundTestTileSettings(uncached = 1, cached = 1) } @@ -223,10 +201,6 @@ class WithTraceGen extends Config( }.flatten } case UseAtomics => true - case _ => throw new CDEMatchError - }, - knobValues = { - case "L1D_SETS" => 16 - case "L1D_WAYS" => 1 + case CacheName("L1D") => up(CacheName("L1D")).copy(nSets = 16, nWays = 1) case _ => throw new CDEMatchError }) diff --git a/src/main/scala/groundtest/Coreplex.scala b/src/main/scala/groundtest/Coreplex.scala index 738b02f8..23529037 100644 --- a/src/main/scala/groundtest/Coreplex.scala +++ b/src/main/scala/groundtest/Coreplex.scala @@ -1,17 +1,37 @@ package groundtest import Chisel._ -import cde.{Parameters} +import config._ +import diplomacy._ import coreplex._ +import uncore.devices.NTiles +import uncore.tilelink2._ +import rocket.TileId +import uncore.tilelink.TLId + +class GroundTestCoreplex(implicit p: Parameters) extends BaseCoreplex { + val tiles = List.tabulate(p(NTiles)) { i => + LazyModule(new GroundTestTile()(p.alterPartial({ + case TLId => "L1toL2" + case TileId => i + }))) + } + tiles.foreach { lm => + l1tol2.node := lm.cachedOut + l1tol2.node := lm.uncachedOut + } + + val cbusRAM = LazyModule(new TLRAM(AddressSet(testRamAddr, 0xffff), false, cbus_beatBytes)) + cbusRAM.node := TLFragmenter(cbus_beatBytes, cbus_lineBytes)(cbus.node) -class GroundTestCoreplex(implicit p: Parameters) extends BaseCoreplex - with DirectConnection { override lazy val module = new GroundTestCoreplexModule(this, () => new GroundTestCoreplexBundle(this)) } class GroundTestCoreplexBundle[+L <: GroundTestCoreplex](_outer: L) extends BaseCoreplexBundle(_outer) - -class GroundTestCoreplexModule[+L <: GroundTestCoreplex, +B <: GroundTestCoreplexBundle[L]](_outer: L, _io: () => B) extends BaseCoreplexModule(_outer, _io) - with DirectConnectionModule { - io.success := tiles.flatMap(_.io.elements get "success").map(_.asInstanceOf[Bool]).reduce(_&&_) +{ + val success = Bool(OUTPUT) +} + +class GroundTestCoreplexModule[+L <: GroundTestCoreplex, +B <: GroundTestCoreplexBundle[L]](_outer: L, _io: () => B) extends BaseCoreplexModule(_outer, _io) { + io.success := outer.tiles.map(_.module.io.success).reduce(_&&_) } diff --git a/src/main/scala/groundtest/Package.scala b/src/main/scala/groundtest/Package.scala new file mode 100644 index 00000000..e91d9fd1 --- /dev/null +++ b/src/main/scala/groundtest/Package.scala @@ -0,0 +1,3 @@ +package object groundtest { + val testRamAddr = 0x10000 +} diff --git a/src/main/scala/groundtest/Regression.scala b/src/main/scala/groundtest/Regression.scala index 3db5ad74..6652c583 100644 --- a/src/main/scala/groundtest/Regression.scala +++ b/src/main/scala/groundtest/Regression.scala @@ -5,9 +5,9 @@ import uncore.tilelink._ import uncore.constants._ import uncore.agents._ import util._ -import junctions.HasAddrMapParameters import rocket._ -import cde.{Parameters, Field} +import rocketchip._ +import config._ class RegressionIO(implicit val p: Parameters) extends ParameterizedBundle()(p) { val start = Bool(INPUT) @@ -18,8 +18,8 @@ class RegressionIO(implicit val p: Parameters) extends ParameterizedBundle()(p) } abstract class Regression(implicit val p: Parameters) - extends Module with HasTileLinkParameters with HasAddrMapParameters { - val memStart = addrMap("mem").start + extends Module with HasTileLinkParameters { + val memStart = p(ExtMem).base val memStartBlock = memStart >> p(CacheBlockOffsetBits) val io = new RegressionIO @@ -71,7 +71,7 @@ class IOGetAfterPutBlockRegression(implicit p: Parameters) extends Regression()( io.mem.grant.ready := Bool(true) io.cache.req.valid := !get_sent && started - io.cache.req.bits.addr := UInt(addrMap("TL2:bootrom").start) + io.cache.req.bits.addr := UInt(testRamAddr) io.cache.req.bits.typ := MT_WU io.cache.req.bits.cmd := M_XRD io.cache.req.bits.tag := UInt(0) @@ -107,8 +107,7 @@ class PutBlockMergeRegression(implicit p: Parameters) disableCache() - val l2params = p.alterPartial({ case CacheName => "L2Bank" }) - val nSets = l2params(NSets) + val nSets = p(CacheName("L2")).nSets val addr_blocks = Vec(Seq(0, 0, nSets).map(num => UInt(num + memStartBlock))) val nSteps = addr_blocks.size val (acq_beat, acq_done) = Counter(io.mem.acquire.fire(), tlDataBeats) @@ -385,38 +384,6 @@ class PrefetchHitRegression(implicit p: Parameters) extends Regression()(p) { io.errored := Bool(false) } -/* This tests the sort of access the pattern that Hwacha uses. - * Instead of using PutBlock/GetBlock, it uses word-sized puts and gets - * to the same block. - * Each request has the same client_xact_id, but there are multiple in flight. - * The responses therefore must come back in the order they are sent. */ -class SequentialSameIdGetRegression(implicit p: Parameters) extends Regression()(p) { - disableCache() - - val sending = Reg(init = Bool(false)) - val finished = Reg(init = Bool(false)) - - val (send_cnt, send_done) = Counter(io.mem.acquire.fire(), tlDataBeats) - val (recv_cnt, recv_done) = Counter(io.mem.grant.fire(), tlDataBeats) - - when (!sending && io.start) { sending := Bool(true) } - when (send_done) { sending := Bool(false) } - when (recv_done) { finished := Bool(true) } - - io.mem.acquire.valid := sending - io.mem.acquire.bits := Get( - client_xact_id = UInt(0), - addr_block = UInt(memStartBlock + 9), - addr_beat = send_cnt) - io.mem.grant.ready := !finished - - io.finished := finished - - val beat_mismatch = io.mem.grant.fire() && io.mem.grant.bits.addr_beat =/= recv_cnt - assert(!beat_mismatch, "SequentialSameIdGetRegression: grant received out of order") - io.errored := beat_mismatch -} - /* Test that a writeback will occur by writing nWays + 1 blocks to the same * set. This assumes that there is only a single cache bank. If we want to * test multibank configurations, we'll have to think of some other way to @@ -424,9 +391,8 @@ class SequentialSameIdGetRegression(implicit p: Parameters) extends Regression() class WritebackRegression(implicit p: Parameters) extends Regression()(p) { disableCache() - val l2params = p.alterPartial({ case CacheName => "L2Bank" }) - val nSets = l2params(NSets) - val nWays = l2params(NWays) + val nSets = p(CacheName("L2")).nSets + val nWays = p(CacheName("L2")).nWays val addr_blocks = Vec.tabulate(nWays + 1) { i => UInt(memStartBlock + i * nSets) } val data = Vec.tabulate(nWays + 1) { i => UInt((i + 1) * 1423) } @@ -477,10 +443,9 @@ class WritebackRegression(implicit p: Parameters) extends Regression()(p) { class ReleaseRegression(implicit p: Parameters) extends Regression()(p) { disableMem() - val l1params = p.alterPartial({ case CacheName => "L1D" }) - val nSets = l1params(NSets) - val nWays = l1params(NWays) - val blockOffset = l1params(CacheBlockOffsetBits) + val nSets = p(CacheName("L1D")).nSets + val nWays = p(CacheName("L1D")).nWays + val blockOffset = p(CacheBlockOffsetBits) val startBlock = memStartBlock + 10 val addr_blocks = Vec.tabulate(nWays + 1) { i => UInt(startBlock + i * nSets) } @@ -565,9 +530,8 @@ class PutBeforePutBlockRegression(implicit p: Parameters) extends Regression()(p class MergedGetRegression(implicit p: Parameters) extends Regression()(p) { disableCache() - val l2params = p.alterPartial({ case CacheName => "L2Bank" }) - val nSets = l2params(NSets) - val nWays = l2params(NWays) + val nSets = p(CacheName("L2")).nSets + val nWays = p(CacheName("L2")).nWays val (s_idle :: s_put :: s_get :: s_done :: Nil) = Enum(Bits(), 4) val state = Reg(init = s_idle) @@ -634,7 +598,7 @@ class MergedPutRegression(implicit p: Parameters) extends Regression()(p) val delaying = Reg(init = Bool(false)) val (put_cnt, put_done) = Counter(io.mem.acquire.fire(), tlMaxClientXacts) val (delay_cnt, delay_done) = Counter(delaying, 8) - val put_acked = Reg(UInt(width = 3), init = UInt(0)) + val put_acked = Reg(UInt(width = tlMaxClientXacts), init = UInt(0)) io.mem.acquire.valid := sending && !delaying io.mem.acquire.bits := Mux(state === s_put, @@ -736,7 +700,6 @@ object RegressionTests { Module(new RepeatedNoAllocPutRegression), Module(new WriteMaskedPutBlockRegression), Module(new PrefetchHitRegression), - Module(new SequentialSameIdGetRegression), Module(new WritebackRegression), Module(new PutBeforePutBlockRegression), Module(new MixedAllocPutRegression), @@ -760,12 +723,15 @@ class RegressionTest(implicit p: Parameters) extends GroundTest()(p) { val all_done = (regress_idx === UInt(regressions.size)) val start = Reg(init = Bool(true)) + // Some tests randomly backpressure grant; make this safe: + val grant = Queue(io.mem.head.grant, 16) + // default output values io.mem.head.acquire.valid := Bool(false) io.mem.head.acquire.bits := GetBlock( client_xact_id = UInt(0), addr_block = UInt(0)) - io.mem.head.grant.ready := Bool(false) + grant.ready := Bool(false) io.cache.head.req.valid := Bool(false) io.cache.head.req.bits.addr := UInt(0) io.cache.head.req.bits.typ := UInt(log2Ceil(64 / 8)) @@ -779,8 +745,8 @@ class RegressionTest(implicit p: Parameters) extends GroundTest()(p) { val me = regress_idx === UInt(i) regress.io.start := me && start regress.io.mem.acquire.ready := io.mem.head.acquire.ready && me - regress.io.mem.grant.valid := io.mem.head.grant.valid && me - regress.io.mem.grant.bits := io.mem.head.grant.bits + regress.io.mem.grant.valid := grant.valid && me + regress.io.mem.grant.bits := grant.bits regress.io.cache.req.ready := io.cache.head.req.ready && me regress.io.cache.resp.valid := io.cache.head.resp.valid && me regress.io.cache.resp.bits := io.cache.head.resp.bits @@ -788,7 +754,7 @@ class RegressionTest(implicit p: Parameters) extends GroundTest()(p) { when (me) { io.mem.head.acquire.valid := regress.io.mem.acquire.valid io.mem.head.acquire.bits := regress.io.mem.acquire.bits - io.mem.head.grant.ready := regress.io.mem.grant.ready + grant.ready := regress.io.mem.grant.ready io.cache.head.req.valid := regress.io.cache.req.valid io.cache.head.req.bits := regress.io.cache.req.bits io.cache.head.invalidate_lr := regress.io.cache.invalidate_lr @@ -815,11 +781,11 @@ class RegressionTest(implicit p: Parameters) extends GroundTest()(p) { io.status.timeout.valid := timeout io.status.timeout.bits := UInt(0) - assert(!(all_done && io.mem.head.grant.valid), + assert(!(all_done && grant.valid), "Getting grant after test completion") when (all_done) { - io.status.error.valid := io.mem.head.grant.valid + io.status.error.valid := grant.valid io.status.error.bits := UInt(regressions.size) } } diff --git a/src/main/scala/groundtest/TestHarness.scala b/src/main/scala/groundtest/TestHarness.scala index dbabf0da..fd265858 100644 --- a/src/main/scala/groundtest/TestHarness.scala +++ b/src/main/scala/groundtest/TestHarness.scala @@ -1,7 +1,25 @@ package groundtest import Chisel._ -import cde.Parameters +import diplomacy._ +import config._ +import rocketchip._ +import util._ -// !!! TODO: Replace with a groundtest-specific test harness -class TestHarness(implicit p: Parameters) extends rocketchip.TestHarness(p) +class TestHarness(q: Parameters) extends Module { + val io = new Bundle { + val success = Bool(OUTPUT) + } + implicit val p = q + + val dut = Module(LazyModule(new GroundTestTop(new GroundTestCoreplex()(_))).module) + io.success := dut.io.success + + if (dut.io.mem_axi4.nonEmpty) { + val memSize = p(ExtMem).size + require(memSize % dut.io.mem_axi4.size == 0) + for (axi <- dut.io.mem_axi4) { + Module(LazyModule(new SimAXIMem(memSize / dut.io.mem_axi4.size)).module).io.axi <> axi + } + } +} diff --git a/src/main/scala/groundtest/Tile.scala b/src/main/scala/groundtest/Tile.scala index f0973dc4..d889bfb3 100644 --- a/src/main/scala/groundtest/Tile.scala +++ b/src/main/scala/groundtest/Tile.scala @@ -3,11 +3,14 @@ package groundtest import Chisel._ import rocket._ import uncore.tilelink._ +import uncore.agents.CacheName +import uncore.tilelink2._ +import rocketchip.ExtMem +import diplomacy._ import scala.util.Random import scala.collection.mutable.ListBuffer -import junctions.HasAddrMapParameters import util.ParameterizedBundle -import cde.{Parameters, Field} +import config._ case object BuildGroundTest extends Field[Parameters => GroundTest] @@ -20,13 +23,13 @@ trait HasGroundTestConstants { val errorCodeBits = 4 } -trait HasGroundTestParameters extends HasAddrMapParameters { +trait HasGroundTestParameters { implicit val p: Parameters val tileSettings = p(GroundTestKey)(p(TileId)) val nUncached = tileSettings.uncached val nCached = tileSettings.cached val nPTW = tileSettings.ptw - val memStart = addrMap("mem").start + val memStart = p(ExtMem).base val memStartBlock = memStart >> p(CacheBlockOffsetBits) } @@ -96,20 +99,31 @@ abstract class GroundTest(implicit val p: Parameters) extends Module val io = new GroundTestIO } -class GroundTestTile(implicit val p: Parameters) extends LazyTile { +class GroundTestTile(implicit val p: Parameters) extends LazyModule with HasGroundTestParameters { + val dcacheParams = p.alterPartial({ case CacheName => CacheName("L1D") }) val slave = None - lazy val module = new TileImp(this) with HasGroundTestParameters { - val io = new TileIO(bc) { + val dcache = HellaCache(p(DCacheKey))(dcacheParams) + val ucLegacy = LazyModule(new TLLegacy()(p)) + + val cachedOut = TLOutputNode() + val uncachedOut = TLOutputNode() + cachedOut := dcache.node + uncachedOut := TLHintHandler()(ucLegacy.node) + val masterNodes = List(cachedOut, uncachedOut) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val cached = cachedOut.bundleOut + val uncached = uncachedOut.bundleOut val success = Bool(OUTPUT) } val test = p(BuildGroundTest)(dcacheParams) val ptwPorts = ListBuffer.empty ++= test.io.ptw - val memPorts = ListBuffer.empty ++= test.io.mem + val uncachedArbPorts = ListBuffer.empty ++= test.io.mem if (nCached > 0) { - val dcache_io = HellaCache(p(DCacheKey))(dcacheParams) val dcacheArb = Module(new HellaCacheArbiter(nCached)(dcacheParams)) dcacheArb.io.requestor.zip(test.io.cache).foreach { @@ -118,13 +132,12 @@ class GroundTestTile(implicit val p: Parameters) extends LazyTile { dcacheIF.io.requestor <> cache requestor <> dcacheIF.io.cache } - dcache_io.cpu <> dcacheArb.io.mem - io.cached.head <> dcache_io.mem + dcache.module.io.cpu <> dcacheArb.io.mem // SimpleHellaCacheIF leaves invalidate_lr dangling, so we wire it to false - dcache_io.cpu.invalidate_lr := Bool(false) + dcache.module.io.cpu.invalidate_lr := Bool(false) - ptwPorts += dcache_io.ptw + ptwPorts += dcache.module.io.ptw } if (ptwPorts.size > 0) { @@ -132,9 +145,13 @@ class GroundTestTile(implicit val p: Parameters) extends LazyTile { ptw.io.requestors <> ptwPorts } - require(memPorts.size == io.uncached.size) - if (memPorts.size > 0) { - io.uncached <> memPorts + if (uncachedArbPorts.isEmpty) { + ucLegacy.module.io.legacy.acquire.valid := Bool(false) + ucLegacy.module.io.legacy.grant.ready := Bool(true) + } else { + val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size)) + uncachedArb.io.in <> uncachedArbPorts + ucLegacy.module.io.legacy <> uncachedArb.io.out } io.success := test.io.status.finished diff --git a/src/main/scala/groundtest/Top.scala b/src/main/scala/groundtest/Top.scala new file mode 100644 index 00000000..613bb39a --- /dev/null +++ b/src/main/scala/groundtest/Top.scala @@ -0,0 +1,26 @@ +package groundtest + +import Chisel._ +import config._ +import diplomacy._ +import coreplex._ +import rocketchip._ + +class GroundTestTop[+C <: GroundTestCoreplex](_coreplex: Parameters => C)(implicit p: Parameters) extends BaseTop(_coreplex) + with DirectConnection + with PeripheryMasterAXI4Mem + with PeripheryTestRAM { + override lazy val module = new GroundTestTopModule(this, () => new GroundTestTopBundle(this)) +} + +class GroundTestTopBundle[+L <: GroundTestTop[GroundTestCoreplex]](_outer: L) extends BaseTopBundle(_outer) + with PeripheryMasterAXI4MemBundle + with PeripheryTestRAMBundle { + val success = Bool(OUTPUT) +} + +class GroundTestTopModule[+L <: GroundTestTop[GroundTestCoreplex], +B <: GroundTestTopBundle[L]](_outer: L, _io: () => B) extends BaseTopModule(_outer, _io) + with PeripheryMasterAXI4MemModule + with PeripheryTestRAMModule { + io.success := outer.coreplex.module.io.success +} diff --git a/src/main/scala/groundtest/TraceGen.scala b/src/main/scala/groundtest/TraceGen.scala index 97ec6682..1280a7db 100644 --- a/src/main/scala/groundtest/TraceGen.scala +++ b/src/main/scala/groundtest/TraceGen.scala @@ -24,7 +24,7 @@ import junctions._ import rocket._ import util.{Timer, DynamicTimer} import scala.util.Random -import cde.{Parameters, Field} +import config._ // ======= // Outline @@ -178,7 +178,6 @@ class TagMan(val logNumTags : Int) extends Module { class TraceGenerator(id: Int) (implicit p: Parameters) extends L1HellaCacheModule()(p) - with HasAddrMapParameters with HasTraceGenParams with HasGroundTestParameters { val io = new Bundle { diff --git a/src/main/scala/groundtest/TrafficGenerator.scala b/src/main/scala/groundtest/TrafficGenerator.scala index af2aeb50..fd1f7194 100644 --- a/src/main/scala/groundtest/TrafficGenerator.scala +++ b/src/main/scala/groundtest/TrafficGenerator.scala @@ -8,7 +8,7 @@ import junctions._ import rocket._ import util.SimpleTimer import scala.util.Random -import cde.{Parameters, Field} +import config._ case class TrafficGeneratorParameters( maxRequests: Int, diff --git a/src/main/scala/junctions/NastiDriver.scala b/src/main/scala/junctions/NastiDriver.scala index 685743d2..f7de9da4 100644 --- a/src/main/scala/junctions/NastiDriver.scala +++ b/src/main/scala/junctions/NastiDriver.scala @@ -1,7 +1,7 @@ package junctions import Chisel._ -import cde.Parameters +import config._ class NastiDriver(dataWidth: Int, burstLen: Int, nBursts: Int) (implicit p: Parameters) extends NastiModule { diff --git a/src/main/scala/junctions/addrmap.scala b/src/main/scala/junctions/addrmap.scala index 86fdd276..a94ea32d 100644 --- a/src/main/scala/junctions/addrmap.scala +++ b/src/main/scala/junctions/addrmap.scala @@ -3,7 +3,7 @@ package junctions import Chisel._ -import cde.{Parameters, Field} +import config._ import scala.collection.mutable.HashMap case object PAddrBits extends Field[Int] diff --git a/src/main/scala/junctions/hasti.scala b/src/main/scala/junctions/hasti.scala index 9a36048f..d9c08d43 100644 --- a/src/main/scala/junctions/hasti.scala +++ b/src/main/scala/junctions/hasti.scala @@ -1,7 +1,7 @@ package junctions import Chisel._ -import cde.{Parameters, Field} +import config._ import unittest.UnitTest import util.ParameterizedBundle diff --git a/src/main/scala/junctions/jtag.scala b/src/main/scala/junctions/jtag.scala index a9613637..485ab4da 100644 --- a/src/main/scala/junctions/jtag.scala +++ b/src/main/scala/junctions/jtag.scala @@ -1,6 +1,6 @@ package junctions import Chisel._ -import cde.{Parameters} +import config._ class JTAGIO(drvTdo: Boolean = false) extends Bundle { val TCK = Clock(OUTPUT) diff --git a/src/main/scala/junctions/nasti.scala b/src/main/scala/junctions/nasti.scala index cab7ecd0..acc9e21f 100644 --- a/src/main/scala/junctions/nasti.scala +++ b/src/main/scala/junctions/nasti.scala @@ -5,7 +5,7 @@ import Chisel._ import scala.math.max import scala.collection.mutable.ArraySeq import util._ -import cde.{Parameters, Field} +import config._ case object NastiKey extends Field[NastiParameters] diff --git a/src/main/scala/junctions/poci.scala b/src/main/scala/junctions/poci.scala index ac089164..7fd40205 100644 --- a/src/main/scala/junctions/poci.scala +++ b/src/main/scala/junctions/poci.scala @@ -1,7 +1,7 @@ package junctions import Chisel._ -import cde.{Parameters, Field} +import config._ class PociIO(implicit p: Parameters) extends HastiBundle()(p) { diff --git a/src/main/scala/junctions/stream.scala b/src/main/scala/junctions/stream.scala index 24f9ad58..c71dfd8d 100644 --- a/src/main/scala/junctions/stream.scala +++ b/src/main/scala/junctions/stream.scala @@ -2,7 +2,7 @@ package junctions import Chisel._ import NastiConstants._ -import cde.Parameters +import config._ class StreamChannel(w: Int) extends Bundle { val data = UInt(width = w) diff --git a/src/main/scala/rocket/dcache.scala b/src/main/scala/rocket/Dcache.scala similarity index 58% rename from src/main/scala/rocket/dcache.scala rename to src/main/scala/rocket/Dcache.scala index 8e908dcd..4c44943f 100644 --- a/src/main/scala/rocket/dcache.scala +++ b/src/main/scala/rocket/Dcache.scala @@ -3,17 +3,15 @@ package rocket import Chisel._ -import junctions._ +import Chisel.ImplicitConversions._ import diplomacy._ -import uncore.tilelink._ -import uncore.tilelink2._ import uncore.agents._ -import uncore.coherence._ import uncore.constants._ +import uncore.tilelink2._ import uncore.util._ import util._ -import Chisel.ImplicitConversions._ -import cde.{Parameters, Field} +import TLMessages._ +import config._ class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { val addr = Bits(width = untagBits) @@ -41,18 +39,17 @@ class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { } } -class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { - val io = new Bundle { - val cpu = (new HellaCacheIO).flip - val ptw = new TLBPTWIO() - val mem = new ClientTileLinkIO - } +class DCache(cfg: DCacheConfig)(implicit p: Parameters) extends HellaCache(cfg)(p) { + override lazy val module = new DCacheModule(this) +} - val fq = Module(new FinishQueue(1)) +class DCacheModule(outer: DCache)(implicit p: Parameters) extends HellaCacheModule(outer)(p) { + + val maxUncachedInFlight = cfg.nMMIOs require(rowBits == encRowBits) // no ECC - require(refillCyclesPerBeat == 1) - require(rowBits >= coreDataBits) + + val grantackq = Module(new Queue(tl_out.e.bits,1)) // TODO don't need this in scratchpad mode // tags val replacer = p(Replacer)() @@ -67,8 +64,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { dataArb.io.out.ready := true val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) - val s1_probe = Reg(next=io.mem.probe.fire(), init=Bool(false)) - val probe_bits = RegEnable(io.mem.probe.bits, io.mem.probe.fire()) + val s1_probe = Reg(next=tl_out.b.fire(), init=Bool(false)) + val probe_bits = RegEnable(tl_out.b.bits, tl_out.b.fire()) // TODO has data now :( val s1_nack = Wire(init=Bool(false)) val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR val s1_valid_not_nacked = s1_valid_masked && !s1_nack @@ -93,7 +90,6 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.req.ready := (release_state === s_ready) && !cached_grant_wait && !s1_nack // I/O MSHRs - val maxUncachedInFlight = (1 << io.mem.acquire.bits.client_xact_id.getWidth) - 1 val uncachedInFlight = Reg(init=Vec.fill(maxUncachedInFlight)(Bool(false))) val uncachedReqs = Reg(Vec(maxUncachedInFlight, new HellaCacheReq)) @@ -120,15 +116,15 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true } val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) - val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits)) + val s1_tag = Mux(s1_probe, probe_bits.address, s1_paddr)(paddrBits-1, untagBits) val s1_victim_way = Wire(init = replacer.way) val (s1_hit_way, s1_hit_state, s1_victim_meta) = if (usingDataScratchpad) { require(nWays == 1) metaWriteArb.io.out.ready := true metaReadArb.io.out.ready := !metaWriteArb.io.out.valid - val inScratchpad = addrMap(s"TL2:dmem${tileId}").containsAddress(s1_paddr) - val hitState = Mux(inScratchpad, ClientMetadata.onReset.onHit(M_XWR), ClientMetadata.onReset) + val inScratchpad = addrMap(s"TL2:dmem${p(TileId)}").containsAddress(s1_paddr) + val hitState = Mux(inScratchpad, ClientMetadata.maximum, ClientMetadata.onReset) (inScratchpad, hitState, L1Metadata(UInt(0), ClientMetadata.onReset)) } else { val meta = Module(new MetadataArray(onReset _)) @@ -149,6 +145,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready val s2_valid_masked = s2_valid && Reg(next = !s1_nack) val s2_req = Reg(io.cpu.req.bits) + val s2_req_block_addr = (s2_req.addr >> idxLSB) << idxLSB val s2_uncached = Reg(Bool()) when (s1_valid_not_nacked || s1_flush_valid) { s2_req := s1_req @@ -164,20 +161,21 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s2_probe_state = RegEnable(s1_hit_state, s1_probe) val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked) val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked) - val s2_hit = s2_hit_state.isHit(s2_req.cmd) + val s2_hit_valid = s2_hit_state.isValid() + val (s2_hit, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd) val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait val s2_valid_cached_miss = s2_valid_miss && !s2_uncached val s2_victimize = s2_valid_cached_miss || s2_flush_valid val s2_valid_uncached = s2_valid_miss && s2_uncached - val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid))) + val s2_victim_way = Mux(s2_hit_valid && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid))) val s2_victim_tag = RegEnable(s1_victim_meta.tag, s1_valid_not_nacked || s1_flush_valid) - val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(s1_victim_meta.coh, s1_valid_not_nacked || s1_flush_valid)) + val s2_victim_state = Mux(s2_hit_valid && !s2_flush_valid, s2_hit_state, RegEnable(s1_victim_meta.coh, s1_valid_not_nacked || s1_flush_valid)) val s2_victim_valid = s2_victim_state.isValid() - val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback() - val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd) + val (s2_prb_ack_data, s2_report_param, probeNewCoh)= s2_probe_state.onProbe(probe_bits.param) + val (s2_victim_dirty, s2_shrink_param, voluntaryNewCoh) = s2_victim_state.onCacheControl(M_FLUSH) val s2_update_meta = s2_hit_state =/= s2_new_hit_state - io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready && !uncachedInFlight.asUInt.andR) + io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && tl_out.a.ready && !uncachedInFlight.asUInt.andR) when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true } // exceptions @@ -250,105 +248,100 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { metaWriteArb.io.in(0).bits.data.coh := Mux(s2_valid_hit, s2_new_hit_state, ClientMetadata.onReset) metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) - // acquire - val xact_id = PriorityEncoder(~uncachedInFlight.asUInt) - val cachedGetMessage = s2_hit_state.makeAcquire( - client_xact_id = UInt(maxUncachedInFlight - 1), - addr_block = s2_req.addr(paddrBits-1, blockOffBits), - op_code = s2_req.cmd) - val uncachedGetMessage = Get( - client_xact_id = xact_id, - addr_block = s2_req.addr(paddrBits-1, blockOffBits), - addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), - addr_byte = s2_req.addr(beatOffBits-1, 0), - operand_size = s2_req.typ, - alloc = Bool(false)) - val uncachedPutOffset = s2_req.addr.extract(beatOffBits-1, wordOffBits) - val uncachedPutMessage = Put( - client_xact_id = xact_id, - addr_block = s2_req.addr(paddrBits-1, blockOffBits), - addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), - data = Fill(beatWords, pstore1_storegen.data), - wmask = Some(pstore1_storegen.mask << (uncachedPutOffset << wordOffBits)), - alloc = Bool(false)) - val uncachedPutAtomicMessage = PutAtomic( - client_xact_id = xact_id, - addr_block = s2_req.addr(paddrBits-1, blockOffBits), - addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), - addr_byte = s2_req.addr(beatOffBits-1, 0), - atomic_opcode = s2_req.cmd, - operand_size = s2_req.typ, - data = Fill(beatWords, pstore1_storegen.data)) - io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || (s2_valid_uncached && !uncachedInFlight.asUInt.andR)) && fq.io.enq.ready - io.mem.acquire.bits := cachedGetMessage - when (s2_uncached) { - if (!usingDataScratchpad) - assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access") - io.mem.acquire.bits := uncachedGetMessage - when (s2_write) { - io.mem.acquire.bits := uncachedPutMessage - when (pstore1_amo) { - io.mem.acquire.bits := uncachedPutAtomicMessage - } - } + // Prepare a TileLink request message that initiates a transaction + val a_source = PriorityEncoder(~uncachedInFlight.asUInt) + val acquire_address = s2_req_block_addr + val access_address = s2_req.addr + val a_size = s2_req.typ + val a_data = Fill(beatWords, pstore1_storegen.data) + val acquire = if (edge.manager.anySupportAcquire) { + edge.Acquire(a_source, acquire_address, lgCacheBlockBytes, s2_grow_param)._2 // Cacheability checked by tlb + } else { + Wire(new TLBundleA(edge.bundle)) } - when (io.mem.acquire.fire()) { + val get = edge.Get(a_source, access_address, a_size)._2 + val put = edge.Put(a_source, access_address, a_size, a_data)._2 + val atomics = if (edge.manager.anySupportLogical) { + MuxLookup(s2_req.cmd, Wire(new TLBundleA(edge.bundle)), Array( + M_XA_SWAP -> edge.Logical(a_source, access_address, a_size, a_data, TLAtomics.SWAP)._2, + M_XA_XOR -> edge.Logical(a_source, access_address, a_size, a_data, TLAtomics.XOR) ._2, + M_XA_OR -> edge.Logical(a_source, access_address, a_size, a_data, TLAtomics.OR) ._2, + M_XA_AND -> edge.Logical(a_source, access_address, a_size, a_data, TLAtomics.AND) ._2, + M_XA_ADD -> edge.Arithmetic(a_source, access_address, a_size, a_data, TLAtomics.ADD)._2, + M_XA_MIN -> edge.Arithmetic(a_source, access_address, a_size, a_data, TLAtomics.MIN)._2, + M_XA_MAX -> edge.Arithmetic(a_source, access_address, a_size, a_data, TLAtomics.MAX)._2, + M_XA_MINU -> edge.Arithmetic(a_source, access_address, a_size, a_data, TLAtomics.MINU)._2, + M_XA_MAXU -> edge.Arithmetic(a_source, access_address, a_size, a_data, TLAtomics.MAXU)._2)) + } else { + // If no managers support atomics, assert fail if processor asks for them + assert (!(tl_out.a.valid && pstore1_amo && s2_write && s2_uncached)) + Wire(new TLBundleA(edge.bundle)) + } + + tl_out.a.valid := grantackq.io.enq.ready && ((s2_valid_cached_miss && !s2_victim_dirty) || + (s2_valid_uncached && !uncachedInFlight.asUInt.andR)) + tl_out.a.bits := Mux(!s2_uncached, acquire, Mux(!s2_write, get, Mux(!pstore1_amo, put, atomics))) + + // Set pending bits for outstanding TileLink transaction + when (tl_out.a.fire()) { when (s2_uncached) { - uncachedInFlight(xact_id) := true - uncachedReqs(xact_id) := s2_req + uncachedInFlight(a_source) := true + uncachedReqs(a_source) := s2_req }.otherwise { cached_grant_wait := true } } // grant - val grantIsRefill = io.mem.grant.bits.hasMultibeatData() - val grantIsVoluntary = io.mem.grant.bits.isVoluntary() - val grantIsUncached = !grantIsRefill && !grantIsVoluntary - io.mem.grant.ready := true - when (io.mem.grant.fire()) { - when (grantIsRefill) { assert(cached_grant_wait) } - when (grantIsUncached) { - assert(uncachedInFlight(io.mem.grant.bits.client_xact_id)) - uncachedInFlight(io.mem.grant.bits.client_xact_id) := false - s2_data := io.mem.grant.bits.data - val req = uncachedReqs(io.mem.grant.bits.client_xact_id) + val (d_first, d_last, d_done, d_address_inc) = edge.addr_inc(tl_out.d) + val grantIsCached = tl_out.d.bits.opcode.isOneOf(Grant, GrantData) + val grantIsUncached = tl_out.d.bits.opcode.isOneOf(AccessAck, AccessAckData, HintAck) + val grantIsVoluntary = tl_out.d.bits.opcode === ReleaseAck // Clears a different pending bit + val grantIsRefill = tl_out.d.bits.opcode === GrantData // Writes the data array + tl_out.d.ready := true + when (tl_out.d.fire()) { + when (grantIsCached) { + assert(cached_grant_wait, "A GrantData was unexpected by the dcache.") + when(d_last) { cached_grant_wait := false } + } .elsewhen (grantIsUncached) { + val id = tl_out.d.bits.source + val req = uncachedReqs(id) + assert(uncachedInFlight(id), "An AccessAck was unexpected by the dcache.") // TODO must handle Ack coming back on same cycle! + when(d_last) { uncachedInFlight(id) := false } + s2_data := tl_out.d.bits.data s2_req.cmd := req.cmd s2_req.typ := req.typ s2_req.tag := req.tag s2_req.addr := Cat(s1_paddr >> wordOffBits /* don't-care */, req.addr(wordOffBits-1, 0)) - } - when (grantIsVoluntary) { - assert(release_ack_wait) + } .elsewhen (grantIsVoluntary) { + assert(release_ack_wait, "A ReleaseAck was unexpected by the dcache.") // TODO should handle Ack coming back on same cycle! release_ack_wait := false } } - val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles) - when (io.mem.grant.fire() && refillDone) { cached_grant_wait := false } // data refill - val doRefillBeat = grantIsRefill && io.mem.grant.valid + val doRefillBeat = grantIsRefill && tl_out.d.valid dataArb.io.in(1).valid := doRefillBeat assert(dataArb.io.in(1).ready || !doRefillBeat) dataArb.io.in(1).bits.write := true - dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits + dataArb.io.in(1).bits.addr := s2_req_block_addr | d_address_inc dataArb.io.in(1).bits.way_en := s2_victim_way - dataArb.io.in(1).bits.wdata := io.mem.grant.bits.data + dataArb.io.in(1).bits.wdata := tl_out.d.bits.data dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes) // tag updates on refill - metaWriteArb.io.in(1).valid := refillDone + metaWriteArb.io.in(1).valid := grantIsCached && d_done assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready) metaWriteArb.io.in(1).bits.way_en := s2_victim_way metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB) - metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd) + metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(s2_req.cmd, tl_out.d.bits.param) metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) // don't accept uncached grants if there's a structural hazard on s2_data... val blockUncachedGrant = Reg(Bool()) blockUncachedGrant := dataArb.io.out.valid when (grantIsUncached) { - io.mem.grant.ready := !(blockUncachedGrant || s1_valid) + tl_out.d.ready := !(blockUncachedGrant || s1_valid) // ...but insert bubble to guarantee grant's eventual forward progress - when (io.mem.grant.valid && !io.mem.grant.ready) { + when (tl_out.d.valid && !tl_out.d.ready) { io.cpu.req.ready := false dataArb.io.in(1).valid := true dataArb.io.in(1).bits.write := false @@ -356,58 +349,79 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { } } - // finish - fq.io.enq.valid := io.mem.grant.fire() && io.mem.grant.bits.requiresAck() && (!grantIsRefill || refillDone) - fq.io.enq.bits := io.mem.grant.bits.makeFinish() - io.mem.finish <> fq.io.deq - when (fq.io.enq.valid) { assert(fq.io.enq.ready) } - when (refillDone) { replacer.miss } + // Finish TileLink transaction by issuing a GrantAck + grantackq.io.enq.valid := d_done && edge.hasFollowUp(tl_out.d.bits) + grantackq.io.enq.bits := edge.GrantAck(tl_out.d.bits) + tl_out.e <> grantackq.io.deq + assert(!grantackq.io.enq.valid || grantackq.io.enq.ready, "Too many Grants received by dcache.") + when (d_done) { replacer.miss } - // probe + // Handle an incoming TileLink Probe message val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr) - metaReadArb.io.in(1).valid := io.mem.probe.valid && !block_probe - io.mem.probe.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit) - metaReadArb.io.in(1).bits.idx := io.mem.probe.bits.addr_block + metaReadArb.io.in(1).valid := tl_out.b.valid && !block_probe + tl_out.b.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit) + metaReadArb.io.in(1).bits.idx := tl_out.b.bits.address(idxMSB, idxLSB) metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays) // release - val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles) - val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback) - val releaseRejected = io.mem.release.valid && !io.mem.release.ready + val (_, c_last, releaseDone, c_count) = edge.count(tl_out.c) + val releaseRejected = tl_out.c.valid && !tl_out.c.ready val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire()) val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected) - val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid)) - io.mem.release.valid := s2_release_data_valid - io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits) - val voluntaryReleaseMessage = s2_victim_state.makeVoluntaryWriteback(UInt(maxUncachedInFlight - 1), UInt(0)) - val voluntaryNewCoh = s2_victim_state.onCacheControl(M_FLUSH) - val probeResponseMessage = s2_probe_state.makeRelease(probe_bits) - val probeNewCoh = s2_probe_state.onProbe(probe_bits) + val releaseDataBeat = Cat(UInt(0), c_count) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid)) + + val nackResponseMessage = edge.ProbeAck( + b = probe_bits, + reportPermissions = TLPermissions.NtoN) + + val voluntaryReleaseMessage = if (edge.manager.anySupportAcquire) { + edge.Release( + fromSource = UInt(maxUncachedInFlight - 1), + toAddress = probe_bits.address, + lgSize = lgCacheBlockBytes, + shrinkPermissions = s2_shrink_param, + data = s2_data)._2 + } else { + Wire(new TLBundleC(edge.bundle)) + } + + val probeResponseMessage = Mux(!s2_prb_ack_data, + edge.ProbeAck( + b = probe_bits, + reportPermissions = s2_report_param), + edge.ProbeAck( + b = probe_bits, + reportPermissions = s2_report_param, + data = s2_data)) + + tl_out.c.valid := s2_release_data_valid + tl_out.c.bits := nackResponseMessage val newCoh = Wire(init = probeNewCoh) releaseWay := s2_probe_way + when (s2_victimize && s2_victim_dirty) { - assert(!(s2_valid && s2_hit_state.isValid())) + assert(!(s2_valid && s2_hit_valid)) release_state := s_voluntary_writeback - probe_bits.addr_block := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB)) + probe_bits.address := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB)) << idxLSB } when (s2_probe) { - when (s2_probe_state.requiresVoluntaryWriteback()) { release_state := s_probe_rep_dirty } + when (s2_prb_ack_data) { release_state := s_probe_rep_dirty } .elsewhen (s2_probe_state.isValid()) { release_state := s_probe_rep_clean } .otherwise { - io.mem.release.valid := true + tl_out.c.valid := true release_state := s_probe_rep_miss } } when (releaseDone) { release_state := s_ready } when (release_state.isOneOf(s_probe_rep_miss, s_probe_rep_clean)) { - io.mem.release.valid := true + tl_out.c.valid := true } when (release_state.isOneOf(s_probe_rep_clean, s_probe_rep_dirty)) { - io.mem.release.bits := probeResponseMessage + tl_out.c.bits := probeResponseMessage when (releaseDone) { release_state := s_probe_write_meta } } when (release_state.isOneOf(s_voluntary_writeback, s_voluntary_write_meta)) { - io.mem.release.bits := voluntaryReleaseMessage + tl_out.c.bits := voluntaryReleaseMessage newCoh := voluntaryNewCoh releaseWay := s2_victim_way when (releaseDone) { @@ -415,21 +429,20 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { release_ack_wait := true } } - when (s2_probe && !io.mem.release.fire()) { s1_nack := true } - io.mem.release.bits.addr_block := probe_bits.addr_block - io.mem.release.bits.addr_beat := writebackCount - io.mem.release.bits.data := s2_data + when (s2_probe && !tl_out.c.fire()) { s1_nack := true } + tl_out.c.bits.address := probe_bits.address + tl_out.c.bits.data := s2_data dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles dataArb.io.in(2).bits.write := false - dataArb.io.in(2).bits.addr := Cat(io.mem.release.bits.addr_block, releaseDataBeat(log2Up(refillCycles)-1,0)) << rowOffBits + dataArb.io.in(2).bits.addr := tl_out.c.bits.address | (releaseDataBeat(log2Up(refillCycles)-1,0) << rowOffBits) dataArb.io.in(2).bits.way_en := ~UInt(0, nWays) metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta) metaWriteArb.io.in(2).bits.way_en := releaseWay - metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB) + metaWriteArb.io.in(2).bits.idx := tl_out.c.bits.address(idxMSB, idxLSB) metaWriteArb.io.in(2).bits.data.coh := newCoh - metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits) + metaWriteArb.io.in(2).bits.data.tag := tl_out.c.bits.address(paddrBits-1, untagBits) when (metaWriteArb.io.in(2).fire()) { release_state := s_ready } // cached response @@ -440,7 +453,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.ordered := !(s1_valid || s2_valid || cached_grant_wait || uncachedInFlight.asUInt.orR) // uncached response - io.cpu.replay_next := io.mem.grant.fire() && grantIsUncached + io.cpu.replay_next := tl_out.d.fire() && grantIsUncached val doUncachedResp = Reg(next = io.cpu.replay_next) when (doUncachedResp) { assert(!s2_valid_hit) @@ -473,7 +486,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val flushed = Reg(init=Bool(true)) val flushing = Reg(init=Bool(false)) val flushCounter = Counter(nSets * nWays) - when (io.mem.acquire.fire() && !s2_uncached) { flushed := false } + when (tl_out.a.fire() && !s2_uncached) { flushed := false } when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) { io.cpu.s2_nack := !flushed when (!flushed) { @@ -496,98 +509,3 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { } } } - -class ScratchpadSlavePort(implicit val p: Parameters) extends LazyModule with HasCoreParameters { - val node = TLManagerNode(TLManagerPortParameters( - Seq(TLManagerParameters( - address = List(AddressSet(0x80000000L, BigInt(p(DataScratchpadSize)-1))), - regionType = RegionType.UNCACHED, - executable = true, - supportsArithmetic = if (p(UseAtomics)) TransferSizes(1, coreDataBytes) else TransferSizes.none, - supportsLogical = if (p(UseAtomics)) TransferSizes(1, coreDataBytes) else TransferSizes.none, - supportsPutPartial = TransferSizes(1, coreDataBytes), - supportsPutFull = TransferSizes(1, coreDataBytes), - supportsGet = TransferSizes(1, coreDataBytes), - fifoId = Some(0))), // requests handled in FIFO order - beatBytes = coreDataBytes, - minLatency = 1)) - - // Make sure this ends up with the same name as before - override def name = "dmem0" - - lazy val module = new LazyModuleImp(this) { - val io = new Bundle { - val tl_in = node.bundleIn - val dmem = new HellaCacheIO - } - - val tl_in = io.tl_in(0) - val edge = node.edgesIn(0) - - require(usingDataScratchpad) - - val s_ready :: s_wait :: s_replay :: s_grant :: Nil = Enum(UInt(), 4) - val state = Reg(init = s_ready) - when (io.dmem.resp.valid) { state := s_grant } - when (tl_in.d.fire()) { state := s_ready } - when (io.dmem.s2_nack) { state := s_replay } - when (io.dmem.req.fire()) { state := s_wait } - - val acq = Reg(tl_in.a.bits) - when (io.dmem.resp.valid) { acq.data := io.dmem.resp.bits.data } - when (tl_in.a.fire()) { acq := tl_in.a.bits } - - val isWrite = acq.opcode === TLMessages.PutFullData || acq.opcode === TLMessages.PutPartialData - val isRead = !edge.hasData(acq) - - def formCacheReq(acq: TLBundleA) = { - val req = Wire(new HellaCacheReq) - req.cmd := MuxLookup(acq.opcode, Wire(M_XRD), Array( - TLMessages.PutFullData -> M_XWR, - TLMessages.PutPartialData -> M_XWR, - TLMessages.ArithmeticData -> MuxLookup(acq.param, Wire(M_XRD), Array( - TLAtomics.MIN -> M_XA_MIN, - TLAtomics.MAX -> M_XA_MAX, - TLAtomics.MINU -> M_XA_MINU, - TLAtomics.MAXU -> M_XA_MAXU, - TLAtomics.ADD -> M_XA_ADD)), - TLMessages.LogicalData -> MuxLookup(acq.param, Wire(M_XRD), Array( - TLAtomics.XOR -> M_XA_XOR, - TLAtomics.OR -> M_XA_OR, - TLAtomics.AND -> M_XA_AND, - TLAtomics.SWAP -> M_XA_SWAP)), - TLMessages.Get -> M_XRD)) - // treat all loads as full words, so bytes appear in correct lane - req.typ := Mux(isRead, log2Ceil(coreDataBytes), acq.size) - req.addr := Mux(isRead, ~(~acq.address | (coreDataBytes-1)), acq.address) - req.tag := UInt(0) - req - } - - val ready = state === s_ready || tl_in.d.fire() - io.dmem.req.valid := (tl_in.a.valid && ready) || state === s_replay - tl_in.a.ready := io.dmem.req.ready && ready - io.dmem.req.bits := formCacheReq(Mux(state === s_replay, acq, tl_in.a.bits)) - // the TL data is already in the correct byte lane, but the D$ - // expects right-justified store data, so that it can steer the bytes. - io.dmem.s1_data := new LoadGen(acq.size, Bool(false), acq.address(log2Ceil(coreDataBytes)-1,0), acq.data, Bool(false), coreDataBytes).data - io.dmem.s1_kill := false - io.dmem.invalidate_lr := false - - // place AMO data in correct word lane - val minAMOBytes = 4 - val grantData = Mux(io.dmem.resp.valid, io.dmem.resp.bits.data, acq.data) - val alignedGrantData = Mux(acq.size <= log2Ceil(minAMOBytes), Fill(coreDataBytes/minAMOBytes, grantData(8*minAMOBytes-1, 0)), grantData) - - tl_in.d.valid := io.dmem.resp.valid || state === s_grant - tl_in.d.bits := Mux(isWrite, - edge.AccessAck(acq, UInt(0)), - edge.AccessAck(acq, UInt(0), UInt(0))) - tl_in.d.bits.data := alignedGrantData - - // Tie off unused channels - tl_in.b.valid := Bool(false) - tl_in.c.ready := Bool(true) - tl_in.e.ready := Bool(true) - } -} diff --git a/src/main/scala/rocket/HellaCache.scala b/src/main/scala/rocket/HellaCache.scala new file mode 100644 index 00000000..eb1a91f9 --- /dev/null +++ b/src/main/scala/rocket/HellaCache.scala @@ -0,0 +1,167 @@ +// See LICENSE for license details. + +package rocket + +import Chisel._ +import config.{Parameters, Field} +import diplomacy._ +import uncore.tilelink2._ +import uncore.agents._ +import uncore.constants._ +import uncore.tilelink.{TLKey, TLId} +import util.ParameterizedBundle + +case class DCacheConfig( + nMSHRs: Int = 1, + nSDQ: Int = 17, + nRPQ: Int = 16, + nMMIOs: Int = 1) + +case object DCacheKey extends Field[DCacheConfig] + +trait HasL1HellaCacheParameters extends HasCacheParameters with HasCoreParameters { + val outerDataBeats = p(TLKey(p(TLId))).dataBeats + val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat + + val refillCyclesPerBeat = outerDataBits/rowBits + require(refillCyclesPerBeat == 1) + + val refillCycles = refillCyclesPerBeat*outerDataBeats + + val cacheBlockBytes = p(CacheBlockBytes) + val lgCacheBlockBytes = log2Up(cacheBlockBytes) + + val wordBits = xLen // really, xLen max + val wordBytes = wordBits/8 + val wordOffBits = log2Up(wordBytes) + val beatBytes = cacheBlockBytes / outerDataBeats + val beatWords = beatBytes / wordBytes + val beatOffBits = log2Up(beatBytes) + val idxMSB = untagBits-1 + val idxLSB = blockOffBits + val offsetmsb = idxLSB-1 + val offsetlsb = wordOffBits + val rowWords = rowBits/wordBits + val doNarrowRead = coreDataBits * nWays % rowBits == 0 + val encDataBits = code.width(coreDataBits) + val encRowBits = encDataBits*rowWords + val nIOMSHRs = 1 + val lrscCycles = 32 // ISA requires 16-insn LRSC sequences to succeed + + require(isPow2(nSets)) + require(rowBits >= coreDataBits) + require(rowBits <= outerDataBits) + require(xLen <= outerDataBits) // would need offset addr for puts if data width < xlen + require(!usingVM || untagBits <= pgIdxBits) +} + +abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module + with HasL1HellaCacheParameters + +abstract class L1HellaCacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) + with HasL1HellaCacheParameters + +class L1Metadata(implicit p: Parameters) extends Metadata()(p) with HasL1HellaCacheParameters { + val coh = new ClientMetadata +} +object L1Metadata { + def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = { + val meta = Wire(new L1Metadata) + meta.tag := tag + meta.coh := coh + meta + } +} + +class L1MetaReadReq(implicit p: Parameters) extends MetaReadReq { + val tag = Bits(width = tagBits) + override def cloneType = new L1MetaReadReq()(p).asInstanceOf[this.type] //TODO remove +} + +class L1MetaWriteReq(implicit p: Parameters) extends + MetaWriteReq[L1Metadata](new L1Metadata) + +trait HasCoreMemOp extends HasCoreParameters { + val addr = UInt(width = coreMaxAddrBits) + val tag = Bits(width = dcacheReqTagBits) + val cmd = Bits(width = M_SZ) + val typ = Bits(width = MT_SZ) +} + +trait HasCoreData extends HasCoreParameters { + val data = Bits(width = coreDataBits) +} + +class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData + +class HellaCacheResp(implicit p: Parameters) extends CoreBundle()(p) + with HasCoreMemOp + with HasCoreData { + val replay = Bool() + val has_data = Bool() + val data_word_bypass = Bits(width = coreDataBits) + val store_data = Bits(width = coreDataBits) +} + +class AlignmentExceptions extends Bundle { + val ld = Bool() + val st = Bool() +} + +class HellaCacheExceptions extends Bundle { + val ma = new AlignmentExceptions + val pf = new AlignmentExceptions +} + + +// interface between D$ and processor/DTLB +class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { + val req = Decoupled(new HellaCacheReq) + val s1_kill = Bool(OUTPUT) // kill previous cycle's req + val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req + val s2_nack = Bool(INPUT) // req from two cycles ago is rejected + + val resp = Valid(new HellaCacheResp).flip + val replay_next = Bool(INPUT) + val xcpt = (new HellaCacheExceptions).asInput + val invalidate_lr = Bool(OUTPUT) + val ordered = Bool(INPUT) +} + +abstract class HellaCache(val cfg: DCacheConfig)(implicit val p: Parameters) extends LazyModule { + val node = TLClientNode(TLClientParameters( + sourceId = IdRange(0, cfg.nMSHRs + cfg.nMMIOs), + supportsProbe = TransferSizes(p(CacheBlockBytes)))) + val module: HellaCacheModule +} + +class HellaCacheBundle(outer: HellaCache)(implicit p: Parameters) extends Bundle { + val cpu = (new HellaCacheIO).flip + val ptw = new TLBPTWIO() + val mem = outer.node.bundleOut +} + +class HellaCacheModule(outer: HellaCache)(implicit val p: Parameters) extends LazyModuleImp(outer) + with HasL1HellaCacheParameters { + implicit val cfg = outer.cfg + val io = new HellaCacheBundle(outer) + val edge = outer.node.edgesOut(0) + val tl_out = io.mem(0) + + /* TODO + edge.manager.managers.foreach { m => + if (m.supportsGet) { + require (m.supportsGet.contains(TransferSizes(1, tlDataBytes))) + ....etc + } + } + */ + +} + +object HellaCache { + def apply(cfg: DCacheConfig)(implicit p: Parameters) = { + if (cfg.nMSHRs == 0) LazyModule(new DCache(cfg)) + else LazyModule(new NonBlockingDCache(cfg)) + } +} diff --git a/src/main/scala/rocket/nbdcache.scala b/src/main/scala/rocket/NBDcache.scala similarity index 60% rename from src/main/scala/rocket/nbdcache.scala rename to src/main/scala/rocket/NBDcache.scala index f0be7c07..8858d51f 100644 --- a/src/main/scala/rocket/nbdcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -3,60 +3,15 @@ package rocket import Chisel._ -import uncore.tilelink._ -import uncore.coherence._ +import Chisel.ImplicitConversions._ +import diplomacy._ import uncore.agents._ import uncore.constants._ +import uncore.tilelink._ +import uncore.tilelink2._ import uncore.util._ import util._ -import Chisel.ImplicitConversions._ -import cde.{Parameters, Field} - -case class DCacheConfig( - nMSHRs: Int = 1, - nSDQ: Int = 17, - nRPQ: Int = 16) - -case object DCacheKey extends Field[DCacheConfig] - -trait HasL1HellaCacheParameters extends HasL1CacheParameters { - val wordBits = xLen // really, xLen max fLen - val wordBytes = wordBits/8 - val wordOffBits = log2Up(wordBytes) - val beatBytes = p(CacheBlockBytes) / outerDataBeats - val beatWords = beatBytes / wordBytes - val beatOffBits = log2Up(beatBytes) - val idxMSB = untagBits-1 - val idxLSB = blockOffBits - val offsetmsb = idxLSB-1 - val offsetlsb = wordOffBits - val rowWords = rowBits/wordBits - val doNarrowRead = coreDataBits * nWays % rowBits == 0 - val encDataBits = code.width(coreDataBits) - val encRowBits = encDataBits*rowWords - val nIOMSHRs = 1 - val lrscCycles = 32 // ISA requires 16-insn LRSC sequences to succeed - - require(isPow2(nSets)) - require(rowBits <= outerDataBits) - require(!usingVM || untagBits <= pgIdxBits) -} - -abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module - with HasL1HellaCacheParameters -abstract class L1HellaCacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) - with HasL1HellaCacheParameters - -trait HasCoreMemOp extends HasCoreParameters { - val addr = UInt(width = coreMaxAddrBits) - val tag = Bits(width = dcacheReqTagBits) - val cmd = Bits(width = M_SZ) - val typ = Bits(width = MT_SZ) -} - -trait HasCoreData extends HasCoreParameters { - val data = Bits(width = coreDataBits) -} +import config._ trait HasMissInfo extends HasL1HellaCacheParameters { val tag_match = Bool() @@ -64,46 +19,11 @@ trait HasMissInfo extends HasL1HellaCacheParameters { val way_en = Bits(width = nWays) } -class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p) +class HellaCacheReqInternal(implicit p: Parameters) extends CoreBundle()(p) with HasCoreMemOp { val phys = Bool() } -class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData - -class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p) - with HasCoreMemOp - with HasCoreData { - val replay = Bool() - val has_data = Bool() - val data_word_bypass = Bits(width = coreDataBits) - val store_data = Bits(width = coreDataBits) -} - -class AlignmentExceptions extends Bundle { - val ld = Bool() - val st = Bool() -} - -class HellaCacheExceptions extends Bundle { - val ma = new AlignmentExceptions - val pf = new AlignmentExceptions -} - -// interface between D$ and processor/DTLB -class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { - val req = Decoupled(new HellaCacheReq) - val s1_kill = Bool(OUTPUT) // kill previous cycle's req - val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req - val s2_nack = Bool(INPUT) // req from two cycles ago is rejected - - val resp = Valid(new HellaCacheResp).flip - val replay_next = Bool(INPUT) - val xcpt = (new HellaCacheExceptions).asInput - val invalidate_lr = Bool(OUTPUT) - val ordered = Bool(INPUT) -} - class L1DataReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { val way_en = Bits(width = nWays) val addr = Bits(width = untagBits) @@ -116,27 +36,8 @@ class L1DataWriteReq(implicit p: Parameters) extends L1DataReadReq()(p) { class L1RefillReq(implicit p: Parameters) extends L1DataReadReq()(p) -class L1MetaReadReq(implicit p: Parameters) extends MetaReadReq { - val tag = Bits(width = tagBits) - override def cloneType = new L1MetaReadReq()(p).asInstanceOf[this.type] //TODO remove -} - -class L1MetaWriteReq(implicit p: Parameters) extends - MetaWriteReq[L1Metadata](new L1Metadata) - -object L1Metadata { - def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = { - val meta = Wire(new L1Metadata) - meta.tag := tag - meta.coh := coh - meta - } -} -class L1Metadata(implicit p: Parameters) extends Metadata()(p) with HasL1HellaCacheParameters { - val coh = new ClientMetadata -} - class Replay(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData + class ReplayInternal(cfg: DCacheConfig)(implicit p: Parameters) extends HellaCacheReqInternal()(p) { val sdq_id = UInt(width = log2Up(cfg.nSDQ)) @@ -144,24 +45,29 @@ class ReplayInternal(cfg: DCacheConfig)(implicit p: Parameters) extends HellaCac } class MSHRReq(implicit p: Parameters) extends Replay()(p) with HasMissInfo + class MSHRReqInternal(cfg: DCacheConfig)(implicit p: Parameters) extends ReplayInternal(cfg)(p) with HasMissInfo { override def cloneType = new MSHRReqInternal(cfg)(p).asInstanceOf[this.type] } -class ProbeInternal(implicit p: Parameters) extends Probe()(p) with HasClientTransactionId - -class WritebackReq(implicit p: Parameters) extends Release()(p) with HasCacheParameters { +class WritebackReq(params: TLBundleParameters)(implicit p: Parameters) extends L1HellaCacheBundle()(p) { + val tag = Bits(width = tagBits) + val idx = Bits(width = idxBits) + val source = UInt(width = params.sourceBits) + val param = UInt(width = TLPermissions.cWidth) val way_en = Bits(width = nWays) + val voluntary = Bool() + + override def cloneType = new WritebackReq(params)(p).asInstanceOf[this.type] } -class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { +class IOMSHR(id: Int, edge: TLEdgeOut)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new HellaCacheReq).flip - val acquire = Decoupled(new Acquire) - val grant = Valid(new GrantFromSrc).flip - val finish = Decoupled(new FinishToDst) val resp = Decoupled(new HellaCacheResp) + val mem_access = Decoupled(new TLBundleA(edge.bundle)) + val mem_ack = Valid(new TLBundleD(edge.bundle)).flip val replay_next = Bool(OUTPUT) } @@ -175,57 +81,42 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val req = Reg(new HellaCacheReq) val req_cmd_sc = req.cmd === M_XSC val grant_word = Reg(UInt(width = wordBits)) - val fq = Module(new FinishQueue(1)) - val s_idle :: s_acquire :: s_grant :: s_resp :: s_finish :: Nil = Enum(Bits(), 5) + val s_idle :: s_mem_access :: s_mem_ack :: s_resp :: Nil = Enum(Bits(), 4) val state = Reg(init = s_idle) io.req.ready := (state === s_idle) - fq.io.enq.valid := io.grant.valid && io.grant.bits.requiresAck() - fq.io.enq.bits := io.grant.bits.makeFinish() - io.finish.valid := fq.io.deq.valid && (state === s_finish) - io.finish.bits := fq.io.deq.bits - fq.io.deq.ready := io.finish.ready && (state === s_finish) - val storegen = new StoreGen(req.typ, req.addr, req.data, wordBytes) val loadgen = new LoadGen(req.typ, mtSigned(req.typ), req.addr, grant_word, req_cmd_sc, wordBytes) + + val a_source = UInt(id) + val a_address = req.addr + val a_size = storegen.size + val a_data = Fill(beatWords, storegen.data) - val beat_mask = (storegen.mask << Cat(beatOffset(req.addr), UInt(0, wordOffBits))) - val beat_data = Fill(beatWords, storegen.data) + val get = edge.Get(a_source, a_address, a_size)._2 + val put = edge.Put(a_source, a_address, a_size, a_data)._2 + val atomics = if (edge.manager.anySupportLogical) { + MuxLookup(req.cmd, Wire(new TLBundleA(edge.bundle)), Array( + M_XA_SWAP -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.SWAP)._2, + M_XA_XOR -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.XOR) ._2, + M_XA_OR -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.OR) ._2, + M_XA_AND -> edge.Logical(a_source, a_address, a_size, a_data, TLAtomics.AND) ._2, + M_XA_ADD -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.ADD)._2, + M_XA_MIN -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MIN)._2, + M_XA_MAX -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MAX)._2, + M_XA_MINU -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MINU)._2, + M_XA_MAXU -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MAXU)._2)) + } else { + // If no managers support atomics, assert fail if processor asks for them + assert (!isAMO(req.cmd)) + Wire(new TLBundleA(edge.bundle)) + } - val addr_block = req.addr(paddrBits - 1, blockOffBits) - val addr_beat = req.addr(blockOffBits - 1, beatOffBits) - val addr_byte = req.addr(beatOffBits - 1, 0) + io.mem_access.valid := (state === s_mem_access) + io.mem_access.bits := Mux(isAMO(req.cmd), atomics, Mux(isRead(req.cmd), get, put)) - val get_acquire = Get( - client_xact_id = UInt(id), - addr_block = addr_block, - addr_beat = addr_beat, - addr_byte = addr_byte, - operand_size = req.typ, - alloc = Bool(false)) - - val put_acquire = Put( - client_xact_id = UInt(id), - addr_block = addr_block, - addr_beat = addr_beat, - data = beat_data, - wmask = Some(beat_mask), - alloc = Bool(false)) - - val putAtomic_acquire = PutAtomic( - client_xact_id = UInt(id), - addr_block = addr_block, - addr_beat = addr_beat, - addr_byte = addr_byte, - atomic_opcode = req.cmd, - operand_size = req.typ, - data = beat_data) - - io.acquire.valid := (state === s_acquire) - io.acquire.bits := Mux(isAMO(req.cmd), putAtomic_acquire, Mux(isRead(req.cmd), get_acquire, put_acquire)) - - io.replay_next := (state === s_grant) || io.resp.valid && !io.resp.ready + io.replay_next := (state === s_mem_ack) || io.resp.valid && !io.resp.ready io.resp.valid := (state === s_resp) io.resp.bits := req io.resp.bits.has_data := isRead(req.cmd) @@ -235,30 +126,26 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (io.req.fire()) { req := io.req.bits - state := s_acquire + state := s_mem_access } - when (io.acquire.fire()) { - state := s_grant + when (io.mem_access.fire()) { + state := s_mem_ack } - when (state === s_grant && io.grant.valid) { + when (state === s_mem_ack && io.mem_ack.valid) { state := s_resp when (isRead(req.cmd)) { - grant_word := wordFromBeat(req.addr, io.grant.bits.data) + grant_word := wordFromBeat(req.addr, io.mem_ack.bits.data) } } when (io.resp.fire()) { - state := s_finish - } - - when (io.finish.fire()) { state := s_idle } } -class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheModule()(p) { +class MSHR(id: Int, edge: TLEdgeOut)(implicit cfg: DCacheConfig, p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -269,23 +156,25 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa val idx_match = Bool(OUTPUT) val tag = Bits(OUTPUT, tagBits) - val mem_req = Decoupled(new Acquire) + val mem_acquire = Decoupled(new TLBundleA(edge.bundle)) + val mem_grant = Valid(new TLBundleD(edge.bundle)).flip + val mem_finish = Decoupled(new TLBundleE(edge.bundle)) + val refill = new L1RefillReq().asOutput // Data is bypassed val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new ReplayInternal(cfg)) - val mem_grant = Valid(new GrantFromSrc).flip - val mem_finish = Decoupled(new FinishToDst) - val wb_req = Decoupled(new WritebackReq) + val wb_req = Decoupled(new WritebackReq(edge.bundle)) val probe_rdy = Bool(OUTPUT) } val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(UInt(), 9) val state = Reg(init=s_invalid) - val new_coh_state = Reg(init=ClientMetadata.onReset) val req = Reg(new MSHRReqInternal(cfg)) val req_idx = req.addr(untagBits-1,blockOffBits) + val req_tag = req.addr >> untagBits + val req_block_addr = (req.addr >> blockOffBits) << blockOffBits val idx_match = req_idx === io.req_bits.addr(untagBits-1,blockOffBits) // We only accept secondary misses if we haven't yet sent an Acquire to outer memory // or if the Acquire that was sent will obtain a Grant with sufficient permissions @@ -297,9 +186,7 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa // to go from clean to dirty. val dirties_coh = Reg(Bool()) val states_before_refill = Seq(s_wb_req, s_wb_resp, s_meta_clear) - val gnt_multi_data = io.mem_grant.bits.hasMultibeatData() - val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) - val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done) + val (_, _, refill_done, refill_address_inc) = edge.addr_inc(io.mem_grant) val sec_rdy = idx_match && (state.isOneOf(states_before_refill) || (state.isOneOf(s_refill_req, s_refill_resp) && @@ -310,10 +197,18 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa rpq.io.enq.bits := io.req_bits rpq.io.deq.ready := (io.replay.ready && state === s_drain_rpq) || state === s_invalid - val coh_on_grant = req.old_meta.coh.onGrant( - incoming = io.mem_grant.bits, - pending = Mux(dirties_coh, M_XWR, req.cmd)) - val coh_on_hit = io.req_bits.old_meta.coh.onHit(io.req_bits.cmd) + // TODO clean all this coh state business up + val new_coh_state = Reg(init=ClientMetadata.onReset) + val grow_param = Reg(init=UInt(0)) + val coh_on_grant = Mux(dirties_coh, + ClientMetadata.maximum, + req.old_meta.coh.onGrant(req.cmd, io.mem_grant.bits.param)) + val (is_hit, missed_param, coh_on_hit) = io.req_bits.old_meta.coh.onAccess(io.req_bits.cmd) + val (needs_wb, _, _) = io.req_bits.old_meta.coh.onCacheControl(M_FLUSH) + val (_, shrink_param, _) = req.old_meta.coh.onCacheControl(M_FLUSH) + val (hit_again, missed_again_param, _) = req.old_meta.coh.onCacheControl(io.req_bits.cmd) + val (_, _, clear_coh_state) = req.old_meta.coh.onCacheControl(M_FLUSH) + val (_, after_wb_param, _) = ClientMetadata.onReset.onAccess(req.cmd) when (state === s_drain_rpq && !rpq.io.deq.valid) { state := s_invalid @@ -329,17 +224,18 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa state := s_meta_write_req new_coh_state := coh_on_grant } - when (io.mem_req.fire()) { // s_refill_req + when (io.mem_acquire.fire()) { // s_refill_req state := s_refill_resp } when (state === s_meta_clear && io.meta_write.ready) { + grow_param := after_wb_param state := s_refill_req } when (state === s_wb_resp && io.mem_grant.valid) { state := s_meta_clear } when (io.wb_req.fire()) { // s_wb_req - state := Mux(io.wb_req.bits.requiresAck(), s_wb_resp, s_meta_clear) + state := s_wb_resp } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req //If we get a secondary miss that needs more permissions before we've sent @@ -347,38 +243,38 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa // going to ask for in s_refill_req when(cmd_requires_second_acquire) { req.cmd := io.req_bits.cmd + when(!hit_again) { grow_param := missed_again_param } } dirties_coh := dirties_coh || isWrite(io.req_bits.cmd) } when (io.req_pri_val && io.req_pri_rdy) { - val coh = io.req_bits.old_meta.coh req := io.req_bits dirties_coh := isWrite(io.req_bits.cmd) when (io.req_bits.tag_match) { - when(coh.isHit(io.req_bits.cmd)) { // set dirty bit + when (is_hit) { // set dirty bit state := s_meta_write_req new_coh_state := coh_on_hit }.otherwise { // upgrade permissions state := s_refill_req + grow_param := missed_param } }.otherwise { // writback if necessary and refill - state := Mux(coh.requiresVoluntaryWriteback(), s_wb_req, s_meta_clear) + state := Mux(needs_wb, s_wb_req, s_meta_clear) } } - val fq = Module(new FinishQueue(1)) - val g = io.mem_grant.bits + val grantackq = Module(new Queue(io.mem_finish.bits, 1)) val can_finish = state.isOneOf(s_invalid, s_refill_req) - fq.io.enq.valid := io.mem_grant.valid && g.requiresAck() && refill_done - fq.io.enq.bits := g.makeFinish() - io.mem_finish.valid := fq.io.deq.valid && can_finish - fq.io.deq.ready := io.mem_finish.ready && can_finish - io.mem_finish.bits := fq.io.deq.bits + grantackq.io.enq.valid := refill_done && edge.hasFollowUp(io.mem_grant.bits) + grantackq.io.enq.bits := edge.GrantAck(io.mem_grant.bits) + io.mem_finish.valid := grantackq.io.deq.valid && can_finish + io.mem_finish.bits := grantackq.io.deq.bits + grantackq.io.deq.ready := io.mem_finish.ready && can_finish io.idx_match := (state =/= s_invalid) && idx_match io.refill.way_en := req.way_en - io.refill.addr := ((req_idx << log2Ceil(refillCycles)) | refill_cnt) << rowOffBits - io.tag := req.addr >> untagBits + io.refill.addr := req_block_addr | refill_address_inc + io.tag := req_tag io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready @@ -389,23 +285,24 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa io.meta_write.valid := state.isOneOf(s_meta_write_req, s_meta_clear) io.meta_write.bits.idx := req_idx - io.meta_write.bits.data.coh := Mux(state === s_meta_clear, - req.old_meta.coh.onCacheControl(M_FLUSH), - new_coh_state) + io.meta_write.bits.data.coh := Mux(state === s_meta_clear, clear_coh_state, new_coh_state) io.meta_write.bits.data.tag := io.tag io.meta_write.bits.way_en := req.way_en io.wb_req.valid := state === s_wb_req - io.wb_req.bits := req.old_meta.coh.makeVoluntaryWriteback( - client_xact_id = UInt(id), - addr_block = Cat(req.old_meta.tag, req_idx)) + io.wb_req.bits.source := UInt(id) + io.wb_req.bits.tag := req.old_meta.tag + io.wb_req.bits.idx := req_idx + io.wb_req.bits.param := shrink_param io.wb_req.bits.way_en := req.way_en + io.wb_req.bits.voluntary := Bool(true) - io.mem_req.valid := state === s_refill_req && fq.io.enq.ready - io.mem_req.bits := req.old_meta.coh.makeAcquire( - addr_block = Cat(io.tag, req_idx), - client_xact_id = Bits(id), - op_code = req.cmd) + io.mem_acquire.valid := state === s_refill_req && grantackq.io.enq.ready + io.mem_acquire.bits := edge.Acquire( + fromSource = UInt(id), + toAddress = Cat(io.tag, req_idx) << blockOffBits, + lgSize = lgCacheBlockBytes, + growPermissions = grow_param)._2 io.meta_read.valid := state === s_drain_rpq io.meta_read.bits.idx := req_idx @@ -422,20 +319,21 @@ class MSHR(id: Int)(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCa } } -class MSHRFile(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheModule()(p) { +class MSHRFile(edge: TLEdgeOut)(implicit cfg: DCacheConfig, p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new MSHRReq).flip val resp = Decoupled(new HellaCacheResp) val secondary_miss = Bool(OUTPUT) - val mem_req = Decoupled(new Acquire) + val mem_acquire = Decoupled(new TLBundleA(edge.bundle)) + val mem_grant = Valid(new TLBundleD(edge.bundle)).flip + val mem_finish = Decoupled(new TLBundleE(edge.bundle)) + val refill = new L1RefillReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new Replay) - val mem_grant = Valid(new GrantFromSrc).flip - val mem_finish = Decoupled(new FinishToDst) - val wb_req = Decoupled(new WritebackReq) + val wb_req = Decoupled(new WritebackReq(edge.bundle)) val probe_rdy = Bool(OUTPUT) val fence_rdy = Bool(OUTPUT) @@ -443,7 +341,7 @@ class MSHRFile(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheMo } // determine if the request is cacheable or not - val cacheable = addrMap.isCacheable(io.req.bits.addr) + val cacheable = edge.manager.supportsAcquireFast(io.req.bits.addr, lgCacheBlockBytes) val sdq_val = Reg(init=Bits(0, cfg.nSDQ)) val sdq_alloc_id = PriorityEncoder(~sdq_val(cfg.nSDQ-1,0)) @@ -460,13 +358,7 @@ class MSHRFile(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheMo val refillMux = Wire(Vec(cfg.nMSHRs, new L1RefillReq)) val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, cfg.nMSHRs)) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, cfg.nMSHRs)) - val mem_req_arb = Module(new LockingArbiter( - new Acquire, - cfg.nMSHRs + nIOMSHRs, - outerDataBeats, - Some((a: Acquire) => a.hasMultibeatData()))) - val mem_finish_arb = Module(new Arbiter(new FinishToDst, cfg.nMSHRs + nIOMSHRs)) - val wb_req_arb = Module(new Arbiter(new WritebackReq, cfg.nMSHRs)) + val wb_req_arb = Module(new Arbiter(new WritebackReq(edge.bundle), cfg.nMSHRs)) val replay_arb = Module(new Arbiter(new ReplayInternal(cfg), cfg.nMSHRs)) val alloc_arb = Module(new Arbiter(Bool(), cfg.nMSHRs)) @@ -477,12 +369,12 @@ class MSHRFile(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheMo io.fence_rdy := true io.probe_rdy := true - for (i <- 0 until cfg.nMSHRs) { - val mshr = Module(new MSHR(i)(cfg)) + val mshrs = (0 until cfg.nMSHRs) map { i => + val mshr = Module(new MSHR(i,edge)(cfg,p)) idxMatch(i) := mshr.io.idx_match tagList(i) := mshr.io.tag - wbTagList(i) := mshr.io.wb_req.bits.addr_block >> idxBits + wbTagList(i) := mshr.io.wb_req.bits.tag alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy mshr.io.req_pri_val := alloc_arb.io.in(i).ready @@ -493,13 +385,10 @@ class MSHRFile(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheMo meta_read_arb.io.in(i) <> mshr.io.meta_read meta_write_arb.io.in(i) <> mshr.io.meta_write - mem_req_arb.io.in(i) <> mshr.io.mem_req - mem_finish_arb.io.in(i) <> mshr.io.mem_finish wb_req_arb.io.in(i) <> mshr.io.wb_req replay_arb.io.in(i) <> mshr.io.replay - mshr.io.mem_grant.valid := io.mem_grant.valid && - io.mem_grant.bits.client_xact_id === UInt(i) + mshr.io.mem_grant.valid := io.mem_grant.valid && io.mem_grant.bits.source === UInt(i) mshr.io.mem_grant.bits := io.mem_grant.bits refillMux(i) := mshr.io.refill @@ -509,14 +398,15 @@ class MSHRFile(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheMo when (!mshr.io.req_pri_rdy) { io.fence_rdy := false } when (!mshr.io.probe_rdy) { io.probe_rdy := false } + + mshr } + alloc_arb.io.out.ready := io.req.valid && sdq_rdy && cacheable && !idx_match io.meta_read <> meta_read_arb.io.out io.meta_write <> meta_write_arb.io.out - io.mem_req <> mem_req_arb.io.out - io.mem_finish <> mem_finish_arb.io.out io.wb_req <> wb_req_arb.io.out val mmio_alloc_arb = Module(new Arbiter(Bool(), nIOMSHRs)) @@ -525,9 +415,9 @@ class MSHRFile(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheMo var mmio_rdy = Bool(false) io.replay_next := Bool(false) - for (i <- 0 until nIOMSHRs) { + val mmios = (0 until nIOMSHRs) map { i => val id = cfg.nMSHRs + i - val mshr = Module(new IOMSHR(id)) + val mshr = Module(new IOMSHR(id, edge)) mmio_alloc_arb.io.in(i).valid := mshr.io.req.ready mshr.io.req.valid := mmio_alloc_arb.io.in(i).ready @@ -535,26 +425,28 @@ class MSHRFile(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheMo mmio_rdy = mmio_rdy || mshr.io.req.ready - mem_req_arb.io.in(id) <> mshr.io.acquire - mem_finish_arb.io.in(id) <> mshr.io.finish - - mshr.io.grant.bits := io.mem_grant.bits - mshr.io.grant.valid := io.mem_grant.valid && - io.mem_grant.bits.client_xact_id === UInt(id) + mshr.io.mem_ack.bits := io.mem_grant.bits + mshr.io.mem_ack.valid := io.mem_grant.valid && io.mem_grant.bits.source === UInt(id) resp_arb.io.in(i) <> mshr.io.resp when (!mshr.io.req.ready) { io.fence_rdy := Bool(false) } when (mshr.io.replay_next) { io.replay_next := Bool(true) } + + mshr } mmio_alloc_arb.io.out.ready := io.req.valid && !cacheable + TLArbiter.lowestFromSeq(edge, io.mem_acquire, mshrs.map(_.io.mem_acquire) ++ mmios.map(_.io.mem_access)) + TLArbiter.lowestFromSeq(edge, io.mem_finish, mshrs.map(_.io.mem_finish)) + io.resp <> resp_arb.io.out - io.req.ready := Mux(!cacheable, mmio_rdy, - Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy) + io.req.ready := Mux(!cacheable, + mmio_rdy, + sdq_rdy && Mux(idx_match, tag_match && sec_rdy, pri_rdy)) io.secondary_miss := idx_match - io.refill := refillMux(io.mem_grant.bits.client_xact_id) + io.refill := refillMux(io.mem_grant.bits.source) val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd) io.replay.bits.data := sdq(RegEnable(replay_arb.io.out.bits.sdq_id, free_sdq)) @@ -566,23 +458,21 @@ class MSHRFile(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheMo } } -class WritebackUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { +class WritebackUnit(edge: TLEdgeOut)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { - val req = Decoupled(new WritebackReq).flip + val req = Decoupled(new WritebackReq(edge.bundle)).flip val meta_read = Decoupled(new L1MetaReadReq) val data_req = Decoupled(new L1DataReadReq) val data_resp = Bits(INPUT, encRowBits) - val release = Decoupled(new Release) + val release = Decoupled(new TLBundleC(edge.bundle)) } + val req = Reg(new WritebackReq(edge.bundle)) val active = Reg(init=Bool(false)) val r1_data_req_fired = Reg(init=Bool(false)) val r2_data_req_fired = Reg(init=Bool(false)) val data_req_cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) //TODO Zero width - val buf_v = (if(refillCyclesPerBeat > 1) Reg(init=Bits(0, width = refillCyclesPerBeat-1)) else Bits(1)) - val beat_done = buf_v.andR - val (beat_cnt, all_beats_done) = Counter(io.release.fire(), outerDataBeats) - val req = Reg(new WritebackReq) + val (_, last_beat, all_beats_done, beat_count) = edge.count(io.release) io.release.valid := false when (active) { @@ -593,13 +483,11 @@ class WritebackUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { data_req_cnt := data_req_cnt + 1 } when (r2_data_req_fired) { - io.release.valid := beat_done - when(beat_done) { - when(!io.release.ready) { - r1_data_req_fired := false - r2_data_req_fired := false - data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1) - } .otherwise { if(refillCyclesPerBeat > 1) buf_v := 0 } + io.release.valid := true + when(!io.release.ready) { + r1_data_req_fired := false + r2_data_req_fired := false + data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1) } when(!r1_data_req_fired) { // We're done if this is the final data request and the Release can be sent @@ -610,49 +498,49 @@ class WritebackUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (io.req.fire()) { active := true data_req_cnt := 0 - if(refillCyclesPerBeat > 1) buf_v := 0 req := io.req.bits } io.req.ready := !active - val req_idx = req.addr_block(idxBits-1, 0) val fire = active && data_req_cnt < UInt(refillCycles) // We reissue the meta read as it sets up the mux ctrl for s2_data_muxed io.meta_read.valid := fire - io.meta_read.bits.idx := req_idx - io.meta_read.bits.tag := req.addr_block >> idxBits + io.meta_read.bits.idx := req.idx + io.meta_read.bits.tag := req.tag io.data_req.valid := fire io.data_req.bits.way_en := req.way_en io.data_req.bits.addr := (if(refillCycles > 1) - Cat(req_idx, data_req_cnt(log2Up(refillCycles)-1,0)) - else req_idx) << rowOffBits + Cat(req.idx, data_req_cnt(log2Up(refillCycles)-1,0)) + else req.idx) << rowOffBits - io.release.bits := req - io.release.bits.addr_beat := beat_cnt - io.release.bits.data := (if(refillCyclesPerBeat > 1) { - // If the cache rows are narrower than a TLDataBeat, - // then buffer enough data_resps to make a whole beat - val data_buf = Reg(Bits()) - when(active && r2_data_req_fired && !beat_done) { - data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat)*encRowBits-1, encRowBits)) - buf_v := (if(refillCyclesPerBeat > 2) - Cat(UInt(1), buf_v(refillCyclesPerBeat-2,1)) - else UInt(1)) - } - Cat(io.data_resp, data_buf) - } else { io.data_resp }) + val r_address = Cat(req.tag, req.idx) << blockOffBits + val probeResponse = edge.ProbeAck( + fromSource = req.source, + toAddress = r_address, + lgSize = lgCacheBlockBytes, + reportPermissions = req.param, + data = io.data_resp) + + val voluntaryRelease = edge.Release( + fromSource = req.source, + toAddress = r_address, + lgSize = lgCacheBlockBytes, + shrinkPermissions = req.param, + data = io.data_resp)._2 + + io.release.bits := Mux(req.voluntary, voluntaryRelease, probeResponse) } -class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { +class ProbeUnit(edge: TLEdgeOut)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { - val req = Decoupled(new ProbeInternal).flip - val rep = Decoupled(new Release) + val req = Decoupled(new TLBundleB(edge.bundle)).flip + val rep = Decoupled(new TLBundleC(edge.bundle)) val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) - val wb_req = Decoupled(new WritebackReq) + val wb_req = Decoupled(new WritebackReq(edge.bundle)) val way_en = Bits(INPUT, nWays) val mshr_rdy = Bool(INPUT) val block_state = new ClientMetadata().asInput @@ -662,34 +550,42 @@ class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { s_mshr_resp :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil) = Enum(UInt(), 9) val state = Reg(init=s_invalid) - val old_coh = Reg(new ClientMetadata) - val way_en = Reg(Bits()) - val req = Reg(new ProbeInternal) - val tag_matches = way_en.orR + val req = Reg(new TLBundleB(edge.bundle)) + val req_idx = req.address(idxMSB, idxLSB) + val req_tag = req.address >> untagBits + + val way_en = Reg(Bits()) + val tag_matches = way_en.orR + val old_coh = Reg(new ClientMetadata) val miss_coh = ClientMetadata.onReset val reply_coh = Mux(tag_matches, old_coh, miss_coh) - val reply = reply_coh.makeRelease(req) + val (is_dirty, report_param, new_coh) = reply_coh.onProbe(req.param) + io.req.ready := state === s_invalid io.rep.valid := state === s_release - io.rep.bits := reply + io.rep.bits := edge.ProbeAck(req, report_param) - assert(!io.rep.valid || !io.rep.bits.hasData(), - "ProbeUnit should not send releases with data") + assert(!io.rep.valid || !edge.hasData(io.rep.bits), + "ProbeUnit should not send ProbeAcks with data, WritebackUnit should handle it") io.meta_read.valid := state === s_meta_read - io.meta_read.bits.idx := req.addr_block - io.meta_read.bits.tag := req.addr_block >> idxBits + io.meta_read.bits.idx := req_idx + io.meta_read.bits.tag := req_tag io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en - io.meta_write.bits.idx := req.addr_block - io.meta_write.bits.data.tag := req.addr_block >> idxBits - io.meta_write.bits.data.coh := old_coh.onProbe(req) + io.meta_write.bits.idx := req_idx + io.meta_write.bits.data.tag := req_tag + io.meta_write.bits.data.coh := new_coh io.wb_req.valid := state === s_writeback_req - io.wb_req.bits := reply + io.wb_req.bits.source := req.source + io.wb_req.bits.idx := req_idx + io.wb_req.bits.tag := req_tag + io.wb_req.bits.param := report_param io.wb_req.bits.way_en := way_en + io.wb_req.bits.voluntary := Bool(false) // state === s_invalid when (io.req.fire()) { @@ -708,16 +604,14 @@ class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { } when (state === s_mshr_req) { - state := s_mshr_resp old_coh := io.block_state way_en := io.way_en // if the read didn't go through, we need to retry - when (!io.mshr_rdy) { state := s_meta_read } + state := Mux(io.mshr_rdy, s_mshr_resp, s_meta_read) } when (state === s_mshr_resp) { - val needs_writeback = tag_matches && old_coh.requiresVoluntaryWriteback() - state := Mux(needs_writeback, s_writeback_req, s_release) + state := Mux(tag_matches && is_dirty, s_writeback_req, s_release) } when (state === s_release && io.rep.ready) { @@ -786,18 +680,17 @@ class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.write.ready := Bool(true) } -class HellaCache(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCacheModule()(p) { - val io = new Bundle { - val cpu = (new HellaCacheIO).flip - val ptw = new TLBPTWIO() - val mem = new ClientTileLinkIO - } - +class NonBlockingDCache(cfg: DCacheConfig)(implicit p: Parameters) extends HellaCache(cfg)(p) { + override lazy val module = new NonBlockingDCacheModule(this) +} + +class NonBlockingDCacheModule(outer: NonBlockingDCache)(implicit p: Parameters) extends HellaCacheModule(outer)(p) { + require(isPow2(nWays)) // TODO: relax this - val wb = Module(new WritebackUnit) - val prober = Module(new ProbeUnit) - val mshrs = Module(new MSHRFile(cfg)) + val wb = Module(new WritebackUnit(edge)) + val prober = Module(new ProbeUnit(edge)) + val mshrs = Module(new MSHRFile(edge)) io.cpu.req.ready := Bool(true) val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) @@ -912,9 +805,8 @@ class HellaCache(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCache val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en))) - val s2_hit = s2_tag_match && - s2_hit_state.isHit(s2_req.cmd) && - s2_hit_state === s2_hit_state.onHit(s2_req.cmd) + val (s2_has_permission, _, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd) + val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state // load-reserved/store-conditional val lrsc_count = Reg(init=UInt(0)) @@ -981,7 +873,7 @@ class HellaCache(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCache mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshrs.io.req.bits.data := s2_req.data when (mshrs.io.req.fire()) { replacer.miss } - io.mem.acquire <> mshrs.io.mem_req + tl_out.a <> mshrs.io.mem_acquire // replays readArb.io.in(1).valid := mshrs.io.replay.valid @@ -993,15 +885,9 @@ class HellaCache(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCache metaWriteArb.io.in(0) <> mshrs.io.meta_write // probes and releases - val releaseArb = Module(new LockingArbiter( - new Release, 2, outerDataBeats, - Some((r: Release) => r.hasMultibeatData()))) - io.mem.release <> releaseArb.io.out - - prober.io.req.valid := io.mem.probe.valid && !lrsc_valid - io.mem.probe.ready := prober.io.req.ready && !lrsc_valid - prober.io.req.bits := io.mem.probe.bits - releaseArb.io.in(1) <> prober.io.rep + prober.io.req.valid := tl_out.b.valid && !lrsc_valid + tl_out.b.ready := prober.io.req.ready && !lrsc_valid + prober.io.req.bits := tl_out.b.bits prober.io.way_en := s2_tag_match_way prober.io.block_state := s2_hit_state metaReadArb.io.in(2) <> prober.io.meta_read @@ -1009,32 +895,32 @@ class HellaCache(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCache prober.io.mshr_rdy := mshrs.io.probe_rdy // refills - val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat) - mshrs.io.mem_grant.valid := narrow_grant.fire() - mshrs.io.mem_grant.bits := narrow_grant.bits - narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData() + val grant_has_data = edge.hasData(tl_out.d.bits) + mshrs.io.mem_grant.valid := tl_out.d.fire() + mshrs.io.mem_grant.bits := tl_out.d.bits + tl_out.d.ready := writeArb.io.in(1).ready || !grant_has_data /* The last clause here is necessary in order to prevent the responses for * the IOMSHRs from being written into the data array. It works because the * IOMSHR ids start right the ones for the regular MSHRs. */ - writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() && - narrow_grant.bits.client_xact_id < UInt(cfg.nMSHRs) + writeArb.io.in(1).valid := tl_out.d.valid && grant_has_data && + tl_out.d.bits.source < UInt(cfg.nMSHRs) writeArb.io.in(1).bits.addr := mshrs.io.refill.addr writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en writeArb.io.in(1).bits.wmask := ~UInt(0, rowWords) - writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) + writeArb.io.in(1).bits.data := tl_out.d.bits.data(encRowBits-1,0) data.io.read <> readArb.io.out - readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked - io.mem.finish <> mshrs.io.mem_finish + readArb.io.out.ready := !tl_out.d.valid || tl_out.d.ready // insert bubble if refill gets blocked + tl_out.e <> mshrs.io.mem_finish // writebacks - val wbArb = Module(new Arbiter(new WritebackReq, 2)) + val wbArb = Module(new Arbiter(new WritebackReq(edge.bundle), 2)) wbArb.io.in(0) <> prober.io.wb_req wbArb.io.in(1) <> mshrs.io.wb_req wb.io.req <> wbArb.io.out metaReadArb.io.in(3) <> wb.io.meta_read readArb.io.in(2) <> wb.io.data_req wb.io.data_resp := s2_data_corrected - releaseArb.io.in(0) <> wb.io.release + TLArbiter.lowest(edge, tl_out.c, wb.io.release, prober.io.rep) // store->load bypassing val s4_valid = Reg(next=s3_valid, init=Bool(false)) @@ -1106,137 +992,3 @@ class HellaCache(cfg: DCacheConfig)(implicit p: Parameters) extends L1HellaCache io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next } - -/** - * This module buffers requests made by the SimpleHellaCacheIF in case they - * are nacked. Nacked requests must be replayed in order, and no other requests - * must be allowed to go through until the replayed requests are successfully - * completed. - */ -class SimpleHellaCacheIFReplayQueue(depth: Int) - (implicit val p: Parameters) extends Module - with HasL1HellaCacheParameters { - val io = new Bundle { - val req = Decoupled(new HellaCacheReq).flip - val nack = Valid(Bits(width = coreDCacheReqTagBits)).flip - val resp = Valid(new HellaCacheResp).flip - val replay = Decoupled(new HellaCacheReq) - } - - // Registers to store the sent request - // When a request is sent the first time, - // it is stored in one of the reqs registers - // and the corresponding inflight bit is set. - // The reqs register will be deallocated once the request is - // successfully completed. - val inflight = Reg(init = UInt(0, depth)) - val reqs = Reg(Vec(depth, new HellaCacheReq)) - - // The nack queue stores the index of nacked requests (in the reqs vector) - // in the order that they were nacked. A request is enqueued onto nackq - // when it is newly nacked (i.e. not a nack for a previous replay). - // The head of the nack queue will be replayed until it is - // successfully completed, at which time the request is dequeued. - // No new requests will be made or other replays attempted until the head - // of the nackq is successfully completed. - val nackq = Module(new Queue(UInt(width = log2Up(depth)), depth)) - val replaying = Reg(init = Bool(false)) - - val next_inflight_onehot = PriorityEncoderOH(~inflight) - val next_inflight = OHToUInt(next_inflight_onehot) - - val next_replay = nackq.io.deq.bits - val next_replay_onehot = UIntToOH(next_replay) - val next_replay_req = reqs(next_replay) - - // Keep sending the head of the nack queue until it succeeds - io.replay.valid := nackq.io.deq.valid && !replaying - io.replay.bits := next_replay_req - // Don't allow new requests if there is are replays waiting - // or something being nacked. - io.req.ready := !inflight.andR && !nackq.io.deq.valid && !io.nack.valid - - // Match on the tags to determine the index of nacks or responses - val nack_onehot = Cat(reqs.map(_.tag === io.nack.bits).reverse) & inflight - val resp_onehot = Cat(reqs.map(_.tag === io.resp.bits.tag).reverse) & inflight - - val replay_complete = io.resp.valid && replaying && io.resp.bits.tag === next_replay_req.tag - val nack_head = io.nack.valid && nackq.io.deq.valid && io.nack.bits === next_replay_req.tag - - // Enqueue to the nack queue if there is a nack that is not in response to - // the previous replay - nackq.io.enq.valid := io.nack.valid && !nack_head - nackq.io.enq.bits := OHToUInt(nack_onehot) - assert(!nackq.io.enq.valid || nackq.io.enq.ready, - "SimpleHellaCacheIF: ReplayQueue nack queue overflow") - - // Dequeue from the nack queue if the last replay was successfully completed - nackq.io.deq.ready := replay_complete - assert(!nackq.io.deq.ready || nackq.io.deq.valid, - "SimpleHellaCacheIF: ReplayQueue nack queue underflow") - - // Set inflight bit when a request is made - // Clear it when it is successfully completed - inflight := (inflight | Mux(io.req.fire(), next_inflight_onehot, UInt(0))) & - ~Mux(io.resp.valid, resp_onehot, UInt(0)) - - when (io.req.fire()) { - reqs(next_inflight) := io.req.bits - } - - // Only one replay outstanding at a time - when (io.replay.fire()) { replaying := Bool(true) } - when (nack_head || replay_complete) { replaying := Bool(false) } -} - -// exposes a sane decoupled request interface -class SimpleHellaCacheIF(implicit p: Parameters) extends Module -{ - val io = new Bundle { - val requestor = new HellaCacheIO().flip - val cache = new HellaCacheIO - } - - val replayq = Module(new SimpleHellaCacheIFReplayQueue(2)) - val req_arb = Module(new Arbiter(new HellaCacheReq, 2)) - - val req_helper = DecoupledHelper( - req_arb.io.in(1).ready, - replayq.io.req.ready, - io.requestor.req.valid) - - req_arb.io.in(0) <> replayq.io.replay - req_arb.io.in(1).valid := req_helper.fire(req_arb.io.in(1).ready) - req_arb.io.in(1).bits := io.requestor.req.bits - io.requestor.req.ready := req_helper.fire(io.requestor.req.valid) - replayq.io.req.valid := req_helper.fire(replayq.io.req.ready) - replayq.io.req.bits := io.requestor.req.bits - - val s0_req_fire = io.cache.req.fire() - val s1_req_fire = Reg(next = s0_req_fire) - val s2_req_fire = Reg(next = s1_req_fire) - val s1_req_tag = Reg(next = io.cache.req.bits.tag) - val s2_req_tag = Reg(next = s1_req_tag) - val s2_kill = Reg(next = io.cache.s1_kill) - - io.cache.invalidate_lr := io.requestor.invalidate_lr - io.cache.req <> req_arb.io.out - io.cache.s1_kill := io.cache.s2_nack - io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) - - replayq.io.nack.valid := (io.cache.s2_nack || s2_kill) && s2_req_fire - replayq.io.nack.bits := s2_req_tag - replayq.io.resp := io.cache.resp - io.requestor.resp := io.cache.resp - - assert(!Reg(next = io.cache.req.fire()) || - !(io.cache.xcpt.ma.ld || io.cache.xcpt.ma.st || - io.cache.xcpt.pf.ld || io.cache.xcpt.pf.st), - "SimpleHellaCacheIF exception") -} - -object HellaCache { - def apply(cfg: DCacheConfig)(implicit p: Parameters) = - if (cfg.nMSHRs == 0) Module(new DCache()).io - else Module(new HellaCache(cfg)).io -} diff --git a/src/main/scala/rocket/ScratchpadSlavePort.scala b/src/main/scala/rocket/ScratchpadSlavePort.scala new file mode 100644 index 00000000..b766082a --- /dev/null +++ b/src/main/scala/rocket/ScratchpadSlavePort.scala @@ -0,0 +1,107 @@ +// See LICENSE for license details. + +package rocket + +import Chisel._ +import Chisel.ImplicitConversions._ +import junctions._ +import diplomacy._ +import config._ +import uncore.constants._ +import uncore.tilelink2._ +import uncore.util._ + +class ScratchpadSlavePort(implicit val p: Parameters) extends LazyModule with HasCoreParameters { + val node = TLManagerNode(TLManagerPortParameters( + Seq(TLManagerParameters( + address = List(AddressSet(0x80000000L, BigInt(p(DataScratchpadSize)-1))), + regionType = RegionType.UNCACHED, + executable = true, + supportsArithmetic = if (p(UseAtomics)) TransferSizes(1, coreDataBytes) else TransferSizes.none, + supportsLogical = if (p(UseAtomics)) TransferSizes(1, coreDataBytes) else TransferSizes.none, + supportsPutPartial = TransferSizes(1, coreDataBytes), + supportsPutFull = TransferSizes(1, coreDataBytes), + supportsGet = TransferSizes(1, coreDataBytes), + fifoId = Some(0))), // requests handled in FIFO order + beatBytes = coreDataBytes, + minLatency = 1)) + + // Make sure this ends up with the same name as before + override def name = "dmem0" + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val tl_in = node.bundleIn + val dmem = new HellaCacheIO + } + + val tl_in = io.tl_in(0) + val edge = node.edgesIn(0) + + require(usingDataScratchpad) + + val s_ready :: s_wait :: s_replay :: s_grant :: Nil = Enum(UInt(), 4) + val state = Reg(init = s_ready) + when (io.dmem.resp.valid) { state := s_grant } + when (tl_in.d.fire()) { state := s_ready } + when (io.dmem.s2_nack) { state := s_replay } + when (io.dmem.req.fire()) { state := s_wait } + + val acq = Reg(tl_in.a.bits) + when (io.dmem.resp.valid) { acq.data := io.dmem.resp.bits.data } + when (tl_in.a.fire()) { acq := tl_in.a.bits } + + val isWrite = acq.opcode === TLMessages.PutFullData || acq.opcode === TLMessages.PutPartialData + val isRead = !edge.hasData(acq) + + def formCacheReq(acq: TLBundleA) = { + val req = Wire(new HellaCacheReq) + req.cmd := MuxLookup(acq.opcode, Wire(M_XRD), Array( + TLMessages.PutFullData -> M_XWR, + TLMessages.PutPartialData -> M_XWR, + TLMessages.ArithmeticData -> MuxLookup(acq.param, Wire(M_XRD), Array( + TLAtomics.MIN -> M_XA_MIN, + TLAtomics.MAX -> M_XA_MAX, + TLAtomics.MINU -> M_XA_MINU, + TLAtomics.MAXU -> M_XA_MAXU, + TLAtomics.ADD -> M_XA_ADD)), + TLMessages.LogicalData -> MuxLookup(acq.param, Wire(M_XRD), Array( + TLAtomics.XOR -> M_XA_XOR, + TLAtomics.OR -> M_XA_OR, + TLAtomics.AND -> M_XA_AND, + TLAtomics.SWAP -> M_XA_SWAP)), + TLMessages.Get -> M_XRD)) + // treat all loads as full words, so bytes appear in correct lane + req.typ := Mux(isRead, log2Ceil(coreDataBytes), acq.size) + req.addr := Mux(isRead, ~(~acq.address | (coreDataBytes-1)), acq.address) + req.tag := UInt(0) + req + } + + val ready = state === s_ready || tl_in.d.fire() + io.dmem.req.valid := (tl_in.a.valid && ready) || state === s_replay + tl_in.a.ready := io.dmem.req.ready && ready + io.dmem.req.bits := formCacheReq(Mux(state === s_replay, acq, tl_in.a.bits)) + // the TL data is already in the correct byte lane, but the D$ + // expects right-justified store data, so that it can steer the bytes. + io.dmem.s1_data := new LoadGen(acq.size, Bool(false), acq.address(log2Ceil(coreDataBytes)-1,0), acq.data, Bool(false), coreDataBytes).data + io.dmem.s1_kill := false + io.dmem.invalidate_lr := false + + // place AMO data in correct word lane + val minAMOBytes = 4 + val grantData = Mux(io.dmem.resp.valid, io.dmem.resp.bits.data, acq.data) + val alignedGrantData = Mux(acq.size <= log2Ceil(minAMOBytes), Fill(coreDataBytes/minAMOBytes, grantData(8*minAMOBytes-1, 0)), grantData) + + tl_in.d.valid := io.dmem.resp.valid || state === s_grant + tl_in.d.bits := Mux(isWrite, + edge.AccessAck(acq, UInt(0)), + edge.AccessAck(acq, UInt(0), UInt(0))) + tl_in.d.bits.data := alignedGrantData + + // Tie off unused channels + tl_in.b.valid := Bool(false) + tl_in.c.ready := Bool(true) + tl_in.e.ready := Bool(true) + } +} diff --git a/src/main/scala/rocket/SimpleHellaCacheIF.scala b/src/main/scala/rocket/SimpleHellaCacheIF.scala new file mode 100644 index 00000000..0d8b484c --- /dev/null +++ b/src/main/scala/rocket/SimpleHellaCacheIF.scala @@ -0,0 +1,136 @@ +// See LICENSE for license details. + +package rocket + +import Chisel._ +import Chisel.ImplicitConversions._ +import config._ +import util._ + +/** + * This module buffers requests made by the SimpleHellaCacheIF in case they + * are nacked. Nacked requests must be replayed in order, and no other requests + * must be allowed to go through until the replayed requests are successfully + * completed. + */ +class SimpleHellaCacheIFReplayQueue(depth: Int) + (implicit val p: Parameters) extends Module + with HasL1HellaCacheParameters { + val io = new Bundle { + val req = Decoupled(new HellaCacheReq).flip + val nack = Valid(Bits(width = coreDCacheReqTagBits)).flip + val resp = Valid(new HellaCacheResp).flip + val replay = Decoupled(new HellaCacheReq) + } + + // Registers to store the sent request + // When a request is sent the first time, + // it is stored in one of the reqs registers + // and the corresponding inflight bit is set. + // The reqs register will be deallocated once the request is + // successfully completed. + val inflight = Reg(init = UInt(0, depth)) + val reqs = Reg(Vec(depth, new HellaCacheReq)) + + // The nack queue stores the index of nacked requests (in the reqs vector) + // in the order that they were nacked. A request is enqueued onto nackq + // when it is newly nacked (i.e. not a nack for a previous replay). + // The head of the nack queue will be replayed until it is + // successfully completed, at which time the request is dequeued. + // No new requests will be made or other replays attempted until the head + // of the nackq is successfully completed. + val nackq = Module(new Queue(UInt(width = log2Up(depth)), depth)) + val replaying = Reg(init = Bool(false)) + + val next_inflight_onehot = PriorityEncoderOH(~inflight) + val next_inflight = OHToUInt(next_inflight_onehot) + + val next_replay = nackq.io.deq.bits + val next_replay_onehot = UIntToOH(next_replay) + val next_replay_req = reqs(next_replay) + + // Keep sending the head of the nack queue until it succeeds + io.replay.valid := nackq.io.deq.valid && !replaying + io.replay.bits := next_replay_req + // Don't allow new requests if there is are replays waiting + // or something being nacked. + io.req.ready := !inflight.andR && !nackq.io.deq.valid && !io.nack.valid + + // Match on the tags to determine the index of nacks or responses + val nack_onehot = Cat(reqs.map(_.tag === io.nack.bits).reverse) & inflight + val resp_onehot = Cat(reqs.map(_.tag === io.resp.bits.tag).reverse) & inflight + + val replay_complete = io.resp.valid && replaying && io.resp.bits.tag === next_replay_req.tag + val nack_head = io.nack.valid && nackq.io.deq.valid && io.nack.bits === next_replay_req.tag + + // Enqueue to the nack queue if there is a nack that is not in response to + // the previous replay + nackq.io.enq.valid := io.nack.valid && !nack_head + nackq.io.enq.bits := OHToUInt(nack_onehot) + assert(!nackq.io.enq.valid || nackq.io.enq.ready, + "SimpleHellaCacheIF: ReplayQueue nack queue overflow") + + // Dequeue from the nack queue if the last replay was successfully completed + nackq.io.deq.ready := replay_complete + assert(!nackq.io.deq.ready || nackq.io.deq.valid, + "SimpleHellaCacheIF: ReplayQueue nack queue underflow") + + // Set inflight bit when a request is made + // Clear it when it is successfully completed + inflight := (inflight | Mux(io.req.fire(), next_inflight_onehot, UInt(0))) & + ~Mux(io.resp.valid, resp_onehot, UInt(0)) + + when (io.req.fire()) { + reqs(next_inflight) := io.req.bits + } + + // Only one replay outstanding at a time + when (io.replay.fire()) { replaying := Bool(true) } + when (nack_head || replay_complete) { replaying := Bool(false) } +} + +// exposes a sane decoupled request interface +class SimpleHellaCacheIF(implicit p: Parameters) extends Module +{ + val io = new Bundle { + val requestor = new HellaCacheIO().flip + val cache = new HellaCacheIO + } + + val replayq = Module(new SimpleHellaCacheIFReplayQueue(2)) + val req_arb = Module(new Arbiter(new HellaCacheReq, 2)) + + val req_helper = DecoupledHelper( + req_arb.io.in(1).ready, + replayq.io.req.ready, + io.requestor.req.valid) + + req_arb.io.in(0) <> replayq.io.replay + req_arb.io.in(1).valid := req_helper.fire(req_arb.io.in(1).ready) + req_arb.io.in(1).bits := io.requestor.req.bits + io.requestor.req.ready := req_helper.fire(io.requestor.req.valid) + replayq.io.req.valid := req_helper.fire(replayq.io.req.ready) + replayq.io.req.bits := io.requestor.req.bits + + val s0_req_fire = io.cache.req.fire() + val s1_req_fire = Reg(next = s0_req_fire) + val s2_req_fire = Reg(next = s1_req_fire) + val s1_req_tag = Reg(next = io.cache.req.bits.tag) + val s2_req_tag = Reg(next = s1_req_tag) + val s2_kill = Reg(next = io.cache.s1_kill) + + io.cache.invalidate_lr := io.requestor.invalidate_lr + io.cache.req <> req_arb.io.out + io.cache.s1_kill := io.cache.s2_nack + io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) + + replayq.io.nack.valid := (io.cache.s2_nack || s2_kill) && s2_req_fire + replayq.io.nack.bits := s2_req_tag + replayq.io.resp := io.cache.resp + io.requestor.resp := io.cache.resp + + assert(!Reg(next = io.cache.req.fire()) || + !(io.cache.xcpt.ma.ld || io.cache.xcpt.ma.st || + io.cache.xcpt.pf.ld || io.cache.xcpt.pf.st), + "SimpleHellaCacheIF exception") +} diff --git a/src/main/scala/rocket/arbiter.scala b/src/main/scala/rocket/arbiter.scala index de43dd6d..6a481287 100644 --- a/src/main/scala/rocket/arbiter.scala +++ b/src/main/scala/rocket/arbiter.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ -import cde.{Parameters, Field} +import config._ import util.{ParameterizedBundle, DecoupledHelper} class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module diff --git a/src/main/scala/rocket/breakpoint.scala b/src/main/scala/rocket/breakpoint.scala index 36767e70..e91db306 100644 --- a/src/main/scala/rocket/breakpoint.scala +++ b/src/main/scala/rocket/breakpoint.scala @@ -5,7 +5,7 @@ package rocket import Chisel._ import util._ import Chisel.ImplicitConversions._ -import cde.Parameters +import config._ class BPControl(implicit p: Parameters) extends CoreBundle()(p) { val ttype = UInt(width = 4) diff --git a/src/main/scala/rocket/btb.scala b/src/main/scala/rocket/btb.scala index 3bf9e351..98d06f04 100644 --- a/src/main/scala/rocket/btb.scala +++ b/src/main/scala/rocket/btb.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ -import cde.{Parameters, Field} +import config._ import util._ import Chisel.ImplicitConversions._ import uncore.agents.PseudoLRU diff --git a/src/main/scala/rocket/csr.scala b/src/main/scala/rocket/csr.scala index c25cd682..4c67d2c2 100644 --- a/src/main/scala/rocket/csr.scala +++ b/src/main/scala/rocket/csr.scala @@ -4,7 +4,7 @@ package rocket import Chisel._ import Instructions._ -import cde.{Parameters, Field} +import config._ import uncore.devices._ import util._ import Chisel.ImplicitConversions._ diff --git a/src/main/scala/rocket/dpath_alu.scala b/src/main/scala/rocket/dpath_alu.scala index 0f9480cb..90554656 100644 --- a/src/main/scala/rocket/dpath_alu.scala +++ b/src/main/scala/rocket/dpath_alu.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ -import cde.{Parameters, Field} +import config._ import Instructions._ object ALU diff --git a/src/main/scala/rocket/fpu.scala b/src/main/scala/rocket/fpu.scala index b825d1a9..691354d5 100644 --- a/src/main/scala/rocket/fpu.scala +++ b/src/main/scala/rocket/fpu.scala @@ -8,7 +8,7 @@ import util._ import Chisel.ImplicitConversions._ import FPConstants._ import uncore.constants.MemoryOpConstants._ -import cde.{Parameters, Field} +import config._ case class FPUConfig( divSqrt: Boolean = true, diff --git a/src/main/scala/rocket/frontend.scala b/src/main/scala/rocket/frontend.scala index cb22bd48..8ffd43ed 100644 --- a/src/main/scala/rocket/frontend.scala +++ b/src/main/scala/rocket/frontend.scala @@ -4,7 +4,7 @@ import Chisel._ import uncore.tilelink._ import util._ import Chisel.ImplicitConversions._ -import cde.{Parameters, Field} +import config._ class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) { val pc = UInt(width = vaddrBitsExtended) diff --git a/src/main/scala/rocket/ibuf.scala b/src/main/scala/rocket/ibuf.scala index 9ae99883..3f076008 100644 --- a/src/main/scala/rocket/ibuf.scala +++ b/src/main/scala/rocket/ibuf.scala @@ -5,7 +5,7 @@ package rocket import Chisel._ import util._ import Chisel.ImplicitConversions._ -import cde.{Parameters, Field} +import config._ class Instruction(implicit val p: Parameters) extends ParameterizedBundle with HasCoreParameters { val pf0 = Bool() // page fault on first half of instruction diff --git a/src/main/scala/rocket/icache.scala b/src/main/scala/rocket/icache.scala index ef90453e..028d6fef 100644 --- a/src/main/scala/rocket/icache.scala +++ b/src/main/scala/rocket/icache.scala @@ -6,7 +6,7 @@ import uncore.tilelink._ import uncore.util._ import util._ import Chisel.ImplicitConversions._ -import cde.{Parameters, Field} +import config._ trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { val outerDataBeats = p(TLKey(p(TLId))).dataBeats @@ -72,7 +72,8 @@ class ICache(latency: Int)(implicit p: Parameters) extends CoreModule()(p) with } val refill_tag = refill_addr(tagBits+untagBits-1,untagBits) - val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat) + require(refillCyclesPerBeat == 1) + val narrow_grant = io.mem.grant val (refill_cnt, refill_wrap) = Counter(narrow_grant.fire(), refillCycles) val refill_done = state === s_refill && refill_wrap narrow_grant.ready := Bool(true) diff --git a/src/main/scala/rocket/idecode.scala b/src/main/scala/rocket/idecode.scala index 30fa7a8d..018e723c 100644 --- a/src/main/scala/rocket/idecode.scala +++ b/src/main/scala/rocket/idecode.scala @@ -6,7 +6,7 @@ import Chisel._ import Instructions._ import uncore.constants.MemoryOpConstants._ import ALU._ -import cde.Parameters +import config._ import util._ import Chisel.ImplicitConversions._ diff --git a/src/main/scala/rocket/ptw.scala b/src/main/scala/rocket/ptw.scala index 31da1358..60e8fa92 100644 --- a/src/main/scala/rocket/ptw.scala +++ b/src/main/scala/rocket/ptw.scala @@ -7,7 +7,7 @@ import uncore.agents._ import uncore.constants._ import util._ import Chisel.ImplicitConversions._ -import cde.{Parameters, Field} +import config._ class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { val prv = Bits(width = 2) diff --git a/src/main/scala/rocket/rocc.scala b/src/main/scala/rocket/rocc.scala index c0f9de9a..4503c984 100644 --- a/src/main/scala/rocket/rocc.scala +++ b/src/main/scala/rocket/rocc.scala @@ -8,7 +8,7 @@ import uncore.constants._ import uncore.agents.CacheName import util._ import Chisel.ImplicitConversions._ -import cde.{Parameters, Field} +import config._ case object RoccMaxTaggedMemXacts extends Field[Int] case object RoccNMemChannels extends Field[Int] @@ -41,7 +41,7 @@ class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) { class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) { val cmd = Decoupled(new RoCCCommand).flip val resp = Decoupled(new RoCCResponse) - val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) + val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => CacheName("L1D") })) val busy = Bool(OUTPUT) val interrupt = Bool(OUTPUT) diff --git a/src/main/scala/rocket/rocket.scala b/src/main/scala/rocket/rocket.scala index 64d90783..ea350c84 100644 --- a/src/main/scala/rocket/rocket.scala +++ b/src/main/scala/rocket/rocket.scala @@ -9,7 +9,7 @@ import uncore.constants._ import junctions.HasAddrMapParameters import util._ import Chisel.ImplicitConversions._ -import cde.{Parameters, Field} +import config._ case object XLen extends Field[Int] case object FetchWidth extends Field[Int] @@ -75,8 +75,6 @@ trait HasCoreParameters extends HasAddrMapParameters { val vaddrBitsExtended = vpnBitsExtended + pgIdxBits val coreMaxAddrBits = paddrBits max vaddrBitsExtended val nCustomMrwCsrs = p(NCustomMRWCSRs) - val nCores = p(NTiles) - val tileId = p(TileId) // fetchWidth doubled, but coreInstBytes halved, for RVC val decodeWidth = fetchWidth / (if (usingCompressed) 2 else 1) @@ -144,8 +142,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { val interrupts = new TileInterrupts().asInput val hartid = UInt(INPUT, xLen) - val imem = new FrontendIO()(p.alterPartial({case CacheName => "L1I" })) - val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) + val imem = new FrontendIO()(p.alterPartial({case CacheName => CacheName("L1I") })) + val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => CacheName("L1D") })) val ptw = new DatapathPTWIO().flip val fpu = new FPUIO().flip val rocc = new RoCCInterface().flip diff --git a/src/main/scala/rocket/rvc.scala b/src/main/scala/rocket/rvc.scala index e7ae1fae..d7529f9d 100644 --- a/src/main/scala/rocket/rvc.scala +++ b/src/main/scala/rocket/rvc.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ import Chisel.ImplicitConversions._ import util._ -import cde.Parameters +import config._ class ExpandedInstruction extends Bundle { val bits = UInt(width = 32) diff --git a/src/main/scala/rocket/tile.scala b/src/main/scala/rocket/tile.scala index c497a98e..1ebc8836 100644 --- a/src/main/scala/rocket/tile.scala +++ b/src/main/scala/rocket/tile.scala @@ -10,11 +10,10 @@ import uncore.agents._ import uncore.converters._ import uncore.devices._ import util._ -import cde.{Parameters, Field} +import config._ +import scala.collection.mutable.ListBuffer case object BuildRoCC extends Field[Seq[RoccParameters]] -case object NCachedTileLinkPorts extends Field[Int] -case object NUncachedTileLinkPorts extends Field[Int] case object TileId extends Field[Int] case class RoccParameters( @@ -24,61 +23,52 @@ case class RoccParameters( nPTWPorts : Int = 0, useFPU: Boolean = false) -case class TileBundleConfig( - nCachedTileLinkPorts: Int, - nUncachedTileLinkPorts: Int, - xLen: Int) +class RocketTile(tileId: Int)(implicit p: Parameters) extends LazyModule { + val dcacheParams = p.alterPartial({ + case CacheName => CacheName("L1D") + case TLId => "L1toL2" + case TileId => tileId // TODO using this messes with Heirarchical P&R: change to io.hartid? + }) + val icacheParams = p.alterPartial({ + case CacheName => CacheName("L1I") + case TLId => "L1toL2" + }) -class TileIO(c: TileBundleConfig, node: Option[TLInwardNode] = None)(implicit p: Parameters) extends Bundle { - val cached = Vec(c.nCachedTileLinkPorts, new ClientTileLinkIO) - val uncached = Vec(c.nUncachedTileLinkPorts, new ClientUncachedTileLinkIO) - val hartid = UInt(INPUT, c.xLen) - val interrupts = new TileInterrupts().asInput - val slave = node.map(_.inward.bundleIn) - val resetVector = UInt(INPUT, c.xLen) - - override def cloneType = new TileIO(c).asInstanceOf[this.type] -} - -abstract class TileImp(l: LazyTile)(implicit val p: Parameters) extends LazyModuleImp(l) { - val io: TileIO -} - -abstract class LazyTile(implicit p: Parameters) extends LazyModule { - val nCachedTileLinkPorts = p(NCachedTileLinkPorts) - val nUncachedTileLinkPorts = p(NUncachedTileLinkPorts) - val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) - val bc = TileBundleConfig( - nCachedTileLinkPorts = nCachedTileLinkPorts, - nUncachedTileLinkPorts = nUncachedTileLinkPorts, - xLen = p(XLen)) - - val module: TileImp - val slave: Option[TLInputNode] -} - -class RocketTile(implicit p: Parameters) extends LazyTile { - val slave = if (p(DataScratchpadSize) == 0) None else Some(TLInputNode()) + //TODO val intNode = IntInputNode() + val slaveNode = if (p(DataScratchpadSize) == 0) None else Some(TLInputNode()) val scratch = if (p(DataScratchpadSize) == 0) None else Some(LazyModule(new ScratchpadSlavePort()(dcacheParams))) + val dcache = HellaCache(p(DCacheKey))(dcacheParams) + val ucLegacy = LazyModule(new TLLegacy()(icacheParams)) - (slave zip scratch) foreach { case (node, lm) => lm.node := TLFragmenter(p(XLen)/8, p(CacheBlockBytes))(node) } + val cachedOut = TLOutputNode() + val uncachedOut = TLOutputNode() + cachedOut := dcache.node + uncachedOut := TLHintHandler()(ucLegacy.node) + val masterNodes = List(cachedOut, uncachedOut) + + (slaveNode zip scratch) foreach { case (node, lm) => lm.node := TLFragmenter(p(XLen)/8, p(CacheBlockBytes))(node) } + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val cached = cachedOut.bundleOut + val uncached = uncachedOut.bundleOut + val slave = slaveNode.map(_.bundleIn) + val hartid = UInt(INPUT, p(XLen)) + val interrupts = new TileInterrupts().asInput + val resetVector = UInt(INPUT, p(XLen)) + } - lazy val module = new TileImp(this) { - val io = new TileIO(bc, slave) val buildRocc = p(BuildRoCC) val usingRocc = !buildRocc.isEmpty val nRocc = buildRocc.size val nFPUPorts = buildRocc.filter(_.useFPU).size - val core = Module(new Rocket) - val icache = Module(new Frontend()(p.alterPartial({ case CacheName => "L1I" }))) - val dcache = HellaCache(p(DCacheKey))(dcacheParams) + val core = Module(new Rocket()(dcacheParams)) + val icache = Module(new Frontend()(icacheParams)) - val ptwPorts = collection.mutable.ArrayBuffer(icache.io.ptw, dcache.ptw) - val dcPorts = collection.mutable.ArrayBuffer(core.io.dmem) - val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem) - val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]() - val cachedPorts = collection.mutable.ArrayBuffer(dcache.mem) + val ptwPorts = ListBuffer(icache.io.ptw, dcache.module.io.ptw) + val dcPorts = ListBuffer(core.io.dmem) + val uncachedArbPorts = ListBuffer(icache.io.mem) core.io.interrupts := io.interrupts core.io.hartid := io.hartid icache.io.cpu <> core.io.imem @@ -129,19 +119,12 @@ class RocketTile(implicit p: Parameters) extends LazyTile { respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) ptwPorts ++= roccs.flatMap(_.io.ptw) - uncachedPorts ++= roccs.flatMap(_.io.utl) + uncachedArbPorts ++= roccs.flatMap(_.io.utl) // TODO no difference between io.autl and io.utl for now } - val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size)) + val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size)(icacheParams)) uncachedArb.io.in <> uncachedArbPorts - uncachedArb.io.out +=: uncachedPorts - - // Connect the caches and RoCC to the outer memory system - io.uncached <> uncachedPorts - io.cached <> cachedPorts - // TODO remove nCached/nUncachedTileLinkPorts parameters and these assertions - require(uncachedPorts.size == nUncachedTileLinkPorts) - require(cachedPorts.size == nCachedTileLinkPorts) + ucLegacy.module.io.legacy <> uncachedArb.io.out if (p(UseVM)) { val ptw = Module(new PTW(ptwPorts.size)(dcacheParams)) @@ -155,7 +138,7 @@ class RocketTile(implicit p: Parameters) extends LazyTile { require(dcPorts.size == core.dcacheArbPorts) val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams)) dcArb.io.requestor <> dcPorts - dcache.cpu <> dcArb.io.mem + dcache.module.io.cpu <> dcArb.io.mem if (nFPUPorts == 0) { fpuOpt.foreach { fpu => diff --git a/src/main/scala/rocket/tlb.scala b/src/main/scala/rocket/tlb.scala index 8bb1e666..339f2e84 100644 --- a/src/main/scala/rocket/tlb.scala +++ b/src/main/scala/rocket/tlb.scala @@ -7,16 +7,15 @@ import util._ import Chisel.ImplicitConversions._ import junctions._ import scala.math._ -import cde.{Parameters, Field} -import uncore.agents.PseudoLRU +import config._ +import uncore.agents._ import uncore.coherence._ case object PgLevels extends Field[Int] case object ASIdBits extends Field[Int] -case object NTLBEntries extends Field[Int] trait HasTLBParameters extends HasCoreParameters { - val entries = p(NTLBEntries) + val entries = p(p(CacheName)).nTLBEntries val camAddrBits = log2Ceil(entries) val camTagBits = asIdBits + vpnBits } diff --git a/src/main/scala/rocketchip/BaseTop.scala b/src/main/scala/rocketchip/BaseTop.scala index 99a5521e..a5f100ab 100644 --- a/src/main/scala/rocketchip/BaseTop.scala +++ b/src/main/scala/rocketchip/BaseTop.scala @@ -3,7 +3,7 @@ package rocketchip import Chisel._ -import cde.{Parameters, Field} +import config._ import junctions._ import diplomacy._ import uncore.tilelink._ @@ -15,16 +15,13 @@ import coreplex._ // the following parameters will be refactored properly with TL2 case object GlobalAddrMap extends Field[AddrMap] -case object NCoreplexExtClients extends Field[Int] /** Enable or disable monitoring of Diplomatic buses */ -case object TLEmitMonitors extends Field[Bool] +case object TLEmitMonitors extends Field[Boolean] -abstract class BareTop[+C <: BaseCoreplex](_coreplex: Parameters => C)(implicit val q: Parameters) extends LazyModule { +abstract class BareTop[+C <: BaseCoreplex](_coreplex: Parameters => C)(implicit val p: Parameters) extends LazyModule { // Fill in the TL1 legacy parameters; remove these once rocket/groundtest/unittest are TL2 - val pBusMasters = new RangeManager - lazy val legacyAddrMap = GenerateGlobalAddrMap(q, coreplex.l1tol2.node.edgesIn(0).manager.managers) - val coreplex : C = LazyModule(_coreplex(q.alterPartial { - case NCoreplexExtClients => pBusMasters.sum + lazy val legacyAddrMap = GenerateGlobalAddrMap(p, coreplex.l1tol2.node.edgesIn(0).manager.managers) + val coreplex : C = LazyModule(_coreplex(p.alterPartial { case GlobalAddrMap => legacyAddrMap })) @@ -42,8 +39,8 @@ abstract class BareTopModule[+L <: BareTop[BaseCoreplex], +B <: BareTopBundle[L] /** Base Top with no Periphery */ trait TopNetwork extends HasPeripheryParameters { - this: BareTop[BaseCoreplex] => - implicit val p = q + val module: TopNetworkModule + TLImp.emitMonitors = p(TLEmitMonitors) // Add a SoC and peripheral bus @@ -52,33 +49,22 @@ trait TopNetwork extends HasPeripheryParameters { val intBus = LazyModule(new IntXbar) peripheryBus.node := - TLWidthWidget(p(SOCBusKey).beatBytes)( - TLAtomicAutomata(arithmetic = p(PeripheryBusKey).arithAMO)( + TLWidthWidget(socBusConfig.beatBytes)( + TLAtomicAutomata(arithmetic = peripheryBusArithmetic)( socBus.node)) + + var coreplexMem = Seq[TLOutwardNode]() } trait TopNetworkBundle extends HasPeripheryParameters { - this: BareTopBundle[BareTop[BaseCoreplex]] => - implicit val p = outer.q - val success = Bool(OUTPUT) + val outer: TopNetwork + implicit val p = outer.p } trait TopNetworkModule extends HasPeripheryParameters { - this: { - val outer: BareTop[BaseCoreplex] with TopNetwork - val io: TopNetworkBundle - } => + val io: TopNetworkBundle + val outer: TopNetwork implicit val p = outer.p - - val coreplexMem : Vec[ClientUncachedTileLinkIO] = Wire(outer.coreplex.module.io.mem) - val coreplexSlave: Vec[ClientUncachedTileLinkIO] = Wire(outer.coreplex.module.io.slave) - val coreplexDebug: DebugBusIO = Wire(outer.coreplex.module.io.debug) - val coreplexRtc : Bool = Wire(outer.coreplex.module.io.rtcTick) - - io.success := outer.coreplex.module.io.success - - outer.coreplex.module.io.rtcTick := coreplexRtc - coreplexRtc := Counter(p(rocketchip.RTCPeriod)).inc() } /** Base Top with no Periphery */ @@ -93,19 +79,11 @@ class BaseTopBundle[+L <: BaseTop[BaseCoreplex]](_outer: L) extends BareTopBundl class BaseTopModule[+L <: BaseTop[BaseCoreplex], +B <: BaseTopBundle[L]](_outer: L, _io: () => B) extends BareTopModule(_outer, _io) with TopNetworkModule -trait DirectConnection { - this: BareTop[BaseCoreplex] with TopNetwork => +trait DirectConnection extends TopNetwork { + val coreplex: BaseCoreplex socBus.node := coreplex.mmio coreplex.mmioInt := intBus.intnode -} -trait DirectConnectionModule { - this: TopNetworkModule { - val outer: BaseTop[BaseCoreplex] - } => - - coreplexMem <> outer.coreplex.module.io.mem - outer.coreplex.module.io.slave <> coreplexSlave - outer.coreplex.module.io.debug <> coreplexDebug + coreplexMem = coreplex.mem } diff --git a/src/main/scala/rocketchip/Configs.scala b/src/main/scala/rocketchip/Configs.scala index 32853a42..47c0937a 100644 --- a/src/main/scala/rocketchip/Configs.scala +++ b/src/main/scala/rocketchip/Configs.scala @@ -16,58 +16,22 @@ import scala.math.max import scala.collection.mutable.{LinkedHashSet, ListBuffer} import scala.collection.immutable.HashMap import DefaultTestSuites._ -import cde.{Parameters, Config, Dump, Knob, CDEMatchError} +import config._ class BasePlatformConfig extends Config( - topDefinitions = { - (pname,site,here) => { - type PF = PartialFunction[Any,Any] - def findBy(sname:Any):Any = here[PF](site[Any](sname))(pname) - lazy val edgeDataBits = site(EdgeDataBits) - lazy val edgeDataBeats = (8 * site(CacheBlockBytes)) / edgeDataBits - pname match { - //Memory Parameters - case EdgeDataBits => 64 - case EdgeIDBits => 5 - case NastiKey => NastiParameters( - dataBits = edgeDataBits, - addrBits = site(PAddrBits), - idBits = site(EdgeIDBits)) - case TLEmitMonitors => true - case TLKey("EdgetoSlave") => - site(TLKey("L1toL2")).copy(dataBeats = edgeDataBeats) - case TLKey("MCtoEdge") => - site(TLKey("L2toMC")).copy(dataBeats = edgeDataBeats) - case TLKey("MMIOtoEdge") => - site(TLKey("L2toMMIO")).copy(dataBeats = edgeDataBeats) - case NExtTopInterrupts => 2 - case SOCBusKey => SOCBusConfig(beatBytes = site(TLKey("L2toMMIO")).dataBitsPerBeat/8) - case PeripheryBusKey => PeripheryBusConfig(arithAMO = true, beatBytes = 4) - // Note that PLIC asserts that this is > 0. - case AsyncDebugBus => false - case IncludeJtagDTM => false - case AsyncBusChannels => false - case NExtBusAXIChannels => 0 - case HastiId => "Ext" - case HastiKey("TL") => - HastiParameters( - addrBits = site(PAddrBits), - dataBits = site(TLKey(site(TLId))).dataBits / site(TLKey(site(TLId))).dataBeats) - case HastiKey("Ext") => - HastiParameters( - addrBits = site(PAddrBits), - dataBits = edgeDataBits) - case AsyncMemChannels => false - case NMemoryChannels => Dump("N_MEM_CHANNELS", 1) - case TMemoryChannels => BusType.AXI - case ExtMemSize => Dump("MEM_SIZE", 0x10000000L) - case RTCPeriod => 100 // gives 10 MHz RTC assuming 1 GHz uncore clock - case BuildExampleTop => - (p: Parameters) => LazyModule(new ExampleTop(new DefaultCoreplex()(_))(p)) - case SimMemLatency => 0 - case _ => throw new CDEMatchError - } - } + (pname,site,here) => pname match { + //Memory Parameters + case TLEmitMonitors => true + case NExtTopInterrupts => 2 + case SOCBusConfig => site(L1toL2Config) + case PeripheryBusConfig => TLBusConfig(beatBytes = 4) + case PeripheryBusArithmetic => true + // Note that PLIC asserts that this is > 0. + case IncludeJtagDTM => false + case ExtMem => AXIMasterConfig(0x80000000L, 0x10000000L, 8, 4) + case ExtBus => AXIMasterConfig(0x60000000L, 0x20000000L, 8, 4) + case RTCPeriod => 100 // gives 10 MHz RTC assuming 1 GHz uncore clock + case _ => throw new CDEMatchError }) class BaseConfig extends Config(new BaseCoreplexConfig ++ new BasePlatformConfig) @@ -91,15 +55,15 @@ class DefaultL2FPGAConfig extends Config( class PLRUL2Config extends Config(new WithPLRU ++ new DefaultL2Config) class WithNMemoryChannels(n: Int) extends Config( - (pname,site,here) => pname match { - case NMemoryChannels => Dump("N_MEM_CHANNELS", n) + (pname,site,here,up) => pname match { + case BankedL2Config => up(BankedL2Config).copy(nMemoryChannels = n) case _ => throw new CDEMatchError } ) class WithExtMemSize(n: Long) extends Config( - (pname,site,here) => pname match { - case ExtMemSize => Dump("MEM_SIZE", n) + (pname,site,here,up) => pname match { + case ExtMem => up(ExtMem).copy(size = n) case _ => throw new CDEMatchError } ) @@ -129,8 +93,8 @@ class DualChannelDualBankL2Config extends Config( class RoccExampleConfig extends Config(new WithRoccExample ++ new BaseConfig) class WithEdgeDataBits(dataBits: Int) extends Config( - (pname, site, here) => pname match { - case EdgeDataBits => dataBits + (pname, site, here, up) => pname match { + case ExtMem => up(ExtMem).copy(beatBytes = dataBits/8) case _ => throw new CDEMatchError }) @@ -150,8 +114,6 @@ class OctoChannelBenchmarkConfig extends Config(new WithNMemoryChannels(8) ++ ne class EightChannelConfig extends Config(new WithNMemoryChannels(8) ++ new BaseConfig) -class SplitL2MetadataTestConfig extends Config(new WithSplitL2Metadata ++ new DefaultL2Config) - class DualCoreConfig extends Config( new WithNCores(2) ++ new WithL2Cache ++ new BaseConfig) @@ -160,13 +122,6 @@ class TinyConfig extends Config( new WithSmallCores ++ new WithRV32 ++ new WithStatelessBridge ++ new BaseConfig) -class WithAsyncDebug extends Config ( - (pname, site, here) => pname match { - case AsyncDebugBus => true - case _ => throw new CDEMatchError - } -) - class WithJtagDTM extends Config ( (pname, site, here) => pname match { case IncludeJtagDTM => true @@ -176,13 +131,13 @@ class WithJtagDTM extends Config ( class WithNoPeripheryArithAMO extends Config ( (pname, site, here) => pname match { - case PeripheryBusKey => PeripheryBusConfig(arithAMO = false, beatBytes = 4) + case PeripheryBusArithmetic => false } ) class With64BitPeriphery extends Config ( (pname, site, here) => pname match { - case PeripheryBusKey => PeripheryBusConfig(arithAMO = true, beatBytes = 8) + case PeripheryBusConfig => TLBusConfig(beatBytes = 8) } ) diff --git a/src/main/scala/rocketchip/DebugTransport.scala b/src/main/scala/rocketchip/DebugTransport.scala index 895bd3b7..1d4949a3 100644 --- a/src/main/scala/rocketchip/DebugTransport.scala +++ b/src/main/scala/rocketchip/DebugTransport.scala @@ -1,10 +1,10 @@ package rocketchip import Chisel._ -import uncore.devices.{DebugBusIO, AsyncDebugBusCrossing, DebugBusReq, DebugBusResp, DMKey} +import uncore.devices._ import junctions._ import util._ -import cde.{Parameters, Field} +import config._ case object IncludeJtagDTM extends Field[Boolean] @@ -45,13 +45,13 @@ class JtagDTMWithSync(depth: Int = 1, sync: Int = 3)(implicit val p: Parameters) val io = new Bundle { - val jtag = new JTAGIO(true).flip() - val debug = new DebugBusIO()(p) + val jtag = new JTAGIO(true).flip + val debug = new AsyncDebugBusIO } - val req_width = io.debug.req.bits.getWidth - val resp_width = io.debug.resp.bits.getWidth + val req_width = io.debug.req.mem(0).getWidth + val resp_width = io.debug.resp.mem(0).getWidth val jtag_dtm = Module (new DebugTransportModuleJtag(req_width, resp_width)) @@ -62,7 +62,8 @@ class JtagDTMWithSync(depth: Int = 1, sync: Int = 3)(implicit val p: Parameters) val io_debug_bus = Wire (new DebugBusIO) - io.debug <> AsyncDebugBusCrossing(io.jtag.TCK, io.jtag.TRST, io_debug_bus, clock, reset, depth, sync) + io.debug.req <> ToAsyncBundle(io_debug_bus.req) + io_debug_bus.resp <> FromAsyncBundle(io.debug.resp) // Translate from straight 'bits' interface of the blackboxes // into the Resp/Req data structures. diff --git a/src/main/scala/rocketchip/ExampleTop.scala b/src/main/scala/rocketchip/ExampleTop.scala index 5b17b242..a8ff4adf 100644 --- a/src/main/scala/rocketchip/ExampleTop.scala +++ b/src/main/scala/rocketchip/ExampleTop.scala @@ -3,40 +3,49 @@ package rocketchip import Chisel._ -import cde.{Parameters, Field} +import config._ import junctions._ import coreplex._ import rocketchip._ /** Example Top with Periphery */ class ExampleTop[+C <: BaseCoreplex](_coreplex: Parameters => C)(implicit p: Parameters) extends BaseTop(_coreplex) + with DirectConnection with PeripheryBootROM - with PeripheryDebug with PeripheryExtInterrupts - with PeripheryMasterMem - with PeripheryMasterAXI4MMIO - with PeripherySlave - with DirectConnection { + with PeripheryMasterAXI4Mem + with PeripheryMasterAXI4MMIO { override lazy val module = new ExampleTopModule(this, () => new ExampleTopBundle(this)) } class ExampleTopBundle[+L <: ExampleTop[BaseCoreplex]](_outer: L) extends BaseTopBundle(_outer) with PeripheryBootROMBundle - with PeripheryDebugBundle with PeripheryExtInterruptsBundle - with PeripheryMasterMemBundle + with PeripheryMasterAXI4MemBundle with PeripheryMasterAXI4MMIOBundle - with PeripherySlaveBundle class ExampleTopModule[+L <: ExampleTop[BaseCoreplex], +B <: ExampleTopBundle[L]](_outer: L, _io: () => B) extends BaseTopModule(_outer, _io) with PeripheryBootROMModule - with PeripheryDebugModule with PeripheryExtInterruptsModule - with PeripheryMasterMemModule + with PeripheryMasterAXI4MemModule with PeripheryMasterAXI4MMIOModule - with PeripherySlaveModule - with HardwiredResetVector - with DirectConnectionModule + +class ExampleRocketTop[+C <: DefaultCoreplex](_coreplex: Parameters => C)(implicit p: Parameters) extends ExampleTop(_coreplex) + with PeripheryDTM + with PeripheryCounter + with HardwiredResetVector { + override lazy val module = new ExampleRocketTopModule(this, () => new ExampleRocketTopBundle(this)) +} + +class ExampleRocketTopBundle[+L <: ExampleRocketTop[DefaultCoreplex]](_outer: L) extends ExampleTopBundle(_outer) + with PeripheryDTMBundle + with PeripheryCounterBundle + with HardwiredResetVectorBundle + +class ExampleRocketTopModule[+L <: ExampleRocketTop[DefaultCoreplex], +B <: ExampleRocketTopBundle[L]](_outer: L, _io: () => B) extends ExampleTopModule(_outer, _io) + with PeripheryDTMModule + with PeripheryCounterModule + with HardwiredResetVectorModule /** Example Top with TestRAM */ class ExampleTopWithTestRAM[+C <: BaseCoreplex](_coreplex: Parameters => C)(implicit p: Parameters) extends ExampleTop(_coreplex) diff --git a/src/main/scala/rocketchip/Generator.scala b/src/main/scala/rocketchip/Generator.scala index 28e101a3..a1eb5356 100644 --- a/src/main/scala/rocketchip/Generator.scala +++ b/src/main/scala/rocketchip/Generator.scala @@ -77,7 +77,6 @@ object Generator extends util.GeneratorApp { val longName = names.topModuleProject + "." + names.configs generateFirrtl generateTestSuiteMakefrags - generateDSEConstraints generateConfigString generateGraphML generateParameterDump diff --git a/src/main/scala/rocketchip/Periphery.scala b/src/main/scala/rocketchip/Periphery.scala index 0b6783d8..aaf6fd4f 100644 --- a/src/main/scala/rocketchip/Periphery.scala +++ b/src/main/scala/rocketchip/Periphery.scala @@ -3,7 +3,7 @@ package rocketchip import Chisel._ -import cde.{Parameters, Field} +import config._ import junctions._ import junctions.NastiConstants._ import diplomacy._ @@ -19,110 +19,27 @@ import rocket.XLen import scala.math.max import coreplex._ -/** Options for memory bus interface */ -object BusType { - sealed trait EnumVal - case object AXI extends EnumVal - case object AHB extends EnumVal - case object TL extends EnumVal - val busTypes = Seq(AXI, AHB, TL) -} - -/** Memory channel controls */ -case object TMemoryChannels extends Field[BusType.EnumVal] -/** External Bus controls */ -case object NExtBusAXIChannels extends Field[Int] -/** Async configurations */ -case object AsyncBusChannels extends Field[Boolean] -case object AsyncDebugBus extends Field[Boolean] -case object AsyncMemChannels extends Field[Boolean] /** Specifies the size of external memory */ -case object ExtMemSize extends Field[Long] +case class AXIMasterConfig(base: Long, size: Long, beatBytes: Int, idBits: Int) +case object ExtMem extends Field[AXIMasterConfig] +case object ExtBus extends Field[AXIMasterConfig] /** Specifies the number of external interrupts */ case object NExtTopInterrupts extends Field[Int] /** Source of RTC. First bundle is TopIO.extra, Second bundle is periphery.io.extra **/ case object RTCPeriod extends Field[Int] /* Specifies the periphery bus configuration */ -case class PeripheryBusConfig(arithAMO: Boolean, beatBytes: Int = 4) -case object PeripheryBusKey extends Field[PeripheryBusConfig] +case object PeripheryBusConfig extends Field[TLBusConfig] +case object PeripheryBusArithmetic extends Field[Boolean] /* Specifies the SOC-bus configuration */ -case class SOCBusConfig(beatBytes: Int = 4) -case object SOCBusKey extends Field[SOCBusConfig] - -/* Specifies the data and id width at the chip boundary */ -case object EdgeDataBits extends Field[Int] -case object EdgeIDBits extends Field[Int] - -object PeripheryUtils { - def addQueueAXI(source: NastiIO) = { - val sink = Wire(source) - sink.ar <> Queue(source.ar, 1) - sink.aw <> Queue(source.aw, 1) - sink.w <> Queue(source.w) - source.r <> Queue(sink.r) - source.b <> Queue(sink.b, 1) - sink - } - def convertTLtoAXI(tl: ClientUncachedTileLinkIO) = { - val bridge = Module(new NastiIOTileLinkIOConverter()(tl.p)) - bridge.io.tl <> tl - addQueueAXI(bridge.io.nasti) - } - def convertTLtoAHB(tl: ClientUncachedTileLinkIO, atomics: Boolean) = { - val bridge = Module(new AHBBridge(atomics)(tl.p)) - bridge.io.tl <> tl - bridge.io.ahb - } -} +case object SOCBusConfig extends Field[TLBusConfig] /** Utility trait for quick access to some relevant parameters */ trait HasPeripheryParameters { implicit val p: Parameters - lazy val tMemChannels = p(TMemoryChannels) - lazy val nMemChannels = p(NMemoryChannels) - lazy val nMemAXIChannels = if (tMemChannels == BusType.AXI) nMemChannels else 0 - lazy val nMemAHBChannels = if (tMemChannels == BusType.AHB) nMemChannels else 0 - lazy val nMemTLChannels = if (tMemChannels == BusType.TL) nMemChannels else 0 - lazy val edgeSlaveParams = p.alterPartial({ case TLId => "EdgetoSlave" }) - lazy val edgeMemParams = p.alterPartial({ case TLId => "MCtoEdge" }) - lazy val peripheryBusConfig = p(PeripheryBusKey) - lazy val socBusConfig = p(SOCBusKey) + lazy val peripheryBusConfig = p(PeripheryBusConfig) + lazy val socBusConfig = p(SOCBusConfig) lazy val cacheBlockBytes = p(CacheBlockBytes) -} - -///// - -trait PeripheryDebug { - this: TopNetwork => -} - -trait PeripheryDebugBundle { - this: TopNetworkBundle { - val outer: PeripheryDebug - } => - val debug_clk = (p(AsyncDebugBus) && !p(IncludeJtagDTM)).option(Clock(INPUT)) - val debug_rst = (p(AsyncDebugBus) && !p(IncludeJtagDTM)).option(Bool(INPUT)) - val debug = (!p(IncludeJtagDTM)).option(new DebugBusIO()(p).flip) - val jtag = p(IncludeJtagDTM).option(new JTAGIO(true).flip) -} - -trait PeripheryDebugModule { - this: TopNetworkModule { - val outer: PeripheryDebug - val io: PeripheryDebugBundle - } => - - if (p(IncludeJtagDTM)) { - // JtagDTMWithSync is a wrapper which - // handles the synchronization as well. - val dtm = Module (new JtagDTMWithSync()(p)) - dtm.io.jtag <> io.jtag.get - coreplexDebug <> dtm.io.debug - } else { - coreplexDebug <> - (if (p(AsyncDebugBus)) AsyncDebugBusFrom(io.debug_clk.get, io.debug_rst.get, io.debug.get) - else io.debug.get) - } + lazy val peripheryBusArithmetic = p(PeripheryBusArithmetic) } ///// @@ -153,47 +70,48 @@ trait PeripheryExtInterruptsModule { ///// -trait PeripheryMasterMem { - this: TopNetwork => +trait PeripheryMasterAXI4Mem { + this: BaseTop[BaseCoreplex] with TopNetwork => + + private val config = p(ExtMem) + private val channels = coreplexMem.size + + val mem_axi4 = coreplexMem.zipWithIndex.map { case (node, i) => + val c_size = config.size/channels + val c_base = config.base + c_size*i + + val axi4 = AXI4BlindOutputNode(AXI4SlavePortParameters( + slaves = Seq(AXI4SlaveParameters( + address = List(AddressSet(c_base, c_size-1)), + regionType = RegionType.UNCACHED, // cacheable + executable = true, + supportsWrite = TransferSizes(1, 256), // The slave supports 1-256 byte transfers + supportsRead = TransferSizes(1, 256), + interleavedId = Some(0))), // slave does not interleave read responses + beatBytes = config.beatBytes)) + + axi4 := + // AXI4Fragmenter(lite=false, maxInFlight = 20)( // beef device up to support awlen = 0xff + TLToAXI4(idBits = config.idBits)( // use idBits = 0 for AXI4-Lite + TLWidthWidget(coreplex.l1tol2_beatBytes)( // convert width before attaching to the l1tol2 + node)) + + axi4 + } } -trait PeripheryMasterMemBundle { +trait PeripheryMasterAXI4MemBundle { this: TopNetworkBundle { - val outer: PeripheryMasterMem + val outer: PeripheryMasterAXI4Mem } => - val mem_clk = p(AsyncMemChannels).option(Vec(nMemChannels, Clock(INPUT))) - val mem_rst = p(AsyncMemChannels).option(Vec(nMemChannels, Bool (INPUT))) - val mem_axi = Vec(nMemAXIChannels, new NastiIO) - val mem_ahb = Vec(nMemAHBChannels, new HastiMasterIO) - val mem_tl = Vec(nMemTLChannels, new ClientUncachedTileLinkIO()(edgeMemParams)) + val mem_axi4 = outer.mem_axi4.map(_.bundleOut).toList.headOption // !!! remove headOption when Seq supported } -trait PeripheryMasterMemModule { +trait PeripheryMasterAXI4MemModule { this: TopNetworkModule { - val outer: PeripheryMasterMem - val io: PeripheryMasterMemBundle + val outer: PeripheryMasterAXI4Mem + val io: PeripheryMasterAXI4MemBundle } => - - val edgeMem = coreplexMem.map(TileLinkWidthAdapter(_, edgeMemParams)) - - // Abuse the fact that zip takes the shorter of the two lists - ((io.mem_axi zip edgeMem) zipWithIndex) foreach { case ((axi, mem), idx) => - val axi_sync = PeripheryUtils.convertTLtoAXI(mem) - axi_sync.ar.bits.cache := CACHE_NORMAL_NOCACHE_BUF - axi_sync.aw.bits.cache := CACHE_NORMAL_NOCACHE_BUF - axi <> ( - if (!p(AsyncMemChannels)) axi_sync - else AsyncNastiTo(io.mem_clk.get(idx), io.mem_rst.get(idx), axi_sync) - ) - } - - (io.mem_ahb zip edgeMem) foreach { case (ahb, mem) => - ahb <> PeripheryUtils.convertTLtoAHB(mem, atomics = false) - } - - (io.mem_tl zip edgeMem) foreach { case (tl, mem) => - tl <> TileLinkEnqueuer(mem, 2) - } } ///// @@ -202,18 +120,19 @@ trait PeripheryMasterMemModule { trait PeripheryMasterAXI4MMIO { this: TopNetwork => + private val config = p(ExtBus) val mmio_axi4 = AXI4BlindOutputNode(AXI4SlavePortParameters( slaves = Seq(AXI4SlaveParameters( - address = List(AddressSet(0x60000000L, 0x1fffffffL)), + address = List(AddressSet(BigInt(config.base), config.size-1)), executable = true, // Can we run programs on this memory? supportsWrite = TransferSizes(1, 256), // The slave supports 1-256 byte transfers supportsRead = TransferSizes(1, 256), interleavedId = Some(0))), // slave does not interleave read responses - beatBytes = 8)) // 64-bit AXI interface + beatBytes = config.beatBytes)) mmio_axi4 := // AXI4Fragmenter(lite=false, maxInFlight = 20)( // beef device up to support awlen = 0xff - TLToAXI4(idBits = 4)( // use idBits = 0 for AXI4-Lite + TLToAXI4(idBits = config.idBits)( // use idBits = 0 for AXI4-Lite TLWidthWidget(socBusConfig.beatBytes)( // convert width before attaching to socBus socBus.node)) } @@ -235,48 +154,6 @@ trait PeripheryMasterAXI4MMIOModule { ///// -trait PeripherySlave { - this: TopNetwork { - val pBusMasters: RangeManager - } => - - if (p(NExtBusAXIChannels) > 0) pBusMasters.add("ext", 1) // NExtBusAXIChannels are arbitrated into one TL port -} - -trait PeripherySlaveBundle { - this: TopNetworkBundle { - val outer: PeripherySlave - } => - val bus_clk = p(AsyncBusChannels).option(Vec(p(NExtBusAXIChannels), Clock(INPUT))) - val bus_rst = p(AsyncBusChannels).option(Vec(p(NExtBusAXIChannels), Bool (INPUT))) - val bus_axi = Vec(p(NExtBusAXIChannels), new NastiIO).flip -} - -trait PeripherySlaveModule { - this: TopNetworkModule { - val outer: PeripherySlave { val pBusMasters: RangeManager } - val io: PeripherySlaveBundle - } => - - if (p(NExtBusAXIChannels) > 0) { - val arb = Module(new NastiArbiter(p(NExtBusAXIChannels))) - ((io.bus_axi zip arb.io.master) zipWithIndex) foreach { case ((bus, port), idx) => - port <> ( - if (!p(AsyncBusChannels)) bus - else AsyncNastiFrom(io.bus_clk.get(idx), io.bus_rst.get(idx), bus) - ) - } - val conv = Module(new TileLinkIONastiIOConverter()(edgeSlaveParams)) - conv.io.nasti <> arb.io.slave - - val (r_start, r_end) = outer.pBusMasters.range("ext") - require(r_end - r_start == 1, "RangeManager should return 1 slot") - TileLinkWidthAdapter(coreplexSlave(r_start), conv.io.tl) - } -} - -///// - trait PeripheryBootROM { this: TopNetwork => @@ -341,12 +218,3 @@ trait PeripheryTestBusMasterModule { val io: PeripheryTestBusMasterBundle } => } - -///// - -trait HardwiredResetVector { - this: TopNetworkModule { - val outer: BaseTop[BaseCoreplex] - } => - outer.coreplex.module.io.resetVector := UInt(0x1000) // boot ROM -} diff --git a/src/main/scala/rocketchip/RISCVPlatform.scala b/src/main/scala/rocketchip/RISCVPlatform.scala new file mode 100644 index 00000000..29c930c3 --- /dev/null +++ b/src/main/scala/rocketchip/RISCVPlatform.scala @@ -0,0 +1,92 @@ +// See LICENSE for license details. + +package rocketchip + +import Chisel._ +import config._ +import diplomacy._ +import uncore.tilelink2._ +import uncore.devices._ +import util._ +import junctions.JTAGIO +import coreplex._ + +trait PeripheryJTAG extends TopNetwork { + val module: PeripheryJTAGModule + val coreplex: CoreplexRISCVPlatform +} + +trait PeripheryJTAGBundle extends TopNetworkBundle { + val outer: PeripheryJTAG + + val jtag = new JTAGIO(true).flip +} + +trait PeripheryJTAGModule extends TopNetworkModule { + val outer: PeripheryJTAG + val io: PeripheryJTAGBundle + + val dtm = Module (new JtagDTMWithSync) + dtm.io.jtag <> io.jtag + outer.coreplex.module.io.debug <> dtm.io.debug + + dtm.clock := io.jtag.TCK + dtm.reset := io.jtag.TRST +} + +trait PeripheryDTM extends TopNetwork { + val module: PeripheryDTMModule + val coreplex: CoreplexRISCVPlatform +} + +trait PeripheryDTMBundle extends TopNetworkBundle { + val outer: PeripheryDTM + + val debug = new DebugBusIO().flip +} + +trait PeripheryDTMModule extends TopNetworkModule { + val outer: PeripheryDTM + val io: PeripheryDTMBundle + + outer.coreplex.module.io.debug <> ToAsyncDebugBus(io.debug) +} + +trait PeripheryCounter extends TopNetwork { + val module: PeripheryCounterModule + val coreplex: CoreplexRISCVPlatform +} + +trait PeripheryCounterBundle extends TopNetworkBundle { + val outer: PeripheryCounter +} + +trait PeripheryCounterModule extends TopNetworkModule { + val outer: PeripheryCounter + val io: PeripheryCounterBundle + + { + val period = p(rocketchip.RTCPeriod) + val rtcCounter = RegInit(UInt(0, width = log2Up(period))) + val rtcWrap = rtcCounter === UInt(period-1) + rtcCounter := Mux(rtcWrap, UInt(0), rtcCounter + UInt(1)) + + outer.coreplex.module.io.rtcToggle := rtcCounter(log2Up(period)-1) + } +} + +trait HardwiredResetVector extends TopNetwork { + val module: HardwiredResetVectorModule + val coreplex: CoreplexRISCVPlatform +} + +trait HardwiredResetVectorBundle extends TopNetworkBundle { + val outer: HardwiredResetVector +} + +trait HardwiredResetVectorModule extends TopNetworkModule { + val outer: HardwiredResetVector + val io: HardwiredResetVectorBundle + + outer.coreplex.module.io.resetVector := UInt(0x1000) // boot ROM +} diff --git a/src/main/scala/rocketchip/TestHarness.scala b/src/main/scala/rocketchip/TestHarness.scala index 5d07d1cd..092a290b 100644 --- a/src/main/scala/rocketchip/TestHarness.scala +++ b/src/main/scala/rocketchip/TestHarness.scala @@ -3,125 +3,48 @@ package rocketchip import Chisel._ -import cde.{Parameters, Field} +import config._ import junctions._ -import junctions.NastiConstants._ -import util.LatencyPipe - -case object BuildExampleTop extends Field[Parameters => ExampleTop[coreplex.BaseCoreplex]] -case object SimMemLatency extends Field[Int] +import diplomacy._ +import coreplex._ +import uncore.axi4._ class TestHarness(q: Parameters) extends Module { val io = new Bundle { val success = Bool(OUTPUT) } - val dut = Module(q(BuildExampleTop)(q).module) - implicit val p = dut.p - - // This test harness isn't especially flexible yet - require(dut.io.mem_clk.isEmpty) - require(dut.io.mem_rst.isEmpty) - require(dut.io.mem_ahb.isEmpty) - require(dut.io.mem_tl.isEmpty) - require(dut.io.bus_clk.isEmpty) - require(dut.io.bus_rst.isEmpty) + implicit val p = q + val dut = Module(LazyModule(new ExampleRocketTop(new DefaultCoreplex()(_))).module) for (int <- dut.io.interrupts(0)) int := Bool(false) - if (dut.io.mem_axi.nonEmpty) { - val memSize = p(ExtMemSize) - require(memSize % dut.io.mem_axi.size == 0) - for (axi <- dut.io.mem_axi) { - val mem = Module(new SimAXIMem(memSize / dut.io.mem_axi.size)) - mem.io.axi.ar <> axi.ar - mem.io.axi.aw <> axi.aw - mem.io.axi.w <> axi.w - axi.r <> LatencyPipe(mem.io.axi.r, p(SimMemLatency)) - axi.b <> LatencyPipe(mem.io.axi.b, p(SimMemLatency)) + if (dut.io.mem_axi4.nonEmpty) { + val memSize = p(ExtMem).size + require(memSize % dut.io.mem_axi4.size == 0) + for (axi <- dut.io.mem_axi4) { + Module(LazyModule(new SimAXIMem(memSize / dut.io.mem_axi4.size)).module).io.axi <> axi } } - if (!p(IncludeJtagDTM)) { - // Todo: enable the usage of different clocks - // to test the synchronizer more aggressively. - val dtm_clock = clock - val dtm_reset = reset - if (dut.io.debug_clk.isDefined) dut.io.debug_clk.get := dtm_clock - if (dut.io.debug_rst.isDefined) dut.io.debug_rst.get := dtm_reset - val dtm = Module(new SimDTM).connect(dtm_clock, dtm_reset, dut.io.debug.get, - dut.io.success, io.success) - } else { - val jtag = Module(new JTAGVPI).connect(dut.io.jtag.get, reset, io.success) - } - - for (bus_axi <- dut.io.bus_axi) { - bus_axi.ar.valid := Bool(false) - bus_axi.aw.valid := Bool(false) - bus_axi.w.valid := Bool(false) - bus_axi.r.ready := Bool(false) - bus_axi.b.ready := Bool(false) - } - - for (mmio_axi <- dut.io.mmio_axi) { - val slave = Module(new NastiErrorSlave) - slave.io <> mmio_axi - } + val dtm = Module(new SimDTM).connect(clock, reset, dut.io.debug, io.success) + val mmio_sim = Module(LazyModule(new SimAXIMem(4096)).module) + mmio_sim.io.axi <> dut.io.mmio_axi } -class SimAXIMem(size: BigInt)(implicit p: Parameters) extends NastiModule()(p) { - val io = new Bundle { - val axi = new NastiIO().flip - } +class SimAXIMem(size: BigInt)(implicit p: Parameters) extends LazyModule { + val config = p(ExtMem) - val rValid = Reg(init = Bool(false)) - val ar = RegEnable(io.axi.ar.bits, io.axi.ar.fire()) - io.axi.ar.ready := !rValid - when (io.axi.ar.fire()) { rValid := Bool(true) } - when (io.axi.r.fire()) { - assert(ar.burst === NastiConstants.BURST_INCR) - ar.addr := ar.addr + (UInt(1) << ar.size) - ar.len := ar.len - UInt(1) - when (ar.len === UInt(0)) { rValid := Bool(false) } - } + val node = AXI4BlindInputNode(AXI4MasterPortParameters(Seq(AXI4MasterParameters(IdRange(0, 1 << config.idBits))))) + val sram = LazyModule(new AXI4RAM(AddressSet(0, size-1), beatBytes = config.beatBytes)) + sram.node := AXI4Buffer()(AXI4Fragmenter(maxInFlight = 4)(node)) - val w = io.axi.w.bits - require((size * 8) % nastiXDataBits == 0) - val depth = (size * 8) / nastiXDataBits - val mem = Mem(depth.toInt, w.data) - - val wValid = Reg(init = Bool(false)) - val bValid = Reg(init = Bool(false)) - val aw = RegEnable(io.axi.aw.bits, io.axi.aw.fire()) - io.axi.aw.ready := !wValid && !bValid - io.axi.w.ready := wValid - when (io.axi.b.fire()) { bValid := Bool(false) } - when (io.axi.aw.fire()) { wValid := Bool(true) } - when (io.axi.w.fire()) { - assert(aw.burst === NastiConstants.BURST_INCR) - aw.addr := aw.addr + (UInt(1) << aw.size) - aw.len := aw.len - UInt(1) - when (aw.len === UInt(0)) { - wValid := Bool(false) - bValid := Bool(true) + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val axi = node.bundleIn } - - def row = mem((aw.addr >> log2Ceil(nastiXDataBits/8))(log2Ceil(depth)-1, 0)) - val mask = FillInterleaved(8, w.strb) - val newData = mask & w.data | ~mask & row - row := newData } - - io.axi.b.valid := bValid - io.axi.b.bits.id := aw.id - io.axi.b.bits.resp := RESP_OKAY - - io.axi.r.valid := rValid - io.axi.r.bits.id := ar.id - io.axi.r.bits.data := mem((ar.addr >> log2Ceil(nastiXDataBits/8))(log2Ceil(depth)-1, 0)) - io.axi.r.bits.resp := RESP_OKAY - io.axi.r.bits.last := ar.len === UInt(0) } class SimDTM(implicit p: Parameters) extends BlackBox { @@ -132,13 +55,12 @@ class SimDTM(implicit p: Parameters) extends BlackBox { val exit = UInt(OUTPUT, 32) } - def connect(tbclk: Clock, tbreset: Bool, dutio: uncore.devices.DebugBusIO, - dutsuccess: Bool, tbsuccess: Bool) = { + def connect(tbclk: Clock, tbreset: Bool, dutio: uncore.devices.DebugBusIO, tbsuccess: Bool) = { io.clk := tbclk io.reset := tbreset dutio <> io.debug - tbsuccess := dutsuccess || io.exit === UInt(1) + tbsuccess := io.exit === UInt(1) when (io.exit >= UInt(2)) { printf("*** FAILED *** (exit code = %d)\n", io.exit >> UInt(1)) stop(1) diff --git a/src/main/scala/rocketchip/Utils.scala b/src/main/scala/rocketchip/Utils.scala index d4839120..f0aaeb9d 100644 --- a/src/main/scala/rocketchip/Utils.scala +++ b/src/main/scala/rocketchip/Utils.scala @@ -2,7 +2,7 @@ package rocketchip -import cde.{Parameters, Dump} +import config._ import junctions._ import diplomacy._ import uncore.devices._ @@ -77,31 +77,17 @@ object GenerateGlobalAddrMap { case (e, i) => if (i == 0) e else e.copy(name = e.name + "_" + i) }).flatten.toList - val memBase = 0x80000000L - val memSize = p(ExtMemSize) - Dump("MEM_BASE", memBase) - val tl2 = AddrMapEntry("TL2", new AddrMap(uniquelyNamedTL2Devices, collapse = true)) - val mem = AddrMapEntry("mem", MemRange(memBase, memSize, MemAttr(AddrMapProt.RWX, true))) - AddrMap((tl2 +: (p(NMemoryChannels) > 0).option(mem).toSeq):_*) + AddrMap(tl2) } } object GenerateConfigString { def apply(p: Parameters, clint: CoreplexLocalInterrupter, plic: TLPLIC, peripheryManagers: Seq[TLManagerParameters]) = { val c = CoreplexParameters()(p) - val addrMap = p(GlobalAddrMap) val res = new StringBuilder res append plic.module.globalConfigString res append clint.module.globalConfigString - if (addrMap contains "mem") { - res append "ram {\n" - res append " 0 {\n" - res append s" addr 0x${addrMap("mem").start.toString(16)};\n" - res append s" size 0x${addrMap("mem").size.toString(16)};\n" - res append " };\n" - res append "};\n" - } res append "core {\n" for (i <- 0 until c.nTiles) { // TODO heterogeneous tiles val isa = { diff --git a/src/main/scala/uncore/Builder.scala b/src/main/scala/uncore/Builder.scala index a75f5688..e55cdaa4 100644 --- a/src/main/scala/uncore/Builder.scala +++ b/src/main/scala/uncore/Builder.scala @@ -1,7 +1,7 @@ package uncore import Chisel._ -import cde.{Config, Parameters, ParameterDump, Knob, Dump, CDEMatchError} +import config._ import junctions.PAddrBits import uncore.tilelink._ import uncore.agents._ @@ -22,99 +22,13 @@ object UncoreBuilder extends App { val gen = () => Class.forName(s"uncore.$topModuleName") - .getConstructor(classOf[cde.Parameters]) + .getConstructor(classOf[Parameters]) .newInstance(paramsFromConfig) .asInstanceOf[Module] chiselMain.run(args.drop(2), gen) val pdFile = new java.io.FileWriter(s"${Driver.targetDir}/$topModuleName.prm") - pdFile.write(ParameterDump.getDump) pdFile.close } - -class DefaultL2Config extends Config ( - topDefinitions = { (pname,site,here) => - pname match { - case PAddrBits => 32 - case CacheId => 0 - case CacheName => "L2Bank" - case TLId => "L1toL2" - case InnerTLId => "L1toL2" - case OuterTLId => "L2toMC" - case "N_CACHED" => Dump("N_CACHED",here[Int]("CACHED_CLIENTS_PER_PORT")) - case "N_UNCACHED" => Dump("N_UNCACHED",here[Int]("MAX_CLIENTS_PER_PORT") - here[Int]("N_CACHED")) - case "MAX_CLIENT_XACTS" => 4 - case "MAX_CLIENTS_PER_PORT" => Knob("NTILES") - case "CACHED_CLIENTS_PER_PORT" => Knob("N_CACHED_TILES") - case TLKey("L1toL2") => - TileLinkParameters( - coherencePolicy = new MESICoherence(site(L2DirectoryRepresentation)), - nManagers = 1, - nCachingClients = here[Int]("N_CACHED"), - nCachelessClients = here[Int]("N_UNCACHED"), - maxClientXacts = here[Int]("MAX_CLIENT_XACTS"), - maxClientsPerPort = here[Int]("MAX_CLIENTS_PER_PORT"), - maxManagerXacts = site(NAcquireTransactors) + 2, - dataBits = site(CacheBlockBytes)*8, - dataBeats = 2) - case TLKey("L2toMC") => - TileLinkParameters( - coherencePolicy = new MEICoherence(new NullRepresentation(1)), - nManagers = 1, - nCachingClients = 1, - nCachelessClients = 0, - maxClientXacts = 1, - maxClientsPerPort = site(NAcquireTransactors) + 2, - maxManagerXacts = 1, - dataBits = site(CacheBlockBytes)*8, - dataBeats = 2) - case CacheBlockBytes => 64 - case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes)) - case "L2_SETS" => Knob("L2_SETS") - case NSets => Dump("L2_SETS",here[Int]("L2_SETS")) - case NWays => Knob("L2_WAYS") - case RowBits => site(TLKey(site(TLId))).dataBitsPerBeat - case CacheIdBits => Dump("CACHE_ID_BITS",1) - case L2StoreDataQueueDepth => 1 - case NAcquireTransactors => Dump("N_ACQUIRE_TRANSACTORS",2) - case NSecondaryMisses => 4 - case L2DirectoryRepresentation => new FullRepresentation(here[Int]("N_CACHED")) - case L2Replacer => () => new SeqRandom(site(NWays)) - case ECCCode => None - case AmoAluOperandBits => 64 - case SplitMetadata => false - case _ => throw new CDEMatchError - // case XLen => 128 - }}, - knobValues = { - case "L2_WAYS" => 1 - case "L2_SETS" => 1024 - case "NTILES" => 2 - case "N_CACHED_TILES" => 2 - case "L2_CAPACITY_IN_KB" => 256 - case _ => throw new CDEMatchError - } -) - -class WithPLRU extends Config( - (pname, site, here) => pname match { - case L2Replacer => () => new SeqPLRU(site(NSets), site(NWays)) - case _ => throw new CDEMatchError - }) - -class PLRUL2Config extends Config(new WithPLRU ++ new DefaultL2Config) - -class With1L2Ways extends Config(knobValues = { case "L2_WAYS" => 1; case _ => throw new CDEMatchError }) -class With2L2Ways extends Config(knobValues = { case "L2_WAYS" => 2; case _ => throw new CDEMatchError }) -class With4L2Ways extends Config(knobValues = { case "L2_WAYS" => 4; case _ => throw new CDEMatchError }) - -class With1Cached extends Config(knobValues = { case "N_CACHED_TILES" => 1; case _ => throw new CDEMatchError }) -class With2Cached extends Config(knobValues = { case "N_CACHED_TILES" => 2; case _ => throw new CDEMatchError }) - - -class W1Cached1WaysConfig extends Config(new With1L2Ways ++ new With1Cached ++ new DefaultL2Config) -class W1Cached2WaysConfig extends Config(new With2L2Ways ++ new With1Cached ++ new DefaultL2Config) -class W2Cached1WaysConfig extends Config(new With1L2Ways ++ new With2Cached ++ new DefaultL2Config) -class W2Cached2WaysConfig extends Config(new With2L2Ways ++ new With2Cached ++ new DefaultL2Config) diff --git a/src/main/scala/uncore/agents/Agents.scala b/src/main/scala/uncore/agents/Agents.scala index afed6636..0c3725a7 100644 --- a/src/main/scala/uncore/agents/Agents.scala +++ b/src/main/scala/uncore/agents/Agents.scala @@ -3,7 +3,7 @@ package uncore.agents import Chisel._ -import cde.{Parameters, Field} +import config._ import junctions.PAddrBits import uncore.tilelink._ import uncore.converters._ diff --git a/src/main/scala/uncore/agents/Broadcast.scala b/src/main/scala/uncore/agents/Broadcast.scala index 6226b83e..f8ba7d17 100644 --- a/src/main/scala/uncore/agents/Broadcast.scala +++ b/src/main/scala/uncore/agents/Broadcast.scala @@ -8,7 +8,7 @@ import uncore.tilelink._ import uncore.constants._ import uncore.util._ import util._ -import cde.Parameters +import config._ class L2BroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) { diff --git a/src/main/scala/uncore/agents/Bufferless.scala b/src/main/scala/uncore/agents/Bufferless.scala index 5371d74a..985508d1 100644 --- a/src/main/scala/uncore/agents/Bufferless.scala +++ b/src/main/scala/uncore/agents/Bufferless.scala @@ -6,7 +6,7 @@ import Chisel._ import uncore.coherence._ import uncore.tilelink._ import uncore.constants._ -import cde.Parameters +import config._ class BufferlessBroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) { diff --git a/src/main/scala/uncore/agents/Cache.scala b/src/main/scala/uncore/agents/Cache.scala index 47b3628e..6f79a945 100644 --- a/src/main/scala/uncore/agents/Cache.scala +++ b/src/main/scala/uncore/agents/Cache.scala @@ -12,38 +12,43 @@ import uncore.tilelink._ import uncore.constants._ import uncore.util._ import util._ -import cde.{Parameters, Field} +import config._ + +case class CacheConfig( + nSets: Int, + nWays: Int, + rowBits: Int, + nTLBEntries: Int, + cacheIdBits: Int, + splitMetadata: Boolean) +case class CacheName(id: String) extends Field[CacheConfig] +case object CacheName extends Field[CacheName] -case object CacheName extends Field[String] -case object NSets extends Field[Int] -case object NWays extends Field[Int] -case object RowBits extends Field[Int] case object Replacer extends Field[() => ReplacementPolicy] case object L2Replacer extends Field[() => SeqReplacementPolicy] case object NPrimaryMisses extends Field[Int] case object NSecondaryMisses extends Field[Int] case object CacheBlockBytes extends Field[Int] case object ECCCode extends Field[Option[Code]] -case object CacheIdBits extends Field[Int] case object CacheId extends Field[Int] -case object SplitMetadata extends Field[Boolean] trait HasCacheParameters { implicit val p: Parameters - val nSets = p(NSets) + val cacheConfig = p(p(CacheName)) + val nSets = cacheConfig.nSets val blockOffBits = p(CacheBlockOffsetBits) - val cacheIdBits = p(CacheIdBits) - val idxBits = log2Up(nSets) + val cacheIdBits = cacheConfig.cacheIdBits + val idxBits = log2Up(cacheConfig.nSets) val untagBits = blockOffBits + cacheIdBits + idxBits val tagBits = p(PAddrBits) - untagBits - val nWays = p(NWays) + val nWays = cacheConfig.nWays val wayBits = log2Up(nWays) val isDM = nWays == 1 - val rowBits = p(RowBits) + val rowBits = cacheConfig.rowBits val rowBytes = rowBits/8 val rowOffBits = log2Up(rowBytes) val code = p(ECCCode).getOrElse(new IdentityCode) - val hasSplitMetadata = p(SplitMetadata) + val hasSplitMetadata = cacheConfig.splitMetadata } abstract class CacheModule(implicit val p: Parameters) extends Module @@ -130,7 +135,6 @@ class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy { abstract class Metadata(implicit p: Parameters) extends CacheBundle()(p) { val tag = Bits(width = tagBits) - val coh: CoherenceMetadata } class MetaReadReq(implicit p: Parameters) extends CacheBundle()(p) { diff --git a/src/main/scala/uncore/agents/Mmio.scala b/src/main/scala/uncore/agents/Mmio.scala index 08ccc4d3..39ee70cd 100644 --- a/src/main/scala/uncore/agents/Mmio.scala +++ b/src/main/scala/uncore/agents/Mmio.scala @@ -2,7 +2,7 @@ package uncore.agents import Chisel._ import uncore.tilelink._ -import cde.Parameters +import config._ class MMIOTileLinkManagerData(implicit p: Parameters) extends TLBundle()(p) diff --git a/src/main/scala/uncore/agents/StatelessBridge.scala b/src/main/scala/uncore/agents/StatelessBridge.scala index 0ed818cf..02edacf0 100644 --- a/src/main/scala/uncore/agents/StatelessBridge.scala +++ b/src/main/scala/uncore/agents/StatelessBridge.scala @@ -7,7 +7,7 @@ import uncore.coherence._ import uncore.tilelink._ import uncore.constants._ import uncore.devices._ -import cde.{Parameters, Field, Config} +import config._ /** The ManagerToClientStateless Bridge does not maintain any state for the messages * which pass through it. It simply passes the messages back and forth without any diff --git a/src/main/scala/uncore/agents/StoreDataQueue.scala b/src/main/scala/uncore/agents/StoreDataQueue.scala index 041d04db..71aee4ed 100644 --- a/src/main/scala/uncore/agents/StoreDataQueue.scala +++ b/src/main/scala/uncore/agents/StoreDataQueue.scala @@ -3,7 +3,7 @@ package uncore.agents import Chisel._ import uncore.tilelink._ -import cde.{Parameters, Field} +import config._ case object L2StoreDataQueueDepth extends Field[Int] diff --git a/src/main/scala/uncore/agents/Trackers.scala b/src/main/scala/uncore/agents/Trackers.scala index a63735ac..7813a445 100644 --- a/src/main/scala/uncore/agents/Trackers.scala +++ b/src/main/scala/uncore/agents/Trackers.scala @@ -7,7 +7,7 @@ import uncore.coherence._ import uncore.tilelink._ import uncore.util._ import util._ -import cde.{Field, Parameters} +import config._ import scala.math.max case object EnableL2Logging extends Field[Boolean] diff --git a/src/main/scala/uncore/axi4/ToTL.scala b/src/main/scala/uncore/axi4/ToTL.scala index 1921c37a..14a749bc 100644 --- a/src/main/scala/uncore/axi4/ToTL.scala +++ b/src/main/scala/uncore/axi4/ToTL.scala @@ -102,7 +102,7 @@ class AXI4ToTL extends LazyModule val d_resp = Mux(out.d.bits.error, AXI4Parameters.RESP_SLVERR, AXI4Parameters.RESP_OKAY) val d_hasData = edgeOut.hasData(out.d.bits) - val (_, d_last, _) = edgeOut.firstlast(out.d.bits, out.d.fire()) + val d_last = edgeOut.last(out.d) out.d.ready := Mux(d_hasData, ok_r.ready, ok_b.ready) ok_r.valid := out.d.valid && d_hasData diff --git a/src/main/scala/uncore/coherence/Metadata.scala b/src/main/scala/uncore/coherence/Metadata.scala index c0d7a6bf..34fae81a 100644 --- a/src/main/scala/uncore/coherence/Metadata.scala +++ b/src/main/scala/uncore/coherence/Metadata.scala @@ -5,7 +5,7 @@ package uncore.coherence import Chisel._ import uncore.tilelink._ import uncore.constants._ -import cde.{Parameters, Field} +import config._ /** Identifies the TLId of the inner network in a hierarchical cache controller */ case object InnerTLId extends Field[String] diff --git a/src/main/scala/uncore/converters/Ahb.scala b/src/main/scala/uncore/converters/Ahb.scala index c298e7b3..d6f236d2 100644 --- a/src/main/scala/uncore/converters/Ahb.scala +++ b/src/main/scala/uncore/converters/Ahb.scala @@ -5,7 +5,7 @@ import junctions._ import uncore.tilelink._ import uncore.util._ import uncore.constants._ -import cde.{Parameters, Field} +import config._ import HastiConstants._ /* We need to translate TileLink requests into operations we can actually execute on AHB. @@ -35,8 +35,7 @@ class AHBRequestIO(implicit p: Parameters) extends HastiMasterIO // AHB stage1: translate TileLink Acquires into AHBRequests class AHBTileLinkIn(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module with HasHastiParameters - with HasTileLinkParameters - with HasAddrMapParameters { + with HasTileLinkParameters { val io = new Bundle { val acquire = new DecoupledIO(new Acquire).flip // NOTE: acquire must be either a Queue or a Pipe val request = new DecoupledIO(new AHBRequestIO) @@ -238,8 +237,7 @@ class AHBTileLinkIn(supportAtomics: Boolean = false)(implicit val p: Parameters) // AHB stage2: execute AHBRequests class AHBBusMaster(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module with HasHastiParameters - with HasTileLinkParameters - with HasAddrMapParameters { + with HasTileLinkParameters { val io = new Bundle { val request = new DecoupledIO(new AHBRequestIO).flip val grant = new DecoupledIO(new Grant) @@ -390,8 +388,7 @@ class AHBBusMaster(supportAtomics: Boolean = false)(implicit val p: Parameters) class AHBBridge(supportAtomics: Boolean = true)(implicit val p: Parameters) extends Module with HasHastiParameters - with HasTileLinkParameters - with HasAddrMapParameters { + with HasTileLinkParameters { val io = new Bundle { val tl = new ClientUncachedTileLinkIO().flip val ahb = new HastiMasterIO() diff --git a/src/main/scala/uncore/converters/Nasti.scala b/src/main/scala/uncore/converters/Nasti.scala index 3477e976..8ff46afb 100644 --- a/src/main/scala/uncore/converters/Nasti.scala +++ b/src/main/scala/uncore/converters/Nasti.scala @@ -6,7 +6,7 @@ import util.{ReorderQueue, DecoupledHelper} import junctions.NastiConstants._ import uncore.tilelink._ import uncore.constants._ -import cde.Parameters +import config._ import scala.math.min class IdMapper(val inIdBits: Int, val outIdBits: Int, diff --git a/src/main/scala/uncore/converters/Tilelink.scala b/src/main/scala/uncore/converters/Tilelink.scala index 94cc768f..7d6c3f13 100644 --- a/src/main/scala/uncore/converters/Tilelink.scala +++ b/src/main/scala/uncore/converters/Tilelink.scala @@ -8,7 +8,7 @@ import uncore.util._ import uncore.constants._ import uncore.devices.TileLinkTestRAM import unittest.UnitTest -import cde.Parameters +import config._ /** Utilities for safely wrapping a *UncachedTileLink by pinning probe.ready and release.valid low */ object TileLinkIOWrapper { diff --git a/src/main/scala/uncore/devices/Bram.scala b/src/main/scala/uncore/devices/Bram.scala index 447dccfe..e7d6751d 100644 --- a/src/main/scala/uncore/devices/Bram.scala +++ b/src/main/scala/uncore/devices/Bram.scala @@ -1,7 +1,7 @@ package uncore.devices import Chisel._ -import cde.{Parameters, Field} +import config._ import unittest.UnitTest import junctions._ import uncore.tilelink._ diff --git a/src/main/scala/uncore/devices/Debug.scala b/src/main/scala/uncore/devices/Debug.scala index 3ee83e9a..cf3010e3 100644 --- a/src/main/scala/uncore/devices/Debug.scala +++ b/src/main/scala/uncore/devices/Debug.scala @@ -7,7 +7,7 @@ import junctions._ import util._ import regmapper._ import uncore.tilelink2._ -import cde.{Parameters, Config, Field} +import config._ // ***************************************** // Constants which are interesting even @@ -301,11 +301,36 @@ class DebugBusResp( ) extends Bundle { * Therefore it has the 'flipped' version of this. */ -class DebugBusIO(implicit val p: cde.Parameters) extends ParameterizedBundle()(p) { +class DebugBusIO(implicit val p: Parameters) extends ParameterizedBundle()(p) { val req = new DecoupledIO(new DebugBusReq(p(DMKey).nDebugBusAddrSize)) val resp = new DecoupledIO(new DebugBusResp).flip() } +class AsyncDebugBusIO(implicit val p: Parameters) extends ParameterizedBundle()(p) { + val req = new AsyncBundle(1, new DebugBusReq(p(DMKey).nDebugBusAddrSize)) + val resp = new AsyncBundle(1, new DebugBusResp).flip +} + +object FromAsyncDebugBus +{ + def apply(x: AsyncDebugBusIO) = { + val out = Wire(new DebugBusIO()(x.p)) + out.req <> FromAsyncBundle(x.req) + x.resp <> ToAsyncBundle(out.resp, 1) + out + } +} + +object ToAsyncDebugBus +{ + def apply(x: DebugBusIO) = { + val out = Wire(new AsyncDebugBusIO()(x.p)) + out.req <> ToAsyncBundle(x.req, 1) + x.resp <> FromAsyncBundle(out.resp) + out + } +} + trait HasDebugModuleParameters { val params : Parameters implicit val p = params diff --git a/src/main/scala/uncore/devices/Plic.scala b/src/main/scala/uncore/devices/Plic.scala index 830cd88a..4673b8cf 100644 --- a/src/main/scala/uncore/devices/Plic.scala +++ b/src/main/scala/uncore/devices/Plic.scala @@ -9,7 +9,7 @@ import junctions._ import diplomacy._ import regmapper._ import uncore.tilelink2._ -import cde.Parameters +import config._ import scala.math.min class GatewayPLICIO extends Bundle { @@ -135,7 +135,7 @@ class TLPLIC(supervisor: Boolean, maxPriorities: Int, address: BigInt = 0xC00000 val gateway = Module(new LevelGateway) gateway.io.interrupt := i gateway.io.plic - }) + } ++ (if (interrupts.isEmpty) Some(Wire(new GatewayPLICIO)) else None)) val priority = if (nPriorities > 0) Reg(Vec(nDevices+1, UInt(width=log2Up(nPriorities+1)))) diff --git a/src/main/scala/uncore/devices/Prci.scala b/src/main/scala/uncore/devices/Prci.scala index 604c89ab..ef73294c 100644 --- a/src/main/scala/uncore/devices/Prci.scala +++ b/src/main/scala/uncore/devices/Prci.scala @@ -11,7 +11,7 @@ import uncore.tilelink2._ import uncore.util._ import util._ import scala.math.{min,max} -import cde.{Parameters, Field} +import config._ /** Number of tiles */ case object NTiles extends Field[Int] diff --git a/src/main/scala/uncore/devices/Rom.scala b/src/main/scala/uncore/devices/Rom.scala index c47f2ab1..3cfab1e4 100644 --- a/src/main/scala/uncore/devices/Rom.scala +++ b/src/main/scala/uncore/devices/Rom.scala @@ -7,7 +7,7 @@ import diplomacy._ import uncore.tilelink._ import uncore.tilelink2._ import uncore.util._ -import cde.{Parameters, Field} +import config._ class TLROM(val base: BigInt, val size: Int, contentsDelayed: => Seq[Byte], executable: Boolean = true, beatBytes: Int = 4) extends LazyModule { @@ -47,8 +47,7 @@ class TLROM(val base: BigInt, val size: Int, contentsDelayed: => Seq[Byte], exec } class ROMSlave(contents: Seq[Byte])(implicit val p: Parameters) extends Module - with HasTileLinkParameters - with HasAddrMapParameters { + with HasTileLinkParameters { val io = new ClientUncachedTileLinkIO().flip val acq = Queue(io.acquire, 1) diff --git a/src/main/scala/uncore/tilelink/Arbiters.scala b/src/main/scala/uncore/tilelink/Arbiters.scala index 3b76b88f..c81ccee8 100644 --- a/src/main/scala/uncore/tilelink/Arbiters.scala +++ b/src/main/scala/uncore/tilelink/Arbiters.scala @@ -1,7 +1,7 @@ package uncore.tilelink import Chisel._ import junctions._ -import cde.{Parameters, Field} +import config._ /** Utility functions for constructing TileLinkIO arbiters */ trait TileLinkArbiterLike extends HasTileLinkParameters { diff --git a/src/main/scala/uncore/tilelink/Definitions.scala b/src/main/scala/uncore/tilelink/Definitions.scala index c1f12f90..88becdd4 100644 --- a/src/main/scala/uncore/tilelink/Definitions.scala +++ b/src/main/scala/uncore/tilelink/Definitions.scala @@ -7,7 +7,7 @@ import uncore.coherence.CoherencePolicy import uncore.constants._ import util._ import scala.math.max -import cde.{Parameters, Field} +import config._ case object CacheBlockOffsetBits extends Field[Int] case object AmoAluOperandBits extends Field[Int] diff --git a/src/main/scala/uncore/tilelink/Drivers.scala b/src/main/scala/uncore/tilelink/Drivers.scala index 43d127a5..c926779a 100644 --- a/src/main/scala/uncore/tilelink/Drivers.scala +++ b/src/main/scala/uncore/tilelink/Drivers.scala @@ -5,7 +5,7 @@ import junctions._ import uncore.constants._ import uncore.util._ import util._ -import cde.Parameters +import config._ abstract class Driver(implicit p: Parameters) extends TLModule()(p) { val io = new Bundle { diff --git a/src/main/scala/uncore/tilelink/Interconnect.scala b/src/main/scala/uncore/tilelink/Interconnect.scala index 8a2de1b7..544a6c51 100644 --- a/src/main/scala/uncore/tilelink/Interconnect.scala +++ b/src/main/scala/uncore/tilelink/Interconnect.scala @@ -4,7 +4,7 @@ import Chisel._ import junctions._ import scala.collection.mutable.ArraySeq import uncore.util._ -import cde.{Parameters, Field} +import config._ /** PortedTileLinkNetworks combine a TileLink protocol with a particular physical diff --git a/src/main/scala/uncore/tilelink/Network.scala b/src/main/scala/uncore/tilelink/Network.scala index 2a5fd8ab..51702b32 100644 --- a/src/main/scala/uncore/tilelink/Network.scala +++ b/src/main/scala/uncore/tilelink/Network.scala @@ -4,7 +4,7 @@ package uncore.tilelink import Chisel._ import uncore.util._ -import cde.{Parameters, Field} +import config._ case object LNEndpoints extends Field[Int] case object LNHeaderBits extends Field[Int] diff --git a/src/main/scala/uncore/tilelink2/Arbiter.scala b/src/main/scala/uncore/tilelink2/Arbiter.scala index 3ef0ca7e..a42ca60c 100644 --- a/src/main/scala/uncore/tilelink2/Arbiter.scala +++ b/src/main/scala/uncore/tilelink2/Arbiter.scala @@ -13,6 +13,14 @@ object TLArbiter val lowestIndexFirst: Policy = (valids, granted) => valids.scanLeft(Bool(true))(_ && !_).init + def lowestFromSeq[T <: TLChannel](edge: TLEdge, sink: DecoupledIO[T], sources: Seq[DecoupledIO[T]]) { + apply(lowestIndexFirst)(sink, sources.map(s => (edge.numBeats1(s.bits), s)):_*) + } + + def lowest[T <: TLChannel](edge: TLEdge, sink: DecoupledIO[T], sources: DecoupledIO[T]*) { + apply(lowestIndexFirst)(sink, sources.toList.map(s => (edge.numBeats1(s.bits), s)):_*) + } + def apply[T <: Data](policy: Policy)(sink: DecoupledIO[T], sources: (UInt, DecoupledIO[T])*) { if (sources.isEmpty) { sink.valid := Bool(false) diff --git a/src/main/scala/uncore/tilelink2/Broadcast.scala b/src/main/scala/uncore/tilelink2/Broadcast.scala index 582c5405..8bc40111 100644 --- a/src/main/scala/uncore/tilelink2/Broadcast.scala +++ b/src/main/scala/uncore/tilelink2/Broadcast.scala @@ -71,7 +71,7 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa // Create the request tracker queues val trackers = Seq.tabulate(numTrackers) { id => - Module(new TLBroadcastTracker(id, lineBytes, log2Up(caches.size), bufferless, edgeIn, edgeOut)).io + Module(new TLBroadcastTracker(id, lineBytes, log2Up(caches.size+1), bufferless, edgeIn, edgeOut)).io } // We always accept E @@ -96,11 +96,11 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa d_normal.bits.param := Mux(d_hasData, Mux(d_what(0), TLPermissions.toT, TLPermissions.toB), UInt(0)) } d_normal.bits.sink := OHToUInt(d_trackerOH) - assert (!d_normal.valid || d_trackerOH.orR()) + assert (!d_normal.valid || (d_trackerOH.orR() || d_normal.bits.opcode === TLMessages.ReleaseAck)) // A tracker response is anything neither dropped nor a ReleaseAck val d_response = d_hasData || !d_what(1) - val (_, d_last, _) = edgeIn.firstlast(d_normal) + val d_last = edgeIn.last(d_normal) (trackers zip d_trackerOH.toBools) foreach { case (tracker, select) => tracker.d_last := select && d_normal.fire() && d_response && d_last } @@ -118,7 +118,7 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa // Decrement the tracker's outstanding probe counter val c_decrement = in.c.fire() && (c_probeack || c_probeackdata) - val (_, c_last, _) = edgeIn.firstlast(in.c) + val c_last = edgeIn.last(in.c) trackers foreach { tracker => tracker.probeack := c_decrement && c_last && tracker.line === (in.c.bits.address >> lineShift) } @@ -136,11 +136,9 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa putfull.bits := edgeOut.Put(Cat(put_what, in.c.bits.source), in.c.bits.address, in.c.bits.size, in.c.bits.data)._2 // Combine ReleaseAck or the modified D - TLArbiter(TLArbiter.lowestIndexFirst)(in.d, (UInt(0), releaseack), (edgeOut.numBeats1(d_normal.bits), d_normal)) + TLArbiter.lowest(edgeOut, in.d, releaseack, d_normal) // Combine the PutFull with the trackers - TLArbiter(TLArbiter.lowestIndexFirst)(out.a, - ((edgeOut.numBeats1(putfull.bits), putfull) +: - trackers.map { t => (edgeOut.numBeats1(t.out_a.bits), t.out_a) }):_*) + TLArbiter.lowestFromSeq(edgeOut, out.a, putfull +: trackers.map(_.out_a)) // The Probe FSM walks all caches and probes them val probe_todo = RegInit(UInt(0, width = max(1, caches.size))) @@ -159,7 +157,7 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa // Which cache does a request come from? val a_cache = if (caches.size == 0) UInt(1) else Vec(caches.map(_.contains(in.a.bits.source))).asUInt - val (a_first, _, _) = edgeIn.firstlast(in.a) + val a_first = edgeIn.first(in.a) // To accept a request from A, the probe FSM must be idle and there must be a matching tracker val freeTrackers = Vec(trackers.map { t => t.idle }).asUInt @@ -259,7 +257,7 @@ class TLBroadcastTracker(id: Int, lineBytes: Int, probeCountBits: Int, bufferles io.line := address >> lineShift val i_data = Wire(Decoupled(new TLBroadcastData(edgeIn.bundle))) - val o_data = Queue(i_data, if (bufferless) 1 else (lineBytes / edgeIn.manager.beatBytes)) + val o_data = Queue(i_data, if (bufferless) 1 else (lineBytes / edgeIn.manager.beatBytes), pipe=bufferless) io.in_a.ready := (idle || !io.in_a_first) && i_data.ready i_data.valid := (idle || !io.in_a_first) && io.in_a.valid diff --git a/src/main/scala/uncore/tilelink2/Bundles.scala b/src/main/scala/uncore/tilelink2/Bundles.scala index cee73942..b61970f7 100644 --- a/src/main/scala/uncore/tilelink2/Bundles.scala +++ b/src/main/scala/uncore/tilelink2/Bundles.scala @@ -5,7 +5,7 @@ package uncore.tilelink2 import Chisel._ import chisel3.util.{ReadyValidIO} import diplomacy._ -import util.{AsyncQueueSource, AsyncQueueSink, GenericParameterizedBundle} +import util._ abstract class TLBundleBase(params: TLBundleParameters) extends GenericParameterizedBundle(params) @@ -27,7 +27,6 @@ object TLMessages val AccessAck = UInt(0) // . . val AccessAckData = UInt(1) // . . val HintAck = UInt(2) // . . -//val PutThroughData = UInt(3) // . // future extension ? val ProbeAck = UInt(4) // . val ProbeAckData = UInt(5) // . val Release = UInt(6) // . => ReleaseAck @@ -43,55 +42,72 @@ object TLMessages def isD(x: UInt) = x <= ReleaseAck } +/** + * The three primary TileLink permissions are: + * (T)runk: the agent is (or is on inwards path to) the global point of serialization. + * (B)ranch: the agent is on an outwards path to + * (N)one: + * These permissions are permuted by transfer operations in various ways. + * Operations can cap permissions, request for them to be grown or shrunk, + * or for a report on their current status. + */ object TLPermissions { + val aWidth = 2 + val bdWidth = 2 + val cWidth = 3 + // Cap types (Grant = new permissions, Probe = permisions <= target) - val toT = UInt(0) - val toB = UInt(1) - val toN = UInt(2) + val toT = UInt(0, bdWidth) + val toB = UInt(1, bdWidth) + val toN = UInt(2, bdWidth) def isCap(x: UInt) = x <= toN // Grow types (Acquire = permissions >= target) - val NtoB = UInt(0) - val NtoT = UInt(1) - val BtoT = UInt(2) + val NtoB = UInt(0, aWidth) + val NtoT = UInt(1, aWidth) + val BtoT = UInt(2, aWidth) def isGrow(x: UInt) = x <= BtoT // Shrink types (ProbeAck, Release) - val TtoB = UInt(0) - val TtoN = UInt(1) - val BtoN = UInt(2) + val TtoB = UInt(0, cWidth) + val TtoN = UInt(1, cWidth) + val BtoN = UInt(2, cWidth) def isShrink(x: UInt) = x <= BtoN // Report types (ProbeAck) - val TtoT = UInt(3) - val BtoB = UInt(4) - val NtoN = UInt(5) + val TtoT = UInt(3, cWidth) + val BtoB = UInt(4, cWidth) + val NtoN = UInt(5, cWidth) def isReport(x: UInt) = x <= NtoN } object TLAtomics { + val width = 3 + // Arithmetic types - val MIN = UInt(0) - val MAX = UInt(1) - val MINU = UInt(2) - val MAXU = UInt(3) - val ADD = UInt(4) + val MIN = UInt(0, width) + val MAX = UInt(1, width) + val MINU = UInt(2, width) + val MAXU = UInt(3, width) + val ADD = UInt(4, width) def isArithmetic(x: UInt) = x <= ADD // Logical types - val XOR = UInt(0) - val OR = UInt(1) - val AND = UInt(2) - val SWAP = UInt(3) + val XOR = UInt(0, width) + val OR = UInt(1, width) + val AND = UInt(2, width) + val SWAP = UInt(3, width) def isLogical(x: UInt) = x <= SWAP } object TLHints { - val PREFETCH_READ = UInt(0) - val PREFETCH_WRITE = UInt(1) + val width = 1 + + val PREFETCH_READ = UInt(0, width) + val PREFETCH_WRITE = UInt(1, width) } sealed trait TLChannel extends TLBundleBase { @@ -106,7 +122,7 @@ final class TLBundleA(params: TLBundleParameters) val channelName = "'A' channel" // fixed fields during multibeat: val opcode = UInt(width = 3) - val param = UInt(width = 3) // amo_opcode || perms || hint + val param = UInt(width = List(TLAtomics.width, TLPermissions.aWidth, TLHints.width).max) // amo_opcode || grow perms || hint val size = UInt(width = params.sizeBits) val source = UInt(width = params.sourceBits) // from val address = UInt(width = params.addressBits) // to @@ -121,7 +137,7 @@ final class TLBundleB(params: TLBundleParameters) val channelName = "'B' channel" // fixed fields during multibeat: val opcode = UInt(width = 3) - val param = UInt(width = 3) + val param = UInt(width = TLPermissions.bdWidth) // cap perms val size = UInt(width = params.sizeBits) val source = UInt(width = params.sourceBits) // to val address = UInt(width = params.addressBits) // from @@ -136,7 +152,7 @@ final class TLBundleC(params: TLBundleParameters) val channelName = "'C' channel" // fixed fields during multibeat: val opcode = UInt(width = 3) - val param = UInt(width = 3) + val param = UInt(width = TLPermissions.cWidth) // shrink or report perms val size = UInt(width = params.sizeBits) val source = UInt(width = params.sourceBits) // from val address = UInt(width = params.addressBits) // to @@ -151,7 +167,7 @@ final class TLBundleD(params: TLBundleParameters) val channelName = "'D' channel" // fixed fields during multibeat: val opcode = UInt(width = 3) - val param = UInt(width = 2) + val param = UInt(width = TLPermissions.bdWidth) // cap perms val size = UInt(width = params.sizeBits) val source = UInt(width = params.sourceBits) // to val sink = UInt(width = params.sinkBits) // from @@ -225,58 +241,6 @@ object TLBundleSnoop } } -final class AsyncBundle[T <: Data](val depth: Int, gen: T) extends Bundle -{ - require (isPow2(depth)) - val mem = Vec(depth, gen) - val ridx = UInt(width = log2Up(depth)+1).flip - val widx = UInt(width = log2Up(depth)+1) - val ridx_valid = Bool().flip - val widx_valid = Bool() - val source_reset_n = Bool() - val sink_reset_n = Bool().flip - - override def cloneType: this.type = new AsyncBundle(depth, gen).asInstanceOf[this.type] -} - -object FromAsyncBundle -{ - def apply[T <: Data](x: AsyncBundle[T], sync: Int = 3): DecoupledIO[T] = { - val sink = Module(new AsyncQueueSink(x.mem(0), x.depth, sync)) - x.ridx := sink.io.ridx - x.ridx_valid := sink.io.ridx_valid - sink.io.widx := x.widx - sink.io.widx_valid := x.widx_valid - sink.io.mem := x.mem - sink.io.source_reset_n := x.source_reset_n - x.sink_reset_n := !sink.reset - val out = Wire(Decoupled(x.mem(0))) - out.valid := sink.io.deq.valid - out.bits := sink.io.deq.bits - sink.io.deq.ready := out.ready - out - } -} - -object ToAsyncBundle -{ - def apply[T <: Data](x: ReadyValidIO[T], depth: Int = 8, sync: Int = 3): AsyncBundle[T] = { - val source = Module(new AsyncQueueSource(x.bits, depth, sync)) - source.io.enq.valid := x.valid - source.io.enq.bits := x.bits - x.ready := source.io.enq.ready - val out = Wire(new AsyncBundle(depth, x.bits)) - source.io.ridx := out.ridx - source.io.ridx_valid := out.ridx_valid - out.mem := source.io.mem - out.widx := source.io.widx - out.widx_valid := source.io.widx_valid - source.io.sink_reset_n := out.sink_reset_n - out.source_reset_n := !source.reset - out - } -} - class TLAsyncBundleBase(params: TLAsyncBundleParameters) extends GenericParameterizedBundle(params) class TLAsyncBundle(params: TLAsyncBundleParameters) extends TLAsyncBundleBase(params) diff --git a/src/main/scala/uncore/tilelink2/Edges.scala b/src/main/scala/uncore/tilelink2/Edges.scala index a7cb2df6..7ce91a8d 100644 --- a/src/main/scala/uncore/tilelink2/Edges.scala +++ b/src/main/scala/uncore/tilelink2/Edges.scala @@ -175,19 +175,48 @@ class TLEdge( } } - def firstlast(bits: TLChannel, fire: Bool): (Bool, Bool, UInt) = { + def firstlastHelper(bits: TLChannel, fire: Bool): (Bool, Bool, Bool, UInt) = { val beats1 = numBeats1(bits) val counter = RegInit(UInt(0, width = log2Up(maxTransfer / manager.beatBytes))) val counter1 = counter - UInt(1) val first = counter === UInt(0) val last = counter === UInt(1) || beats1 === UInt(0) + val done = last && fire + val count = (beats1 & ~counter1) when (fire) { counter := Mux(first, beats1, counter1) } - (first, last, (beats1 & ~counter1) << log2Ceil(manager.beatBytes)) + (first, last, done, count) } - def firstlast(x: DecoupledIO[TLChannel]): (Bool, Bool, UInt) = firstlast(x.bits, x.fire()) + def first(bits: TLChannel, fire: Bool): Bool = firstlastHelper(bits, fire)._1 + def first(x: DecoupledIO[TLChannel]): Bool = first(x.bits, x.fire()) + def first(x: ValidIO[TLChannel]): Bool = first(x.bits, x.valid) + + def last(bits: TLChannel, fire: Bool): Bool = firstlastHelper(bits, fire)._2 + def last(x: DecoupledIO[TLChannel]): Bool = last(x.bits, x.fire()) + def last(x: ValidIO[TLChannel]): Bool = last(x.bits, x.valid) + + def firstlast(bits: TLChannel, fire: Bool): (Bool, Bool, Bool) = { + val r = firstlastHelper(bits, fire) + (r._1, r._2, r._3) + } + def firstlast(x: DecoupledIO[TLChannel]): (Bool, Bool, Bool) = firstlast(x.bits, x.fire()) + def firstlast(x: ValidIO[TLChannel]): (Bool, Bool, Bool) = firstlast(x.bits, x.valid) + + def count(bits: TLChannel, fire: Bool): (Bool, Bool, Bool, UInt) = { + val r = firstlastHelper(bits, fire) + (r._1, r._2, r._3, r._4) + } + def count(x: DecoupledIO[TLChannel]): (Bool, Bool, Bool, UInt) = count(x.bits, x.fire()) + def count(x: ValidIO[TLChannel]): (Bool, Bool, Bool, UInt) = count(x.bits, x.valid) + + def addr_inc(bits: TLChannel, fire: Bool): (Bool, Bool, Bool, UInt) = { + val r = firstlastHelper(bits, fire) + (r._1, r._2, r._3, r._4 << log2Ceil(manager.beatBytes)) + } + def addr_inc(x: DecoupledIO[TLChannel]): (Bool, Bool, Bool, UInt) = addr_inc(x.bits, x.fire()) + def addr_inc(x: ValidIO[TLChannel]): (Bool, Bool, Bool, UInt) = addr_inc(x.bits, x.valid) } class TLEdgeOut( @@ -205,7 +234,7 @@ class TLEdgeOut( a.size := lgSize a.source := fromSource a.address := toAddress - a.mask := SInt(-1).asUInt + a.mask := mask(toAddress, lgSize) a.data := UInt(0) (legal, a) } @@ -238,7 +267,10 @@ class TLEdgeOut( (legal, c) } - def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt) = { + def ProbeAck(b: TLBundleB, reportPermissions: UInt): TLBundleC = + ProbeAck(b.source, b.address, b.size, reportPermissions) + + def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt): TLBundleC = { val c = Wire(new TLBundleC(bundle)) c.opcode := TLMessages.ProbeAck c.param := reportPermissions @@ -250,7 +282,10 @@ class TLEdgeOut( c } - def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt, data: UInt) = { + def ProbeAck(b: TLBundleB, reportPermissions: UInt, data: UInt): TLBundleC = + ProbeAck(b.source, b.address, b.size, reportPermissions, data) + + def ProbeAck(fromSource: UInt, toAddress: UInt, lgSize: UInt, reportPermissions: UInt, data: UInt): TLBundleC = { val c = Wire(new TLBundleC(bundle)) c.opcode := TLMessages.ProbeAckData c.param := reportPermissions @@ -262,7 +297,8 @@ class TLEdgeOut( c } - def GrantAck(toSink: UInt) = { + def GrantAck(d: TLBundleD): TLBundleE = GrantAck(d.sink) + def GrantAck(toSink: UInt): TLBundleE = { val e = Wire(new TLBundleE(bundle)) e.sink := toSink e @@ -412,7 +448,7 @@ class TLEdgeIn( b.size := lgSize b.source := toSource b.address := fromAddress - b.mask := SInt(-1).asUInt + b.mask := mask(fromAddress, lgSize) b.data := UInt(0) (legal, b) } diff --git a/src/main/scala/uncore/tilelink2/Fuzzer.scala b/src/main/scala/uncore/tilelink2/Fuzzer.scala index de9c9fa5..c7ff80b9 100644 --- a/src/main/scala/uncore/tilelink2/Fuzzer.scala +++ b/src/main/scala/uncore/tilelink2/Fuzzer.scala @@ -113,12 +113,10 @@ class TLFuzzer( // Progress within each operation val a = out.a.bits - val (a_first, a_last, _) = edge.firstlast(out.a) - val req_done = out.a.fire() && a_last + val (a_first, a_last, req_done) = edge.firstlast(out.a) val d = out.d.bits - val (d_first, d_last, _) = edge.firstlast(out.d) - val resp_done = out.d.fire() && d_last + val (d_first, d_last, resp_done) = edge.firstlast(out.d) // Source ID generation val idMap = Module(new IDMapGenerator(inFlight)) diff --git a/src/main/scala/uncore/tilelink2/IntNodes.scala b/src/main/scala/uncore/tilelink2/IntNodes.scala index d98e93e7..86d441b7 100644 --- a/src/main/scala/uncore/tilelink2/IntNodes.scala +++ b/src/main/scala/uncore/tilelink2/IntNodes.scala @@ -41,7 +41,7 @@ case class IntSourcePortParameters(sources: Seq[IntSourceParameters]) // The interrupts mapping must not overlap sources.map(_.range).combinations(2).foreach { case Seq(a, b) => require (!a.overlaps(b)) } // The interrupts must perfectly cover the range - require (sources.map(_.range.end).max == num) + require (sources.isEmpty || sources.map(_.range.end).max == num) } case class IntSinkPortParameters(sinks: Seq[IntSinkParameters]) @@ -57,7 +57,7 @@ object IntImp extends NodeImp[IntSourcePortParameters, IntSinkPortParameters, In Vec(eo.size, Vec(eo.map(_.source.num).max, Bool())) } def bundleI(ei: Seq[IntEdge]): Vec[Vec[Bool]] = { - require (!ei.isEmpty) + if (ei.isEmpty) Vec(0, Vec(0, Bool())) else Vec(ei.size, Vec(ei.map(_.source.num).max, Bool())) } @@ -103,7 +103,7 @@ case class IntInternalInputNode(num: Int) extends InternalInputNode(IntImp)(IntS class IntXbar extends LazyModule { val intnode = IntAdapterNode( - numSourcePorts = 1 to 1, // does it make sense to have more than one interrupt sink? + numSourcePorts = 0 to 128, numSinkPorts = 0 to 128, sinkFn = { _ => IntSinkPortParameters(Seq(IntSinkParameters())) }, sourceFn = { seq => diff --git a/src/main/scala/uncore/tilelink2/Isolation.scala b/src/main/scala/uncore/tilelink2/Isolation.scala index 3be5caca..6f592218 100644 --- a/src/main/scala/uncore/tilelink2/Isolation.scala +++ b/src/main/scala/uncore/tilelink2/Isolation.scala @@ -5,6 +5,7 @@ package uncore.tilelink2 import Chisel._ import chisel3.internal.sourceinfo.SourceInfo import diplomacy._ +import util.AsyncBundle // READ the comments in the TLIsolation object before you instantiate this module class TLIsolation(fOut: (Bool, UInt) => UInt, fIn: (Bool, UInt) => UInt) extends LazyModule diff --git a/src/main/scala/uncore/tilelink2/Legacy.scala b/src/main/scala/uncore/tilelink2/Legacy.scala index 3fa75774..2a8b0341 100644 --- a/src/main/scala/uncore/tilelink2/Legacy.scala +++ b/src/main/scala/uncore/tilelink2/Legacy.scala @@ -4,7 +4,7 @@ package uncore.tilelink2 import Chisel._ import diplomacy._ -import cde.Parameters +import config._ import uncore.tilelink._ import uncore.constants._ @@ -47,9 +47,6 @@ class TLLegacy(implicit val p: Parameters) extends LazyModule with HasTileLinkPa // TL legacy will not generate PutFull // During conversion from TL Legacy, we won't support Acquire - // Must be able to fit TL2 sink_id into TL legacy - require ((1 << tlManagerXactIdBits) >= edge.manager.endSinkId || !edge.manager.anySupportAcquire) - val out = io.out(0) out.a.valid := io.legacy.acquire.valid out.d.ready := io.legacy.grant .ready @@ -134,7 +131,7 @@ class TLLegacy(implicit val p: Parameters) extends LazyModule with HasTileLinkPa TLMessages.HintAck -> Grant.prefetchAckType)) grant.is_builtin_type := Bool(true) grant.client_xact_id := out.d.bits.source - grant.manager_xact_id := out.d.bits.sink + grant.manager_xact_id := UInt(0) grant.data := out.d.bits.data grant.addr_beat := beatCounter diff --git a/src/main/scala/uncore/tilelink2/Metadata.scala b/src/main/scala/uncore/tilelink2/Metadata.scala new file mode 100644 index 00000000..2e23ae7b --- /dev/null +++ b/src/main/scala/uncore/tilelink2/Metadata.scala @@ -0,0 +1,157 @@ +// See LICENSE for license details. + +package uncore.tilelink2 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import util._ +import uncore.constants.MemoryOpConstants + +object ClientStates { + val width = 2 + + val Nothing = UInt(0, width) + val Branch = UInt(1, width) + val Trunk = UInt(2, width) + val Dirty = UInt(3, width) + + def hasReadPermission(state: UInt): Bool = state > Nothing + def hasWritePermission(state: UInt): Bool = state > Branch +} + +object MemoryOpCategories extends MemoryOpConstants { + def wr = Cat(Bool(true), Bool(true)) // Op actually writes + def wi = Cat(Bool(false), Bool(true)) // Future op will write + def rd = Cat(Bool(false), Bool(false)) // Op only reads + + def categorize(cmd: UInt): UInt = { + val cat = Cat(isWrite(cmd), isWriteIntent(cmd)) + //assert(cat.isOneOf(wr,wi,rd), "Could not categorize command.") + cat + } +} + +/** Stores the client-side coherence information, + * such as permissions on the data and whether the data is dirty. + * Its API can be used to make TileLink messages in response to + * memory operations, cache control oeprations, or Probe messages. + */ +class ClientMetadata extends Bundle { + /** Actual state information stored in this bundle */ + val state = UInt(width = ClientStates.width) + + /** Metadata equality */ + def ===(rhs: UInt): Bool = state === rhs + def ===(rhs: ClientMetadata): Bool = state === rhs.state + def =/=(rhs: ClientMetadata): Bool = !this.===(rhs) + + /** Is the block's data present in this cache */ + def isValid(dummy: Int = 0): Bool = state > ClientStates.Nothing + + /** Determine whether this cmd misses, and the new state (on hit) or param to be sent (on miss) */ + private def growStarter(cmd: UInt): (Bool, UInt) = { + import MemoryOpCategories._ + import TLPermissions._ + import ClientStates._ + val c = categorize(cmd) + MuxTLookup(Cat(c, state), (Bool(false), UInt(0)), Seq( + //(effect, am now) -> (was a hit, next) + Cat(rd, Dirty) -> (Bool(true), Dirty), + Cat(rd, Trunk) -> (Bool(true), Trunk), + Cat(rd, Branch) -> (Bool(true), Branch), + Cat(wi, Dirty) -> (Bool(true), Dirty), + Cat(wi, Trunk) -> (Bool(true), Trunk), + Cat(wr, Dirty) -> (Bool(true), Dirty), + Cat(wr, Trunk) -> (Bool(true), Dirty), + //(effect, am now) -> (was a miss, param) + Cat(rd, Nothing) -> (Bool(false), NtoB), + Cat(wi, Branch) -> (Bool(false), BtoT), + Cat(wi, Nothing) -> (Bool(false), NtoT), + Cat(wr, Branch) -> (Bool(false), BtoT), + Cat(wr, Nothing) -> (Bool(false), NtoT))) + } + + /** Determine what state to go to after miss based on Grant param + * For now, doesn't depend on state (which may have been Probed). + */ + private def growFinisher(cmd: UInt, param: UInt): UInt = { + import MemoryOpCategories._ + import TLPermissions._ + import ClientStates._ + val c = categorize(cmd) + //assert(c === rd || param === toT, "Client was expecting trunk permissions.") + MuxLookup(Cat(c, param), Nothing, Seq( + //(effect param) -> (next) + Cat(rd, toB) -> Branch, + Cat(rd, toT) -> Trunk, + Cat(wi, toT) -> Trunk, + Cat(wr, toT) -> Dirty)) + } + + /** Does a secondary miss on the block require another Acquire message */ + def requiresAcquireOnSecondaryMiss(first_cmd: UInt, second_cmd: UInt): Bool = { + import MemoryOpCategories._ + isWriteIntent(second_cmd) && !isWriteIntent(first_cmd) + } + + /** Does this cache have permissions on this block sufficient to perform op, + * and what to do next (Acquire message param or updated metadata). */ + def onAccess(cmd: UInt): (Bool, UInt, ClientMetadata) = { + val r = growStarter(cmd) + (r._1, r._2, ClientMetadata(r._2)) + } + + /** Metadata change on a returned Grant */ + def onGrant(cmd: UInt, param: UInt): ClientMetadata = ClientMetadata(growFinisher(cmd, param)) + + /** Determine what state to go to based on Probe param */ + private def shrinkHelper(param: UInt): (Bool, UInt, UInt) = { + import ClientStates._ + import TLPermissions._ + MuxTLookup(Cat(param, state), (Bool(false), UInt(0), UInt(0)), Seq( + //(wanted, am now) -> (hasDirtyData resp, next) + Cat(toT, Dirty) -> (Bool(true), TtoT, Trunk), + Cat(toT, Trunk) -> (Bool(false), TtoT, Trunk), + Cat(toT, Branch) -> (Bool(false), BtoB, Branch), + Cat(toT, Nothing) -> (Bool(false), NtoN, Nothing), + Cat(toB, Dirty) -> (Bool(true), TtoB, Branch), + Cat(toB, Trunk) -> (Bool(false), TtoB, Branch), // Policy: Don't notify on clean downgrade + Cat(toB, Branch) -> (Bool(false), BtoB, Branch), + Cat(toB, Nothing) -> (Bool(false), BtoN, Nothing), + Cat(toN, Dirty) -> (Bool(true), TtoN, Nothing), + Cat(toN, Trunk) -> (Bool(false), TtoN, Nothing), // Policy: Don't notify on clean downgrade + Cat(toN, Branch) -> (Bool(false), BtoN, Nothing), // Policy: Don't notify on clean downgrade + Cat(toN, Nothing) -> (Bool(false), NtoN, Nothing))) + } + + /** Translate cache control cmds into Probe param */ + private def cmdToPermCap(cmd: UInt): UInt = { + import MemoryOpCategories._ + import TLPermissions._ + MuxLookup(cmd, toN, Seq( + M_FLUSH -> toN, + M_PRODUCE -> toB, + M_CLEAN -> toT)) + } + + def onCacheControl(cmd: UInt): (Bool, UInt, ClientMetadata) = { + val r = shrinkHelper(cmdToPermCap(cmd)) + (r._1, r._2, ClientMetadata(r._3)) + } + + def onProbe(param: UInt): (Bool, UInt, ClientMetadata) = { + val r = shrinkHelper(param) + (r._1, r._2, ClientMetadata(r._3)) + } +} + +/** Factories for ClientMetadata, including on reset */ +object ClientMetadata { + def apply(perm: UInt) = { + val meta = Wire(new ClientMetadata) + meta.state := perm + meta + } + def onReset = ClientMetadata(ClientStates.Nothing) + def maximum = ClientMetadata(ClientStates.Dirty) +} diff --git a/src/main/scala/uncore/tilelink2/Monitor.scala b/src/main/scala/uncore/tilelink2/Monitor.scala index a8af77e3..0da47da2 100644 --- a/src/main/scala/uncore/tilelink2/Monitor.scala +++ b/src/main/scala/uncore/tilelink2/Monitor.scala @@ -94,7 +94,7 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source assert (bundle.size >= UInt(log2Ceil(edge.manager.beatBytes)), "'B' channel Probe smaller than a beat" + extra) assert (is_aligned, "'B' channel Probe address not aligned to size" + extra) assert (TLPermissions.isCap(bundle.param), "'B' channel Probe carries invalid cap param" + extra) - assert (~bundle.mask === UInt(0).asUInt, "'B' channel Probe contains invalid mask" + extra) + assert (~bundle.mask === UInt(0), "'B' channel Probe contains invalid mask" + extra) } when (bundle.opcode === TLMessages.Get) { @@ -102,7 +102,7 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source assert (address_ok, "'B' channel Get carries unmanaged address" + extra) assert (is_aligned, "'B' channel Get address not aligned to size" + extra) assert (bundle.param === UInt(0), "'B' channel Get carries invalid param" + extra) - assert (bundle.mask === mask, "'A' channel Get contains invalid mask" + extra) + assert (bundle.mask === mask, "'B' channel Get contains invalid mask" + extra) } when (bundle.opcode === TLMessages.PutFullData) { @@ -283,7 +283,7 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source } def legalizeMultibeatA(a: DecoupledSnoop[TLBundleA], edge: TLEdge)(implicit sourceInfo: SourceInfo) { - val (a_first, _, _) = edge.firstlast(a.bits, a.fire()) + val a_first = edge.first(a.bits, a.fire()) val opcode = Reg(UInt()) val param = Reg(UInt()) val size = Reg(UInt()) @@ -306,7 +306,7 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source } def legalizeMultibeatB(b: DecoupledSnoop[TLBundleB], edge: TLEdge)(implicit sourceInfo: SourceInfo) { - val (b_first, _, _) = edge.firstlast(b.bits, b.fire()) + val b_first = edge.first(b.bits, b.fire()) val opcode = Reg(UInt()) val param = Reg(UInt()) val size = Reg(UInt()) @@ -329,7 +329,7 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source } def legalizeMultibeatC(c: DecoupledSnoop[TLBundleC], edge: TLEdge)(implicit sourceInfo: SourceInfo) { - val (c_first, _, _) = edge.firstlast(c.bits, c.fire()) + val c_first = edge.first(c.bits, c.fire()) val opcode = Reg(UInt()) val param = Reg(UInt()) val size = Reg(UInt()) @@ -352,7 +352,7 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source } def legalizeMultibeatD(d: DecoupledSnoop[TLBundleD], edge: TLEdge)(implicit sourceInfo: SourceInfo) { - val (d_first, _, _) = edge.firstlast(d.bits, d.fire()) + val d_first = edge.first(d.bits, d.fire()) val opcode = Reg(UInt()) val param = Reg(UInt()) val size = Reg(UInt()) @@ -387,8 +387,8 @@ class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: Source def legalizeSourceUnique(bundle: TLBundleSnoop, edge: TLEdge)(implicit sourceInfo: SourceInfo) { val inflight = RegInit(UInt(0, width = edge.client.endSourceId)) - val (_, a_last, _) = edge.firstlast(bundle.a.bits, bundle.a.fire()) - val (_, d_last, _) = edge.firstlast(bundle.d.bits, bundle.d.fire()) + val a_last = edge.last(bundle.a.bits, bundle.a.fire()) + val d_last = edge.last(bundle.d.bits, bundle.d.fire()) if (edge.manager.minLatency > 0) { assert(bundle.a.bits.source =/= bundle.d.bits.source || !bundle.a.valid || !bundle.d.valid, s"'A' and 'D' concurrent, despite minlatency ${edge.manager.minLatency}" + extra) diff --git a/src/main/scala/uncore/tilelink2/RAMModel.scala b/src/main/scala/uncore/tilelink2/RAMModel.scala index 37dc2878..ec5c8d90 100644 --- a/src/main/scala/uncore/tilelink2/RAMModel.scala +++ b/src/main/scala/uncore/tilelink2/RAMModel.scala @@ -110,7 +110,7 @@ class TLRAMModel(log: String = "") extends LazyModule // Process A access requests val a = Reg(next = in.a.bits) val a_fire = Reg(next = in.a.fire(), init = Bool(false)) - val (a_first, a_last, a_address_inc) = edge.firstlast(a, a_fire) + val (a_first, a_last, _, a_address_inc) = edge.addr_inc(a, a_fire) val a_size = edge.size(a) val a_sizeOH = UIntToOH(a_size) val a_address = a.address | a_address_inc @@ -196,7 +196,7 @@ class TLRAMModel(log: String = "") extends LazyModule // Process D access responses val d = RegNext(out.d.bits) val d_fire = Reg(next = out.d.fire(), init = Bool(false)) - val (d_first, d_last, d_address_inc) = edge.firstlast(d, d_fire) + val (d_first, d_last, _, d_address_inc) = edge.addr_inc(d, d_fire) val d_size = edge.size(d) val d_sizeOH = UIntToOH(d_size) val d_base = d_flight.base diff --git a/src/main/scala/uncore/tilelink2/ToAXI4.scala b/src/main/scala/uncore/tilelink2/ToAXI4.scala index 6b476815..ae2f41e3 100644 --- a/src/main/scala/uncore/tilelink2/ToAXI4.scala +++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala @@ -80,7 +80,7 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true) extends LazyModule val a_source = in.a.bits.source val a_size = edgeIn.size(in.a.bits) val a_isPut = edgeIn.hasData(in.a.bits) - val (_, a_last, _) = edgeIn.firstlast(in.a) + val a_last = edgeIn.last(in.a) // Make sure the fields are within the bounds we assumed assert (a_source < UInt(1 << sourceBits)) diff --git a/src/main/scala/uncore/tilelink2/Xbar.scala b/src/main/scala/uncore/tilelink2/Xbar.scala index 6e2b64a0..5c32ca94 100644 --- a/src/main/scala/uncore/tilelink2/Xbar.scala +++ b/src/main/scala/uncore/tilelink2/Xbar.scala @@ -55,6 +55,7 @@ class TLXbar(policy: TLArbiter.Policy = TLArbiter.lowestIndexFirst) extends Lazy minLatency = seq.map(_.minLatency).min, endSinkId = outputIdRanges.map(_.end).max, managers = ManagerUnification(seq.flatMap { port => + // println(s"${port.managers.map(_.name)} ${port.beatBytes} vs ${seq(0).managers.map(_.name)} ${seq(0).beatBytes}") require (port.beatBytes == seq(0).beatBytes) val fifoIdMapper = fifoIdFactory() port.managers map { manager => manager.copy( diff --git a/src/main/scala/uncore/util/AmoAlu.scala b/src/main/scala/uncore/util/AmoAlu.scala index 40ae8d72..ce80bab0 100644 --- a/src/main/scala/uncore/util/AmoAlu.scala +++ b/src/main/scala/uncore/util/AmoAlu.scala @@ -4,7 +4,7 @@ package uncore.util import Chisel._ import uncore.tilelink._ -import cde.Parameters +import config._ import uncore.constants._ class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) { diff --git a/src/main/scala/uncore/util/Counters.scala b/src/main/scala/uncore/util/Counters.scala index 52209797..2ad49f0b 100644 --- a/src/main/scala/uncore/util/Counters.scala +++ b/src/main/scala/uncore/util/Counters.scala @@ -3,7 +3,7 @@ package uncore.util import Chisel._ import uncore.tilelink._ import util.TwoWayCounter -import cde.Parameters +import config._ class BeatCounterStatus extends Bundle { val idx = UInt() diff --git a/src/main/scala/uncore/util/Enqueuer.scala b/src/main/scala/uncore/util/Enqueuer.scala index 3018821d..163577dc 100644 --- a/src/main/scala/uncore/util/Enqueuer.scala +++ b/src/main/scala/uncore/util/Enqueuer.scala @@ -2,7 +2,7 @@ package uncore.util import Chisel._ import uncore.tilelink._ -import cde.Parameters +import config._ /** Struct for describing per-channel queue depths */ case class TileLinkDepths(acq: Int, prb: Int, rel: Int, gnt: Int, fin: Int) diff --git a/src/main/scala/uncore/util/Serializer.scala b/src/main/scala/uncore/util/Serializer.scala deleted file mode 100644 index 8cc0caa2..00000000 --- a/src/main/scala/uncore/util/Serializer.scala +++ /dev/null @@ -1,69 +0,0 @@ -// See LICENSE for license details. - -package uncore.util - -import Chisel._ -import uncore.tilelink._ - -class FlowThroughSerializer[T <: Bundle with HasTileLinkData](gen: T, n: Int) extends Module { - val io = new Bundle { - val in = Decoupled(gen).flip - val out = Decoupled(gen) - val cnt = UInt(OUTPUT, log2Up(n)) - val done = Bool(OUTPUT) - } - val narrowWidth = io.in.bits.data.getWidth / n - require(io.in.bits.data.getWidth % narrowWidth == 0) - - if(n == 1) { - io.out <> io.in - io.cnt := UInt(0) - io.done := Bool(true) - } else { - val cnt = Reg(init=UInt(0, width = log2Up(n))) - val wrap = cnt === UInt(n-1) - val rbits = Reg{io.in.bits} - val active = Reg(init=Bool(false)) - - val shifter = Wire(Vec(n, Bits(width = narrowWidth))) - (0 until n).foreach { - i => shifter(i) := rbits.data((i+1)*narrowWidth-1,i*narrowWidth) - } - - io.done := Bool(false) - io.cnt := cnt - io.in.ready := !active - io.out.valid := active || io.in.valid - io.out.bits := io.in.bits - when(!active && io.in.valid) { - when(io.in.bits.hasData()) { - cnt := Mux(io.out.ready, UInt(1), UInt(0)) - rbits := io.in.bits - active := Bool(true) - } - io.done := !io.in.bits.hasData() - } - when(active) { - io.out.bits := rbits - io.out.bits.data := shifter(cnt) - when(io.out.ready) { - cnt := cnt + UInt(1) - when(wrap) { - cnt := UInt(0) - io.done := Bool(true) - active := Bool(false) - } - } - } - } -} - -object FlowThroughSerializer { - def apply[T <: Bundle with HasTileLinkData](in: DecoupledIO[T], n: Int): DecoupledIO[T] = { - val fs = Module(new FlowThroughSerializer(in.bits, n)) - fs.io.in.valid := in.valid - fs.io.in.bits := in.bits - in.ready := fs.io.in.ready - fs.io.out - } -} diff --git a/src/main/scala/unittest/Configs.scala b/src/main/scala/unittest/Configs.scala index e968da04..48fbf81f 100644 --- a/src/main/scala/unittest/Configs.scala +++ b/src/main/scala/unittest/Configs.scala @@ -3,11 +3,15 @@ package unittest import Chisel._ -import cde.{Parameters, Config, CDEMatchError} +import config._ +import junctions._ import rocketchip.{BaseConfig, BasePlatformConfig} class WithJunctionsUnitTests extends Config( (pname, site, here) => pname match { + case HastiId => "HastiTest" + case HastiKey("HastiTest") => HastiParameters(addrBits = 32, dataBits = 64) + case NastiKey => NastiParameters(addrBits = 32, dataBits = 64, idBits = 4) case junctions.PAddrBits => 32 case rocket.XLen => 64 case UnitTests => (p: Parameters) => Seq( @@ -20,7 +24,6 @@ class JunctionsUnitTestConfig extends Config(new WithJunctionsUnitTests ++ new B class WithUncoreUnitTests extends Config( (pname, site, here) => pname match { - case rocketchip.NCoreplexExtClients => 0 case uncore.tilelink.TLId => "L1toL2" case UnitTests => (p: Parameters) => Seq( Module(new uncore.devices.ROMSlaveTest()(p)), diff --git a/src/main/scala/unittest/TestHarness.scala b/src/main/scala/unittest/TestHarness.scala index 8ddc1364..1f979b74 100644 --- a/src/main/scala/unittest/TestHarness.scala +++ b/src/main/scala/unittest/TestHarness.scala @@ -3,8 +3,9 @@ package unittest import Chisel._ +import config._ -class TestHarness(implicit val p: cde.Parameters) extends Module { +class TestHarness(implicit val p: Parameters) extends Module { val io = new Bundle { val success = Bool(OUTPUT) } io.success := Module(new UnitTestSuite).io.finished } diff --git a/src/main/scala/unittest/UnitTest.scala b/src/main/scala/unittest/UnitTest.scala index 1103cf30..4dbc76e5 100644 --- a/src/main/scala/unittest/UnitTest.scala +++ b/src/main/scala/unittest/UnitTest.scala @@ -1,7 +1,7 @@ package unittest import Chisel._ -import cde.{Field, Parameters} +import config._ import util.SimpleTimer trait HasUnitTestIO { diff --git a/src/main/scala/util/Arbiters.scala b/src/main/scala/util/Arbiters.scala index b5518720..b3421496 100644 --- a/src/main/scala/util/Arbiters.scala +++ b/src/main/scala/util/Arbiters.scala @@ -1,6 +1,6 @@ package util import Chisel._ -import cde.Parameters +import config._ /** A generalized locking RR arbiter that addresses the limitations of the * version in the Chisel standard library */ diff --git a/src/main/scala/util/AsyncBundle.scala b/src/main/scala/util/AsyncBundle.scala new file mode 100644 index 00000000..fb178666 --- /dev/null +++ b/src/main/scala/util/AsyncBundle.scala @@ -0,0 +1,59 @@ +// See LICENSE for license details. + +package util + +import Chisel._ +import chisel3.util.{ReadyValidIO} + +final class AsyncBundle[T <: Data](val depth: Int, gen: T) extends Bundle +{ + require (isPow2(depth)) + val mem = Vec(depth, gen) + val ridx = UInt(width = log2Up(depth)+1).flip + val widx = UInt(width = log2Up(depth)+1) + val ridx_valid = Bool().flip + val widx_valid = Bool() + val source_reset_n = Bool() + val sink_reset_n = Bool().flip + + override def cloneType: this.type = new AsyncBundle(depth, gen).asInstanceOf[this.type] +} + +object FromAsyncBundle +{ + def apply[T <: Data](x: AsyncBundle[T], sync: Int = 3): DecoupledIO[T] = { + val sink = Module(new AsyncQueueSink(x.mem(0), x.depth, sync)) + x.ridx := sink.io.ridx + x.ridx_valid := sink.io.ridx_valid + sink.io.widx := x.widx + sink.io.widx_valid := x.widx_valid + sink.io.mem := x.mem + sink.io.source_reset_n := x.source_reset_n + x.sink_reset_n := !sink.reset + val out = Wire(Decoupled(x.mem(0))) + out.valid := sink.io.deq.valid + out.bits := sink.io.deq.bits + sink.io.deq.ready := out.ready + out + } +} + +object ToAsyncBundle +{ + def apply[T <: Data](x: ReadyValidIO[T], depth: Int = 8, sync: Int = 3): AsyncBundle[T] = { + val source = Module(new AsyncQueueSource(x.bits, depth, sync)) + source.io.enq.valid := x.valid + source.io.enq.bits := x.bits + x.ready := source.io.enq.ready + val out = Wire(new AsyncBundle(depth, x.bits)) + source.io.ridx := out.ridx + source.io.ridx_valid := out.ridx_valid + out.mem := source.io.mem + out.widx := source.io.widx + out.widx_valid := source.io.widx_valid + source.io.sink_reset_n := out.sink_reset_n + out.source_reset_n := !source.reset + out + } +} + diff --git a/src/main/scala/util/BlackBoxRegs.scala b/src/main/scala/util/BlackBoxRegs.scala index a03c3bfa..14bba000 100644 --- a/src/main/scala/util/BlackBoxRegs.scala +++ b/src/main/scala/util/BlackBoxRegs.scala @@ -2,7 +2,7 @@ package util import Chisel._ -import cde.{Parameters} +import config._ /** This black-boxes an Async Reset * (or Set) diff --git a/src/main/scala/util/Config.scala b/src/main/scala/util/Config.scala new file mode 100644 index 00000000..966b594b --- /dev/null +++ b/src/main/scala/util/Config.scala @@ -0,0 +1,84 @@ +package config + +class Field[T] +class CDEMatchError() extends Exception { + override def fillInStackTrace() = this +} + +abstract class View { + final def apply[T](pname: Field[T]): T = find(pname, this).asInstanceOf[T] + + protected[config] def find(pname: Any, site: View): Any +} + +abstract class Parameters extends View { + final def ++ (x: Parameters): Parameters = new ChainParameters(this, x) + final def alter(f: (Any, View, View, View) => Any): Parameters = Parameters(f) ++ this + final def alter(m: Map[Any,Any]): Parameters = Parameters(m) ++ this + final def alter(f: PartialFunction[Any,Any]): Parameters = Parameters(f) ++ this + final def alterPartial(f: PartialFunction[Any,Any]): Parameters = Parameters(f) ++ this + + protected[config] def chain(site: View, tail: View, pname: Any): Any + protected[config] def find(pname: Any, site: View) = chain(site, new TerminalView, pname) +} + +object Parameters { + def empty: Parameters = new EmptyParameters + def apply(f: (Any, View, View, View) => Any): Parameters = new FunctionParameters(f) + def apply(m: Map[Any,Any]): Parameters = new MapParameters(m) + def apply(f: PartialFunction[Any,Any]): Parameters = new PartialParameters(f) + def partial(f: PartialFunction[Any,Any]): Parameters = new PartialParameters(f) + def root(p: Parameters) = p +} + +class Config(p: Parameters) extends Parameters { + def this(f: (Any, View, View) => Any) = this(Parameters((p,s,h,u) => f(p,s,h))) // backwards compat; don't use + def this(f: (Any, View, View, View) => Any) = this(Parameters(f)) + def this(m: Map[Any,Any]) = this(Parameters(m)) + def this(f: PartialFunction[Any,Any]) = this(Parameters(f)) + + protected[config] def chain(site: View, tail: View, pname: Any) = p.chain(site, tail, pname) + override def toString = this.getClass.getSimpleName + def toInstance = this +} + +class ConfigPartial(f: PartialFunction[Any,Any]) extends Config(Parameters(f)) + +// Internal implementation: + +private class TerminalView extends View { + private class Unusable + def find(pname: Any, site: View): Any = pname match { case x: Unusable => () } +} + +private class ChainView(head: Parameters, tail: View) extends View { + def find(pname: Any, site: View) = head.chain(site, tail, pname) +} + +private class ChainParameters(x: Parameters, y: Parameters) extends Parameters { + def chain(site: View, tail: View, pname: Any) = x.chain(site, new ChainView(y, tail), pname) +} + +private class EmptyParameters extends Parameters { + def chain(site: View, tail: View, pname: Any) = tail.find(pname, site) +} + +private class FunctionParameters(f: (Any, View, View, View) => Any) extends Parameters { + protected[config] def chain(site: View, tail: View, pname: Any) = { + try f(pname, site, this, tail) + catch { + case e: CDEMatchError => tail.find(pname, site) + case e: scala.MatchError => tail.find(pname, site) + } + } +} + +private class MapParameters(map: Map[Any, Any]) extends Parameters { + protected[config] def chain(site: View, tail: View, pname: Any) = + map.get(pname).getOrElse(find(pname, site)) +} + +private class PartialParameters(f: PartialFunction[Any,Any]) extends Parameters { + protected[config] def chain(site: View, tail: View, pname: Any) = + if (f.isDefinedAt(pname)) f.apply(pname) else tail.find(pname, site) +} diff --git a/src/main/scala/util/Counters.scala b/src/main/scala/util/Counters.scala index 2f27ccaf..0d9bccc1 100644 --- a/src/main/scala/util/Counters.scala +++ b/src/main/scala/util/Counters.scala @@ -1,7 +1,7 @@ package util import Chisel._ -import cde.Parameters +import config._ import scala.math.max // Produces 0-width value when counting to 1 diff --git a/src/main/scala/util/GeneratorUtils.scala b/src/main/scala/util/GeneratorUtils.scala index 5ae69265..29161912 100644 --- a/src/main/scala/util/GeneratorUtils.scala +++ b/src/main/scala/util/GeneratorUtils.scala @@ -3,7 +3,7 @@ package util import Chisel._ -import cde._ +import config._ import diplomacy.LazyModule import java.io.{File, FileWriter} @@ -24,7 +24,7 @@ case class ParsedInputNames( */ trait HasGeneratorUtilities { def getConfig(names: ParsedInputNames): Config = { - names.fullConfigClasses.foldRight(new Config()) { case (currentName, config) => + new Config(names.fullConfigClasses.foldRight(Parameters.empty) { case (currentName, config) => val currentConfig = try { Class.forName(currentName).newInstance.asInstanceOf[Config] } catch { @@ -32,7 +32,7 @@ trait HasGeneratorUtilities { throwException(s"""Unable to find part "$currentName" from "${names.configs}", did you misspell it?""", e) } currentConfig ++ config - } + }) } def getParameters(names: ParsedInputNames): Parameters = getParameters(getConfig(names)) @@ -43,7 +43,7 @@ trait HasGeneratorUtilities { def elaborate(names: ParsedInputNames, params: Parameters): Circuit = { val gen = () => Class.forName(names.fullTopModuleClass) - .getConstructor(classOf[cde.Parameters]) + .getConstructor(classOf[Parameters]) .newInstance(params) .asInstanceOf[Module] @@ -103,15 +103,9 @@ trait GeneratorApp extends App with HasGeneratorUtilities { TestGeneration.addSuite(DefaultTestSuites.singleRegression) } - /** Output Design Space Exploration knobs and constraints. */ - def generateDSEConstraints { - writeOutputFile(td, s"${names.configs}.knb", world.getKnobs) // Knobs for DSE - writeOutputFile(td, s"${names.configs}.cst", world.getConstraints) // Constraints for DSE - } - /** Output a global Parameter dump, which an external script can turn into Verilog headers. */ def generateParameterDump { - writeOutputFile(td, s"$longName.prm", ParameterDump.getDump) // Parameters flagged with Dump() + writeOutputFile(td, s"$longName.prm", "") } /** Output a global ConfigString, for use by the RISC-V software ecosystem. */ diff --git a/src/main/scala/util/HellaQueue.scala b/src/main/scala/util/HellaQueue.scala index acaa800b..53a7e5f9 100644 --- a/src/main/scala/util/HellaQueue.scala +++ b/src/main/scala/util/HellaQueue.scala @@ -1,7 +1,7 @@ package util import Chisel._ -import cde.Parameters +import config._ class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Module { val io = new QueueIO(data, entries) diff --git a/src/main/scala/util/Misc.scala b/src/main/scala/util/Misc.scala index 6ce8583b..f674160b 100644 --- a/src/main/scala/util/Misc.scala +++ b/src/main/scala/util/Misc.scala @@ -1,7 +1,7 @@ package util import Chisel._ -import cde.Parameters +import config._ import scala.math._ class ParameterizedBundle(implicit p: Parameters) extends Bundle { @@ -36,6 +36,23 @@ object MuxT { (Mux(cond, con._1, alt._1), Mux(cond, con._2, alt._2), Mux(cond, con._3, alt._3)) } +/** Creates a cascade of n MuxTs to search for a key value. */ +object MuxTLookup { + def apply[S <: UInt, T <: Data, U <: Data](key: S, default: (T, U), mapping: Seq[(S, (T, U))]): (T, U) = { + var res = default + for ((k, v) <- mapping.reverse) + res = MuxT(k === key, v, res) + res + } + + def apply[S <: UInt, T <: Data, U <: Data, W <: Data](key: S, default: (T, U, W), mapping: Seq[(S, (T, U, W))]): (T, U, W) = { + var res = default + for ((k, v) <- mapping.reverse) + res = MuxT(k === key, v, res) + res + } +} + object Str { def apply(s: String): UInt = { diff --git a/src/main/scala/util/ReorderQueue.scala b/src/main/scala/util/ReorderQueue.scala index 458001af..c19115a0 100644 --- a/src/main/scala/util/ReorderQueue.scala +++ b/src/main/scala/util/ReorderQueue.scala @@ -1,7 +1,7 @@ package util import Chisel._ -import cde.Parameters +import config._ class ReorderQueueWrite[T <: Data](dType: T, tagWidth: Int) extends Bundle { val data = dType.cloneType