From 06c3f9b6550502869aa80584cce9d1a019e88d4a Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 14 Jan 2016 15:10:40 -0800 Subject: [PATCH] Rocket Chip fixes in response to lowRISC team's comments * DMA frontend-backend communication tunneled over TileLink/AXI * Split MMIO and Mem requests in l1tol2net instead of in AXI interconnect * Don't make NIOMSHRs configurable. Fixed at 1. * Connect accelerator-internal CSRs into the CSR file * Make mtvec register configurable and writeable --- groundtest | 2 +- junctions | 2 +- rocket | 2 +- src/main/scala/Configs.scala | 30 +++++++----- src/main/scala/RocketChip.scala | 80 +++++++++++++++++--------------- src/main/scala/TestConfigs.scala | 10 ++-- uncore | 2 +- 7 files changed, 68 insertions(+), 60 deletions(-) diff --git a/groundtest b/groundtest index c68ceb65..5cc2e28f 160000 --- a/groundtest +++ b/groundtest @@ -1 +1 @@ -Subproject commit c68ceb653184ad06998cb0702f30e39aaaf50b5c +Subproject commit 5cc2e28f1f8af3fa7cb35b1ba53c667f8f516690 diff --git a/junctions b/junctions index 8c947fc2..4069e88d 160000 --- a/junctions +++ b/junctions @@ -1 +1 @@ -Subproject commit 8c947fc25d76ee7d210dcc781878338023a93e93 +Subproject commit 4069e88d849a5fd6e82d238e45e4c97f9a0d2b44 diff --git a/rocket b/rocket index 480beb8b..bcf035f4 160000 --- a/rocket +++ b/rocket @@ -1 +1 @@ -Subproject commit 480beb8b84789ee2747b20d951451e34d5ddb5fd +Subproject commit bcf035f4e4ac6685ef811013a20b3dab5a9c9046 diff --git a/src/main/scala/Configs.scala b/src/main/scala/Configs.scala index 5291e005..a0e18ab4 100644 --- a/src/main/scala/Configs.scala +++ b/src/main/scala/Configs.scala @@ -28,7 +28,7 @@ class DefaultConfig extends Config ( new AddrMap(deviceTree +: csrs :+ scr) } def makeDeviceTree() = { - val addrMap = new AddrHashMap(site(GlobalAddrMap)) + val addrMap = new AddrHashMap(site(GlobalAddrMap), site(MMIOBase)) val devices = site(GlobalDeviceSet) val dt = new DeviceTreeGenerator dt.beginNode("") @@ -75,6 +75,7 @@ class DefaultConfig extends Config ( case HtifKey => HtifParameters( width = Dump("HTIF_WIDTH", 16), nSCR = 64, + csrDataBits = site(XLen), offsetBits = site(CacheBlockOffsetBits), nCores = site(NTiles)) //Memory Parameters @@ -89,8 +90,8 @@ class DefaultConfig extends Config ( case MIFTagBits => // Bits needed at the L2 agent log2Up(site(NAcquireTransactors)+2) + // Bits added by NASTI interconnect - log2Up(site(NMemoryChannels) * site(NBanksPerMemoryChannel) + - (if (site(UseDma)) 2 else 1)) + max(log2Up(site(NBanksPerMemoryChannel)), + (if (site(UseDma)) 3 else 2)) case MIFDataBits => 64 case MIFAddrBits => site(PAddrBits) - site(CacheBlockOffsetBits) case MIFDataBeats => site(CacheBlockBytes) * 8 / site(MIFDataBits) @@ -132,7 +133,6 @@ class DefaultConfig extends Config ( case StoreDataQueueDepth => 17 case ReplayQueueDepth => 16 case NMSHRs => Knob("L1D_MSHRS") - case NIOMSHRs => 1 case LRSCCycles => 32 //L2 Memory System Params case NAcquireTransactors => 7 @@ -153,11 +153,12 @@ class DefaultConfig extends Config ( } case BuildRoCC => Nil case RoccNMemChannels => site(BuildRoCC).map(_.nMemChannels).foldLeft(0)(_ + _) + case RoccNCSRs => site(BuildRoCC).map(_.csrs.size).foldLeft(0)(_ + _) case UseDma => false case UseStreamLoopback => false case NDmaTransactors => 3 + case NDmaXacts => site(NDmaTransactors) * site(NTiles) case NDmaClients => site(NTiles) - case NDmaXactsPerClient => site(NDmaTransactors) //Rocket Core Constants case FetchWidth => 1 case RetireWidth => 1 @@ -179,6 +180,7 @@ class DefaultConfig extends Config ( case CoreInstBits => 32 case CoreDataBits => site(XLen) case NCustomMRWCSRs => 0 + case MtvecInit => BigInt(0x100) //Uncore Paramters case RTCPeriod => 100 // gives 10 MHz RTC assuming 1 GHz uncore clock case LNEndpoints => site(TLKey(site(TLId))).nManagers + site(TLKey(site(TLId))).nClients @@ -187,17 +189,17 @@ class DefaultConfig extends Config ( case TLKey("L1toL2") => TileLinkParameters( coherencePolicy = new MESICoherence(site(L2DirectoryRepresentation)), - nManagers = site(NBanksPerMemoryChannel)*site(NMemoryChannels), + nManagers = site(NBanksPerMemoryChannel)*site(NMemoryChannels) + 1, nCachingClients = site(NTiles), nCachelessClients = (if (site(UseDma)) 2 else 1) + site(NTiles) * (1 + (if(site(BuildRoCC).isEmpty) 0 else site(RoccNMemChannels))), - maxClientXacts = max(site(NMSHRs) + site(NIOMSHRs), + maxClientXacts = max(site(NMSHRs) + 1, max(if (site(BuildRoCC).isEmpty) 1 else site(RoccMaxTaggedMemXacts), if (site(UseDma)) 4 else 1)), maxClientsPerPort = max(if (site(BuildRoCC).isEmpty) 1 else 2, - if (site(UseDma)) site(NDmaTransactors) else 1), + if (site(UseDma)) site(NDmaTransactors) + 1 else 1), maxManagerXacts = site(NAcquireTransactors) + 2, dataBits = site(CacheBlockBytes)*8) case TLKey("L2toMC") => @@ -225,7 +227,6 @@ class DefaultConfig extends Config ( case GlobalAddrMap => { val extraSize = site(ExternalIOStart) - site(MMIOBase) AddrMap( - AddrMapEntry("mem", None, MemChannels(site(MMIOBase), site(NMemoryChannels), AddrMapConsts.RWX)), AddrMapEntry("conf", None, MemSubmap(extraSize / 2, genCsrAddrMap)), AddrMapEntry("devices", None, MemSubmap(extraSize / 2, site(GlobalDeviceSet).getAddrMap)), AddrMapEntry("io", Some(site(ExternalIOStart)), MemSize(2 * site(MMIOBase), AddrMapConsts.RW))) @@ -235,6 +236,9 @@ class DefaultConfig extends Config ( if (site(UseStreamLoopback)) { devset.addDevice("loopback", site(StreamLoopbackWidth) / 8, "stream") } + if (site(UseDma)) { + devset.addDevice("dma", site(CacheBlockBytes), "dma") + } devset } }}, @@ -403,7 +407,9 @@ class WithDmaController extends Config( RoccParameters( opcodes = OpcodeSet.custom2, generator = (p: Parameters) => Module(new DmaController()(p)), - useDma = true)) + csrs = Seq.range( + DmaCtrlRegNumbers.CSR_BASE, + DmaCtrlRegNumbers.CSR_END))) case RoccMaxTaggedMemXacts => 1 }) @@ -414,7 +420,9 @@ class WithStreamLoopback extends Config( case StreamLoopbackWidth => 64 }) -class DmaControllerConfig extends Config(new WithDmaController ++ new DefaultL2Config) +class DmaControllerConfig extends Config(new WithDmaController ++ new WithStreamLoopback ++ new DefaultL2Config) +class DualCoreDmaControllerConfig extends Config(new With2Cores ++ new DmaControllerConfig) +class DmaControllerFPGAConfig extends Config(new WithDmaController ++ new WithStreamLoopback ++ new DefaultFPGAConfig) class SmallL2Config extends Config( new With2MemoryChannels ++ new With4BanksPerMemChannel ++ diff --git a/src/main/scala/RocketChip.scala b/src/main/scala/RocketChip.scala index 8c29ccf3..60a7f9e9 100644 --- a/src/main/scala/RocketChip.scala +++ b/src/main/scala/RocketChip.scala @@ -117,7 +117,6 @@ class Top(topParams: Parameters) extends Module with HasTopLevelParameters { uncore.io.tiles_uncached <> tileList.map(_.io.uncached).flatten io.host <> uncore.io.host if (p(UseBackupMemoryPort)) { io.mem_backup_ctrl <> uncore.io.mem_backup_ctrl } - if (p(UseDma)) { uncore.io.dma <> tileList.map(_.io.dma) } io.mem.zip(uncore.io.mem).foreach { case (outer, inner) => TopUtils.connectNasti(outer, inner) @@ -146,7 +145,6 @@ class Uncore(implicit val p: Parameters) extends Module val htif = Vec(nTiles, new HtifIO).flip val mem_backup_ctrl = new MemBackupCtrlIO val mmio = new NastiIO - val dma = Vec(nTiles, new DmaIO).flip } val htif = Module(new Htif(CSRs.mreset)) // One HTIF module per chip @@ -155,11 +153,6 @@ class Uncore(implicit val p: Parameters) extends Module outmemsys.io.htif_uncached <> htif.io.mem outmemsys.io.tiles_uncached <> io.tiles_uncached outmemsys.io.tiles_cached <> io.tiles_cached - if (p(UseDma)) { - val dma_arb = Module(new DmaArbiter(nTiles)) - dma_arb.io.in <> io.dma - outmemsys.io.dma <> dma_arb.io.out - } for (i <- 0 until nTiles) { io.htif(i).reset := htif.io.cpu(i).reset @@ -213,34 +206,44 @@ class OuterMemorySystem(implicit val p: Parameters) extends Module with HasTopLe val scr = new SmiIO(xLen, scrAddrBits) val mmio = new NastiIO val deviceTree = new NastiIO - val dma = (new DmaIO).flip } - val dmaOpt = if (p(UseDma)) Some(Module(new DmaEngine)) else None + val dmaOpt = if (p(UseDma)) + Some(Module(new DmaEngine( + DmaCtrlRegNumbers.CSR_BASE + DmaCtrlRegNumbers.OUTSTANDING))) + else None + val mmioBase = p(MMIOBase) // Create a simple L1toL2 NoC between the tiles+htif and the banks of outer memory // Cached ports are first in client list, making sharerToClientId just an indentity function // addrToBank is sed to hash physical addresses (of cache blocks) to banks (and thereby memory channels) val ordered_clients = (io.tiles_cached ++ - (io.tiles_uncached ++ dmaOpt.map(_.io.inner) :+ io.htif_uncached) + (io.tiles_uncached ++ dmaOpt.map(_.io.mem) :+ io.htif_uncached) .map(TileLinkIOWrapper(_))) def sharerToClientId(sharerId: UInt) = sharerId - def addrToBank(addr: Bits): UInt = if(nBanks > 1) addr(lsb + log2Up(nBanks) - 1, lsb) else UInt(0) + def addrToBank(addr: Bits): UInt = { + Mux(addr.toUInt < UInt(mmioBase >> log2Up(p(CacheBlockBytes))), + if (nBanks > 1) addr(lsb + log2Up(nBanks) - 1, lsb) else UInt(0), + UInt(nBanks)) + } val preBuffering = TileLinkDepths(2,2,2,2,2) val postBuffering = TileLinkDepths(0,0,1,0,0) //TODO: had EOS24 crit path on inner.release - val l1tol2net = Module( - if(nBanks == 1) new RocketChipTileLinkArbiter(sharerToClientId, preBuffering, postBuffering) - else new RocketChipTileLinkCrossbar(addrToBank, sharerToClientId, preBuffering, postBuffering)) + val l1tol2net = Module(new RocketChipTileLinkCrossbar(addrToBank, sharerToClientId, preBuffering, postBuffering)) // Create point(s) of coherence serialization - val nManagers = nMemChannels * nBanksPerMemChannel - val managerEndpoints = List.tabulate(nManagers){id => p(BuildL2CoherenceManager)(id, p)} + val managerEndpoints = List.tabulate(nBanks){id => p(BuildL2CoherenceManager)(id, p)} managerEndpoints.foreach { _.incoherent := io.incoherent } + val mmioManager = Module(new MMIOTileLinkManager()(p.alterPartial({ + case TLId => "L1toL2" + case InnerTLId => "L1toL2" + case OuterTLId => "L2toMC" + }))) + // Wire the tiles and htif to the TileLink client ports of the L1toL2 network, // and coherence manager(s) to the other side l1tol2net.io.clients <> ordered_clients - l1tol2net.io.managers <> managerEndpoints.map(_.innerTL) + l1tol2net.io.managers <> managerEndpoints.map(_.innerTL) :+ mmioManager.io.inner // Create a converter between TileLinkIO and MemIO for each channel val outerTLParams = p.alterPartial({ case TLId => "L2toMC" }) @@ -248,8 +251,8 @@ class OuterMemorySystem(implicit val p: Parameters) extends Module with HasTopLe val backendBuffering = TileLinkDepths(0,0,0,0,0) val addrMap = p(GlobalAddrMap) - val addrHashMap = new AddrHashMap(addrMap) - val nMasters = managerEndpoints.size + (if (dmaOpt.isEmpty) 1 else 2) + val addrHashMap = new AddrHashMap(addrMap, mmioBase) + val nMasters = (if (dmaOpt.isEmpty) 2 else 3) val nSlaves = addrHashMap.nEntries println("Generated Address Map") @@ -257,12 +260,8 @@ class OuterMemorySystem(implicit val p: Parameters) extends Module with HasTopLe println(f"\t$name%s $base%x - ${base + size - 1}%x") } - val interconnect = if (nMemChannels == 1) - Module(new NastiRecursiveInterconnect( - nMasters, nSlaves, addrMap)) - else - Module(new NastiPerformanceInterconnect( - nBanksPerMemChannel, nMemChannels, 1, nSlaves - nMemChannels, addrMap)) + val mmio_ic = Module(new NastiRecursiveInterconnect(nMasters, nSlaves, addrMap, mmioBase)) + val mem_ic = Module(new NastiMemoryInterconnect(nBanksPerMemChannel, nMemChannels)) for ((bank, i) <- managerEndpoints.zipWithIndex) { val unwrap = Module(new ClientTileLinkIOUnwrapper()(outerTLParams)) @@ -271,42 +270,47 @@ class OuterMemorySystem(implicit val p: Parameters) extends Module with HasTopLe unwrap.io.in <> ClientTileLinkEnqueuer(bank.outerTL, backendBuffering)(outerTLParams) narrow.io.in <> unwrap.io.out conv.io.tl <> narrow.io.out - TopUtils.connectNasti(interconnect.io.masters(i), conv.io.nasti) + TopUtils.connectNasti(mem_ic.io.masters(i), conv.io.nasti) } + val mmio_narrow = Module(new TileLinkIONarrower("L2toMC", "Outermost")) + val mmio_conv = Module(new NastiIOTileLinkIOConverter()(outermostTLParams)) + mmio_narrow.io.in <> mmioManager.io.outer + mmio_conv.io.tl <> mmio_narrow.io.out + TopUtils.connectNasti(mmio_ic.io.masters(0), mmio_conv.io.nasti) + val rtc = Module(new RTC(CSRs.mtime)) - interconnect.io.masters(nManagers) <> rtc.io + mmio_ic.io.masters(1) <> rtc.io dmaOpt.foreach { dma => - dma.io.dma <> io.dma - interconnect.io.masters(nManagers + 1) <> dma.io.outer + mmio_ic.io.masters(2) <> dma.io.mmio + dma.io.ctrl <> mmio_ic.io.slaves(addrHashMap("devices:dma").port) } for (i <- 0 until nTiles) { val csrName = s"conf:csr$i" val csrPort = addrHashMap(csrName).port val conv = Module(new SmiIONastiIOConverter(xLen, csrAddrBits)) - conv.io.nasti <> interconnect.io.slaves(csrPort) + conv.io.nasti <> mmio_ic.io.slaves(csrPort) io.csr(i) <> conv.io.smi } - val src_conv = Module(new SmiIONastiIOConverter(scrDataBits, scrAddrBits)) - src_conv.io.nasti <> interconnect.io.slaves(addrHashMap("conf:scr").port) - io.scr <> src_conv.io.smi + val scr_conv = Module(new SmiIONastiIOConverter(scrDataBits, scrAddrBits)) + scr_conv.io.nasti <> mmio_ic.io.slaves(addrHashMap("conf:scr").port) + io.scr <> scr_conv.io.smi if (p(UseStreamLoopback)) { val lo_width = p(StreamLoopbackWidth) val lo_size = p(StreamLoopbackSize) val lo_conv = Module(new NastiIOStreamIOConverter(lo_width)) - lo_conv.io.nasti <> interconnect.io.slaves(addrHashMap("devices:loopback").port) + lo_conv.io.nasti <> mmio_ic.io.slaves(addrHashMap("devices:loopback").port) lo_conv.io.stream.in <> Queue(lo_conv.io.stream.out, lo_size) } - io.mmio <> interconnect.io.slaves(addrHashMap("io").port) - io.deviceTree <> interconnect.io.slaves(addrHashMap("conf:devicetree").port) - - val mem_channels = interconnect.io.slaves.take(nMemChannels) + io.mmio <> mmio_ic.io.slaves(addrHashMap("io").port) + io.deviceTree <> mmio_ic.io.slaves(addrHashMap("conf:devicetree").port) + val mem_channels = mem_ic.io.slaves // Create a SerDes for backup memory port if(p(UseBackupMemoryPort)) { VLSIUtils.doOuterMemorySystemSerdes( diff --git a/src/main/scala/TestConfigs.scala b/src/main/scala/TestConfigs.scala index d58e7f6b..7bf6bcae 100644 --- a/src/main/scala/TestConfigs.scala +++ b/src/main/scala/TestConfigs.scala @@ -13,14 +13,14 @@ class WithGroundTest extends Config( case TLKey("L1toL2") => TileLinkParameters( coherencePolicy = new MESICoherence(site(L2DirectoryRepresentation)), - nManagers = site(NBanksPerMemoryChannel)*site(NMemoryChannels), + nManagers = site(NBanksPerMemoryChannel)*site(NMemoryChannels) + 1, nCachingClients = site(NTiles), nCachelessClients = site(NTiles) + (if (site(UseDma)) 2 else 1), - maxClientXacts = max(site(NMSHRs) + site(NIOMSHRs), + maxClientXacts = max(site(NMSHRs) + 1, max(site(GroundTestMaxXacts), if (site(UseDma)) 4 else 1)), maxClientsPerPort = max(if (site(BuildRoCC).isEmpty) 1 else 2, - if (site(UseDma)) site(NDmaTransactors) else 1), + if (site(UseDma)) site(NDmaTransactors) + 1 else 1), maxManagerXacts = site(NAcquireTransactors) + 2, dataBits = site(CacheBlockBytes)*8) case BuildTiles => { @@ -93,10 +93,6 @@ class WithDmaStreamTest extends Config( case UseDma => true case BuildGroundTest => (id: Int, p: Parameters) => Module(new DmaStreamTest()(p)) - case DmaStreamLoopbackAddr => { - val addrMap = new AddrHashMap(site(GlobalAddrMap)) - addrMap("devices:loopback").start - } case DmaStreamTestSettings => DmaStreamTestConfig( source = 0x10, dest = 0x28, len = 0x18, size = site(StreamLoopbackWidth) / 8) diff --git a/uncore b/uncore index fc9ca366..dbdc7ddd 160000 --- a/uncore +++ b/uncore @@ -1 +1 @@ -Subproject commit fc9ca36673e3cca42687a4d8e77ccda755c71389 +Subproject commit dbdc7ddd4c368b06b14799ed76b2069c25a6c313