From 56ecdff52d5b0097bc6be54a7ec22bf1c79212b5 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 6 Aug 2015 12:51:18 -0700 Subject: [PATCH] Implement NASTI-based Mem/IO interconnect --- chisel | 2 +- hardfloat | 2 +- junctions | 2 +- rocket | 2 +- src/main/scala/Configs.scala | 26 +++++-- src/main/scala/RocketChip.scala | 130 ++++++++++++++++++++++---------- src/main/scala/Vlsi.scala | 79 +++++++++++-------- uncore | 2 +- 8 files changed, 166 insertions(+), 79 deletions(-) diff --git a/chisel b/chisel index 1f01401e..179f5c6a 160000 --- a/chisel +++ b/chisel @@ -1 +1 @@ -Subproject commit 1f01401e9b4b0136303e5ae75a1196aaa222d80f +Subproject commit 179f5c6a6fd8b9f0195073ed204ddc07b1a50363 diff --git a/hardfloat b/hardfloat index 1136e89f..83b76aa2 160000 --- a/hardfloat +++ b/hardfloat @@ -1 +1 @@ -Subproject commit 1136e89f0f4037c31e48aad4b13260ff17039811 +Subproject commit 83b76aa258dccdf4b7b9f3d4c7756549ed37be9d diff --git a/junctions b/junctions index f98a4d64..3ad77802 160000 --- a/junctions +++ b/junctions @@ -1 +1 @@ -Subproject commit f98a4d64e7fa2e19968f8275be94efc8415d20a4 +Subproject commit 3ad77802d200be7e1506a9add0faef3acf30bcd1 diff --git a/rocket b/rocket index caa109c3..7a520740 160000 --- a/rocket +++ b/rocket @@ -1 +1 @@ -Subproject commit caa109c376a3c5fd12aea9d976c140982d9cfd8c +Subproject commit 7a520740dc4c41694491e6628ff6233b4c76acd8 diff --git a/src/main/scala/Configs.scala b/src/main/scala/Configs.scala index a71def02..b55d1040 100644 --- a/src/main/scala/Configs.scala +++ b/src/main/scala/Configs.scala @@ -35,15 +35,13 @@ class DefaultConfig extends ChiselConfig ( case MIFTagBits => Dump("MEM_TAG_BITS", log2Up(site(NAcquireTransactors)+2) + log2Up(site(NBanksPerMemoryChannel)) + - log2Up(site(NMemoryChannels)) + /* TODO: Remove for multichannel Top */ - 1) + log2Up(site(NMemoryChannels))) case MIFDataBits => Dump("MEM_DATA_BITS", 128) case MIFAddrBits => Dump("MEM_ADDR_BITS", site(PAddrBits) - site(CacheBlockOffsetBits)) case MIFDataBeats => site(TLDataBits)*site(TLDataBeats)/site(MIFDataBits) case NASTIDataBits => site(MIFDataBits) - case NASTIAddrBits => site(MIFAddrBits) + case NASTIAddrBits => site(PAddrBits) case NASTIIdBits => site(MIFTagBits) - case UseNASTI => false //Params used by all caches case NSets => findBy(CacheName) case NWays => findBy(CacheName) @@ -72,6 +70,7 @@ class DefaultConfig extends ChiselConfig ( case StoreDataQueueDepth => 17 case ReplayQueueDepth => 16 case NMSHRs => Knob("L1D_MSHRS") + case NIOMSHRs => 1 case LRSCCycles => 32 //L2 Memory System Params case NAcquireTransactors => 7 @@ -119,6 +118,7 @@ class DefaultConfig extends ChiselConfig ( case TLNClients => site(TLNCachingClients) + site(TLNCachelessClients) case TLDataBits => site(CacheBlockBytes)*8/site(TLDataBeats) case TLDataBeats => 4 + case TLWriteMaskBits => (site(TLDataBits) - 1) / 8 + 1 case TLNetworkIsOrderedP2P => false case TLNManagers => findBy(TLId) case TLNCachingClients => findBy(TLId) @@ -133,7 +133,7 @@ class DefaultConfig extends ChiselConfig ( case TLNCachelessClients => site(NTiles) + 1 case TLCoherencePolicy => new MESICoherence(site(L2DirectoryRepresentation)) case TLMaxManagerXacts => site(NAcquireTransactors) + 2 - case TLMaxClientXacts => max(site(NMSHRs), + case TLMaxClientXacts => max(site(NMSHRs) + site(NIOMSHRs), if(site(BuildRoCC).isEmpty) 1 else site(RoCCMaxTaggedMemXacts)) case TLMaxClientsPerPort => if(site(BuildRoCC).isEmpty) 1 else 3 @@ -155,6 +155,18 @@ class DefaultConfig extends ChiselConfig ( case CacheBlockBytes => 64 case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes)) case UseBackupMemoryPort => true + case MMIOBase => BigInt(1 << 30) // 1 GB + case ExternalIOStart => 2 * site(MMIOBase) + case NASTIAddrMap => Seq( + ("mem", None, MemSize(site(MMIOBase), AddrMap.RWX)), + ("conf", None, Submap(site(ExternalIOStart) - site(MMIOBase), + ("csr0", None, MemSize(1 << 15, AddrMap.RW)), + ("scr", None, MemSize(site(HTIFNSCR) * 8, AddrMap.RW)))), + ("io", Some(site(ExternalIOStart)), + MemSize(2 * site(MMIOBase), AddrMap.RW))) + case NASTIAddrHashMap => new AddrHashMap(site(NASTIAddrMap)) + case NASTINMasters => site(TLNManagers) + 1 + case NASTINSlaves => site(NASTIAddrHashMap).nEntries }}, knobValues = { case "NTILES" => 1 @@ -254,3 +266,7 @@ class SmallConfig extends ChiselConfig ( class DefaultFPGASmallConfig extends ChiselConfig(new SmallConfig ++ new DefaultFPGAConfig) class ExampleSmallConfig extends ChiselConfig(new SmallConfig ++ new DefaultConfig) + +class MultibankConfig extends ChiselConfig(new With2Banks ++ new DefaultConfig) +class MultibankL2Config extends ChiselConfig( + new With2Banks ++ new WithL2Cache ++ new DefaultConfig) diff --git a/src/main/scala/RocketChip.scala b/src/main/scala/RocketChip.scala index fadffba8..5a904a2f 100644 --- a/src/main/scala/RocketChip.scala +++ b/src/main/scala/RocketChip.scala @@ -26,8 +26,8 @@ case object UseBackupMemoryPort extends Field[Boolean] case object BuildL2CoherenceManager extends Field[() => CoherenceAgent] /** Function for building some kind of tile connected to a reset signal */ case object BuildTiles extends Field[Seq[(Bool) => Tile]] -/** Which protocol to use to talk to memory/devices */ -case object UseNASTI extends Field[Boolean] +/** Start address of the "io" region in the memory map */ +case object ExternalIOStart extends Field[BigInt] /** Utility trait for quick access to some relevant parameters */ trait TopLevelParameters extends UsesParameters { @@ -40,6 +40,7 @@ trait TopLevelParameters extends UsesParameters { val nMemReqs = params(NOutstandingMemReqsPerChannel) val mifAddrBits = params(MIFAddrBits) val mifDataBeats = params(MIFDataBeats) + val scrAddrBits = log2Up(params(HTIFNSCR)) require(lsb + log2Up(nBanks) < mifAddrBits) } @@ -61,7 +62,8 @@ class TopIO extends BasicTopIO { } class MultiChannelTopIO extends BasicTopIO with TopLevelParameters { - val mem = Vec(new MemIO, nMemChannels) + val mem = Vec(new NASTIMasterIO, nMemChannels) + val mmio = new NASTIMasterIO } /** Top-level module for the chip */ @@ -70,11 +72,19 @@ class Top extends Module with TopLevelParameters { val io = new TopIO if(!params(UseZscale)) { val temp = Module(new MultiChannelTop) - val arb = Module(new MemIOArbiter(nMemChannels)) - arb.io.inner <> temp.io.mem - io.mem <> arb.io.outer + val arb = Module(new NASTIArbiter(nMemChannels)) + val conv = Module(new MemIONASTISlaveIOConverter(params(CacheBlockOffsetBits))) + arb.io.master <> temp.io.mem + conv.io.nasti <> arb.io.slave + io.mem.req_cmd <> Queue(conv.io.mem.req_cmd) + io.mem.req_data <> Queue(conv.io.mem.req_data, mifDataBeats) + conv.io.mem.resp <> Queue(io.mem.resp, mifDataBeats) io.mem_backup_ctrl <> temp.io.mem_backup_ctrl io.host <> temp.io.host + + // tie off the mmio port + val errslave = Module(new NASTIErrorSlave) + errslave.io <> temp.io.mmio } else { val temp = Module(new ZscaleTop) io.host <> temp.io.host @@ -93,8 +103,8 @@ class MultiChannelTop extends Module with TopLevelParameters { case ((hl, tile), i) => tile.io.host.id := UInt(i) tile.io.host.reset := Reg(next=Reg(next=hl.reset)) - tile.io.host.pcr_req <> Queue(hl.pcr_req) - hl.pcr_rep <> Queue(tile.io.host.pcr_rep) + tile.io.host.pcr.req <> Queue(hl.pcr.req) + hl.pcr.resp <> Queue(tile.io.host.pcr.resp) hl.ipi_req <> Queue(tile.io.host.ipi_req) tile.io.host.ipi_rep <> Queue(hl.ipi_rep) hl.debug_stats_pcr := tile.io.host.debug_stats_pcr @@ -105,6 +115,7 @@ class MultiChannelTop extends Module with TopLevelParameters { uncore.io.tiles_uncached <> tileList.map(_.io.uncached) io.host <> uncore.io.host io.mem <> uncore.io.mem + io.mmio <> uncore.io.mmio if(params(UseBackupMemoryPort)) { io.mem_backup_ctrl <> uncore.io.mem_backup_ctrl } } @@ -116,11 +127,12 @@ class MultiChannelTop extends Module with TopLevelParameters { class Uncore extends Module with TopLevelParameters { val io = new Bundle { val host = new HostIO - val mem = Vec(new MemIO, nMemChannels) + val mem = Vec(new NASTIMasterIO, nMemChannels) val tiles_cached = Vec(new ClientTileLinkIO, nTiles).flip val tiles_uncached = Vec(new ClientUncachedTileLinkIO, nTiles).flip val htif = Vec(new HTIFIO, nTiles).flip val mem_backup_ctrl = new MemBackupCtrlIO + val mmio = new NASTIMasterIO } val htif = Module(new HTIF(CSRs.mreset)) // One HTIF module per chip @@ -130,13 +142,36 @@ class Uncore extends Module with TopLevelParameters { outmemsys.io.tiles_uncached <> io.tiles_uncached outmemsys.io.tiles_cached <> io.tiles_cached + for (i <- 0 until nTiles) { + io.htif(i).reset := htif.io.cpu(i).reset + io.htif(i).id := htif.io.cpu(i).id + htif.io.cpu(i).ipi_req <> io.htif(i).ipi_req + io.htif(i).ipi_rep <> htif.io.cpu(i).ipi_rep + htif.io.cpu(i).debug_stats_pcr <> io.htif(i).debug_stats_pcr + + val pcr_arb = Module(new SMIArbiter(2, 64, 12)) + pcr_arb.io.in(0) <> htif.io.cpu(i).pcr + pcr_arb.io.in(1) <> outmemsys.io.pcr(i) + io.htif(i).pcr <> pcr_arb.io.out + } + + // Arbitrate SCR access between MMIO and HTIF + val scrArb = Module(new SMIArbiter(2, 64, scrAddrBits)) + val scrFile = Module(new SCRFile) + + scrArb.io.in(0) <> htif.io.scr + scrArb.io.in(1) <> outmemsys.io.scr + scrFile.io.smi <> scrArb.io.out + // scrFile.io.scr <> (... your SCR connections ...) + // Wire the htif to the memory port(s) and host interface io.host.debug_stats_pcr := htif.io.host.debug_stats_pcr - htif.io.cpu <> io.htif io.mem <> outmemsys.io.mem + io.mmio <> outmemsys.io.mmio if(params(UseBackupMemoryPort)) { outmemsys.io.mem_backup_en := io.mem_backup_ctrl.en - VLSIUtils.padOutHTIFWithDividedClock(htif.io, outmemsys.io.mem_backup, io.mem_backup_ctrl, io.host, htifW) + VLSIUtils.padOutHTIFWithDividedClock(htif.io.host, scrFile.io.scr, + outmemsys.io.mem_backup, io.mem_backup_ctrl, io.host, htifW) } else { htif.io.host.out <> io.host.out htif.io.host.in <> io.host.in @@ -152,9 +187,12 @@ class OuterMemorySystem extends Module with TopLevelParameters { val tiles_uncached = Vec(new ClientUncachedTileLinkIO, nTiles).flip val htif_uncached = (new ClientUncachedTileLinkIO).flip val incoherent = Vec(Bool(), nTiles).asInput - val mem = Vec(new MemIO, nMemChannels) + val mem = Vec(new NASTIMasterIO, nMemChannels) val mem_backup = new MemSerializedIO(htifW) val mem_backup_en = Bool(INPUT) + val pcr = Vec(new SMIIO(64, 12), nTiles) + val scr = new SMIIO(64, scrAddrBits) + val mmio = new NASTIMasterIO } // Create a simple L1toL2 NoC between the tiles+htif and the banks of outer memory @@ -170,43 +208,59 @@ class OuterMemorySystem extends Module with TopLevelParameters { else new RocketChipTileLinkCrossbar(addrToBank, sharerToClientId, preBuffering, postBuffering)) // Create point(s) of coherence serialization - val managerEndpoints = List.fill(nMemChannels) { - List.fill(nBanksPerMemChannel) { - params(BuildL2CoherenceManager)()}} - managerEndpoints.flatten.foreach { _.incoherent := io.incoherent } + val nManagers = nMemChannels * nBanksPerMemChannel + val managerEndpoints = List.fill(nManagers) { params(BuildL2CoherenceManager)()} + managerEndpoints.foreach { _.incoherent := io.incoherent } // Wire the tiles and htif to the TileLink client ports of the L1toL2 network, // and coherence manager(s) to the other side l1tol2net.io.clients <> ordered_clients - l1tol2net.io.managers <> managerEndpoints.flatMap(_.map(_.innerTL)) + l1tol2net.io.managers <> managerEndpoints.map(_.innerTL) // Create a converter between TileLinkIO and MemIO for each channel val outerTLParams = params.alterPartial({ case TLId => "L2ToMC" }) val backendBuffering = TileLinkDepths(0,0,0,0,0) - val mem_channels = managerEndpoints.map { banks => - if(!params(UseNASTI)) { - val arb = Module(new RocketChipTileLinkArbiter(managerDepths = backendBuffering))(outerTLParams) - val conv = Module(new MemPipeIOTileLinkIOConverter(nMemReqs))(outerTLParams) - arb.io.clients <> banks.map(_.outerTL) - arb.io.managers.head <> conv.io.tl - MemIOMemPipeIOConverter(conv.io.mem) - } else { - val arb = Module(new RocketChipTileLinkArbiter(managerDepths = backendBuffering))(outerTLParams) - val conv1 = Module(new NASTIMasterIOTileLinkIOConverter)(outerTLParams) - val conv2 = Module(new MemIONASTISlaveIOConverter(params(CacheBlockOffsetBits))) - val conv3 = Module(new MemPipeIOMemIOConverter(nMemReqs)) - arb.io.clients <> banks.map(_.outerTL) - arb.io.managers.head <> conv1.io.tl - conv2.io.nasti <> conv1.io.nasti - conv3.io.cpu.req_cmd <> Queue(conv2.io.mem.req_cmd, 2) - conv3.io.cpu.req_data <> Queue(conv2.io.mem.req_data, mifDataBeats) - conv2.io.mem.resp <> conv3.io.cpu.resp - MemIOMemPipeIOConverter(conv3.io.mem) - } + + val addrMap = params(NASTIAddrHashMap) + + println("Generated Address Map") + for ((name, base, size, _) <- addrMap.sortedEntries) { + println(f"\t$name%s $base%x - ${base + size - 1}%x") } + val interconnect = Module(new NASTITopInterconnect) + + for ((bank, i) <- managerEndpoints.zipWithIndex) { + val unwrap = Module(new ClientTileLinkIOUnwrapper)(outerTLParams) + val conv = Module(new NASTIMasterIOTileLinkIOConverter)(outerTLParams) + unwrap.io.in <> bank.outerTL + conv.io.tl <> unwrap.io.out + interconnect.io.masters(i) <> conv.io.nasti + } + + val rtc = Module(new RTC(CSRs.mtime)) + interconnect.io.masters(nManagers) <> rtc.io + + for (i <- 0 until nTiles) { + val csrName = s"conf:csr$i" + val csrPort = addrMap(csrName).port + val conv = Module(new SMIIONASTISlaveIOConverter(64, 12)) + conv.io.nasti <> interconnect.io.slaves(csrPort) + io.pcr(i) <> conv.io.smi + } + + val conv = Module(new SMIIONASTISlaveIOConverter(64, scrAddrBits)) + conv.io.nasti <> interconnect.io.slaves(addrMap("conf:scr").port) + io.scr <> conv.io.smi + + io.mmio <> interconnect.io.slaves(addrMap("io").port) + + val mem_channels = interconnect.io.slaves.take(nMemChannels) + // Create a SerDes for backup memory port if(params(UseBackupMemoryPort)) { - VLSIUtils.doOuterMemorySystemSerdes(mem_channels, io.mem, io.mem_backup, io.mem_backup_en, nMemChannels, params(HTIFWidth)) + VLSIUtils.doOuterMemorySystemSerdes( + mem_channels, io.mem, io.mem_backup, io.mem_backup_en, + nMemChannels, params(HTIFWidth), params(CacheBlockOffsetBits)) } else { io.mem <> mem_channels } } diff --git a/src/main/scala/Vlsi.scala b/src/main/scala/Vlsi.scala index 1e71ec84..942495b4 100644 --- a/src/main/scala/Vlsi.scala +++ b/src/main/scala/Vlsi.scala @@ -15,52 +15,69 @@ class MemDessert extends Module { object VLSIUtils { def doOuterMemorySystemSerdes( - llcs: Seq[MemIO], - mems: Seq[MemIO], + llcs: Seq[NASTIMasterIO], + mems: Seq[NASTIMasterIO], backup: MemSerializedIO, en: Bool, nMemChannels: Int, - htifWidth: Int) { - val arb = Module(new MemIOArbiter(nMemChannels)) + htifWidth: Int, + blockOffsetBits: Int) { + + val arb = Module(new NASTIArbiter(nMemChannels)) + val conv = Module(new MemIONASTISlaveIOConverter(blockOffsetBits)) val mem_serdes = Module(new MemSerdes(htifWidth)) - mem_serdes.io.wide <> arb.io.outer + + conv.io.nasti <> arb.io.slave + mem_serdes.io.wide <> conv.io.mem backup <> mem_serdes.io.narrow - llcs zip mems zip arb.io.inner foreach { case ((llc, mem), wide) => - llc.req_cmd.ready := Mux(en, wide.req_cmd.ready, mem.req_cmd.ready) - mem.req_cmd.valid := llc.req_cmd.valid && !en - mem.req_cmd.bits := llc.req_cmd.bits - wide.req_cmd.valid := llc.req_cmd.valid && en - wide.req_cmd.bits := llc.req_cmd.bits + llcs zip mems zip arb.io.master foreach { case ((llc, mem), wide) => + llc.ar.ready := Mux(en, wide.ar.ready, mem.ar.ready) + mem.ar.valid := llc.ar.valid && !en + mem.ar.bits := llc.ar.bits + wide.ar.valid := llc.ar.valid && en + wide.ar.bits := llc.ar.bits - llc.req_data.ready := Mux(en, wide.req_data.ready, mem.req_data.ready) - mem.req_data.valid := llc.req_data.valid && !en - mem.req_data.bits := llc.req_data.bits - wide.req_data.valid := llc.req_data.valid && en - wide.req_data.bits := llc.req_data.bits + llc.aw.ready := Mux(en, wide.aw.ready, mem.aw.ready) + mem.aw.valid := llc.aw.valid && !en + mem.aw.bits := llc.aw.bits + wide.aw.valid := llc.aw.valid && en + wide.aw.bits := llc.aw.bits - llc.resp.valid := Mux(en, wide.resp.valid, mem.resp.valid) - llc.resp.bits := Mux(en, wide.resp.bits, mem.resp.bits) - mem.resp.ready := llc.resp.ready && !en - wide.resp.ready := llc.resp.ready && en + llc.w.ready := Mux(en, wide.w.ready, mem.w.ready) + mem.w.valid := llc.w.valid && !en + mem.w.bits := llc.w.bits + wide.w.valid := llc.w.valid && en + wide.w.bits := llc.w.bits + + llc.b.valid := Mux(en, wide.b.valid, mem.b.valid) + llc.b.bits := Mux(en, wide.b.bits, mem.b.bits) + mem.b.ready := llc.b.ready && !en + wide.b.ready := llc.b.ready && en + + llc.r.valid := Mux(en, wide.r.valid, mem.r.valid) + llc.r.bits := Mux(en, wide.r.bits, mem.r.bits) + mem.r.ready := llc.r.ready && !en + wide.r.ready := llc.r.ready && en } } def padOutHTIFWithDividedClock( - htif: HTIFModuleIO, + htif: HostIO, + scr: SCRIO, child: MemSerializedIO, parent: MemBackupCtrlIO, host: HostIO, htifW: Int) { val hio = Module((new SlowIO(512)) { Bits(width = htifW+1) }) - hio.io.set_divisor.valid := htif.scr.wen && (htif.scr.waddr === UInt(63)) - hio.io.set_divisor.bits := htif.scr.wdata - htif.scr.rdata(63) := hio.io.divisor + hio.io.set_divisor.valid := scr.wen && (scr.waddr === UInt(63)) + hio.io.set_divisor.bits := scr.wdata + scr.rdata(63) := hio.io.divisor - hio.io.out_fast.valid := htif.host.out.valid || child.req.valid - hio.io.out_fast.bits := Cat(htif.host.out.valid, Mux(htif.host.out.valid, htif.host.out.bits, child.req.bits)) - htif.host.out.ready := hio.io.out_fast.ready - child.req.ready := hio.io.out_fast.ready && !htif.host.out.valid + hio.io.out_fast.valid := htif.out.valid || child.req.valid + hio.io.out_fast.bits := Cat(htif.out.valid, Mux(htif.out.valid, htif.out.bits, child.req.bits)) + htif.out.ready := hio.io.out_fast.ready + child.req.ready := hio.io.out_fast.ready && !htif.out.valid host.out.valid := hio.io.out_slow.valid && hio.io.out_slow.bits(htifW) host.out.bits := hio.io.out_slow.bits parent.out_valid := hio.io.out_slow.valid && !hio.io.out_slow.bits(htifW) @@ -72,9 +89,9 @@ object VLSIUtils { host.in.ready := hio.io.in_slow.ready child.resp.valid := hio.io.in_fast.valid && hio.io.in_fast.bits(htifW) child.resp.bits := hio.io.in_fast.bits - htif.host.in.valid := hio.io.in_fast.valid && !hio.io.in_fast.bits(htifW) - htif.host.in.bits := hio.io.in_fast.bits - hio.io.in_fast.ready := Mux(hio.io.in_fast.bits(htifW), Bool(true), htif.host.in.ready) + htif.in.valid := hio.io.in_fast.valid && !hio.io.in_fast.bits(htifW) + htif.in.bits := hio.io.in_fast.bits + hio.io.in_fast.ready := Mux(hio.io.in_fast.bits(htifW), Bool(true), htif.in.ready) host.clk := hio.io.clk_slow host.clk_edge := Reg(next=host.clk && !Reg(next=host.clk)) } diff --git a/uncore b/uncore index 5b76a91b..d6895713 160000 --- a/uncore +++ b/uncore @@ -1 +1 @@ -Subproject commit 5b76a91b2ed22ab203730d32202fa653431cf17c +Subproject commit d6895713cf4c0fcc53a3507f0c376716be8b0dce