1
0

Implement NASTI-based Mem/IO interconnect

This commit is contained in:
Howard Mao 2015-08-06 12:51:18 -07:00
parent c6bcc832a1
commit 56ecdff52d
8 changed files with 166 additions and 79 deletions

2
chisel

@ -1 +1 @@
Subproject commit 1f01401e9b4b0136303e5ae75a1196aaa222d80f
Subproject commit 179f5c6a6fd8b9f0195073ed204ddc07b1a50363

@ -1 +1 @@
Subproject commit 1136e89f0f4037c31e48aad4b13260ff17039811
Subproject commit 83b76aa258dccdf4b7b9f3d4c7756549ed37be9d

@ -1 +1 @@
Subproject commit f98a4d64e7fa2e19968f8275be94efc8415d20a4
Subproject commit 3ad77802d200be7e1506a9add0faef3acf30bcd1

2
rocket

@ -1 +1 @@
Subproject commit caa109c376a3c5fd12aea9d976c140982d9cfd8c
Subproject commit 7a520740dc4c41694491e6628ff6233b4c76acd8

View File

@ -35,15 +35,13 @@ class DefaultConfig extends ChiselConfig (
case MIFTagBits => Dump("MEM_TAG_BITS",
log2Up(site(NAcquireTransactors)+2) +
log2Up(site(NBanksPerMemoryChannel)) +
log2Up(site(NMemoryChannels)) + /* TODO: Remove for multichannel Top */
1)
log2Up(site(NMemoryChannels)))
case MIFDataBits => Dump("MEM_DATA_BITS", 128)
case MIFAddrBits => Dump("MEM_ADDR_BITS", site(PAddrBits) - site(CacheBlockOffsetBits))
case MIFDataBeats => site(TLDataBits)*site(TLDataBeats)/site(MIFDataBits)
case NASTIDataBits => site(MIFDataBits)
case NASTIAddrBits => site(MIFAddrBits)
case NASTIAddrBits => site(PAddrBits)
case NASTIIdBits => site(MIFTagBits)
case UseNASTI => false
//Params used by all caches
case NSets => findBy(CacheName)
case NWays => findBy(CacheName)
@ -72,6 +70,7 @@ class DefaultConfig extends ChiselConfig (
case StoreDataQueueDepth => 17
case ReplayQueueDepth => 16
case NMSHRs => Knob("L1D_MSHRS")
case NIOMSHRs => 1
case LRSCCycles => 32
//L2 Memory System Params
case NAcquireTransactors => 7
@ -119,6 +118,7 @@ class DefaultConfig extends ChiselConfig (
case TLNClients => site(TLNCachingClients) + site(TLNCachelessClients)
case TLDataBits => site(CacheBlockBytes)*8/site(TLDataBeats)
case TLDataBeats => 4
case TLWriteMaskBits => (site(TLDataBits) - 1) / 8 + 1
case TLNetworkIsOrderedP2P => false
case TLNManagers => findBy(TLId)
case TLNCachingClients => findBy(TLId)
@ -133,7 +133,7 @@ class DefaultConfig extends ChiselConfig (
case TLNCachelessClients => site(NTiles) + 1
case TLCoherencePolicy => new MESICoherence(site(L2DirectoryRepresentation))
case TLMaxManagerXacts => site(NAcquireTransactors) + 2
case TLMaxClientXacts => max(site(NMSHRs),
case TLMaxClientXacts => max(site(NMSHRs) + site(NIOMSHRs),
if(site(BuildRoCC).isEmpty) 1
else site(RoCCMaxTaggedMemXacts))
case TLMaxClientsPerPort => if(site(BuildRoCC).isEmpty) 1 else 3
@ -155,6 +155,18 @@ class DefaultConfig extends ChiselConfig (
case CacheBlockBytes => 64
case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes))
case UseBackupMemoryPort => true
case MMIOBase => BigInt(1 << 30) // 1 GB
case ExternalIOStart => 2 * site(MMIOBase)
case NASTIAddrMap => Seq(
("mem", None, MemSize(site(MMIOBase), AddrMap.RWX)),
("conf", None, Submap(site(ExternalIOStart) - site(MMIOBase),
("csr0", None, MemSize(1 << 15, AddrMap.RW)),
("scr", None, MemSize(site(HTIFNSCR) * 8, AddrMap.RW)))),
("io", Some(site(ExternalIOStart)),
MemSize(2 * site(MMIOBase), AddrMap.RW)))
case NASTIAddrHashMap => new AddrHashMap(site(NASTIAddrMap))
case NASTINMasters => site(TLNManagers) + 1
case NASTINSlaves => site(NASTIAddrHashMap).nEntries
}},
knobValues = {
case "NTILES" => 1
@ -254,3 +266,7 @@ class SmallConfig extends ChiselConfig (
class DefaultFPGASmallConfig extends ChiselConfig(new SmallConfig ++ new DefaultFPGAConfig)
class ExampleSmallConfig extends ChiselConfig(new SmallConfig ++ new DefaultConfig)
class MultibankConfig extends ChiselConfig(new With2Banks ++ new DefaultConfig)
class MultibankL2Config extends ChiselConfig(
new With2Banks ++ new WithL2Cache ++ new DefaultConfig)

View File

@ -26,8 +26,8 @@ case object UseBackupMemoryPort extends Field[Boolean]
case object BuildL2CoherenceManager extends Field[() => CoherenceAgent]
/** Function for building some kind of tile connected to a reset signal */
case object BuildTiles extends Field[Seq[(Bool) => Tile]]
/** Which protocol to use to talk to memory/devices */
case object UseNASTI extends Field[Boolean]
/** Start address of the "io" region in the memory map */
case object ExternalIOStart extends Field[BigInt]
/** Utility trait for quick access to some relevant parameters */
trait TopLevelParameters extends UsesParameters {
@ -40,6 +40,7 @@ trait TopLevelParameters extends UsesParameters {
val nMemReqs = params(NOutstandingMemReqsPerChannel)
val mifAddrBits = params(MIFAddrBits)
val mifDataBeats = params(MIFDataBeats)
val scrAddrBits = log2Up(params(HTIFNSCR))
require(lsb + log2Up(nBanks) < mifAddrBits)
}
@ -61,7 +62,8 @@ class TopIO extends BasicTopIO {
}
class MultiChannelTopIO extends BasicTopIO with TopLevelParameters {
val mem = Vec(new MemIO, nMemChannels)
val mem = Vec(new NASTIMasterIO, nMemChannels)
val mmio = new NASTIMasterIO
}
/** Top-level module for the chip */
@ -70,11 +72,19 @@ class Top extends Module with TopLevelParameters {
val io = new TopIO
if(!params(UseZscale)) {
val temp = Module(new MultiChannelTop)
val arb = Module(new MemIOArbiter(nMemChannels))
arb.io.inner <> temp.io.mem
io.mem <> arb.io.outer
val arb = Module(new NASTIArbiter(nMemChannels))
val conv = Module(new MemIONASTISlaveIOConverter(params(CacheBlockOffsetBits)))
arb.io.master <> temp.io.mem
conv.io.nasti <> arb.io.slave
io.mem.req_cmd <> Queue(conv.io.mem.req_cmd)
io.mem.req_data <> Queue(conv.io.mem.req_data, mifDataBeats)
conv.io.mem.resp <> Queue(io.mem.resp, mifDataBeats)
io.mem_backup_ctrl <> temp.io.mem_backup_ctrl
io.host <> temp.io.host
// tie off the mmio port
val errslave = Module(new NASTIErrorSlave)
errslave.io <> temp.io.mmio
} else {
val temp = Module(new ZscaleTop)
io.host <> temp.io.host
@ -93,8 +103,8 @@ class MultiChannelTop extends Module with TopLevelParameters {
case ((hl, tile), i) =>
tile.io.host.id := UInt(i)
tile.io.host.reset := Reg(next=Reg(next=hl.reset))
tile.io.host.pcr_req <> Queue(hl.pcr_req)
hl.pcr_rep <> Queue(tile.io.host.pcr_rep)
tile.io.host.pcr.req <> Queue(hl.pcr.req)
hl.pcr.resp <> Queue(tile.io.host.pcr.resp)
hl.ipi_req <> Queue(tile.io.host.ipi_req)
tile.io.host.ipi_rep <> Queue(hl.ipi_rep)
hl.debug_stats_pcr := tile.io.host.debug_stats_pcr
@ -105,6 +115,7 @@ class MultiChannelTop extends Module with TopLevelParameters {
uncore.io.tiles_uncached <> tileList.map(_.io.uncached)
io.host <> uncore.io.host
io.mem <> uncore.io.mem
io.mmio <> uncore.io.mmio
if(params(UseBackupMemoryPort)) { io.mem_backup_ctrl <> uncore.io.mem_backup_ctrl }
}
@ -116,11 +127,12 @@ class MultiChannelTop extends Module with TopLevelParameters {
class Uncore extends Module with TopLevelParameters {
val io = new Bundle {
val host = new HostIO
val mem = Vec(new MemIO, nMemChannels)
val mem = Vec(new NASTIMasterIO, nMemChannels)
val tiles_cached = Vec(new ClientTileLinkIO, nTiles).flip
val tiles_uncached = Vec(new ClientUncachedTileLinkIO, nTiles).flip
val htif = Vec(new HTIFIO, nTiles).flip
val mem_backup_ctrl = new MemBackupCtrlIO
val mmio = new NASTIMasterIO
}
val htif = Module(new HTIF(CSRs.mreset)) // One HTIF module per chip
@ -130,13 +142,36 @@ class Uncore extends Module with TopLevelParameters {
outmemsys.io.tiles_uncached <> io.tiles_uncached
outmemsys.io.tiles_cached <> io.tiles_cached
for (i <- 0 until nTiles) {
io.htif(i).reset := htif.io.cpu(i).reset
io.htif(i).id := htif.io.cpu(i).id
htif.io.cpu(i).ipi_req <> io.htif(i).ipi_req
io.htif(i).ipi_rep <> htif.io.cpu(i).ipi_rep
htif.io.cpu(i).debug_stats_pcr <> io.htif(i).debug_stats_pcr
val pcr_arb = Module(new SMIArbiter(2, 64, 12))
pcr_arb.io.in(0) <> htif.io.cpu(i).pcr
pcr_arb.io.in(1) <> outmemsys.io.pcr(i)
io.htif(i).pcr <> pcr_arb.io.out
}
// Arbitrate SCR access between MMIO and HTIF
val scrArb = Module(new SMIArbiter(2, 64, scrAddrBits))
val scrFile = Module(new SCRFile)
scrArb.io.in(0) <> htif.io.scr
scrArb.io.in(1) <> outmemsys.io.scr
scrFile.io.smi <> scrArb.io.out
// scrFile.io.scr <> (... your SCR connections ...)
// Wire the htif to the memory port(s) and host interface
io.host.debug_stats_pcr := htif.io.host.debug_stats_pcr
htif.io.cpu <> io.htif
io.mem <> outmemsys.io.mem
io.mmio <> outmemsys.io.mmio
if(params(UseBackupMemoryPort)) {
outmemsys.io.mem_backup_en := io.mem_backup_ctrl.en
VLSIUtils.padOutHTIFWithDividedClock(htif.io, outmemsys.io.mem_backup, io.mem_backup_ctrl, io.host, htifW)
VLSIUtils.padOutHTIFWithDividedClock(htif.io.host, scrFile.io.scr,
outmemsys.io.mem_backup, io.mem_backup_ctrl, io.host, htifW)
} else {
htif.io.host.out <> io.host.out
htif.io.host.in <> io.host.in
@ -152,9 +187,12 @@ class OuterMemorySystem extends Module with TopLevelParameters {
val tiles_uncached = Vec(new ClientUncachedTileLinkIO, nTiles).flip
val htif_uncached = (new ClientUncachedTileLinkIO).flip
val incoherent = Vec(Bool(), nTiles).asInput
val mem = Vec(new MemIO, nMemChannels)
val mem = Vec(new NASTIMasterIO, nMemChannels)
val mem_backup = new MemSerializedIO(htifW)
val mem_backup_en = Bool(INPUT)
val pcr = Vec(new SMIIO(64, 12), nTiles)
val scr = new SMIIO(64, scrAddrBits)
val mmio = new NASTIMasterIO
}
// Create a simple L1toL2 NoC between the tiles+htif and the banks of outer memory
@ -170,43 +208,59 @@ class OuterMemorySystem extends Module with TopLevelParameters {
else new RocketChipTileLinkCrossbar(addrToBank, sharerToClientId, preBuffering, postBuffering))
// Create point(s) of coherence serialization
val managerEndpoints = List.fill(nMemChannels) {
List.fill(nBanksPerMemChannel) {
params(BuildL2CoherenceManager)()}}
managerEndpoints.flatten.foreach { _.incoherent := io.incoherent }
val nManagers = nMemChannels * nBanksPerMemChannel
val managerEndpoints = List.fill(nManagers) { params(BuildL2CoherenceManager)()}
managerEndpoints.foreach { _.incoherent := io.incoherent }
// Wire the tiles and htif to the TileLink client ports of the L1toL2 network,
// and coherence manager(s) to the other side
l1tol2net.io.clients <> ordered_clients
l1tol2net.io.managers <> managerEndpoints.flatMap(_.map(_.innerTL))
l1tol2net.io.managers <> managerEndpoints.map(_.innerTL)
// Create a converter between TileLinkIO and MemIO for each channel
val outerTLParams = params.alterPartial({ case TLId => "L2ToMC" })
val backendBuffering = TileLinkDepths(0,0,0,0,0)
val mem_channels = managerEndpoints.map { banks =>
if(!params(UseNASTI)) {
val arb = Module(new RocketChipTileLinkArbiter(managerDepths = backendBuffering))(outerTLParams)
val conv = Module(new MemPipeIOTileLinkIOConverter(nMemReqs))(outerTLParams)
arb.io.clients <> banks.map(_.outerTL)
arb.io.managers.head <> conv.io.tl
MemIOMemPipeIOConverter(conv.io.mem)
} else {
val arb = Module(new RocketChipTileLinkArbiter(managerDepths = backendBuffering))(outerTLParams)
val conv1 = Module(new NASTIMasterIOTileLinkIOConverter)(outerTLParams)
val conv2 = Module(new MemIONASTISlaveIOConverter(params(CacheBlockOffsetBits)))
val conv3 = Module(new MemPipeIOMemIOConverter(nMemReqs))
arb.io.clients <> banks.map(_.outerTL)
arb.io.managers.head <> conv1.io.tl
conv2.io.nasti <> conv1.io.nasti
conv3.io.cpu.req_cmd <> Queue(conv2.io.mem.req_cmd, 2)
conv3.io.cpu.req_data <> Queue(conv2.io.mem.req_data, mifDataBeats)
conv2.io.mem.resp <> conv3.io.cpu.resp
MemIOMemPipeIOConverter(conv3.io.mem)
}
val addrMap = params(NASTIAddrHashMap)
println("Generated Address Map")
for ((name, base, size, _) <- addrMap.sortedEntries) {
println(f"\t$name%s $base%x - ${base + size - 1}%x")
}
val interconnect = Module(new NASTITopInterconnect)
for ((bank, i) <- managerEndpoints.zipWithIndex) {
val unwrap = Module(new ClientTileLinkIOUnwrapper)(outerTLParams)
val conv = Module(new NASTIMasterIOTileLinkIOConverter)(outerTLParams)
unwrap.io.in <> bank.outerTL
conv.io.tl <> unwrap.io.out
interconnect.io.masters(i) <> conv.io.nasti
}
val rtc = Module(new RTC(CSRs.mtime))
interconnect.io.masters(nManagers) <> rtc.io
for (i <- 0 until nTiles) {
val csrName = s"conf:csr$i"
val csrPort = addrMap(csrName).port
val conv = Module(new SMIIONASTISlaveIOConverter(64, 12))
conv.io.nasti <> interconnect.io.slaves(csrPort)
io.pcr(i) <> conv.io.smi
}
val conv = Module(new SMIIONASTISlaveIOConverter(64, scrAddrBits))
conv.io.nasti <> interconnect.io.slaves(addrMap("conf:scr").port)
io.scr <> conv.io.smi
io.mmio <> interconnect.io.slaves(addrMap("io").port)
val mem_channels = interconnect.io.slaves.take(nMemChannels)
// Create a SerDes for backup memory port
if(params(UseBackupMemoryPort)) {
VLSIUtils.doOuterMemorySystemSerdes(mem_channels, io.mem, io.mem_backup, io.mem_backup_en, nMemChannels, params(HTIFWidth))
VLSIUtils.doOuterMemorySystemSerdes(
mem_channels, io.mem, io.mem_backup, io.mem_backup_en,
nMemChannels, params(HTIFWidth), params(CacheBlockOffsetBits))
} else { io.mem <> mem_channels }
}

View File

@ -15,52 +15,69 @@ class MemDessert extends Module {
object VLSIUtils {
def doOuterMemorySystemSerdes(
llcs: Seq[MemIO],
mems: Seq[MemIO],
llcs: Seq[NASTIMasterIO],
mems: Seq[NASTIMasterIO],
backup: MemSerializedIO,
en: Bool,
nMemChannels: Int,
htifWidth: Int) {
val arb = Module(new MemIOArbiter(nMemChannels))
htifWidth: Int,
blockOffsetBits: Int) {
val arb = Module(new NASTIArbiter(nMemChannels))
val conv = Module(new MemIONASTISlaveIOConverter(blockOffsetBits))
val mem_serdes = Module(new MemSerdes(htifWidth))
mem_serdes.io.wide <> arb.io.outer
conv.io.nasti <> arb.io.slave
mem_serdes.io.wide <> conv.io.mem
backup <> mem_serdes.io.narrow
llcs zip mems zip arb.io.inner foreach { case ((llc, mem), wide) =>
llc.req_cmd.ready := Mux(en, wide.req_cmd.ready, mem.req_cmd.ready)
mem.req_cmd.valid := llc.req_cmd.valid && !en
mem.req_cmd.bits := llc.req_cmd.bits
wide.req_cmd.valid := llc.req_cmd.valid && en
wide.req_cmd.bits := llc.req_cmd.bits
llcs zip mems zip arb.io.master foreach { case ((llc, mem), wide) =>
llc.ar.ready := Mux(en, wide.ar.ready, mem.ar.ready)
mem.ar.valid := llc.ar.valid && !en
mem.ar.bits := llc.ar.bits
wide.ar.valid := llc.ar.valid && en
wide.ar.bits := llc.ar.bits
llc.req_data.ready := Mux(en, wide.req_data.ready, mem.req_data.ready)
mem.req_data.valid := llc.req_data.valid && !en
mem.req_data.bits := llc.req_data.bits
wide.req_data.valid := llc.req_data.valid && en
wide.req_data.bits := llc.req_data.bits
llc.aw.ready := Mux(en, wide.aw.ready, mem.aw.ready)
mem.aw.valid := llc.aw.valid && !en
mem.aw.bits := llc.aw.bits
wide.aw.valid := llc.aw.valid && en
wide.aw.bits := llc.aw.bits
llc.resp.valid := Mux(en, wide.resp.valid, mem.resp.valid)
llc.resp.bits := Mux(en, wide.resp.bits, mem.resp.bits)
mem.resp.ready := llc.resp.ready && !en
wide.resp.ready := llc.resp.ready && en
llc.w.ready := Mux(en, wide.w.ready, mem.w.ready)
mem.w.valid := llc.w.valid && !en
mem.w.bits := llc.w.bits
wide.w.valid := llc.w.valid && en
wide.w.bits := llc.w.bits
llc.b.valid := Mux(en, wide.b.valid, mem.b.valid)
llc.b.bits := Mux(en, wide.b.bits, mem.b.bits)
mem.b.ready := llc.b.ready && !en
wide.b.ready := llc.b.ready && en
llc.r.valid := Mux(en, wide.r.valid, mem.r.valid)
llc.r.bits := Mux(en, wide.r.bits, mem.r.bits)
mem.r.ready := llc.r.ready && !en
wide.r.ready := llc.r.ready && en
}
}
def padOutHTIFWithDividedClock(
htif: HTIFModuleIO,
htif: HostIO,
scr: SCRIO,
child: MemSerializedIO,
parent: MemBackupCtrlIO,
host: HostIO,
htifW: Int) {
val hio = Module((new SlowIO(512)) { Bits(width = htifW+1) })
hio.io.set_divisor.valid := htif.scr.wen && (htif.scr.waddr === UInt(63))
hio.io.set_divisor.bits := htif.scr.wdata
htif.scr.rdata(63) := hio.io.divisor
hio.io.set_divisor.valid := scr.wen && (scr.waddr === UInt(63))
hio.io.set_divisor.bits := scr.wdata
scr.rdata(63) := hio.io.divisor
hio.io.out_fast.valid := htif.host.out.valid || child.req.valid
hio.io.out_fast.bits := Cat(htif.host.out.valid, Mux(htif.host.out.valid, htif.host.out.bits, child.req.bits))
htif.host.out.ready := hio.io.out_fast.ready
child.req.ready := hio.io.out_fast.ready && !htif.host.out.valid
hio.io.out_fast.valid := htif.out.valid || child.req.valid
hio.io.out_fast.bits := Cat(htif.out.valid, Mux(htif.out.valid, htif.out.bits, child.req.bits))
htif.out.ready := hio.io.out_fast.ready
child.req.ready := hio.io.out_fast.ready && !htif.out.valid
host.out.valid := hio.io.out_slow.valid && hio.io.out_slow.bits(htifW)
host.out.bits := hio.io.out_slow.bits
parent.out_valid := hio.io.out_slow.valid && !hio.io.out_slow.bits(htifW)
@ -72,9 +89,9 @@ object VLSIUtils {
host.in.ready := hio.io.in_slow.ready
child.resp.valid := hio.io.in_fast.valid && hio.io.in_fast.bits(htifW)
child.resp.bits := hio.io.in_fast.bits
htif.host.in.valid := hio.io.in_fast.valid && !hio.io.in_fast.bits(htifW)
htif.host.in.bits := hio.io.in_fast.bits
hio.io.in_fast.ready := Mux(hio.io.in_fast.bits(htifW), Bool(true), htif.host.in.ready)
htif.in.valid := hio.io.in_fast.valid && !hio.io.in_fast.bits(htifW)
htif.in.bits := hio.io.in_fast.bits
hio.io.in_fast.ready := Mux(hio.io.in_fast.bits(htifW), Bool(true), htif.in.ready)
host.clk := hio.io.clk_slow
host.clk_edge := Reg(next=host.clk && !Reg(next=host.clk))
}

2
uncore

@ -1 +1 @@
Subproject commit 5b76a91b2ed22ab203730d32202fa653431cf17c
Subproject commit d6895713cf4c0fcc53a3507f0c376716be8b0dce