From 64e1de751d1321d263922b1fb5e508f3f41a377b Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Mon, 23 Jan 2017 17:54:27 -0800 Subject: [PATCH 1/2] axi4: add a minLatency parameter --- src/main/scala/uncore/axi4/Buffer.scala | 6 ++++-- src/main/scala/uncore/axi4/Parameters.scala | 5 +++-- src/main/scala/uncore/axi4/RegisterRouter.scala | 3 ++- src/main/scala/uncore/axi4/SRAM.scala | 3 ++- src/main/scala/uncore/axi4/ToTL.scala | 9 +++++---- src/main/scala/uncore/tilelink2/Buffer.scala | 4 ++-- src/main/scala/uncore/tilelink2/Crossing.scala | 4 ++-- src/main/scala/uncore/tilelink2/Nodes.scala | 12 ++++++------ src/main/scala/uncore/tilelink2/RegisterRouter.scala | 2 +- src/main/scala/uncore/tilelink2/ToAXI4.scala | 10 +++++----- 10 files changed, 32 insertions(+), 26 deletions(-) diff --git a/src/main/scala/uncore/axi4/Buffer.scala b/src/main/scala/uncore/axi4/Buffer.scala index 2dcefeb4..322699c2 100644 --- a/src/main/scala/uncore/axi4/Buffer.scala +++ b/src/main/scala/uncore/axi4/Buffer.scala @@ -6,7 +6,7 @@ import Chisel._ import chisel3.internal.sourceinfo.SourceInfo import config._ import diplomacy._ -import scala.math.max +import scala.math.{min,max} // pipe is only used if a queue has depth = 1 class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, pipe: Boolean = true)(implicit p: Parameters) extends LazyModule @@ -17,7 +17,9 @@ class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, p require (ar >= 0) require (r >= 0) - val node = AXI4IdentityNode() + val node = AXI4AdapterNode( + masterFn = { case Seq(p) => p }, + slaveFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,min(aw,ar)) + min(1,min(r,b))) }) lazy val module = new LazyModuleImp(this) { val io = new Bundle { diff --git a/src/main/scala/uncore/axi4/Parameters.scala b/src/main/scala/uncore/axi4/Parameters.scala index 3e6b267f..8d151645 100644 --- a/src/main/scala/uncore/axi4/Parameters.scala +++ b/src/main/scala/uncore/axi4/Parameters.scala @@ -29,8 +29,9 @@ case class AXI4SlaveParameters( } case class AXI4SlavePortParameters( - slaves: Seq[AXI4SlaveParameters], - beatBytes: Int) + slaves: Seq[AXI4SlaveParameters], + beatBytes: Int, + minLatency: Int = 1) { require (!slaves.isEmpty) require (isPow2(beatBytes)) diff --git a/src/main/scala/uncore/axi4/RegisterRouter.scala b/src/main/scala/uncore/axi4/RegisterRouter.scala index df78c647..679c16c1 100644 --- a/src/main/scala/uncore/axi4/RegisterRouter.scala +++ b/src/main/scala/uncore/axi4/RegisterRouter.scala @@ -16,7 +16,8 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int supportsWrite = TransferSizes(1, beatBytes), supportsRead = TransferSizes(1, beatBytes), interleavedId = Some(0))), - beatBytes = beatBytes)) + beatBytes = beatBytes, + minLatency = min(concurrency, 1))) // the Queue adds at most one cycle { require (address.contiguous) diff --git a/src/main/scala/uncore/axi4/SRAM.scala b/src/main/scala/uncore/axi4/SRAM.scala index cc04c9fc..c5ecf566 100644 --- a/src/main/scala/uncore/axi4/SRAM.scala +++ b/src/main/scala/uncore/axi4/SRAM.scala @@ -16,7 +16,8 @@ class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = supportsRead = TransferSizes(1, beatBytes), supportsWrite = TransferSizes(1, beatBytes), interleavedId = Some(0))), - beatBytes = beatBytes)) + beatBytes = beatBytes, + minLatency = 0)) // B responds on same cycle // We require the address range to include an entire beat (for the write mask) require ((address.mask & (beatBytes-1)) == beatBytes-1) diff --git a/src/main/scala/uncore/axi4/ToTL.scala b/src/main/scala/uncore/axi4/ToTL.scala index 6519d5c3..0f62b5f8 100644 --- a/src/main/scala/uncore/axi4/ToTL.scala +++ b/src/main/scala/uncore/axi4/ToTL.scala @@ -16,8 +16,8 @@ case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)( nodePath = m.nodePath) })) }, - uFn = { case (1, Seq(TLManagerPortParameters(managers, beatBytes, _, _))) => - Seq(AXI4SlavePortParameters(beatBytes = beatBytes, slaves = managers.map { m => + uFn = { case (1, Seq(mp)) => Seq(AXI4SlavePortParameters( + slaves = mp.managers.map { m => AXI4SlaveParameters( address = m.address, regionType = m.regionType, @@ -25,8 +25,9 @@ case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)( nodePath = m.nodePath, supportsWrite = m.supportsPutPartial, supportsRead = m.supportsGet, - interleavedId = Some(0)) // TL2 never interleaves D beats - })) + interleavedId = Some(0))}, // TL2 never interleaves D beats + beatBytes = mp.beatBytes, + minLatency = mp.minLatency)) }, numPO = 1 to 1, numPI = 1 to 1) diff --git a/src/main/scala/uncore/tilelink2/Buffer.scala b/src/main/scala/uncore/tilelink2/Buffer.scala index b481e0d5..c25f5561 100644 --- a/src/main/scala/uncore/tilelink2/Buffer.scala +++ b/src/main/scala/uncore/tilelink2/Buffer.scala @@ -18,8 +18,8 @@ class TLBuffer(a: Int = 2, b: Int = 2, c: Int = 2, d: Int = 2, e: Int = 2, pipe: require (e >= 0) val node = TLAdapterNode( - clientFn = { seq => seq(0).copy(minLatency = seq(0).minLatency + min(1,b) + min(1,c)) }, - managerFn = { seq => seq(0).copy(minLatency = seq(0).minLatency + min(1,a) + min(1,d)) }) + clientFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,b) + min(1,c)) }, + managerFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,a) + min(1,d)) }) lazy val module = new LazyModuleImp(this) { val io = new Bundle { diff --git a/src/main/scala/uncore/tilelink2/Crossing.scala b/src/main/scala/uncore/tilelink2/Crossing.scala index dae7440d..93e63f2c 100644 --- a/src/main/scala/uncore/tilelink2/Crossing.scala +++ b/src/main/scala/uncore/tilelink2/Crossing.scala @@ -10,7 +10,7 @@ import util._ class TLAsyncCrossingSource(sync: Int = 3)(implicit p: Parameters) extends LazyModule { - val node = TLAsyncSourceNode() + val node = TLAsyncSourceNode(sync) lazy val module = new LazyModuleImp(this) { val io = new Bundle { @@ -44,7 +44,7 @@ class TLAsyncCrossingSource(sync: Int = 3)(implicit p: Parameters) extends LazyM class TLAsyncCrossingSink(depth: Int = 8, sync: Int = 3)(implicit p: Parameters) extends LazyModule { - val node = TLAsyncSinkNode(depth) + val node = TLAsyncSinkNode(depth, sync) lazy val module = new LazyModuleImp(this) { val io = new Bundle { diff --git a/src/main/scala/uncore/tilelink2/Nodes.scala b/src/main/scala/uncore/tilelink2/Nodes.scala index 9fb1c000..28fe22d9 100644 --- a/src/main/scala/uncore/tilelink2/Nodes.scala +++ b/src/main/scala/uncore/tilelink2/Nodes.scala @@ -169,14 +169,14 @@ case class TLAsyncIdentityNode() extends IdentityNode(TLAsyncImp) case class TLAsyncOutputNode() extends OutputNode(TLAsyncImp) case class TLAsyncInputNode() extends InputNode(TLAsyncImp) -case class TLAsyncSourceNode() extends MixedNode(TLImp, TLAsyncImp)( - dFn = { case (1, s) => s.map(TLAsyncClientPortParameters(_)) }, - uFn = { case (1, s) => s.map(_.base) }, +case class TLAsyncSourceNode(sync: Int) extends MixedNode(TLImp, TLAsyncImp)( + dFn = { case (1, Seq(p)) => Seq(TLAsyncClientPortParameters(p)) }, + uFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) }, // discard cycles in other clock domain numPO = 1 to 1, numPI = 1 to 1) -case class TLAsyncSinkNode(depth: Int) extends MixedNode(TLAsyncImp, TLImp)( - dFn = { case (1, s) => s.map(_.base) }, - uFn = { case (1, s) => s.map(TLAsyncManagerPortParameters(depth, _)) }, +case class TLAsyncSinkNode(depth: Int, sync: Int) extends MixedNode(TLAsyncImp, TLImp)( + dFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) }, + uFn = { case (1, Seq(p)) => Seq(TLAsyncManagerPortParameters(depth, p)) }, numPO = 1 to 1, numPI = 1 to 1) diff --git a/src/main/scala/uncore/tilelink2/RegisterRouter.scala b/src/main/scala/uncore/tilelink2/RegisterRouter.scala index 4269380f..0508c930 100644 --- a/src/main/scala/uncore/tilelink2/RegisterRouter.scala +++ b/src/main/scala/uncore/tilelink2/RegisterRouter.scala @@ -18,7 +18,7 @@ class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = supportsPutFull = TransferSizes(1, beatBytes), fifoId = Some(0))), // requests are handled in order beatBytes = beatBytes, - minLatency = min(concurrency, 1))) // the Queue adds at least one cycle + minLatency = min(concurrency, 1))) // the Queue adds at most one cycle { require (address.contiguous) diff --git a/src/main/scala/uncore/tilelink2/ToAXI4.scala b/src/main/scala/uncore/tilelink2/ToAXI4.scala index cd068e97..b7111f87 100644 --- a/src/main/scala/uncore/tilelink2/ToAXI4.scala +++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala @@ -19,8 +19,8 @@ case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)( aligned = true)) Seq(AXI4MasterPortParameters(masters)) }, - uFn = { case (1, Seq(AXI4SlavePortParameters(slaves, beatBytes))) => - val managers = slaves.map { case s => + uFn = { case (1, Seq(p)) => Seq(TLManagerPortParameters( + managers = p.slaves.map { case s => TLManagerParameters( address = s.address, regionType = s.regionType, @@ -28,10 +28,10 @@ case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)( nodePath = s.nodePath, supportsGet = s.supportsRead, supportsPutFull = s.supportsWrite, - supportsPutPartial = s.supportsWrite) + supportsPutPartial = s.supportsWrite)}, // AXI4 is NEVER fifo in TL sense (R+W are independent) - } - Seq(TLManagerPortParameters(managers, beatBytes, 1, 0)) + beatBytes = p.beatBytes, + minLatency = p.minLatency)) }, numPO = 1 to 1, numPI = 1 to 1) From 6ff35a387a069819bfd9326874cb68c8fbf3b7a7 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Mon, 23 Jan 2017 18:03:29 -0800 Subject: [PATCH 2/2] tilelink2: disable A=>D bypass in ToAXI4 whenever possible --- src/main/scala/uncore/tilelink2/ToAXI4.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/uncore/tilelink2/ToAXI4.scala b/src/main/scala/uncore/tilelink2/ToAXI4.scala index b7111f87..3cad2a31 100644 --- a/src/main/scala/uncore/tilelink2/ToAXI4.scala +++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala @@ -127,7 +127,8 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true)(implicit p: Parameter // We know there can only be as many outstanding requests as TL sources // However, AXI read and write queues are not mutually FIFO. // Therefore, we want to pop them individually, but share the storage. - PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=combinational) + val bypass = combinational && edgeOut.slave.minLatency == 0 + PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass) } val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits)