Merge pull request #529 from ucb-bar/physical-optimization

Physical optimization
2017-01-24 18:59:07 -08:00
parent 46cdfc2b45 6ff35a387a
commit d1dedd25e7
10 changed files with 34 additions and 27 deletions
--- a/src/main/scala/uncore/axi4/Buffer.scala
+++ b/src/main/scala/uncore/axi4/Buffer.scala
@@ -6,7 +6,7 @@ import Chisel._
 import chisel3.internal.sourceinfo.SourceInfo
 import config._
 import diplomacy._
-import scala.math.max
+import scala.math.{min,max}

 // pipe is only used if a queue has depth = 1
 class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, pipe: Boolean = true)(implicit p: Parameters) extends LazyModule
@@ -17,7 +17,9 @@ class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, p
  require (ar >= 0)
  require (r  >= 0)

-  val node = AXI4IdentityNode()
+  val node = AXI4AdapterNode(
+    masterFn = { case Seq(p) => p },
+    slaveFn  = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,min(aw,ar)) + min(1,min(r,b))) })

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
--- a/src/main/scala/uncore/axi4/Parameters.scala
+++ b/src/main/scala/uncore/axi4/Parameters.scala
@@ -29,8 +29,9 @@ case class AXI4SlaveParameters(
 }

 case class AXI4SlavePortParameters(
-  slaves:    Seq[AXI4SlaveParameters],
-  beatBytes: Int)
+  slaves:     Seq[AXI4SlaveParameters],
+  beatBytes:  Int,
+  minLatency: Int = 1)
 {
  require (!slaves.isEmpty)
  require (isPow2(beatBytes))
--- a/src/main/scala/uncore/axi4/RegisterRouter.scala
+++ b/src/main/scala/uncore/axi4/RegisterRouter.scala
@@ -16,7 +16,8 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int
      supportsWrite = TransferSizes(1, beatBytes),
      supportsRead  = TransferSizes(1, beatBytes),
      interleavedId = Some(0))),
-    beatBytes  = beatBytes))
+    beatBytes  = beatBytes,
+    minLatency = min(concurrency, 1))) // the Queue adds at most one cycle
 {
  require (address.contiguous)

--- a/src/main/scala/uncore/axi4/SRAM.scala
+++ b/src/main/scala/uncore/axi4/SRAM.scala
@@ -16,7 +16,8 @@ class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int =
      supportsRead  = TransferSizes(1, beatBytes),
      supportsWrite = TransferSizes(1, beatBytes),
      interleavedId = Some(0))),
-    beatBytes  = beatBytes))
+    beatBytes  = beatBytes,
+    minLatency = 0)) // B responds on same cycle

  // We require the address range to include an entire beat (for the write mask)
  require ((address.mask & (beatBytes-1)) == beatBytes-1)
--- a/src/main/scala/uncore/axi4/ToTL.scala
+++ b/src/main/scala/uncore/axi4/ToTL.scala
@@ -16,8 +16,8 @@ case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)(
        nodePath = m.nodePath)
    }))
  },
-  uFn = { case (1, Seq(TLManagerPortParameters(managers, beatBytes, _, _))) =>
-    Seq(AXI4SlavePortParameters(beatBytes = beatBytes, slaves = managers.map { m =>
+  uFn = { case (1, Seq(mp)) => Seq(AXI4SlavePortParameters(
+    slaves = mp.managers.map { m =>
      AXI4SlaveParameters(
        address       = m.address,
        regionType    = m.regionType,
@@ -25,8 +25,9 @@ case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)(
        nodePath      = m.nodePath,
        supportsWrite = m.supportsPutPartial,
        supportsRead  = m.supportsGet,
-        interleavedId = Some(0)) // TL2 never interleaves D beats
-    }))
+        interleavedId = Some(0))}, // TL2 never interleaves D beats
+    beatBytes = mp.beatBytes,
+    minLatency = mp.minLatency))
  },
  numPO = 1 to 1,
  numPI = 1 to 1)
--- a/src/main/scala/uncore/tilelink2/Buffer.scala
+++ b/src/main/scala/uncore/tilelink2/Buffer.scala
@@ -18,8 +18,8 @@ class TLBuffer(a: Int = 2, b: Int = 2, c: Int = 2, d: Int = 2, e: Int = 2, pipe:
  require (e >= 0)

  val node = TLAdapterNode(
-    clientFn  = { seq => seq(0).copy(minLatency = seq(0).minLatency + min(1,b) + min(1,c)) },
-    managerFn = { seq => seq(0).copy(minLatency = seq(0).minLatency + min(1,a) + min(1,d)) })
+    clientFn  = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,b) + min(1,c)) },
+    managerFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,a) + min(1,d)) })

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
--- a/src/main/scala/uncore/tilelink2/Crossing.scala
+++ b/src/main/scala/uncore/tilelink2/Crossing.scala
@@ -10,7 +10,7 @@ import util._

 class TLAsyncCrossingSource(sync: Int = 3)(implicit p: Parameters) extends LazyModule
 {
-  val node = TLAsyncSourceNode()
+  val node = TLAsyncSourceNode(sync)

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
@@ -44,7 +44,7 @@ class TLAsyncCrossingSource(sync: Int = 3)(implicit p: Parameters) extends LazyM

 class TLAsyncCrossingSink(depth: Int = 8, sync: Int = 3)(implicit p: Parameters) extends LazyModule
 {
-  val node = TLAsyncSinkNode(depth)
+  val node = TLAsyncSinkNode(depth, sync)

  lazy val module = new LazyModuleImp(this) {
    val io = new Bundle {
--- a/src/main/scala/uncore/tilelink2/Nodes.scala
+++ b/src/main/scala/uncore/tilelink2/Nodes.scala
@@ -169,14 +169,14 @@ case class TLAsyncIdentityNode() extends IdentityNode(TLAsyncImp)
 case class TLAsyncOutputNode() extends OutputNode(TLAsyncImp)
 case class TLAsyncInputNode() extends InputNode(TLAsyncImp)

-case class TLAsyncSourceNode() extends MixedNode(TLImp, TLAsyncImp)(
-  dFn = { case (1, s) => s.map(TLAsyncClientPortParameters(_)) },
-  uFn = { case (1, s) => s.map(_.base) },
+case class TLAsyncSourceNode(sync: Int) extends MixedNode(TLImp, TLAsyncImp)(
+  dFn = { case (1, Seq(p)) => Seq(TLAsyncClientPortParameters(p)) },
+  uFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) }, // discard cycles in other clock domain
  numPO = 1 to 1,
  numPI = 1 to 1)

-case class TLAsyncSinkNode(depth: Int) extends MixedNode(TLAsyncImp, TLImp)(
-  dFn = { case (1, s) => s.map(_.base) },
-  uFn = { case (1, s) => s.map(TLAsyncManagerPortParameters(depth, _)) },
+case class TLAsyncSinkNode(depth: Int, sync: Int) extends MixedNode(TLAsyncImp, TLImp)(
+  dFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) },
+  uFn = { case (1, Seq(p)) => Seq(TLAsyncManagerPortParameters(depth, p)) },
  numPO = 1 to 1,
  numPI = 1 to 1)
--- a/src/main/scala/uncore/tilelink2/RegisterRouter.scala
+++ b/src/main/scala/uncore/tilelink2/RegisterRouter.scala
@@ -18,7 +18,7 @@ class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int =
      supportsPutFull    = TransferSizes(1, beatBytes),
      fifoId             = Some(0))), // requests are handled in order
    beatBytes  = beatBytes,
-    minLatency = min(concurrency, 1))) // the Queue adds at least one cycle
+    minLatency = min(concurrency, 1))) // the Queue adds at most one cycle
 {
  require (address.contiguous)

--- a/src/main/scala/uncore/tilelink2/ToAXI4.scala
+++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala
@@ -19,8 +19,8 @@ case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)(
        aligned = true))
    Seq(AXI4MasterPortParameters(masters))
  },
-  uFn = { case (1, Seq(AXI4SlavePortParameters(slaves, beatBytes))) =>
-    val managers = slaves.map { case s =>
+  uFn = { case (1, Seq(p)) => Seq(TLManagerPortParameters(
+    managers = p.slaves.map { case s =>
      TLManagerParameters(
        address            = s.address,
        regionType         = s.regionType,
@@ -28,10 +28,10 @@ case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)(
        nodePath           = s.nodePath,
        supportsGet        = s.supportsRead,
        supportsPutFull    = s.supportsWrite,
-        supportsPutPartial = s.supportsWrite)
+        supportsPutPartial = s.supportsWrite)},
        // AXI4 is NEVER fifo in TL sense (R+W are independent)
-    }
-    Seq(TLManagerPortParameters(managers, beatBytes, 1, 0))
+      beatBytes = p.beatBytes,
+      minLatency = p.minLatency))
  },
  numPO = 1 to 1,
  numPI = 1 to 1)
@@ -127,7 +127,8 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true)(implicit p: Parameter
        // We know there can only be as many outstanding requests as TL sources
        // However, AXI read and write queues are not mutually FIFO.
        // Therefore, we want to pop them individually, but share the storage.
-        PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=combinational)
+        val bypass = combinational && edgeOut.slave.minLatency == 0
+        PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass)
      }

      val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits)