1
0

Merge pull request #529 from ucb-bar/physical-optimization

Physical optimization
This commit is contained in:
Wesley W. Terpstra 2017-01-24 18:59:07 -08:00 committed by GitHub
commit d1dedd25e7
10 changed files with 34 additions and 27 deletions

View File

@ -6,7 +6,7 @@ import Chisel._
import chisel3.internal.sourceinfo.SourceInfo import chisel3.internal.sourceinfo.SourceInfo
import config._ import config._
import diplomacy._ import diplomacy._
import scala.math.max import scala.math.{min,max}
// pipe is only used if a queue has depth = 1 // pipe is only used if a queue has depth = 1
class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, pipe: Boolean = true)(implicit p: Parameters) extends LazyModule class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, pipe: Boolean = true)(implicit p: Parameters) extends LazyModule
@ -17,7 +17,9 @@ class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, p
require (ar >= 0) require (ar >= 0)
require (r >= 0) require (r >= 0)
val node = AXI4IdentityNode() val node = AXI4AdapterNode(
masterFn = { case Seq(p) => p },
slaveFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,min(aw,ar)) + min(1,min(r,b))) })
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {

View File

@ -29,8 +29,9 @@ case class AXI4SlaveParameters(
} }
case class AXI4SlavePortParameters( case class AXI4SlavePortParameters(
slaves: Seq[AXI4SlaveParameters], slaves: Seq[AXI4SlaveParameters],
beatBytes: Int) beatBytes: Int,
minLatency: Int = 1)
{ {
require (!slaves.isEmpty) require (!slaves.isEmpty)
require (isPow2(beatBytes)) require (isPow2(beatBytes))

View File

@ -16,7 +16,8 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int
supportsWrite = TransferSizes(1, beatBytes), supportsWrite = TransferSizes(1, beatBytes),
supportsRead = TransferSizes(1, beatBytes), supportsRead = TransferSizes(1, beatBytes),
interleavedId = Some(0))), interleavedId = Some(0))),
beatBytes = beatBytes)) beatBytes = beatBytes,
minLatency = min(concurrency, 1))) // the Queue adds at most one cycle
{ {
require (address.contiguous) require (address.contiguous)

View File

@ -16,7 +16,8 @@ class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int =
supportsRead = TransferSizes(1, beatBytes), supportsRead = TransferSizes(1, beatBytes),
supportsWrite = TransferSizes(1, beatBytes), supportsWrite = TransferSizes(1, beatBytes),
interleavedId = Some(0))), interleavedId = Some(0))),
beatBytes = beatBytes)) beatBytes = beatBytes,
minLatency = 0)) // B responds on same cycle
// We require the address range to include an entire beat (for the write mask) // We require the address range to include an entire beat (for the write mask)
require ((address.mask & (beatBytes-1)) == beatBytes-1) require ((address.mask & (beatBytes-1)) == beatBytes-1)

View File

@ -16,8 +16,8 @@ case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)(
nodePath = m.nodePath) nodePath = m.nodePath)
})) }))
}, },
uFn = { case (1, Seq(TLManagerPortParameters(managers, beatBytes, _, _))) => uFn = { case (1, Seq(mp)) => Seq(AXI4SlavePortParameters(
Seq(AXI4SlavePortParameters(beatBytes = beatBytes, slaves = managers.map { m => slaves = mp.managers.map { m =>
AXI4SlaveParameters( AXI4SlaveParameters(
address = m.address, address = m.address,
regionType = m.regionType, regionType = m.regionType,
@ -25,8 +25,9 @@ case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)(
nodePath = m.nodePath, nodePath = m.nodePath,
supportsWrite = m.supportsPutPartial, supportsWrite = m.supportsPutPartial,
supportsRead = m.supportsGet, supportsRead = m.supportsGet,
interleavedId = Some(0)) // TL2 never interleaves D beats interleavedId = Some(0))}, // TL2 never interleaves D beats
})) beatBytes = mp.beatBytes,
minLatency = mp.minLatency))
}, },
numPO = 1 to 1, numPO = 1 to 1,
numPI = 1 to 1) numPI = 1 to 1)

View File

@ -18,8 +18,8 @@ class TLBuffer(a: Int = 2, b: Int = 2, c: Int = 2, d: Int = 2, e: Int = 2, pipe:
require (e >= 0) require (e >= 0)
val node = TLAdapterNode( val node = TLAdapterNode(
clientFn = { seq => seq(0).copy(minLatency = seq(0).minLatency + min(1,b) + min(1,c)) }, clientFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,b) + min(1,c)) },
managerFn = { seq => seq(0).copy(minLatency = seq(0).minLatency + min(1,a) + min(1,d)) }) managerFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,a) + min(1,d)) })
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {

View File

@ -10,7 +10,7 @@ import util._
class TLAsyncCrossingSource(sync: Int = 3)(implicit p: Parameters) extends LazyModule class TLAsyncCrossingSource(sync: Int = 3)(implicit p: Parameters) extends LazyModule
{ {
val node = TLAsyncSourceNode() val node = TLAsyncSourceNode(sync)
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {
@ -44,7 +44,7 @@ class TLAsyncCrossingSource(sync: Int = 3)(implicit p: Parameters) extends LazyM
class TLAsyncCrossingSink(depth: Int = 8, sync: Int = 3)(implicit p: Parameters) extends LazyModule class TLAsyncCrossingSink(depth: Int = 8, sync: Int = 3)(implicit p: Parameters) extends LazyModule
{ {
val node = TLAsyncSinkNode(depth) val node = TLAsyncSinkNode(depth, sync)
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {

View File

@ -169,14 +169,14 @@ case class TLAsyncIdentityNode() extends IdentityNode(TLAsyncImp)
case class TLAsyncOutputNode() extends OutputNode(TLAsyncImp) case class TLAsyncOutputNode() extends OutputNode(TLAsyncImp)
case class TLAsyncInputNode() extends InputNode(TLAsyncImp) case class TLAsyncInputNode() extends InputNode(TLAsyncImp)
case class TLAsyncSourceNode() extends MixedNode(TLImp, TLAsyncImp)( case class TLAsyncSourceNode(sync: Int) extends MixedNode(TLImp, TLAsyncImp)(
dFn = { case (1, s) => s.map(TLAsyncClientPortParameters(_)) }, dFn = { case (1, Seq(p)) => Seq(TLAsyncClientPortParameters(p)) },
uFn = { case (1, s) => s.map(_.base) }, uFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) }, // discard cycles in other clock domain
numPO = 1 to 1, numPO = 1 to 1,
numPI = 1 to 1) numPI = 1 to 1)
case class TLAsyncSinkNode(depth: Int) extends MixedNode(TLAsyncImp, TLImp)( case class TLAsyncSinkNode(depth: Int, sync: Int) extends MixedNode(TLAsyncImp, TLImp)(
dFn = { case (1, s) => s.map(_.base) }, dFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) },
uFn = { case (1, s) => s.map(TLAsyncManagerPortParameters(depth, _)) }, uFn = { case (1, Seq(p)) => Seq(TLAsyncManagerPortParameters(depth, p)) },
numPO = 1 to 1, numPO = 1 to 1,
numPI = 1 to 1) numPI = 1 to 1)

View File

@ -18,7 +18,7 @@ class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int =
supportsPutFull = TransferSizes(1, beatBytes), supportsPutFull = TransferSizes(1, beatBytes),
fifoId = Some(0))), // requests are handled in order fifoId = Some(0))), // requests are handled in order
beatBytes = beatBytes, beatBytes = beatBytes,
minLatency = min(concurrency, 1))) // the Queue adds at least one cycle minLatency = min(concurrency, 1))) // the Queue adds at most one cycle
{ {
require (address.contiguous) require (address.contiguous)

View File

@ -19,8 +19,8 @@ case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)(
aligned = true)) aligned = true))
Seq(AXI4MasterPortParameters(masters)) Seq(AXI4MasterPortParameters(masters))
}, },
uFn = { case (1, Seq(AXI4SlavePortParameters(slaves, beatBytes))) => uFn = { case (1, Seq(p)) => Seq(TLManagerPortParameters(
val managers = slaves.map { case s => managers = p.slaves.map { case s =>
TLManagerParameters( TLManagerParameters(
address = s.address, address = s.address,
regionType = s.regionType, regionType = s.regionType,
@ -28,10 +28,10 @@ case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)(
nodePath = s.nodePath, nodePath = s.nodePath,
supportsGet = s.supportsRead, supportsGet = s.supportsRead,
supportsPutFull = s.supportsWrite, supportsPutFull = s.supportsWrite,
supportsPutPartial = s.supportsWrite) supportsPutPartial = s.supportsWrite)},
// AXI4 is NEVER fifo in TL sense (R+W are independent) // AXI4 is NEVER fifo in TL sense (R+W are independent)
} beatBytes = p.beatBytes,
Seq(TLManagerPortParameters(managers, beatBytes, 1, 0)) minLatency = p.minLatency))
}, },
numPO = 1 to 1, numPO = 1 to 1,
numPI = 1 to 1) numPI = 1 to 1)
@ -127,7 +127,8 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true)(implicit p: Parameter
// We know there can only be as many outstanding requests as TL sources // We know there can only be as many outstanding requests as TL sources
// However, AXI read and write queues are not mutually FIFO. // However, AXI read and write queues are not mutually FIFO.
// Therefore, we want to pop them individually, but share the storage. // Therefore, we want to pop them individually, but share the storage.
PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=combinational) val bypass = combinational && edgeOut.slave.minLatency == 0
PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass)
} }
val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits) val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits)