1
0

uncore: switch to new diplomacy Node API

Most adapters should work on multiple ports.
This patch changes them all.
This commit is contained in:
Wesley W. Terpstra 2017-01-29 15:17:52 -08:00
parent 4d646939b0
commit 972953868c
34 changed files with 1681 additions and 1722 deletions

View File

@ -13,7 +13,7 @@ import uncore.util._
class ScratchpadSlavePort(implicit p: Parameters) extends LazyModule { class ScratchpadSlavePort(implicit p: Parameters) extends LazyModule {
val coreDataBytes = p(XLen)/8 val coreDataBytes = p(XLen)/8
val node = TLManagerNode(TLManagerPortParameters( val node = TLManagerNode(Seq(TLManagerPortParameters(
Seq(TLManagerParameters( Seq(TLManagerParameters(
address = List(AddressSet(0x80000000L, BigInt(p(DataScratchpadSize)-1))), address = List(AddressSet(0x80000000L, BigInt(p(DataScratchpadSize)-1))),
regionType = RegionType.UNCACHED, regionType = RegionType.UNCACHED,
@ -25,7 +25,7 @@ class ScratchpadSlavePort(implicit p: Parameters) extends LazyModule {
supportsGet = TransferSizes(1, coreDataBytes), supportsGet = TransferSizes(1, coreDataBytes),
fifoId = Some(0))), // requests handled in FIFO order fifoId = Some(0))), // requests handled in FIFO order
beatBytes = coreDataBytes, beatBytes = coreDataBytes,
minLatency = 1)) minLatency = 1)))
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {

View File

@ -31,16 +31,14 @@ object AHBImp extends NodeImp[AHBMasterPortParameters, AHBSlavePortParameters, A
// Nodes implemented inside modules // Nodes implemented inside modules
case class AHBIdentityNode() extends IdentityNode(AHBImp) case class AHBIdentityNode() extends IdentityNode(AHBImp)
case class AHBMasterNode(portParams: AHBMasterPortParameters, numPorts: Range.Inclusive = 1 to 1) case class AHBMasterNode(portParams: Seq[AHBMasterPortParameters]) extends SourceNode(AHBImp)(portParams)
extends SourceNode(AHBImp)(portParams, numPorts) case class AHBSlaveNode(portParams: Seq[AHBSlavePortParameters]) extends SinkNode(AHBImp)(portParams)
case class AHBSlaveNode(portParams: AHBSlavePortParameters, numPorts: Range.Inclusive = 1 to 1) case class AHBNexusNode(
extends SinkNode(AHBImp)(portParams, numPorts) masterFn: Seq[AHBMasterPortParameters] => AHBMasterPortParameters,
case class AHBAdapterNode( slaveFn: Seq[AHBSlavePortParameters] => AHBSlavePortParameters,
masterFn: Seq[AHBMasterPortParameters] => AHBMasterPortParameters, numMasterPorts: Range.Inclusive = 1 to 999,
slaveFn: Seq[AHBSlavePortParameters] => AHBSlavePortParameters, numSlavePorts: Range.Inclusive = 1 to 999)
numMasterPorts: Range.Inclusive = 1 to 1, extends NexusNode(AHBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
numSlavePorts: Range.Inclusive = 1 to 1)
extends InteriorNode(AHBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
// Nodes passed from an inner module // Nodes passed from an inner module
case class AHBOutputNode() extends OutputNode(AHBImp) case class AHBOutputNode() extends OutputNode(AHBImp)

View File

@ -9,13 +9,13 @@ import regmapper._
import scala.math.{min,max} import scala.math.{min,max}
class AHBRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false) class AHBRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
extends AHBSlaveNode(AHBSlavePortParameters( extends AHBSlaveNode(Seq(AHBSlavePortParameters(
Seq(AHBSlaveParameters( Seq(AHBSlaveParameters(
address = Seq(address), address = Seq(address),
executable = executable, executable = executable,
supportsWrite = TransferSizes(1, min(address.alignment.toInt, beatBytes * AHBParameters.maxTransfer)), supportsWrite = TransferSizes(1, min(address.alignment.toInt, beatBytes * AHBParameters.maxTransfer)),
supportsRead = TransferSizes(1, min(address.alignment.toInt, beatBytes * AHBParameters.maxTransfer)))), supportsRead = TransferSizes(1, min(address.alignment.toInt, beatBytes * AHBParameters.maxTransfer)))),
beatBytes = beatBytes)) beatBytes = beatBytes)))
{ {
require (address.contiguous) require (address.contiguous)

View File

@ -8,14 +8,14 @@ import diplomacy._
class AHBRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule class AHBRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
{ {
val node = AHBSlaveNode(AHBSlavePortParameters( val node = AHBSlaveNode(Seq(AHBSlavePortParameters(
Seq(AHBSlaveParameters( Seq(AHBSlaveParameters(
address = List(address), address = List(address),
regionType = RegionType.UNCACHED, regionType = RegionType.UNCACHED,
executable = executable, executable = executable,
supportsRead = TransferSizes(1, beatBytes * AHBParameters.maxTransfer), supportsRead = TransferSizes(1, beatBytes * AHBParameters.maxTransfer),
supportsWrite = TransferSizes(1, beatBytes * AHBParameters.maxTransfer))), supportsWrite = TransferSizes(1, beatBytes * AHBParameters.maxTransfer))),
beatBytes = beatBytes)) beatBytes = beatBytes)))
// We require the address range to include an entire beat (for the write mask) // We require the address range to include an entire beat (for the write mask)
require ((address.mask & (beatBytes-1)) == beatBytes-1) require ((address.mask & (beatBytes-1)) == beatBytes-1)

View File

@ -9,7 +9,7 @@ import regmapper._
import scala.math.{min,max} import scala.math.{min,max}
class AHBFanout()(implicit p: Parameters) extends LazyModule { class AHBFanout()(implicit p: Parameters) extends LazyModule {
val node = AHBAdapterNode( val node = AHBNexusNode(
numSlavePorts = 1 to 1, numSlavePorts = 1 to 1,
numMasterPorts = 1 to 32, numMasterPorts = 1 to 32,
masterFn = { case Seq(m) => m }, masterFn = { case Seq(m) => m },

View File

@ -31,16 +31,14 @@ object APBImp extends NodeImp[APBMasterPortParameters, APBSlavePortParameters, A
// Nodes implemented inside modules // Nodes implemented inside modules
case class APBIdentityNode() extends IdentityNode(APBImp) case class APBIdentityNode() extends IdentityNode(APBImp)
case class APBMasterNode(portParams: APBMasterPortParameters, numPorts: Range.Inclusive = 1 to 1) case class APBMasterNode(portParams: Seq[APBMasterPortParameters]) extends SourceNode(APBImp)(portParams)
extends SourceNode(APBImp)(portParams, numPorts) case class APBSlaveNode(portParams: Seq[APBSlavePortParameters]) extends SinkNode(APBImp)(portParams)
case class APBSlaveNode(portParams: APBSlavePortParameters, numPorts: Range.Inclusive = 1 to 1) case class APBNexusNode(
extends SinkNode(APBImp)(portParams, numPorts) masterFn: Seq[APBMasterPortParameters] => APBMasterPortParameters,
case class APBAdapterNode( slaveFn: Seq[APBSlavePortParameters] => APBSlavePortParameters,
masterFn: Seq[APBMasterPortParameters] => APBMasterPortParameters,
slaveFn: Seq[APBSlavePortParameters] => APBSlavePortParameters,
numMasterPorts: Range.Inclusive = 1 to 1, numMasterPorts: Range.Inclusive = 1 to 1,
numSlavePorts: Range.Inclusive = 1 to 1) numSlavePorts: Range.Inclusive = 1 to 1)
extends InteriorNode(APBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts) extends NexusNode(APBImp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
// Nodes passed from an inner module // Nodes passed from an inner module
case class APBOutputNode() extends OutputNode(APBImp) case class APBOutputNode() extends OutputNode(APBImp)

View File

@ -9,13 +9,13 @@ import regmapper._
import scala.math.{min,max} import scala.math.{min,max}
class APBRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false) class APBRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
extends APBSlaveNode(APBSlavePortParameters( extends APBSlaveNode(Seq(APBSlavePortParameters(
Seq(APBSlaveParameters( Seq(APBSlaveParameters(
address = Seq(address), address = Seq(address),
executable = executable, executable = executable,
supportsWrite = true, supportsWrite = true,
supportsRead = true)), supportsRead = true)),
beatBytes = beatBytes)) beatBytes = beatBytes)))
{ {
require (address.contiguous) require (address.contiguous)

View File

@ -8,14 +8,14 @@ import diplomacy._
class APBRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule class APBRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
{ {
val node = APBSlaveNode(APBSlavePortParameters( val node = APBSlaveNode(Seq(APBSlavePortParameters(
Seq(APBSlaveParameters( Seq(APBSlaveParameters(
address = List(address), address = List(address),
regionType = RegionType.UNCACHED, regionType = RegionType.UNCACHED,
executable = executable, executable = executable,
supportsRead = true, supportsRead = true,
supportsWrite = true)), supportsWrite = true)),
beatBytes = beatBytes)) beatBytes = beatBytes)))
// We require the address range to include an entire beat (for the write mask) // We require the address range to include an entire beat (for the write mask)
require ((address.mask & (beatBytes-1)) == beatBytes-1) require ((address.mask & (beatBytes-1)) == beatBytes-1)

View File

@ -9,7 +9,7 @@ import regmapper._
import scala.math.{min,max} import scala.math.{min,max}
class APBFanout()(implicit p: Parameters) extends LazyModule { class APBFanout()(implicit p: Parameters) extends LazyModule {
val node = APBAdapterNode( val node = APBNexusNode(
numSlavePorts = 1 to 1, numSlavePorts = 1 to 1,
numMasterPorts = 1 to 32, numMasterPorts = 1 to 32,
masterFn = { case Seq(m) => m }, masterFn = { case Seq(m) => m },

View File

@ -18,8 +18,8 @@ class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, p
require (r >= 0) require (r >= 0)
val node = AXI4AdapterNode( val node = AXI4AdapterNode(
masterFn = { case Seq(p) => p }, masterFn = { p => p },
slaveFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,min(aw,ar)) + min(1,min(r,b))) }) slaveFn = { p => p.copy(minLatency = p.minLatency + min(1,min(aw,ar)) + min(1,min(r,b))) })
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {

View File

@ -23,8 +23,8 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio
def mapMaster(m: AXI4MasterParameters) = m.copy(aligned = true) def mapMaster(m: AXI4MasterParameters) = m.copy(aligned = true)
val node = AXI4AdapterNode( val node = AXI4AdapterNode(
masterFn = { case Seq(mp) => mp.copy(masters = mp.masters.map(m => mapMaster(m))) }, masterFn = { mp => mp.copy(masters = mp.masters.map(m => mapMaster(m))) },
slaveFn = { case Seq(sp) => sp.copy(slaves = sp.slaves .map(s => mapSlave(s, sp.beatBytes))) }) slaveFn = { sp => sp.copy(slaves = sp.slaves .map(s => mapSlave(s, sp.beatBytes))) })
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {
@ -32,256 +32,253 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio
val out = node.bundleOut val out = node.bundleOut
} }
val edgeOut = node.edgesOut(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val edgeIn = node.edgesIn(0) val slave = edgeOut.slave
val slave = edgeOut.slave val slaves = slave.slaves
val slaves = slave.slaves val beatBytes = slave.beatBytes
val beatBytes = slave.beatBytes val lgBytes = log2Ceil(beatBytes)
val lgBytes = log2Ceil(beatBytes) val master = edgeIn.master
val master = edgeIn.master val masters = master.masters
val masters = master.masters
// If the user claimed this was a lite interface, then there must be only one Id // If the user claimed this was a lite interface, then there must be only one Id
require (!lite || master.endId == 1) require (!lite || master.endId == 1)
// We don't support fragmenting to sub-beat accesses // We don't support fragmenting to sub-beat accesses
slaves.foreach { s => slaves.foreach { s =>
require (!s.supportsRead || s.supportsRead.contains(beatBytes)) require (!s.supportsRead || s.supportsRead.contains(beatBytes))
require (!s.supportsWrite || s.supportsWrite.contains(beatBytes)) require (!s.supportsWrite || s.supportsWrite.contains(beatBytes))
} }
/* We need to decompose a request into /* We need to decompose a request into
* FIXED => each beat is a new request * FIXED => each beat is a new request
* WRAP/INCR => take xfr up to next power of two, capped by max size of target * WRAP/INCR => take xfr up to next power of two, capped by max size of target
* *
* On AR and AW, we fragment one request into many * On AR and AW, we fragment one request into many
* On W we set 'last' on beats which are fragment boundaries * On W we set 'last' on beats which are fragment boundaries
* On R we clear 'last' on the fragments being reassembled * On R we clear 'last' on the fragments being reassembled
* On B we clear 'valid' on the responses for the injected fragments * On B we clear 'valid' on the responses for the injected fragments
* *
* AR=>R and AW+W=>B are completely independent state machines. * AR=>R and AW+W=>B are completely independent state machines.
*/
/* Returns the number of beats to execute and the new address */
def fragment(a: IrrevocableIO[AXI4BundleA], supportedSizes1: Seq[Int]): (IrrevocableIO[AXI4BundleA], Bool, UInt) = {
val out = Wire(a)
val busy = RegInit(Bool(false))
val r_addr = Reg(UInt(width = a.bits.params.addrBits))
val r_len = Reg(UInt(width = AXI4Parameters.lenBits))
val len = Mux(busy, r_len, a.bits.len)
val addr = Mux(busy, r_addr, a.bits.addr)
val lo = if (lgBytes == 0) UInt(0) else addr(lgBytes-1, 0)
val hi = addr >> lgBytes
val alignment = hi(AXI4Parameters.lenBits-1,0)
val allSame = supportedSizes1.filter(_ >= 0).distinct.size <= 1
val dynamic1 = Mux1H(slave.findFast(addr), supportedSizes1.map(s => UInt(max(0, s))))
val fixed1 = UInt(supportedSizes1.filter(_ >= 0).headOption.getOrElse(0))
/* We need to compute the largest transfer allowed by the AXI len.
* len+1 is the number of beats to execute.
* We want the MSB(len+1)-1; one less than the largest power of two we could execute.
* There are two cases; either len is 2^n-1 in which case we leave it unchanged, ELSE
* fill the bits from highest to lowest, and shift right by one bit.
*/ */
val fillLow = rightOR(len) >> 1 // set all bits in positions < a set bit
val wipeHigh = ~leftOR(~len) // clear all bits in position >= a cleared bit
val remain1 = fillLow | wipeHigh // MSB(a.len+1)-1
val align1 = ~leftOR(alignment) // transfer size limited by address alignment
val support1 = if (allSame) fixed1 else dynamic1 // maximum supported size-1 based on target address
val maxSupported1 = remain1 & align1 & support1 // Take the minimum of all the limits
// Things that cause us to degenerate to a single beat /* Returns the number of beats to execute and the new address */
val fixed = a.bits.burst === AXI4Parameters.BURST_FIXED def fragment(a: IrrevocableIO[AXI4BundleA], supportedSizes1: Seq[Int]): (IrrevocableIO[AXI4BundleA], Bool, UInt) = {
val narrow = a.bits.size =/= UInt(lgBytes) val out = Wire(a)
val bad = fixed || narrow
// The number of beats-1 to execute val busy = RegInit(Bool(false))
val beats1 = Mux(bad, UInt(0), maxSupported1) val r_addr = Reg(UInt(width = a.bits.params.addrBits))
val beats = OH1ToOH(beats1) // beats1 + 1 val r_len = Reg(UInt(width = AXI4Parameters.lenBits))
val inc_addr = addr + (beats << a.bits.size) // address after adding transfer val len = Mux(busy, r_len, a.bits.len)
val wrapMask = a.bits.bytes1() // only these bits may change, if wrapping val addr = Mux(busy, r_addr, a.bits.addr)
val mux_addr = Wire(init = inc_addr)
when (a.bits.burst === AXI4Parameters.BURST_WRAP) { val lo = if (lgBytes == 0) UInt(0) else addr(lgBytes-1, 0)
mux_addr := (inc_addr & wrapMask) | ~(~a.bits.addr | wrapMask) val hi = addr >> lgBytes
} val alignment = hi(AXI4Parameters.lenBits-1,0)
when (a.bits.burst === AXI4Parameters.BURST_FIXED) {
mux_addr := a.bits.addr val allSame = supportedSizes1.filter(_ >= 0).distinct.size <= 1
val dynamic1 = Mux1H(slave.findFast(addr), supportedSizes1.map(s => UInt(max(0, s))))
val fixed1 = UInt(supportedSizes1.filter(_ >= 0).headOption.getOrElse(0))
/* We need to compute the largest transfer allowed by the AXI len.
* len+1 is the number of beats to execute.
* We want the MSB(len+1)-1; one less than the largest power of two we could execute.
* There are two cases; either len is 2^n-1 in which case we leave it unchanged, ELSE
* fill the bits from highest to lowest, and shift right by one bit.
*/
val fillLow = rightOR(len) >> 1 // set all bits in positions < a set bit
val wipeHigh = ~leftOR(~len) // clear all bits in position >= a cleared bit
val remain1 = fillLow | wipeHigh // MSB(a.len+1)-1
val align1 = ~leftOR(alignment) // transfer size limited by address alignment
val support1 = if (allSame) fixed1 else dynamic1 // maximum supported size-1 based on target address
val maxSupported1 = remain1 & align1 & support1 // Take the minimum of all the limits
// Things that cause us to degenerate to a single beat
val fixed = a.bits.burst === AXI4Parameters.BURST_FIXED
val narrow = a.bits.size =/= UInt(lgBytes)
val bad = fixed || narrow
// The number of beats-1 to execute
val beats1 = Mux(bad, UInt(0), maxSupported1)
val beats = OH1ToOH(beats1) // beats1 + 1
val inc_addr = addr + (beats << a.bits.size) // address after adding transfer
val wrapMask = a.bits.bytes1() // only these bits may change, if wrapping
val mux_addr = Wire(init = inc_addr)
when (a.bits.burst === AXI4Parameters.BURST_WRAP) {
mux_addr := (inc_addr & wrapMask) | ~(~a.bits.addr | wrapMask)
}
when (a.bits.burst === AXI4Parameters.BURST_FIXED) {
mux_addr := a.bits.addr
}
val last = beats1 === len
a.ready := out.ready && last
out.valid := a.valid
out.bits := a.bits
out.bits.len := beats1
// We forcibly align every access. If the first beat was misaligned, the strb bits
// for the lower addresses must not have been set. Therefore, rounding the address
// down is harmless. We can do this after the address update algorithm, because the
// incremented values will be rounded down the same way. Furthermore, a subword
// offset cannot cause a premature wrap-around.
out.bits.addr := ~(~addr | UIntToOH1(a.bits.size, lgBytes))
when (out.fire()) {
busy := !last
r_addr := mux_addr
r_len := len - beats
}
(out, last, beats)
} }
val last = beats1 === len // The size to which we will fragment the access
a.ready := out.ready && last val readSizes1 = slaves.map(s => s.supportsRead .max/beatBytes-1)
out.valid := a.valid val writeSizes1 = slaves.map(s => s.supportsWrite.max/beatBytes-1)
out.bits := a.bits // Indirection variables for inputs and outputs; makes transformation application easier
out.bits.len := beats1 val (in_ar, ar_last, _) = fragment(Queue.irrevocable(in.ar, 1, flow=true), readSizes1)
val (in_aw, aw_last, w_beats) = fragment(Queue.irrevocable(in.aw, 1, flow=true), writeSizes1)
val in_w = in.w
val in_r = in.r
val in_b = in.b
val out_ar = Wire(out.ar)
val out_aw = out.aw
val out_w = out.w
val out_r = Wire(out.r)
val out_b = Wire(out.b)
// We forcibly align every access. If the first beat was misaligned, the strb bits val depth = if (combinational) 1 else 2
// for the lower addresses must not have been set. Therefore, rounding the address // In case a slave ties arready := rready, we need a queue to break the combinational loop
// down is harmless. We can do this after the address update algorithm, because the // between the two branches (in_ar => {out_ar => out_r, sideband} => in_r).
// incremented values will be rounded down the same way. Furthermore, a subword if (in.ar.bits.getWidth < in.r.bits.getWidth) {
// offset cannot cause a premature wrap-around. out.ar <> Queue(out_ar, depth, flow=combinational)
out.bits.addr := ~(~addr | UIntToOH1(a.bits.size, lgBytes)) out_r <> out.r
} else {
when (out.fire()) { out.ar <> out_ar
busy := !last out_r <> Queue(out.r, depth, flow=combinational)
r_addr := mux_addr
r_len := len - beats
} }
// In case a slave ties awready := bready or wready := bready, we need this queue
out_b <> Queue(out.b, depth, flow=combinational)
(out, last, beats) // Sideband to track which transfers were the last fragment
} def sideband() = if (lite) {
Module(new Queue(Bool(), maxInFlight, flow=combinational)).io
} else {
Module(new AXI4FragmenterSideband(maxInFlight, flow=combinational)).io
}
val sideband_ar_r = sideband()
val sideband_aw_b = sideband()
val in = io.in(0) // AR flow control
val out = io.out(0) out_ar.valid := in_ar.valid && sideband_ar_r.enq.ready
in_ar.ready := sideband_ar_r.enq.ready && out_ar.ready
sideband_ar_r.enq.valid := in_ar.valid && out_ar.ready
out_ar.bits := in_ar.bits
sideband_ar_r.enq.bits := ar_last
// The size to which we will fragment the access // When does W channel start counting a new transfer
val readSizes1 = slaves.map(s => s.supportsRead .max/beatBytes-1) val wbeats_latched = RegInit(Bool(false))
val writeSizes1 = slaves.map(s => s.supportsWrite.max/beatBytes-1) val wbeats_ready = Wire(Bool())
val wbeats_valid = Wire(Bool())
when (wbeats_valid && wbeats_ready) { wbeats_latched := Bool(true) }
when (out_aw.fire()) { wbeats_latched := Bool(false) }
// Indirection variables for inputs and outputs; makes transformation application easier // AW flow control
val (in_ar, ar_last, _) = fragment(Queue.irrevocable(in.ar, 1, flow=true), readSizes1) out_aw.valid := in_aw.valid && sideband_aw_b.enq.ready && (wbeats_ready || wbeats_latched)
val (in_aw, aw_last, w_beats) = fragment(Queue.irrevocable(in.aw, 1, flow=true), writeSizes1) in_aw.ready := sideband_aw_b.enq.ready && out_aw.ready && (wbeats_ready || wbeats_latched)
val in_w = in.w sideband_aw_b.enq.valid := in_aw.valid && out_aw.ready && (wbeats_ready || wbeats_latched)
val in_r = in.r wbeats_valid := in_aw.valid && !wbeats_latched
val in_b = in.b out_aw.bits := in_aw.bits
val out_ar = Wire(out.ar) sideband_aw_b.enq.bits := aw_last
val out_aw = out.aw
val out_w = out.w
val out_r = Wire(out.r)
val out_b = Wire(out.b)
val depth = if (combinational) 1 else 2 // We need to inject 'last' into the W channel fragments, count!
// In case a slave ties arready := rready, we need a queue to break the combinational loop val w_counter = RegInit(UInt(0, width = AXI4Parameters.lenBits+1))
// between the two branches (in_ar => {out_ar => out_r, sideband} => in_r). val w_idle = w_counter === UInt(0)
if (in.ar.bits.getWidth < in.r.bits.getWidth) { val w_todo = Mux(w_idle, Mux(wbeats_valid, w_beats, UInt(0)), w_counter)
out.ar <> Queue(out_ar, depth, flow=combinational) val w_last = w_todo === UInt(1)
out_r <> out.r w_counter := w_todo - out_w.fire()
} else { assert (!out_w.fire() || w_todo =/= UInt(0)) // underflow impossible
out.ar <> out_ar
out_r <> Queue(out.r, depth, flow=combinational)
}
// In case a slave ties awready := bready or wready := bready, we need this queue
out_b <> Queue(out.b, depth, flow=combinational)
// Sideband to track which transfers were the last fragment // W flow control
def sideband() = if (lite) { wbeats_ready := w_idle
Module(new Queue(Bool(), maxInFlight, flow=combinational)).io out_w.valid := in_w.valid && (!wbeats_ready || wbeats_valid)
} else { in_w.ready := out_w.ready && (!wbeats_ready || wbeats_valid)
Module(new AXI4FragmenterSideband(maxInFlight, flow=combinational)).io out_w.bits := in_w.bits
} out_w.bits.last := w_last
val sideband_ar_r = sideband() // We should also recreate the last last
val sideband_aw_b = sideband() assert (!out_w.valid || !in_w.bits.last || w_last)
// AR flow control // R flow control
out_ar.valid := in_ar.valid && sideband_ar_r.enq.ready val r_last = out_r.bits.last
in_ar.ready := sideband_ar_r.enq.ready && out_ar.ready in_r.valid := out_r.valid && (!r_last || sideband_ar_r.deq.valid)
sideband_ar_r.enq.valid := in_ar.valid && out_ar.ready out_r.ready := in_r.ready && (!r_last || sideband_ar_r.deq.valid)
out_ar.bits := in_ar.bits sideband_ar_r.deq.ready := r_last && out_r.valid && in_r.ready
sideband_ar_r.enq.bits := ar_last in_r.bits := out_r.bits
in_r.bits.last := r_last && sideband_ar_r.deq.bits
// When does W channel start counting a new transfer // B flow control
val wbeats_latched = RegInit(Bool(false)) val b_last = sideband_aw_b.deq.bits
val wbeats_ready = Wire(Bool()) in_b.valid := out_b.valid && sideband_aw_b.deq.valid && b_last
val wbeats_valid = Wire(Bool()) out_b.ready := sideband_aw_b.deq.valid && (!b_last || in_b.ready)
when (wbeats_valid && wbeats_ready) { wbeats_latched := Bool(true) } sideband_aw_b.deq.ready := out_b.valid && (!b_last || in_b.ready)
when (out_aw.fire()) { wbeats_latched := Bool(false) } in_b.bits := out_b.bits
// AW flow control // Merge errors from dropped B responses
out_aw.valid := in_aw.valid && sideband_aw_b.enq.ready && (wbeats_ready || wbeats_latched) val r_resp = RegInit(UInt(0, width = AXI4Parameters.respBits))
in_aw.ready := sideband_aw_b.enq.ready && out_aw.ready && (wbeats_ready || wbeats_latched) val resp = out_b.bits.resp | r_resp
sideband_aw_b.enq.valid := in_aw.valid && out_aw.ready && (wbeats_ready || wbeats_latched) when (out_b.fire()) { r_resp := Mux(b_last, UInt(0), resp) }
wbeats_valid := in_aw.valid && !wbeats_latched in_b.bits.resp := resp
out_aw.bits := in_aw.bits
sideband_aw_b.enq.bits := aw_last
// We need to inject 'last' into the W channel fragments, count!
val w_counter = RegInit(UInt(0, width = AXI4Parameters.lenBits+1))
val w_idle = w_counter === UInt(0)
val w_todo = Mux(w_idle, Mux(wbeats_valid, w_beats, UInt(0)), w_counter)
val w_last = w_todo === UInt(1)
w_counter := w_todo - out_w.fire()
assert (!out_w.fire() || w_todo =/= UInt(0)) // underflow impossible
// W flow control
wbeats_ready := w_idle
out_w.valid := in_w.valid && (!wbeats_ready || wbeats_valid)
in_w.ready := out_w.ready && (!wbeats_ready || wbeats_valid)
out_w.bits := in_w.bits
out_w.bits.last := w_last
// We should also recreate the last last
assert (!out_w.valid || !in_w.bits.last || w_last)
// R flow control
val r_last = out_r.bits.last
in_r.valid := out_r.valid && (!r_last || sideband_ar_r.deq.valid)
out_r.ready := in_r.ready && (!r_last || sideband_ar_r.deq.valid)
sideband_ar_r.deq.ready := r_last && out_r.valid && in_r.ready
in_r.bits := out_r.bits
in_r.bits.last := r_last && sideband_ar_r.deq.bits
// B flow control
val b_last = sideband_aw_b.deq.bits
in_b.valid := out_b.valid && sideband_aw_b.deq.valid && b_last
out_b.ready := sideband_aw_b.deq.valid && (!b_last || in_b.ready)
sideband_aw_b.deq.ready := out_b.valid && (!b_last || in_b.ready)
in_b.bits := out_b.bits
// Merge errors from dropped B responses
val r_resp = RegInit(UInt(0, width = AXI4Parameters.respBits))
val resp = out_b.bits.resp | r_resp
when (out_b.fire()) { r_resp := Mux(b_last, UInt(0), resp) }
in_b.bits.resp := resp
}
}
/* We want to put barriers between the fragments of a fragmented transfer and all other transfers.
* This lets us use very little state to reassemble the fragments (else we need one FIFO per ID).
* Furthermore, because all the fragments share the same AXI ID, they come back contiguously.
* This guarantees that no other R responses might get mixed between fragments, ensuring that the
* interleavedId for the slaves remains unaffected by the fragmentation transformation.
* Of course, if you need to fragment, this means there is a potentially hefty serialization cost.
* However, this design allows full concurrency in the common no-fragmentation-needed scenario.
*/
class AXI4FragmenterSideband(maxInFlight: Int, flow: Boolean = false) extends Module
{
val io = new QueueIO(Bool(), maxInFlight)
io.count := UInt(0)
val PASS = UInt(2, width = 2) // allow 'last=1' bits to enque, on 'last=0' if count>0 block else accept+FIND
val FIND = UInt(0, width = 2) // allow 'last=0' bits to enque, accept 'last=1' and switch to WAIT
val WAIT = UInt(1, width = 2) // block all access till count=0
val state = RegInit(PASS)
val count = RegInit(UInt(0, width = log2Up(maxInFlight)))
val full = count === UInt(maxInFlight-1)
val empty = count === UInt(0)
val last = count === UInt(1)
io.deq.bits := state(1) || (last && state(0)) // PASS || (last && WAIT)
io.deq.valid := !empty
io.enq.ready := !full && (empty || (state === FIND) || (state === PASS && io.enq.bits))
// WAIT => count > 0
assert (state =/= WAIT || count =/= UInt(0))
if (flow) {
when (io.enq.valid) {
io.deq.valid := Bool(true)
when (empty) { io.deq.bits := io.enq.bits }
} }
} }
count := count + io.enq.fire() - io.deq.fire() /* We want to put barriers between the fragments of a fragmented transfer and all other transfers.
switch (state) { * This lets us use very little state to reassemble the fragments (else we need one FIFO per ID).
is(PASS) { when (io.enq.valid && !io.enq.bits && empty) { state := FIND } } * Furthermore, because all the fragments share the same AXI ID, they come back contiguously.
is(FIND) { when (io.enq.valid && io.enq.bits && !full) { state := Mux(empty, PASS, WAIT) } } * This guarantees that no other R responses might get mixed between fragments, ensuring that the
is(WAIT) { when (last && io.deq.ready) { state := PASS } } * interleavedId for the slaves remains unaffected by the fragmentation transformation.
* Of course, if you need to fragment, this means there is a potentially hefty serialization cost.
* However, this design allows full concurrency in the common no-fragmentation-needed scenario.
*/
class AXI4FragmenterSideband(maxInFlight: Int, flow: Boolean = false) extends Module
{
val io = new QueueIO(Bool(), maxInFlight)
io.count := UInt(0)
val PASS = UInt(2, width = 2) // allow 'last=1' bits to enque, on 'last=0' if count>0 block else accept+FIND
val FIND = UInt(0, width = 2) // allow 'last=0' bits to enque, accept 'last=1' and switch to WAIT
val WAIT = UInt(1, width = 2) // block all access till count=0
val state = RegInit(PASS)
val count = RegInit(UInt(0, width = log2Up(maxInFlight)))
val full = count === UInt(maxInFlight-1)
val empty = count === UInt(0)
val last = count === UInt(1)
io.deq.bits := state(1) || (last && state(0)) // PASS || (last && WAIT)
io.deq.valid := !empty
io.enq.ready := !full && (empty || (state === FIND) || (state === PASS && io.enq.bits))
// WAIT => count > 0
assert (state =/= WAIT || count =/= UInt(0))
if (flow) {
when (io.enq.valid) {
io.deq.valid := Bool(true)
when (empty) { io.deq.bits := io.enq.bits }
}
}
count := count + io.enq.fire() - io.deq.fire()
switch (state) {
is(PASS) { when (io.enq.valid && !io.enq.bits && empty) { state := FIND } }
is(FIND) { when (io.enq.valid && io.enq.bits && !full) { state := Mux(empty, PASS, WAIT) } }
is(WAIT) { when (last && io.deq.ready) { state := PASS } }
}
} }
} }

View File

@ -31,16 +31,13 @@ object AXI4Imp extends NodeImp[AXI4MasterPortParameters, AXI4SlavePortParameters
// Nodes implemented inside modules // Nodes implemented inside modules
case class AXI4IdentityNode() extends IdentityNode(AXI4Imp) case class AXI4IdentityNode() extends IdentityNode(AXI4Imp)
case class AXI4MasterNode(portParams: AXI4MasterPortParameters, numPorts: Range.Inclusive = 1 to 1) case class AXI4MasterNode(portParams: Seq[AXI4MasterPortParameters]) extends SourceNode(AXI4Imp)(portParams)
extends SourceNode(AXI4Imp)(portParams, numPorts) case class AXI4SlaveNode(portParams: Seq[AXI4SlavePortParameters]) extends SinkNode(AXI4Imp)(portParams)
case class AXI4SlaveNode(portParams: AXI4SlavePortParameters, numPorts: Range.Inclusive = 1 to 1)
extends SinkNode(AXI4Imp)(portParams, numPorts)
case class AXI4AdapterNode( case class AXI4AdapterNode(
masterFn: Seq[AXI4MasterPortParameters] => AXI4MasterPortParameters, masterFn: AXI4MasterPortParameters => AXI4MasterPortParameters,
slaveFn: Seq[AXI4SlavePortParameters] => AXI4SlavePortParameters, slaveFn: AXI4SlavePortParameters => AXI4SlavePortParameters,
numMasterPorts: Range.Inclusive = 1 to 1, numPorts: Range.Inclusive = 0 to 999)
numSlavePorts: Range.Inclusive = 1 to 1) extends AdapterNode(AXI4Imp)(masterFn, slaveFn, numPorts)
extends InteriorNode(AXI4Imp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)
// Nodes passed from an inner module // Nodes passed from an inner module
case class AXI4OutputNode() extends OutputNode(AXI4Imp) case class AXI4OutputNode() extends OutputNode(AXI4Imp)

View File

@ -9,7 +9,7 @@ import regmapper._
import scala.math.{min,max} import scala.math.{min,max}
class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false) class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
extends AXI4SlaveNode(AXI4SlavePortParameters( extends AXI4SlaveNode(Seq(AXI4SlavePortParameters(
Seq(AXI4SlaveParameters( Seq(AXI4SlaveParameters(
address = Seq(address), address = Seq(address),
executable = executable, executable = executable,
@ -17,7 +17,7 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int
supportsRead = TransferSizes(1, beatBytes), supportsRead = TransferSizes(1, beatBytes),
interleavedId = Some(0))), interleavedId = Some(0))),
beatBytes = beatBytes, beatBytes = beatBytes,
minLatency = min(concurrency, 1))) // the Queue adds at most one cycle minLatency = min(concurrency, 1)))) // the Queue adds at most one cycle
{ {
require (address.contiguous) require (address.contiguous)

View File

@ -8,7 +8,7 @@ import diplomacy._
class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
{ {
val node = AXI4SlaveNode(AXI4SlavePortParameters( val node = AXI4SlaveNode(Seq(AXI4SlavePortParameters(
Seq(AXI4SlaveParameters( Seq(AXI4SlaveParameters(
address = List(address), address = List(address),
regionType = RegionType.UNCACHED, regionType = RegionType.UNCACHED,
@ -17,7 +17,7 @@ class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int =
supportsWrite = TransferSizes(1, beatBytes), supportsWrite = TransferSizes(1, beatBytes),
interleavedId = Some(0))), interleavedId = Some(0))),
beatBytes = beatBytes, beatBytes = beatBytes,
minLatency = 0)) // B responds on same cycle minLatency = 0))) // B responds on same cycle
// We require the address range to include an entire beat (for the write mask) // We require the address range to include an entire beat (for the write mask)
require ((address.mask & (beatBytes-1)) == beatBytes-1) require ((address.mask & (beatBytes-1)) == beatBytes-1)

View File

@ -8,15 +8,15 @@ import config._
import diplomacy._ import diplomacy._
import uncore.tilelink2._ import uncore.tilelink2._
case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)( case class AXI4ToTLNode() extends MixedAdapterNode(AXI4Imp, TLImp)(
dFn = { case (1, Seq(AXI4MasterPortParameters(masters))) => dFn = { case AXI4MasterPortParameters(masters) =>
Seq(TLClientPortParameters(clients = masters.map { m => TLClientPortParameters(clients = masters.map { m =>
TLClientParameters( TLClientParameters(
sourceId = IdRange(m.id.start << 1, m.id.end << 1), // R+W ids are distinct sourceId = IdRange(m.id.start << 1, m.id.end << 1), // R+W ids are distinct
nodePath = m.nodePath) nodePath = m.nodePath)
})) })
}, },
uFn = { case (1, Seq(mp)) => Seq(AXI4SlavePortParameters( uFn = { mp => AXI4SlavePortParameters(
slaves = mp.managers.map { m => slaves = mp.managers.map { m =>
AXI4SlaveParameters( AXI4SlaveParameters(
address = m.address, address = m.address,
@ -27,10 +27,8 @@ case class AXI4ToTLNode() extends MixedNode(AXI4Imp, TLImp)(
supportsRead = m.supportsGet, supportsRead = m.supportsGet,
interleavedId = Some(0))}, // TL2 never interleaves D beats interleavedId = Some(0))}, // TL2 never interleaves D beats
beatBytes = mp.beatBytes, beatBytes = mp.beatBytes,
minLatency = mp.minLatency)) minLatency = mp.minLatency)
}, })
numPO = 1 to 1,
numPI = 1 to 1)
class AXI4ToTL()(implicit p: Parameters) extends LazyModule class AXI4ToTL()(implicit p: Parameters) extends LazyModule
{ {
@ -42,131 +40,129 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule
val out = node.bundleOut val out = node.bundleOut
} }
val in = io.in(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val out = io.out(0) val numIds = edgeIn.master.endId
val edgeIn = node.edgesIn(0) val beatBytes = edgeOut.manager.beatBytes
val edgeOut = node.edgesOut(0) val countBits = AXI4Parameters.lenBits + (1 << AXI4Parameters.sizeBits) - 1
val numIds = edgeIn.master.endId
val beatBytes = edgeOut.manager.beatBytes
val countBits = AXI4Parameters.lenBits + (1 << AXI4Parameters.sizeBits) - 1
require (edgeIn.master.masters(0).aligned) require (edgeIn.master.masters(0).aligned)
val r_out = Wire(out.a) val r_out = Wire(out.a)
val r_inflight = RegInit(UInt(0, width = numIds)) val r_inflight = RegInit(UInt(0, width = numIds))
val r_block = r_inflight(in.ar.bits.id) val r_block = r_inflight(in.ar.bits.id)
val r_size1 = in.ar.bits.bytes1() val r_size1 = in.ar.bits.bytes1()
val r_size = OH1ToUInt(r_size1) val r_size = OH1ToUInt(r_size1)
val r_addr = in.ar.bits.addr val r_addr = in.ar.bits.addr
val r_ok = edgeOut.manager.supportsGetSafe(r_addr, r_size) val r_ok = edgeOut.manager.supportsGetSafe(r_addr, r_size)
val r_err_in = Wire(Decoupled(new AXI4BundleRError(in.ar.bits.params))) val r_err_in = Wire(Decoupled(new AXI4BundleRError(in.ar.bits.params)))
val r_err_out = Queue(r_err_in, 2) val r_err_out = Queue(r_err_in, 2)
val r_count = RegInit(UInt(0, width = in.ar.bits.params.lenBits)) val r_count = RegInit(UInt(0, width = in.ar.bits.params.lenBits))
val r_last = r_count === in.ar.bits.len val r_last = r_count === in.ar.bits.len
assert (!in.ar.valid || r_size1 === UIntToOH1(r_size, countBits)) // because aligned assert (!in.ar.valid || r_size1 === UIntToOH1(r_size, countBits)) // because aligned
in.ar.ready := Mux(r_ok, r_out.ready, r_err_in.ready && r_last) && !r_block in.ar.ready := Mux(r_ok, r_out.ready, r_err_in.ready && r_last) && !r_block
r_out.valid := in.ar.valid && !r_block && r_ok r_out.valid := in.ar.valid && !r_block && r_ok
r_out.bits := edgeOut.Get(in.ar.bits.id << 1 | UInt(1), r_addr, r_size)._2 r_out.bits := edgeOut.Get(in.ar.bits.id << 1 | UInt(1), r_addr, r_size)._2
r_err_in.valid := in.ar.valid && !r_block && !r_ok r_err_in.valid := in.ar.valid && !r_block && !r_ok
r_err_in.bits.last := r_last r_err_in.bits.last := r_last
r_err_in.bits.id := in.ar.bits.id r_err_in.bits.id := in.ar.bits.id
when (r_err_in.fire()) { r_count := Mux(r_last, UInt(0), r_count + UInt(1)) } when (r_err_in.fire()) { r_count := Mux(r_last, UInt(0), r_count + UInt(1)) }
val w_out = Wire(out.a) val w_out = Wire(out.a)
val w_inflight = RegInit(UInt(0, width = numIds)) val w_inflight = RegInit(UInt(0, width = numIds))
val w_block = w_inflight(in.aw.bits.id) val w_block = w_inflight(in.aw.bits.id)
val w_size1 = in.aw.bits.bytes1() val w_size1 = in.aw.bits.bytes1()
val w_size = OH1ToUInt(w_size1) val w_size = OH1ToUInt(w_size1)
val w_addr = in.aw.bits.addr val w_addr = in.aw.bits.addr
val w_ok = edgeOut.manager.supportsPutPartialSafe(w_addr, w_size) val w_ok = edgeOut.manager.supportsPutPartialSafe(w_addr, w_size)
val w_err_in = Wire(Decoupled(in.aw.bits.id)) val w_err_in = Wire(Decoupled(in.aw.bits.id))
val w_err_out = Queue(w_err_in, 2) val w_err_out = Queue(w_err_in, 2)
assert (!in.aw.valid || w_size1 === UIntToOH1(w_size, countBits)) // because aligned assert (!in.aw.valid || w_size1 === UIntToOH1(w_size, countBits)) // because aligned
assert (!in.aw.valid || in.aw.bits.len === UInt(0) || in.aw.bits.size === UInt(log2Ceil(beatBytes))) // because aligned assert (!in.aw.valid || in.aw.bits.len === UInt(0) || in.aw.bits.size === UInt(log2Ceil(beatBytes))) // because aligned
in.aw.ready := Mux(w_ok, w_out.ready, w_err_in.ready) && in.w.valid && in.w.bits.last && !w_block in.aw.ready := Mux(w_ok, w_out.ready, w_err_in.ready) && in.w.valid && in.w.bits.last && !w_block
in.w.ready := Mux(w_ok, w_out.ready, w_err_in.ready || !in.w.bits.last) && in.aw.valid && !w_block in.w.ready := Mux(w_ok, w_out.ready, w_err_in.ready || !in.w.bits.last) && in.aw.valid && !w_block
w_out.valid := in.aw.valid && in.w.valid && !w_block && w_ok w_out.valid := in.aw.valid && in.w.valid && !w_block && w_ok
w_out.bits := edgeOut.Put(in.aw.bits.id << 1, w_addr, w_size, in.w.bits.data, in.w.bits.strb)._2 w_out.bits := edgeOut.Put(in.aw.bits.id << 1, w_addr, w_size, in.w.bits.data, in.w.bits.strb)._2
w_err_in.valid := in.aw.valid && in.w.valid && !w_block && !w_ok && in.w.bits.last w_err_in.valid := in.aw.valid && in.w.valid && !w_block && !w_ok && in.w.bits.last
w_err_in.bits := in.aw.bits.id w_err_in.bits := in.aw.bits.id
TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out)) TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out))
val ok_b = Wire(in.b) val ok_b = Wire(in.b)
val err_b = Wire(in.b) val err_b = Wire(in.b)
val mux_b = Wire(in.b) val mux_b = Wire(in.b)
val ok_r = Wire(in.r) val ok_r = Wire(in.r)
val err_r = Wire(in.r) val err_r = Wire(in.r)
val mux_r = Wire(in.r) val mux_r = Wire(in.r)
val d_resp = Mux(out.d.bits.error, AXI4Parameters.RESP_SLVERR, AXI4Parameters.RESP_OKAY) val d_resp = Mux(out.d.bits.error, AXI4Parameters.RESP_SLVERR, AXI4Parameters.RESP_OKAY)
val d_hasData = edgeOut.hasData(out.d.bits) val d_hasData = edgeOut.hasData(out.d.bits)
val d_last = edgeOut.last(out.d) val d_last = edgeOut.last(out.d)
out.d.ready := Mux(d_hasData, ok_r.ready, ok_b.ready) out.d.ready := Mux(d_hasData, ok_r.ready, ok_b.ready)
ok_r.valid := out.d.valid && d_hasData ok_r.valid := out.d.valid && d_hasData
ok_b.valid := out.d.valid && !d_hasData ok_b.valid := out.d.valid && !d_hasData
ok_r.bits.id := out.d.bits.source >> 1 ok_r.bits.id := out.d.bits.source >> 1
ok_r.bits.data := out.d.bits.data ok_r.bits.data := out.d.bits.data
ok_r.bits.resp := d_resp ok_r.bits.resp := d_resp
ok_r.bits.last := d_last ok_r.bits.last := d_last
r_err_out.ready := err_r.ready r_err_out.ready := err_r.ready
err_r.valid := r_err_out.valid err_r.valid := r_err_out.valid
err_r.bits.id := r_err_out.bits.id err_r.bits.id := r_err_out.bits.id
err_r.bits.data := out.d.bits.data // don't care err_r.bits.data := out.d.bits.data // don't care
err_r.bits.resp := AXI4Parameters.RESP_DECERR err_r.bits.resp := AXI4Parameters.RESP_DECERR
err_r.bits.last := r_err_out.bits.last err_r.bits.last := r_err_out.bits.last
// AXI4 must hold R to one source until last // AXI4 must hold R to one source until last
val mux_lock_ok = RegInit(Bool(false)) val mux_lock_ok = RegInit(Bool(false))
val mux_lock_err = RegInit(Bool(false)) val mux_lock_err = RegInit(Bool(false))
when (ok_r .fire()) { mux_lock_ok := !ok_r .bits.last } when (ok_r .fire()) { mux_lock_ok := !ok_r .bits.last }
when (err_r.fire()) { mux_lock_err := !err_r.bits.last } when (err_r.fire()) { mux_lock_err := !err_r.bits.last }
assert (!mux_lock_ok || !mux_lock_err) assert (!mux_lock_ok || !mux_lock_err)
// Prioritize err over ok (b/c err_r.valid comes from a register) // Prioritize err over ok (b/c err_r.valid comes from a register)
mux_r.valid := (!mux_lock_err && ok_r.valid) || (!mux_lock_ok && err_r.valid) mux_r.valid := (!mux_lock_err && ok_r.valid) || (!mux_lock_ok && err_r.valid)
mux_r.bits := Mux(!mux_lock_ok && err_r.valid, err_r.bits, ok_r.bits) mux_r.bits := Mux(!mux_lock_ok && err_r.valid, err_r.bits, ok_r.bits)
ok_r.ready := mux_r.ready && (mux_lock_ok || !err_r.valid) ok_r.ready := mux_r.ready && (mux_lock_ok || !err_r.valid)
err_r.ready := mux_r.ready && !mux_lock_ok err_r.ready := mux_r.ready && !mux_lock_ok
// AXI4 needs irrevocable behaviour // AXI4 needs irrevocable behaviour
in.r <> Queue.irrevocable(mux_r, 1, flow=true) in.r <> Queue.irrevocable(mux_r, 1, flow=true)
ok_b.bits.id := out.d.bits.source >> 1 ok_b.bits.id := out.d.bits.source >> 1
ok_b.bits.resp := d_resp ok_b.bits.resp := d_resp
w_err_out.ready := err_b.ready w_err_out.ready := err_b.ready
err_b.valid := w_err_out.valid err_b.valid := w_err_out.valid
err_b.bits.id := w_err_out.bits err_b.bits.id := w_err_out.bits
err_b.bits.resp := AXI4Parameters.RESP_DECERR err_b.bits.resp := AXI4Parameters.RESP_DECERR
// Prioritize err over ok (b/c err_b.valid comes from a register) // Prioritize err over ok (b/c err_b.valid comes from a register)
mux_b.valid := ok_b.valid || err_b.valid mux_b.valid := ok_b.valid || err_b.valid
mux_b.bits := Mux(err_b.valid, err_b.bits, ok_b.bits) mux_b.bits := Mux(err_b.valid, err_b.bits, ok_b.bits)
ok_b.ready := mux_b.ready && !err_b.valid ok_b.ready := mux_b.ready && !err_b.valid
err_b.ready := mux_b.ready err_b.ready := mux_b.ready
// AXI4 needs irrevocable behaviour // AXI4 needs irrevocable behaviour
in.b <> Queue.irrevocable(mux_b, 1, flow=true) in.b <> Queue.irrevocable(mux_b, 1, flow=true)
// Update flight trackers // Update flight trackers
val r_set = in.ar.fire().asUInt << in.ar.bits.id val r_set = in.ar.fire().asUInt << in.ar.bits.id
val r_clr = (in.r.fire() && in.r.bits.last).asUInt << in.r.bits.id val r_clr = (in.r.fire() && in.r.bits.last).asUInt << in.r.bits.id
r_inflight := (r_inflight | r_set) & ~r_clr r_inflight := (r_inflight | r_set) & ~r_clr
val w_set = in.aw.fire().asUInt << in.aw.bits.id val w_set = in.aw.fire().asUInt << in.aw.bits.id
val w_clr = in.b.fire().asUInt << in.b.bits.id val w_clr = in.b.fire().asUInt << in.b.bits.id
w_inflight := (w_inflight | w_set) & ~w_clr w_inflight := (w_inflight | w_set) & ~w_clr
// Unused channels // Unused channels
out.b.ready := Bool(true) out.b.ready := Bool(true)
out.c.valid := Bool(false) out.c.valid := Bool(false)
out.e.valid := Bool(false) out.e.valid := Bool(false)
}
} }
} }

View File

@ -62,7 +62,7 @@ class TLPLIC(supervisor: Boolean, maxPriorities: Int, address: BigInt = 0xC00000
beatBytes = p(rocket.XLen)/8, beatBytes = p(rocket.XLen)/8,
undefZero = false) undefZero = false)
val intnode = IntAdapterNode( val intnode = IntNexusNode(
numSourcePorts = 0 to 1024, numSourcePorts = 0 to 1024,
numSinkPorts = 0 to 1024, numSinkPorts = 0 to 1024,
sourceFn = { _ => IntSourcePortParameters(Seq(IntSourceParameters(contextsPerHart))) }, sourceFn = { _ => IntSourcePortParameters(Seq(IntSourceParameters(contextsPerHart))) },

View File

@ -6,6 +6,7 @@ import Chisel._
import chisel3.internal.sourceinfo.SourceInfo import chisel3.internal.sourceinfo.SourceInfo
import config._ import config._
import diplomacy._ import diplomacy._
import util.GenericParameterizedBundle
import scala.math.{min,max} import scala.math.{min,max}
// Ensures that all downstream RW managers support Atomic operationss. // Ensures that all downstream RW managers support Atomic operationss.
@ -15,8 +16,8 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc
require (concurrency >= 1) require (concurrency >= 1)
val node = TLAdapterNode( val node = TLAdapterNode(
clientFn = { case Seq(cp) => require (!cp.unsafeAtomics); cp.copy(unsafeAtomics = true) }, clientFn = { case cp => require (!cp.unsafeAtomics); cp.copy(unsafeAtomics = true) },
managerFn = { case Seq(mp) => mp.copy(managers = mp.managers.map { m => managerFn = { case mp => mp.copy(managers = mp.managers.map { m =>
val ourSupport = TransferSizes(1, mp.beatBytes) val ourSupport = TransferSizes(1, mp.beatBytes)
def widen(x: TransferSizes) = if (passthrough && x.min <= 2*mp.beatBytes) TransferSizes(1, max(mp.beatBytes, x.max)) else ourSupport def widen(x: TransferSizes) = if (passthrough && x.min <= 2*mp.beatBytes) TransferSizes(1, max(mp.beatBytes, x.max)) else ourSupport
val canDoit = m.supportsPutFull.contains(ourSupport) && m.supportsGet.contains(ourSupport) val canDoit = m.supportsPutFull.contains(ourSupport) && m.supportsGet.contains(ourSupport)
@ -33,245 +34,232 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc
val out = node.bundleOut val out = node.bundleOut
} }
val in = io.in(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val out = io.out(0) val managers = edgeOut.manager.managers
val edgeIn = node.edgesIn(0) val beatBytes = edgeOut.manager.beatBytes
val edgeOut = node.edgesOut(0)
val managers = edgeOut.manager.managers
val beatBytes = edgeOut.manager.beatBytes
// To which managers are we adding atomic support? // To which managers are we adding atomic support?
val ourSupport = TransferSizes(1, edgeOut.manager.beatBytes) val ourSupport = TransferSizes(1, edgeOut.manager.beatBytes)
val managersNeedingHelp = managers.filter { m => val managersNeedingHelp = managers.filter { m =>
m.supportsPutFull.contains(ourSupport) && m.supportsPutFull.contains(ourSupport) &&
m.supportsGet.contains(ourSupport) && m.supportsGet.contains(ourSupport) &&
((logical && !m.supportsLogical .contains(ourSupport)) || ((logical && !m.supportsLogical .contains(ourSupport)) ||
(arithmetic && !m.supportsArithmetic.contains(ourSupport)) || (arithmetic && !m.supportsArithmetic.contains(ourSupport)) ||
!passthrough) // we will do atomics for everyone we can !passthrough) // we will do atomics for everyone we can
}
// We cannot add atomcis to a non-FIFO manager
managersNeedingHelp foreach { m => require (m.fifoId.isDefined) }
// We need to preserve FIFO semantics across FIFO domains, not managers
// Suppose you have Put(42) Atomic(+1) both inflight; valid results: 42 or 43
// If we allow Put(42) Get() Put(+1) concurrent; valid results: 42 43 OR undef
// Making non-FIFO work requires waiting for all Acks to come back (=> use FIFOFixer)
val domainsNeedingHelp = managersNeedingHelp.map(_.fifoId.get).distinct
// Don't overprovision the CAM
val camSize = min(domainsNeedingHelp.size, concurrency)
// Compact the fifoIds to only those we care about
val camFifoIds = managers.map(m => UInt(m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0)))
// CAM entry state machine
val FREE = UInt(0) // unused waiting on Atomic from A
val GET = UInt(3) // Get sent down A waiting on AccessDataAck from D
val AMO = UInt(2) // AccessDataAck sent up D waiting for A availability
val ACK = UInt(1) // Put sent down A waiting for PutAck from D
def helper(select: Seq[Bool], x: Seq[TransferSizes], lgSize: UInt) =
if (!passthrough) Bool(false) else
if (x.map(_ == x(0)).reduce(_ && _)) x(0).containsLg(lgSize) else
Mux1H(select, x.map(_.containsLg(lgSize)))
// Do we need to do anything at all?
if (camSize > 0) {
class CAM_S extends Bundle {
val state = UInt(width = 2)
}
class CAM_A extends Bundle {
val bits = new TLBundleA(out.a.bits.params)
val fifoId = UInt(width = log2Up(domainsNeedingHelp.size))
val lut = UInt(width = 4)
}
class CAM_D extends Bundle {
val data = UInt(width = out.a.bits.params.dataBits)
} }
// We cannot add atomcis to a non-FIFO manager
managersNeedingHelp foreach { m => require (m.fifoId.isDefined) }
// We need to preserve FIFO semantics across FIFO domains, not managers
// Suppose you have Put(42) Atomic(+1) both inflight; valid results: 42 or 43
// If we allow Put(42) Get() Put(+1) concurrent; valid results: 42 43 OR undef
// Making non-FIFO work requires waiting for all Acks to come back (=> use FIFOFixer)
val domainsNeedingHelp = managersNeedingHelp.map(_.fifoId.get).distinct
// Don't overprovision the CAM
val camSize = min(domainsNeedingHelp.size, concurrency)
// Compact the fifoIds to only those we care about
val camFifoIds = managers.map(m => UInt(m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0)))
val initval = Wire(new CAM_S) // CAM entry state machine
initval.state := FREE val FREE = UInt(0) // unused waiting on Atomic from A
val cam_s = RegInit(Vec.fill(camSize)(initval)) val GET = UInt(3) // Get sent down A waiting on AccessDataAck from D
val cam_a = Reg(Vec(camSize, new CAM_A)) val AMO = UInt(2) // AccessDataAck sent up D waiting for A availability
val cam_d = Reg(Vec(camSize, new CAM_D)) val ACK = UInt(1) // Put sent down A waiting for PutAck from D
val cam_free = cam_s.map(_.state === FREE) def helper(select: Seq[Bool], x: Seq[TransferSizes], lgSize: UInt) =
val cam_amo = cam_s.map(_.state === AMO) if (!passthrough) Bool(false) else
val cam_abusy = cam_s.map(e => e.state === GET || e.state === AMO) // A is blocked if (x.map(_ == x(0)).reduce(_ && _)) x(0).containsLg(lgSize) else
val cam_dmatch = cam_s.map(e => e.state =/= FREE) // D should inspect these entries Mux1H(select, x.map(_.containsLg(lgSize)))
// Can the manager already handle this message? val params = TLAtomicAutomata.CAMParams(out.a.bits.params, domainsNeedingHelp.size)
val a_size = edgeIn.size(in.a.bits) // Do we need to do anything at all?
val a_select = edgeOut.manager.findFast(edgeIn.address(in.a.bits)) if (camSize > 0) {
val a_canLogical = helper(a_select, managers.map(_.supportsLogical), a_size) val initval = Wire(new TLAtomicAutomata.CAM_S(params))
val a_canArithmetic = helper(a_select, managers.map(_.supportsArithmetic), a_size) initval.state := FREE
val a_isLogical = in.a.bits.opcode === TLMessages.LogicalData val cam_s = RegInit(Vec.fill(camSize)(initval))
val a_isArithmetic = in.a.bits.opcode === TLMessages.ArithmeticData val cam_a = Reg(Vec(camSize, new TLAtomicAutomata.CAM_A(params)))
val a_isSupported = Mux(a_isLogical, a_canLogical, Mux(a_isArithmetic, a_canArithmetic, Bool(true))) val cam_d = Reg(Vec(camSize, new TLAtomicAutomata.CAM_D(params)))
// Must we do a Put? val cam_free = cam_s.map(_.state === FREE)
val a_cam_any_put = cam_amo.reduce(_ || _) val cam_amo = cam_s.map(_.state === AMO)
val a_cam_por_put = cam_amo.scanLeft(Bool(false))(_||_).init val cam_abusy = cam_s.map(e => e.state === GET || e.state === AMO) // A is blocked
val a_cam_sel_put = (cam_amo zip a_cam_por_put) map { case (a, b) => a && !b } val cam_dmatch = cam_s.map(e => e.state =/= FREE) // D should inspect these entries
val a_cam_a = PriorityMux(cam_amo, cam_a)
val a_cam_d = PriorityMux(cam_amo, cam_d)
val a_a = a_cam_a.bits.data
val a_d = a_cam_d.data
// Does the A request conflict with an inflight AMO? // Can the manager already handle this message?
val a_fifoId = Mux1H(a_select, camFifoIds) val a_size = edgeIn.size(in.a.bits)
val a_cam_busy = (cam_abusy zip cam_a.map(_.fifoId === a_fifoId)) map { case (a,b) => a&&b } reduce (_||_) val a_select = edgeOut.manager.findFast(edgeIn.address(in.a.bits))
val a_canLogical = helper(a_select, managers.map(_.supportsLogical), a_size)
val a_canArithmetic = helper(a_select, managers.map(_.supportsArithmetic), a_size)
val a_isLogical = in.a.bits.opcode === TLMessages.LogicalData
val a_isArithmetic = in.a.bits.opcode === TLMessages.ArithmeticData
val a_isSupported = Mux(a_isLogical, a_canLogical, Mux(a_isArithmetic, a_canArithmetic, Bool(true)))
// (Where) are we are allocating in the CAM? // Must we do a Put?
val a_cam_any_free = cam_free.reduce(_ || _) val a_cam_any_put = cam_amo.reduce(_ || _)
val a_cam_por_free = cam_free.scanLeft(Bool(false))(_||_).init val a_cam_por_put = cam_amo.scanLeft(Bool(false))(_||_).init
val a_cam_sel_free = (cam_free zip a_cam_por_free) map { case (a,b) => a && !b } val a_cam_sel_put = (cam_amo zip a_cam_por_put) map { case (a, b) => a && !b }
val a_cam_a = PriorityMux(cam_amo, cam_a)
val a_cam_d = PriorityMux(cam_amo, cam_d)
val a_a = a_cam_a.bits.data
val a_d = a_cam_d.data
// Logical AMO // Does the A request conflict with an inflight AMO?
val indexes = Seq.tabulate(beatBytes*8) { i => Cat(a_a(i,i), a_d(i,i)) } val a_fifoId = Mux1H(a_select, camFifoIds)
val logic_out = Cat(indexes.map(x => a_cam_a.lut(x).asUInt).reverse) val a_cam_busy = (cam_abusy zip cam_a.map(_.fifoId === a_fifoId)) map { case (a,b) => a&&b } reduce (_||_)
// Arithmetic AMO // (Where) are we are allocating in the CAM?
val unsigned = a_cam_a.bits.param(1) val a_cam_any_free = cam_free.reduce(_ || _)
val take_max = a_cam_a.bits.param(0) val a_cam_por_free = cam_free.scanLeft(Bool(false))(_||_).init
val adder = a_cam_a.bits.param(2) val a_cam_sel_free = (cam_free zip a_cam_por_free) map { case (a,b) => a && !b }
val mask = a_cam_a.bits.mask
val signSel = ~(~mask | (mask >> 1))
val signbits_a = Cat(Seq.tabulate(beatBytes) { i => a_a(8*i+7,8*i+7) } .reverse)
val signbits_d = Cat(Seq.tabulate(beatBytes) { i => a_d(8*i+7,8*i+7) } .reverse)
// Move the selected sign bit into the first byte position it will extend
val signbit_a = ((signbits_a & signSel) << 1)(beatBytes-1, 0)
val signbit_d = ((signbits_d & signSel) << 1)(beatBytes-1, 0)
val signext_a = FillInterleaved(8, leftOR(signbit_a))
val signext_d = FillInterleaved(8, leftOR(signbit_d))
// NOTE: sign-extension does not change the relative ordering in EITHER unsigned or signed arithmetic
val wide_mask = FillInterleaved(8, mask)
val a_a_ext = (a_a & wide_mask) | signext_a
val a_d_ext = (a_d & wide_mask) | signext_d
val a_d_inv = Mux(adder, a_d_ext, ~a_d_ext)
val adder_out = a_a_ext + a_d_inv
val h = 8*beatBytes-1 // now sign-extended; use biggest bit
val a_bigger_uneq = unsigned === a_a_ext(h) // result if high bits are unequal
val a_bigger = Mux(a_a_ext(h) === a_d_ext(h), !adder_out(h), a_bigger_uneq)
val pick_a = take_max === a_bigger
val arith_out = Mux(adder, adder_out, Mux(pick_a, a_a, a_d))
// AMO result data // Logical AMO
val amo_data = val indexes = Seq.tabulate(beatBytes*8) { i => Cat(a_a(i,i), a_d(i,i)) }
if (!logical) arith_out else val logic_out = Cat(indexes.map(x => a_cam_a.lut(x).asUInt).reverse)
if (!arithmetic) logic_out else
Mux(a_cam_a.bits.opcode(0), logic_out, arith_out)
// Potentially mutate the message from inner // Arithmetic AMO
val source_i = Wire(in.a) val unsigned = a_cam_a.bits.param(1)
val a_allow = !a_cam_busy && (a_isSupported || a_cam_any_free) val take_max = a_cam_a.bits.param(0)
in.a.ready := source_i.ready && a_allow val adder = a_cam_a.bits.param(2)
source_i.valid := in.a.valid && a_allow val mask = a_cam_a.bits.mask
source_i.bits := in.a.bits val signSel = ~(~mask | (mask >> 1))
when (!a_isSupported) { // minimal mux difference val signbits_a = Cat(Seq.tabulate(beatBytes) { i => a_a(8*i+7,8*i+7) } .reverse)
source_i.bits.opcode := TLMessages.Get val signbits_d = Cat(Seq.tabulate(beatBytes) { i => a_d(8*i+7,8*i+7) } .reverse)
source_i.bits.param := UInt(0) // Move the selected sign bit into the first byte position it will extend
} val signbit_a = ((signbits_a & signSel) << 1)(beatBytes-1, 0)
val signbit_d = ((signbits_d & signSel) << 1)(beatBytes-1, 0)
val signext_a = FillInterleaved(8, leftOR(signbit_a))
val signext_d = FillInterleaved(8, leftOR(signbit_d))
// NOTE: sign-extension does not change the relative ordering in EITHER unsigned or signed arithmetic
val wide_mask = FillInterleaved(8, mask)
val a_a_ext = (a_a & wide_mask) | signext_a
val a_d_ext = (a_d & wide_mask) | signext_d
val a_d_inv = Mux(adder, a_d_ext, ~a_d_ext)
val adder_out = a_a_ext + a_d_inv
val h = 8*beatBytes-1 // now sign-extended; use biggest bit
val a_bigger_uneq = unsigned === a_a_ext(h) // result if high bits are unequal
val a_bigger = Mux(a_a_ext(h) === a_d_ext(h), !adder_out(h), a_bigger_uneq)
val pick_a = take_max === a_bigger
val arith_out = Mux(adder, adder_out, Mux(pick_a, a_a, a_d))
// Potentially take the message from the CAM // AMO result data
val source_c = Wire(in.a) val amo_data =
source_c.valid := a_cam_any_put if (!logical) arith_out else
source_c.bits := edgeOut.Put(a_cam_a.bits.source, edgeIn.address(a_cam_a.bits), a_cam_a.bits.size, amo_data)._2 if (!arithmetic) logic_out else
Mux(a_cam_a.bits.opcode(0), logic_out, arith_out)
// Finishing an AMO from the CAM has highest priority // Potentially mutate the message from inner
TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), source_c), (edgeOut.numBeats1(in.a.bits), source_i)) val source_i = Wire(in.a)
val a_allow = !a_cam_busy && (a_isSupported || a_cam_any_free)
in.a.ready := source_i.ready && a_allow
source_i.valid := in.a.valid && a_allow
source_i.bits := in.a.bits
when (!a_isSupported) { // minimal mux difference
source_i.bits.opcode := TLMessages.Get
source_i.bits.param := UInt(0)
}
// Capture the A state into the CAM // Potentially take the message from the CAM
when (source_i.fire() && !a_isSupported) { val source_c = Wire(in.a)
(a_cam_sel_free zip cam_a) foreach { case (en, r) => source_c.valid := a_cam_any_put
when (en) { source_c.bits := edgeOut.Put(a_cam_a.bits.source, edgeIn.address(a_cam_a.bits), a_cam_a.bits.size, amo_data)._2
r.fifoId := a_fifoId
r.bits := in.a.bits // Finishing an AMO from the CAM has highest priority
r.lut := MuxLookup(in.a.bits.param(1, 0), UInt(0, width = 4), Array( TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), source_c), (edgeOut.numBeats1(in.a.bits), source_i))
TLAtomics.AND -> UInt(0x8),
TLAtomics.OR -> UInt(0xe), // Capture the A state into the CAM
TLAtomics.XOR -> UInt(0x6), when (source_i.fire() && !a_isSupported) {
TLAtomics.SWAP -> UInt(0xc))) (a_cam_sel_free zip cam_a) foreach { case (en, r) =>
when (en) {
r.fifoId := a_fifoId
r.bits := in.a.bits
r.lut := MuxLookup(in.a.bits.param(1, 0), UInt(0, width = 4), Array(
TLAtomics.AND -> UInt(0x8),
TLAtomics.OR -> UInt(0xe),
TLAtomics.XOR -> UInt(0x6),
TLAtomics.SWAP -> UInt(0xc)))
}
}
(a_cam_sel_free zip cam_s) foreach { case (en, r) =>
when (en) {
r.state := GET
}
} }
} }
(a_cam_sel_free zip cam_s) foreach { case (en, r) =>
when (en) { // Advance the put state
r.state := GET when (source_c.fire()) {
(a_cam_sel_put zip cam_s) foreach { case (en, r) =>
when (en) {
r.state := ACK
}
} }
} }
}
// Advance the put state // We need to deal with a potential D response in the same cycle as the A request
when (source_c.fire()) { val d_cam_sel_raw = cam_a.map(_.bits.source === in.d.bits.source)
(a_cam_sel_put zip cam_s) foreach { case (en, r) => val d_cam_sel_match = (d_cam_sel_raw zip cam_dmatch) map { case (a,b) => a&&b }
when (en) { val d_cam_data = Mux1H(d_cam_sel_match, cam_d.map(_.data))
r.state := ACK val d_cam_sel_bypass = if (edgeOut.manager.minLatency > 0) Bool(false) else
out.d.bits.source === in.a.bits.source && in.a.valid && !a_isSupported
val d_cam_sel = (a_cam_sel_free zip d_cam_sel_match) map { case (a,d) => Mux(d_cam_sel_bypass, a, d) }
val d_cam_sel_any = d_cam_sel_bypass || d_cam_sel_match.reduce(_ || _)
val d_ackd = out.d.bits.opcode === TLMessages.AccessAckData
val d_ack = out.d.bits.opcode === TLMessages.AccessAck
when (out.d.fire()) {
(d_cam_sel zip cam_d) foreach { case (en, r) =>
when (en && d_ackd) {
r.data := out.d.bits.data
}
}
(d_cam_sel zip cam_s) foreach { case (en, r) =>
when (en) {
// Note: it is important that this comes AFTER the := GET, so we can go FREE=>GET=>AMO in one cycle
r.state := Mux(d_ackd, AMO, FREE)
}
} }
} }
}
// We need to deal with a potential D response in the same cycle as the A request val d_drop = d_ackd && d_cam_sel_any
val d_cam_sel_raw = cam_a.map(_.bits.source === in.d.bits.source) val d_replace = d_ack && d_cam_sel_match.reduce(_ || _)
val d_cam_sel_match = (d_cam_sel_raw zip cam_dmatch) map { case (a,b) => a&&b }
val d_cam_data = Mux1H(d_cam_sel_match, cam_d.map(_.data))
val d_cam_sel_bypass = if (edgeOut.manager.minLatency > 0) Bool(false) else
out.d.bits.source === in.a.bits.source && in.a.valid && !a_isSupported
val d_cam_sel = (a_cam_sel_free zip d_cam_sel_match) map { case (a,d) => Mux(d_cam_sel_bypass, a, d) }
val d_cam_sel_any = d_cam_sel_bypass || d_cam_sel_match.reduce(_ || _)
val d_ackd = out.d.bits.opcode === TLMessages.AccessAckData
val d_ack = out.d.bits.opcode === TLMessages.AccessAck
when (out.d.fire()) { in.d.valid := out.d.valid && !d_drop
(d_cam_sel zip cam_d) foreach { case (en, r) => out.d.ready := in.d.ready || d_drop
when (en && d_ackd) {
r.data := out.d.bits.data in.d.bits := out.d.bits
} when (d_replace) { // minimal muxes
} in.d.bits.opcode := TLMessages.AccessAckData
(d_cam_sel zip cam_s) foreach { case (en, r) => in.d.bits.data := d_cam_data
when (en) {
// Note: it is important that this comes AFTER the := GET, so we can go FREE=>GET=>AMO in one cycle
r.state := Mux(d_ackd, AMO, FREE)
}
} }
} else {
out.a.valid := in.a.valid
in.a.ready := out.a.ready
out.a.bits := in.a.bits
in.d.valid := out.d.valid
out.d.ready := in.d.ready
in.d.bits := out.d.bits
} }
val d_drop = d_ackd && d_cam_sel_any if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
val d_replace = d_ack && d_cam_sel_match.reduce(_ || _) in.b.valid := out.b.valid
out.b.ready := in.b.ready
in.b.bits := out.b.bits
in.d.valid := out.d.valid && !d_drop out.c.valid := in.c.valid
out.d.ready := in.d.ready || d_drop in.c.ready := out.c.ready
out.c.bits := in.c.bits
in.d.bits := out.d.bits out.e.valid := in.e.valid
when (d_replace) { // minimal muxes in.e.ready := out.e.ready
in.d.bits.opcode := TLMessages.AccessAckData out.e.bits := in.e.bits
in.d.bits.data := d_cam_data } else {
in.b.valid := Bool(false)
in.c.ready := Bool(true)
in.e.ready := Bool(true)
out.b.ready := Bool(true)
out.c.valid := Bool(false)
out.e.valid := Bool(false)
} }
} else {
out.a.valid := in.a.valid
in.a.ready := out.a.ready
out.a.bits := in.a.bits
in.d.valid := out.d.valid
out.d.ready := in.d.ready
in.d.bits := out.d.bits
}
if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
in.b.valid := out.b.valid
out.b.ready := in.b.ready
in.b.bits := out.b.bits
out.c.valid := in.c.valid
in.c.ready := out.c.ready
out.c.bits := in.c.bits
out.e.valid := in.e.valid
in.e.ready := out.e.ready
out.e.bits := in.e.bits
} else {
in.b.valid := Bool(false)
in.c.ready := Bool(true)
in.e.ready := Bool(true)
out.b.ready := Bool(true)
out.c.valid := Bool(false)
out.e.valid := Bool(false)
} }
} }
} }
@ -284,6 +272,20 @@ object TLAtomicAutomata
atomics.node := x atomics.node := x
atomics.node atomics.node
} }
case class CAMParams(a: TLBundleParameters, domainsNeedingHelp: Int)
class CAM_S(params: CAMParams) extends GenericParameterizedBundle(params) {
val state = UInt(width = 2)
}
class CAM_A(params: CAMParams) extends GenericParameterizedBundle(params) {
val bits = new TLBundleA(params.a)
val fifoId = UInt(width = log2Up(params.domainsNeedingHelp))
val lut = UInt(width = 4)
}
class CAM_D(params: CAMParams) extends GenericParameterizedBundle(params) {
val data = UInt(width = params.a.dataBits)
}
} }
/** Synthesizeable unit tests */ /** Synthesizeable unit tests */

View File

@ -13,11 +13,11 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa
require (numTrackers > 0) require (numTrackers > 0)
val node = TLAdapterNode( val node = TLAdapterNode(
clientFn = { case Seq(cp) => clientFn = { cp =>
cp.copy(clients = Seq(TLClientParameters( cp.copy(clients = Seq(TLClientParameters(
sourceId = IdRange(0, 1 << log2Ceil(cp.endSourceId*4))))) sourceId = IdRange(0, 1 << log2Ceil(cp.endSourceId*4)))))
}, },
managerFn = { case Seq(mp) => managerFn = { mp =>
mp.copy( mp.copy(
endSinkId = numTrackers, endSinkId = numTrackers,
managers = mp.managers.map { m => managers = mp.managers.map { m =>
@ -56,154 +56,152 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa
val out = node.bundleOut val out = node.bundleOut
} }
val in = io.in(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val out = io.out(0) val clients = edgeIn.client.clients
val edgeIn = node.edgesIn(0) val managers = edgeOut.manager.managers
val edgeOut = node.edgesOut(0) val lineShift = log2Ceil(lineBytes)
val clients = edgeIn.client.clients
val managers = edgeOut.manager.managers
val lineShift = log2Ceil(lineBytes)
import TLBroadcastConstants._ import TLBroadcastConstants._
require (lineBytes >= edgeOut.manager.beatBytes) require (lineBytes >= edgeOut.manager.beatBytes)
// For the probe walker, we need to identify all the caches // For the probe walker, we need to identify all the caches
val caches = clients.filter(_.supportsProbe).map(_.sourceId) val caches = clients.filter(_.supportsProbe).map(_.sourceId)
val cache_targets = caches.map(c => UInt(c.start)) val cache_targets = caches.map(c => UInt(c.start))
// Create the request tracker queues // Create the request tracker queues
val trackers = Seq.tabulate(numTrackers) { id => val trackers = Seq.tabulate(numTrackers) { id =>
Module(new TLBroadcastTracker(id, lineBytes, log2Up(caches.size+1), bufferless, edgeIn, edgeOut)).io Module(new TLBroadcastTracker(id, lineBytes, log2Up(caches.size+1), bufferless, edgeIn, edgeOut)).io
}
// We always accept E
in.e.ready := Bool(true)
(trackers zip UIntToOH(in.e.bits.sink).toBools) foreach { case (tracker, select) =>
tracker.e_last := select && in.e.fire()
}
// Depending on the high source bits, we might transform D
val d_high = log2Ceil(edgeIn.client.endSourceId)
val d_what = out.d.bits.source(d_high+1, d_high)
val d_drop = d_what === DROP
val d_hasData = edgeOut.hasData(out.d.bits)
val d_normal = Wire(in.d)
val d_trackerOH = Vec(trackers.map { t => !t.idle && t.source === d_normal.bits.source }).asUInt
assert (!out.d.valid || !d_drop || out.d.bits.opcode === TLMessages.AccessAck)
out.d.ready := d_normal.ready || d_drop
d_normal.valid := out.d.valid && !d_drop
d_normal.bits := out.d.bits // truncates source
when (d_what(1)) { // TRANSFORM_*
d_normal.bits.opcode := Mux(d_hasData, TLMessages.GrantData, TLMessages.ReleaseAck)
d_normal.bits.param := Mux(d_hasData, Mux(d_what(0), TLPermissions.toT, TLPermissions.toB), UInt(0))
}
d_normal.bits.sink := OHToUInt(d_trackerOH)
assert (!d_normal.valid || (d_trackerOH.orR() || d_normal.bits.opcode === TLMessages.ReleaseAck))
// A tracker response is anything neither dropped nor a ReleaseAck
val d_response = d_hasData || !d_what(1)
val d_last = edgeIn.last(d_normal)
(trackers zip d_trackerOH.toBools) foreach { case (tracker, select) =>
tracker.d_last := select && d_normal.fire() && d_response && d_last
tracker.probedack := select && out.d.fire() && d_drop
}
// Incoming C can be:
// ProbeAck => decrement tracker, drop
// ProbeAckData => decrement tracker, send out A as PutFull(DROP)
// ReleaseData => send out A as PutFull(TRANSFORM)
// Release => send out D as ReleaseAck
val c_probeack = in.c.bits.opcode === TLMessages.ProbeAck
val c_probeackdata = in.c.bits.opcode === TLMessages.ProbeAckData
val c_releasedata = in.c.bits.opcode === TLMessages.ReleaseData
val c_release = in.c.bits.opcode === TLMessages.Release
val c_trackerOH = trackers.map { t => t.line === (in.c.bits.address >> lineShift) }
val c_trackerSrc = Mux1H(c_trackerOH, trackers.map { _.source })
// Decrement the tracker's outstanding probe counter
(trackers zip c_trackerOH) foreach { case (tracker, select) =>
tracker.probenack := in.c.fire() && c_probeack && select
}
val releaseack = Wire(in.d)
val putfull = Wire(out.a)
in.c.ready := c_probeack || Mux(c_release, releaseack.ready, putfull.ready)
releaseack.valid := in.c.valid && c_release
releaseack.bits := edgeIn.ReleaseAck(in.c.bits.address, UInt(0), in.c.bits.source, in.c.bits.size)
val put_what = Mux(c_releasedata, TRANSFORM_B, DROP)
val put_who = Mux(c_releasedata, in.c.bits.source, c_trackerSrc)
putfull.valid := in.c.valid && (c_probeackdata || c_releasedata)
putfull.bits := edgeOut.Put(Cat(put_what, put_who), in.c.bits.address, in.c.bits.size, in.c.bits.data)._2
// Combine ReleaseAck or the modified D
TLArbiter.lowest(edgeOut, in.d, releaseack, d_normal)
// Combine the PutFull with the trackers
TLArbiter.lowestFromSeq(edgeOut, out.a, putfull +: trackers.map(_.out_a))
// The Probe FSM walks all caches and probes them
val probe_todo = RegInit(UInt(0, width = max(1, caches.size)))
val probe_line = Reg(UInt())
val probe_perms = Reg(UInt(width = 2))
val probe_next = probe_todo & ~(leftOR(probe_todo) << 1)
val probe_busy = probe_todo.orR()
val probe_target = if (caches.size == 0) UInt(0) else Mux1H(probe_next, cache_targets)
// Probe whatever the FSM wants to do next
in.b.valid := probe_busy
if (caches.size != 0) {
in.b.bits := edgeIn.Probe(probe_line << lineShift, probe_target, UInt(lineShift), probe_perms)._2
}
when (in.b.fire()) { probe_todo := probe_todo & ~probe_next }
// Which cache does a request come from?
val a_cache = if (caches.size == 0) UInt(1) else Vec(caches.map(_.contains(in.a.bits.source))).asUInt
val a_first = edgeIn.first(in.a)
// To accept a request from A, the probe FSM must be idle and there must be a matching tracker
val freeTrackers = Vec(trackers.map { t => t.idle }).asUInt
val freeTracker = freeTrackers.orR()
val matchTrackers = Vec(trackers.map { t => t.line === in.a.bits.address >> lineShift }).asUInt
val matchTracker = matchTrackers.orR()
val allocTracker = freeTrackers & ~(leftOR(freeTrackers) << 1)
val selectTracker = Mux(matchTracker, matchTrackers, allocTracker)
val trackerReady = Vec(trackers.map(_.in_a.ready)).asUInt
in.a.ready := (!a_first || !probe_busy) && (selectTracker & trackerReady).orR()
(trackers zip selectTracker.toBools) foreach { case (t, select) =>
t.in_a.valid := in.a.valid && select && (!a_first || !probe_busy)
t.in_a.bits := in.a.bits
t.in_a_first := a_first
t.probe := (if (caches.size == 0) UInt(0) else Mux(a_cache.orR(), UInt(caches.size-1), UInt(caches.size)))
}
when (in.a.fire() && a_first) {
probe_todo := ~a_cache // probe all but the cache who poked us
probe_line := in.a.bits.address >> lineShift
probe_perms := MuxLookup(in.a.bits.opcode, Wire(UInt(width = 2)), Array(
TLMessages.PutFullData -> TLPermissions.toN,
TLMessages.PutPartialData -> TLPermissions.toN,
TLMessages.ArithmeticData -> TLPermissions.toN,
TLMessages.LogicalData -> TLPermissions.toN,
TLMessages.Get -> TLPermissions.toB,
TLMessages.Hint -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
TLHints.PREFETCH_READ -> TLPermissions.toB,
TLHints.PREFETCH_WRITE -> TLPermissions.toN)),
TLMessages.Acquire -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
TLPermissions.NtoB -> TLPermissions.toB,
TLPermissions.NtoT -> TLPermissions.toN,
TLPermissions.BtoT -> TLPermissions.toN))))
}
// The outer TL connections may not be cached
out.b.ready := Bool(true)
out.c.valid := Bool(false)
out.e.valid := Bool(false)
} }
// We always accept E
in.e.ready := Bool(true)
(trackers zip UIntToOH(in.e.bits.sink).toBools) foreach { case (tracker, select) =>
tracker.e_last := select && in.e.fire()
}
// Depending on the high source bits, we might transform D
val d_high = log2Ceil(edgeIn.client.endSourceId)
val d_what = out.d.bits.source(d_high+1, d_high)
val d_drop = d_what === DROP
val d_hasData = edgeOut.hasData(out.d.bits)
val d_normal = Wire(in.d)
val d_trackerOH = Vec(trackers.map { t => !t.idle && t.source === d_normal.bits.source }).asUInt
assert (!out.d.valid || !d_drop || out.d.bits.opcode === TLMessages.AccessAck)
out.d.ready := d_normal.ready || d_drop
d_normal.valid := out.d.valid && !d_drop
d_normal.bits := out.d.bits // truncates source
when (d_what(1)) { // TRANSFORM_*
d_normal.bits.opcode := Mux(d_hasData, TLMessages.GrantData, TLMessages.ReleaseAck)
d_normal.bits.param := Mux(d_hasData, Mux(d_what(0), TLPermissions.toT, TLPermissions.toB), UInt(0))
}
d_normal.bits.sink := OHToUInt(d_trackerOH)
assert (!d_normal.valid || (d_trackerOH.orR() || d_normal.bits.opcode === TLMessages.ReleaseAck))
// A tracker response is anything neither dropped nor a ReleaseAck
val d_response = d_hasData || !d_what(1)
val d_last = edgeIn.last(d_normal)
(trackers zip d_trackerOH.toBools) foreach { case (tracker, select) =>
tracker.d_last := select && d_normal.fire() && d_response && d_last
tracker.probedack := select && out.d.fire() && d_drop
}
// Incoming C can be:
// ProbeAck => decrement tracker, drop
// ProbeAckData => decrement tracker, send out A as PutFull(DROP)
// ReleaseData => send out A as PutFull(TRANSFORM)
// Release => send out D as ReleaseAck
val c_probeack = in.c.bits.opcode === TLMessages.ProbeAck
val c_probeackdata = in.c.bits.opcode === TLMessages.ProbeAckData
val c_releasedata = in.c.bits.opcode === TLMessages.ReleaseData
val c_release = in.c.bits.opcode === TLMessages.Release
val c_trackerOH = trackers.map { t => t.line === (in.c.bits.address >> lineShift) }
val c_trackerSrc = Mux1H(c_trackerOH, trackers.map { _.source })
// Decrement the tracker's outstanding probe counter
(trackers zip c_trackerOH) foreach { case (tracker, select) =>
tracker.probenack := in.c.fire() && c_probeack && select
}
val releaseack = Wire(in.d)
val putfull = Wire(out.a)
in.c.ready := c_probeack || Mux(c_release, releaseack.ready, putfull.ready)
releaseack.valid := in.c.valid && c_release
releaseack.bits := edgeIn.ReleaseAck(in.c.bits.address, UInt(0), in.c.bits.source, in.c.bits.size)
val put_what = Mux(c_releasedata, TRANSFORM_B, DROP)
val put_who = Mux(c_releasedata, in.c.bits.source, c_trackerSrc)
putfull.valid := in.c.valid && (c_probeackdata || c_releasedata)
putfull.bits := edgeOut.Put(Cat(put_what, put_who), in.c.bits.address, in.c.bits.size, in.c.bits.data)._2
// Combine ReleaseAck or the modified D
TLArbiter.lowest(edgeOut, in.d, releaseack, d_normal)
// Combine the PutFull with the trackers
TLArbiter.lowestFromSeq(edgeOut, out.a, putfull +: trackers.map(_.out_a))
// The Probe FSM walks all caches and probes them
val probe_todo = RegInit(UInt(0, width = max(1, caches.size)))
val probe_line = Reg(UInt())
val probe_perms = Reg(UInt(width = 2))
val probe_next = probe_todo & ~(leftOR(probe_todo) << 1)
val probe_busy = probe_todo.orR()
val probe_target = if (caches.size == 0) UInt(0) else Mux1H(probe_next, cache_targets)
// Probe whatever the FSM wants to do next
in.b.valid := probe_busy
if (caches.size != 0) {
in.b.bits := edgeIn.Probe(probe_line << lineShift, probe_target, UInt(lineShift), probe_perms)._2
}
when (in.b.fire()) { probe_todo := probe_todo & ~probe_next }
// Which cache does a request come from?
val a_cache = if (caches.size == 0) UInt(1) else Vec(caches.map(_.contains(in.a.bits.source))).asUInt
val a_first = edgeIn.first(in.a)
// To accept a request from A, the probe FSM must be idle and there must be a matching tracker
val freeTrackers = Vec(trackers.map { t => t.idle }).asUInt
val freeTracker = freeTrackers.orR()
val matchTrackers = Vec(trackers.map { t => t.line === in.a.bits.address >> lineShift }).asUInt
val matchTracker = matchTrackers.orR()
val allocTracker = freeTrackers & ~(leftOR(freeTrackers) << 1)
val selectTracker = Mux(matchTracker, matchTrackers, allocTracker)
val trackerReady = Vec(trackers.map(_.in_a.ready)).asUInt
in.a.ready := (!a_first || !probe_busy) && (selectTracker & trackerReady).orR()
(trackers zip selectTracker.toBools) foreach { case (t, select) =>
t.in_a.valid := in.a.valid && select && (!a_first || !probe_busy)
t.in_a.bits := in.a.bits
t.in_a_first := a_first
t.probe := (if (caches.size == 0) UInt(0) else Mux(a_cache.orR(), UInt(caches.size-1), UInt(caches.size)))
}
when (in.a.fire() && a_first) {
probe_todo := ~a_cache // probe all but the cache who poked us
probe_line := in.a.bits.address >> lineShift
probe_perms := MuxLookup(in.a.bits.opcode, Wire(UInt(width = 2)), Array(
TLMessages.PutFullData -> TLPermissions.toN,
TLMessages.PutPartialData -> TLPermissions.toN,
TLMessages.ArithmeticData -> TLPermissions.toN,
TLMessages.LogicalData -> TLPermissions.toN,
TLMessages.Get -> TLPermissions.toB,
TLMessages.Hint -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
TLHints.PREFETCH_READ -> TLPermissions.toB,
TLHints.PREFETCH_WRITE -> TLPermissions.toN)),
TLMessages.Acquire -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array(
TLPermissions.NtoB -> TLPermissions.toB,
TLPermissions.NtoT -> TLPermissions.toN,
TLPermissions.BtoT -> TLPermissions.toN))))
}
// The outer TL connections may not be cached
out.b.ready := Bool(true)
out.c.valid := Bool(false)
out.e.valid := Bool(false)
} }
} }

View File

@ -18,8 +18,8 @@ class TLBuffer(a: Int = 2, b: Int = 2, c: Int = 2, d: Int = 2, e: Int = 2, pipe:
require (e >= 0) require (e >= 0)
val node = TLAdapterNode( val node = TLAdapterNode(
clientFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,b) + min(1,c)) }, clientFn = { p => p.copy(minLatency = p.minLatency + min(1,b) + min(1,c)) },
managerFn = { case Seq(p) => p.copy(minLatency = p.minLatency + min(1,a) + min(1,d)) }) managerFn = { p => p.copy(minLatency = p.minLatency + min(1,a) + min(1,d)) })
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {

View File

@ -12,10 +12,10 @@ import TLMessages._
class TLCacheCork(unsafe: Boolean = false)(implicit p: Parameters) extends LazyModule class TLCacheCork(unsafe: Boolean = false)(implicit p: Parameters) extends LazyModule
{ {
val node = TLAdapterNode( val node = TLAdapterNode(
clientFn = { case Seq(cp) => clientFn = { case cp =>
cp.copy(clients = cp.clients.map { c => c.copy( cp.copy(clients = cp.clients.map { c => c.copy(
sourceId = IdRange(c.sourceId.start*2, c.sourceId.end*2))})}, sourceId = IdRange(c.sourceId.start*2, c.sourceId.end*2))})},
managerFn = { case Seq(mp) => managerFn = { case mp =>
mp.copy(managers = mp.managers.map { m => m.copy( mp.copy(managers = mp.managers.map { m => m.copy(
regionType = if (m.regionType == RegionType.UNCACHED) RegionType.TRACKED else m.regionType, regionType = if (m.regionType == RegionType.UNCACHED) RegionType.TRACKED else m.regionType,
supportsAcquireB = m.supportsGet, supportsAcquireB = m.supportsGet,
@ -27,93 +27,89 @@ class TLCacheCork(unsafe: Boolean = false)(implicit p: Parameters) extends LazyM
val out = node.bundleOut val out = node.bundleOut
} }
val edgeIn = node.edgesIn(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val edgeOut = node.edgesOut(0) require (edgeIn.client.clients.size == 1 || unsafe, "Only one client can safely use a TLCacheCork")
require (edgeIn.client.clients.filter(_.supportsProbe).size == 1, "Only one caching client allowed")
edgeOut.manager.managers.foreach { case m =>
require (!m.supportsAcquireB, "Cannot support caches beyond the Cork")
}
require (edgeIn.client.clients.size == 1 || unsafe, "Only one client can safely use a TLCacheCork") // The Cork turns [Acquire=>Get] => [AccessAckData=>GrantData]
require (edgeIn.client.clients.filter(_.supportsProbe).size == 1, "Only one caching client allowed") // and [ReleaseData=>PutFullData] => [AccessAck=>ReleaseAck]
edgeOut.manager.managers.foreach { case m => // We need to encode information sufficient to reverse the transformation in output.
require (!m.supportsAcquireB, "Cannot support caches beyond the Cork") // A caveat is that we get Acquire+Release with the same source and must keep the
// source unique after transformation onto the A channel.
// The coding scheme is:
// Put: 1, Release: 0 => AccessAck
// *: 0, Acquire: 1 => AccessAckData
// Take requests from A to A
val isPut = in.a.bits.opcode === PutFullData || in.a.bits.opcode === PutPartialData
val a_a = Wire(out.a)
a_a <> in.a
a_a.bits.source := in.a.bits.source << 1 | Mux(isPut, UInt(1), UInt(0))
// Transform Acquire into Get
when (in.a.bits.opcode === Acquire) {
a_a.bits.opcode := Get
a_a.bits.param := UInt(0)
a_a.bits.source := in.a.bits.source << 1 | UInt(1)
}
// Take ReleaseData from C to A; Release from C to D
val c_a = Wire(out.a)
c_a.valid := in.c.valid && in.c.bits.opcode === ReleaseData
c_a.bits.opcode := PutFullData
c_a.bits.param := UInt(0)
c_a.bits.size := in.c.bits.size
c_a.bits.source := in.c.bits.source << 1
c_a.bits.address := in.c.bits.address
c_a.bits.mask := edgeOut.mask(in.c.bits.address, in.c.bits.size)
c_a.bits.data := in.c.bits.data
val c_d = Wire(in.d)
c_d.valid := in.c.valid && in.c.bits.opcode === Release
c_d.bits.opcode := ReleaseAck
c_d.bits.param := UInt(0)
c_d.bits.size := in.c.bits.size
c_d.bits.source := in.c.bits.source
c_d.bits.sink := UInt(0)
c_d.bits.addr_lo := in.c.bits.address
c_d.bits.data := UInt(0)
c_d.bits.error := Bool(false)
assert (!in.c.valid || in.c.bits.opcode === Release || in.c.bits.opcode === ReleaseData)
in.c.ready := Mux(in.c.bits.opcode === Release, c_d.ready, c_a.ready)
// Discard E
in.e.ready := Bool(true)
// Block B; should never happen
out.b.ready := Bool(false)
assert (!out.b.valid)
// Take responses from D and transform them
val d_d = Wire(in.d)
d_d <> out.d
d_d.bits.source := out.d.bits.source >> 1
when (out.d.bits.opcode === AccessAckData && out.d.bits.source(0)) {
d_d.bits.opcode := GrantData
d_d.bits.param := TLPermissions.toT
}
when (out.d.bits.opcode === AccessAck && !out.d.bits.source(0)) {
d_d.bits.opcode := ReleaseAck
}
// Combine the sources of messages into the channels
TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (edgeOut.numBeats1(c_a.bits), c_a), (edgeOut.numBeats1(a_a.bits), a_a))
TLArbiter(TLArbiter.lowestIndexFirst)(in.d, (edgeIn .numBeats1(d_d.bits), d_d), (UInt(0), Queue(c_d, 2)))
// Tie off unused ports
in.b.valid := Bool(false)
out.c.valid := Bool(false)
out.e.valid := Bool(false)
} }
val out = io.out(0)
val in = io.in(0)
// The Cork turns [Acquire=>Get] => [AccessAckData=>GrantData]
// and [ReleaseData=>PutFullData] => [AccessAck=>ReleaseAck]
// We need to encode information sufficient to reverse the transformation in output.
// A caveat is that we get Acquire+Release with the same source and must keep the
// source unique after transformation onto the A channel.
// The coding scheme is:
// Put: 1, Release: 0 => AccessAck
// *: 0, Acquire: 1 => AccessAckData
// Take requests from A to A
val isPut = in.a.bits.opcode === PutFullData || in.a.bits.opcode === PutPartialData
val a_a = Wire(out.a)
a_a <> in.a
a_a.bits.source := in.a.bits.source << 1 | Mux(isPut, UInt(1), UInt(0))
// Transform Acquire into Get
when (in.a.bits.opcode === Acquire) {
a_a.bits.opcode := Get
a_a.bits.param := UInt(0)
a_a.bits.source := in.a.bits.source << 1 | UInt(1)
}
// Take ReleaseData from C to A; Release from C to D
val c_a = Wire(out.a)
c_a.valid := in.c.valid && in.c.bits.opcode === ReleaseData
c_a.bits.opcode := PutFullData
c_a.bits.param := UInt(0)
c_a.bits.size := in.c.bits.size
c_a.bits.source := in.c.bits.source << 1
c_a.bits.address := in.c.bits.address
c_a.bits.mask := edgeOut.mask(in.c.bits.address, in.c.bits.size)
c_a.bits.data := in.c.bits.data
val c_d = Wire(in.d)
c_d.valid := in.c.valid && in.c.bits.opcode === Release
c_d.bits.opcode := ReleaseAck
c_d.bits.param := UInt(0)
c_d.bits.size := in.c.bits.size
c_d.bits.source := in.c.bits.source
c_d.bits.sink := UInt(0)
c_d.bits.addr_lo := in.c.bits.address
c_d.bits.data := UInt(0)
c_d.bits.error := Bool(false)
assert (!in.c.valid || in.c.bits.opcode === Release || in.c.bits.opcode === ReleaseData)
in.c.ready := Mux(in.c.bits.opcode === Release, c_d.ready, c_a.ready)
// Discard E
in.e.ready := Bool(true)
// Block B; should never happen
out.b.ready := Bool(false)
assert (!out.b.valid)
// Take responses from D and transform them
val d_d = Wire(in.d)
d_d <> out.d
d_d.bits.source := out.d.bits.source >> 1
when (out.d.bits.opcode === AccessAckData && out.d.bits.source(0)) {
d_d.bits.opcode := GrantData
d_d.bits.param := TLPermissions.toT
}
when (out.d.bits.opcode === AccessAck && !out.d.bits.source(0)) {
d_d.bits.opcode := ReleaseAck
}
// Combine the sources of messages into the channels
TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (edgeOut.numBeats1(c_a.bits), c_a), (edgeOut.numBeats1(a_a.bits), a_a))
TLArbiter(TLArbiter.lowestIndexFirst)(in.d, (edgeIn .numBeats1(d_d.bits), d_d), (UInt(0), Queue(c_d, 2)))
// Tie off unused ports
in.b.valid := Bool(false)
out.c.valid := Bool(false)
out.e.valid := Bool(false)
} }
} }

View File

@ -11,8 +11,8 @@ import scala.math.{min,max}
class TLFilter(select: AddressSet)(implicit p: Parameters) extends LazyModule class TLFilter(select: AddressSet)(implicit p: Parameters) extends LazyModule
{ {
val node = TLAdapterNode( val node = TLAdapterNode(
clientFn = { case Seq(cp) => cp }, clientFn = { cp => cp },
managerFn = { case Seq(mp) => managerFn = { mp =>
mp.copy(managers = mp.managers.map { m => mp.copy(managers = mp.managers.map { m =>
val filtered = m.address.map(_.intersect(select)).flatten val filtered = m.address.map(_.intersect(select)).flatten
val alignment = select.alignment /* alignment 0 means 'select' selected everything */ val alignment = select.alignment /* alignment 0 means 'select' selected everything */

View File

@ -41,8 +41,8 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean =
sourceId = IdRange(c.sourceId.start << fragmentBits, c.sourceId.end << fragmentBits)) sourceId = IdRange(c.sourceId.start << fragmentBits, c.sourceId.end << fragmentBits))
val node = TLAdapterNode( val node = TLAdapterNode(
clientFn = { case Seq(c) => c.copy(clients = c.clients.map(mapClient)) }, clientFn = { c => c.copy(clients = c.clients.map(mapClient)) },
managerFn = { case Seq(m) => m.copy(managers = m.managers.map(mapManager)) }) managerFn = { m => m.copy(managers = m.managers.map(mapManager)) })
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {
@ -50,204 +50,201 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean =
val out = node.bundleOut val out = node.bundleOut
} }
// All managers must share a common FIFO domain (responses might end up interleaved) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val edgeOut = node.edgesOut(0) // All managers must share a common FIFO domain (responses might end up interleaved)
val edgeIn = node.edgesIn(0) val manager = edgeOut.manager
val manager = edgeOut.manager val managers = manager.managers
val managers = manager.managers val beatBytes = manager.beatBytes
val beatBytes = manager.beatBytes val fifoId = managers(0).fifoId
val fifoId = managers(0).fifoId require (fifoId.isDefined && managers.map(_.fifoId == fifoId).reduce(_ && _))
require (fifoId.isDefined && managers.map(_.fifoId == fifoId).reduce(_ && _))
// We don't support fragmenting to sub-beat accesses // We don't support fragmenting to sub-beat accesses
require (minSize >= beatBytes) require (minSize >= beatBytes)
// We can't support devices which are cached on both sides of us // We can't support devices which are cached on both sides of us
require (!edgeOut.manager.anySupportAcquireB || !edgeIn.client.anySupportProbe) require (!edgeOut.manager.anySupportAcquireB || !edgeIn.client.anySupportProbe)
/* The Fragmenter is a bit tricky, because there are 5 sizes in play: /* The Fragmenter is a bit tricky, because there are 5 sizes in play:
* max size -- the maximum transfer size possible * max size -- the maximum transfer size possible
* orig size -- the original pre-fragmenter size * orig size -- the original pre-fragmenter size
* frag size -- the modified post-fragmenter size * frag size -- the modified post-fragmenter size
* min size -- the threshold below which frag=orig * min size -- the threshold below which frag=orig
* beat size -- the amount transfered on any given beat * beat size -- the amount transfered on any given beat
* *
* The relationships are as follows: * The relationships are as follows:
* max >= orig >= frag * max >= orig >= frag
* max > min >= beat * max > min >= beat
* It IS possible that orig <= min (then frag=orig; ie: no fragmentation) * It IS possible that orig <= min (then frag=orig; ie: no fragmentation)
* *
* The fragment# (sent via TL.source) is measured in multiples of min size. * The fragment# (sent via TL.source) is measured in multiples of min size.
* Meanwhile, to track the progress, counters measure in multiples of beat size. * Meanwhile, to track the progress, counters measure in multiples of beat size.
* *
* Here is an example of a bus with max=256, min=8, beat=4 and a device supporting 16. * Here is an example of a bus with max=256, min=8, beat=4 and a device supporting 16.
* *
* in.A out.A (frag#) out.D (frag#) in.D gen# ack# * in.A out.A (frag#) out.D (frag#) in.D gen# ack#
* get64 get16 6 ackD16 6 ackD64 12 15 * get64 get16 6 ackD16 6 ackD64 12 15
* ackD16 6 ackD64 14 * ackD16 6 ackD64 14
* ackD16 6 ackD64 13 * ackD16 6 ackD64 13
* ackD16 6 ackD64 12 * ackD16 6 ackD64 12
* get16 4 ackD16 4 ackD64 8 11 * get16 4 ackD16 4 ackD64 8 11
* ackD16 4 ackD64 10 * ackD16 4 ackD64 10
* ackD16 4 ackD64 9 * ackD16 4 ackD64 9
* ackD16 4 ackD64 8 * ackD16 4 ackD64 8
* get16 2 ackD16 2 ackD64 4 7 * get16 2 ackD16 2 ackD64 4 7
* ackD16 2 ackD64 6 * ackD16 2 ackD64 6
* ackD16 2 ackD64 5 * ackD16 2 ackD64 5
* ackD16 2 ackD64 4 * ackD16 2 ackD64 4
* get16 0 ackD16 0 ackD64 0 3 * get16 0 ackD16 0 ackD64 0 3
* ackD16 0 ackD64 2 * ackD16 0 ackD64 2
* ackD16 0 ackD64 1 * ackD16 0 ackD64 1
* ackD16 0 ackD64 0 * ackD16 0 ackD64 0
* *
* get8 get8 0 ackD8 0 ackD8 0 1 * get8 get8 0 ackD8 0 ackD8 0 1
* ackD8 0 ackD8 0 * ackD8 0 ackD8 0
* *
* get4 get4 0 ackD4 0 ackD4 0 0 * get4 get4 0 ackD4 0 ackD4 0 0
* get1 get1 0 ackD1 0 ackD1 0 0 * get1 get1 0 ackD1 0 ackD1 0 0
* *
* put64 put16 6 15 * put64 put16 6 15
* put64 put16 6 14 * put64 put16 6 14
* put64 put16 6 13 * put64 put16 6 13
* put64 put16 6 ack16 6 12 12 * put64 put16 6 ack16 6 12 12
* put64 put16 4 11 * put64 put16 4 11
* put64 put16 4 10 * put64 put16 4 10
* put64 put16 4 9 * put64 put16 4 9
* put64 put16 4 ack16 4 8 8 * put64 put16 4 ack16 4 8 8
* put64 put16 2 7 * put64 put16 2 7
* put64 put16 2 6 * put64 put16 2 6
* put64 put16 2 5 * put64 put16 2 5
* put64 put16 2 ack16 2 4 4 * put64 put16 2 ack16 2 4 4
* put64 put16 0 3 * put64 put16 0 3
* put64 put16 0 2 * put64 put16 0 2
* put64 put16 0 1 * put64 put16 0 1
* put64 put16 0 ack16 0 ack64 0 0 * put64 put16 0 ack16 0 ack64 0 0
* *
* put8 put8 0 1 * put8 put8 0 1
* put8 put8 0 ack8 0 ack8 0 0 * put8 put8 0 ack8 0 ack8 0 0
* *
* put4 put4 0 ack4 0 ack4 0 0 * put4 put4 0 ack4 0 ack4 0 0
* put1 put1 0 ack1 0 ack1 0 0 * put1 put1 0 ack1 0 ack1 0 0
*/ */
val in = io.in(0) val counterBits = log2Up(maxSize/beatBytes)
val out = io.out(0) val maxDownSize = if (alwaysMin) minSize else manager.maxTransfer
val counterBits = log2Up(maxSize/beatBytes) // First, handle the return path
val maxDownSize = if (alwaysMin) minSize else manager.maxTransfer val acknum = RegInit(UInt(0, width = counterBits))
val dOrig = Reg(UInt())
val dFragnum = out.d.bits.source(fragmentBits-1, 0)
val dFirst = acknum === UInt(0)
val dsizeOH = UIntToOH (out.d.bits.size, log2Ceil(maxDownSize)+1)
val dsizeOH1 = UIntToOH1(out.d.bits.size, log2Up(maxDownSize))
val dHasData = edgeOut.hasData(out.d.bits)
// First, handle the return path // calculate new acknum
val acknum = RegInit(UInt(0, width = counterBits)) val acknum_fragment = dFragnum << log2Ceil(minSize/beatBytes)
val dOrig = Reg(UInt()) val acknum_size = dsizeOH1 >> log2Ceil(beatBytes)
val dFragnum = out.d.bits.source(fragmentBits-1, 0) assert (!out.d.valid || (acknum_fragment & acknum_size) === UInt(0))
val dFirst = acknum === UInt(0) val dFirst_acknum = acknum_fragment | Mux(dHasData, acknum_size, UInt(0))
val dsizeOH = UIntToOH (out.d.bits.size, log2Ceil(maxDownSize)+1) val ack_decrement = Mux(dHasData, UInt(1), dsizeOH >> log2Ceil(beatBytes))
val dsizeOH1 = UIntToOH1(out.d.bits.size, log2Up(maxDownSize)) // calculate the original size
val dHasData = edgeOut.hasData(out.d.bits) val dFirst_size = OH1ToUInt((dFragnum << log2Ceil(minSize)) | dsizeOH1)
// calculate new acknum when (out.d.fire()) {
val acknum_fragment = dFragnum << log2Ceil(minSize/beatBytes) acknum := Mux(dFirst, dFirst_acknum, acknum - ack_decrement)
val acknum_size = dsizeOH1 >> log2Ceil(beatBytes) when (dFirst) { dOrig := dFirst_size }
assert (!out.d.valid || (acknum_fragment & acknum_size) === UInt(0)) }
val dFirst_acknum = acknum_fragment | Mux(dHasData, acknum_size, UInt(0))
val ack_decrement = Mux(dHasData, UInt(1), dsizeOH >> log2Ceil(beatBytes))
// calculate the original size
val dFirst_size = OH1ToUInt((dFragnum << log2Ceil(minSize)) | dsizeOH1)
when (out.d.fire()) { // Swallow up non-data ack fragments
acknum := Mux(dFirst, dFirst_acknum, acknum - ack_decrement) val drop = !dHasData && (dFragnum =/= UInt(0))
when (dFirst) { dOrig := dFirst_size } out.d.ready := in.d.ready || drop
in.d.valid := out.d.valid && !drop
in.d.bits := out.d.bits // pass most stuff unchanged
in.d.bits.addr_lo := out.d.bits.addr_lo & ~dsizeOH1
in.d.bits.source := out.d.bits.source >> fragmentBits
in.d.bits.size := Mux(dFirst, dFirst_size, dOrig)
// Combine the error flag
val r_error = RegInit(Bool(false))
val d_error = r_error | out.d.bits.error
when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) }
in.d.bits.error := d_error
// What maximum transfer sizes do downstream devices support?
val maxArithmetics = managers.map(_.supportsArithmetic.max)
val maxLogicals = managers.map(_.supportsLogical.max)
val maxGets = managers.map(_.supportsGet.max)
val maxPutFulls = managers.map(_.supportsPutFull.max)
val maxPutPartials = managers.map(_.supportsPutPartial.max)
val maxHints = managers.map(m => if (m.supportsHint) maxDownSize else 0)
// We assume that the request is valid => size 0 is impossible
val lgMinSize = UInt(log2Ceil(minSize))
val maxLgArithmetics = maxArithmetics.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
val maxLgLogicals = maxLogicals .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
val maxLgGets = maxGets .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
val maxLgPutFulls = maxPutFulls .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
val maxLgPutPartials = maxPutPartials.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
val maxLgHints = maxHints .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
// Make the request repeatable
val repeater = Module(new Repeater(in.a.bits))
repeater.io.enq <> in.a
val in_a = repeater.io.deq
// If this is infront of a single manager, these become constants
val find = manager.findFast(edgeIn.address(in_a.bits))
val maxLgArithmetic = Mux1H(find, maxLgArithmetics)
val maxLgLogical = Mux1H(find, maxLgLogicals)
val maxLgGet = Mux1H(find, maxLgGets)
val maxLgPutFull = Mux1H(find, maxLgPutFulls)
val maxLgPutPartial = Mux1H(find, maxLgPutPartials)
val maxLgHint = Mux1H(find, maxLgHints)
val limit = if (alwaysMin) lgMinSize else
MuxLookup(in_a.bits.opcode, lgMinSize, Array(
TLMessages.PutFullData -> maxLgPutFull,
TLMessages.PutPartialData -> maxLgPutPartial,
TLMessages.ArithmeticData -> maxLgArithmetic,
TLMessages.LogicalData -> maxLgLogical,
TLMessages.Get -> maxLgGet,
TLMessages.Hint -> maxLgHint))
val aOrig = in_a.bits.size
val aFrag = Mux(aOrig > limit, limit, aOrig)
val aOrigOH1 = UIntToOH1(aOrig, log2Ceil(maxSize))
val aFragOH1 = UIntToOH1(aFrag, log2Up(maxDownSize))
val aHasData = node.edgesIn(0).hasData(in_a.bits)
val aMask = Mux(aHasData, UInt(0), aFragOH1)
val gennum = RegInit(UInt(0, width = counterBits))
val aFirst = gennum === UInt(0)
val old_gennum1 = Mux(aFirst, aOrigOH1 >> log2Ceil(beatBytes), gennum - UInt(1))
val new_gennum = ~(~old_gennum1 | (aMask >> log2Ceil(beatBytes))) // ~(~x|y) is width safe
val aFragnum = ~(~(old_gennum1 >> log2Ceil(minSize/beatBytes)) | (aFragOH1 >> log2Ceil(minSize)))
when (out.a.fire()) { gennum := new_gennum }
repeater.io.repeat := !aHasData && aFragnum =/= UInt(0)
out.a <> in_a
out.a.bits.address := in_a.bits.address | (~aFragnum << log2Ceil(minSize) & aOrigOH1)
out.a.bits.source := Cat(in_a.bits.source, aFragnum)
out.a.bits.size := aFrag
// Optimize away some of the Repeater's registers
assert (!repeater.io.full || !aHasData)
out.a.bits.data := in.a.bits.data
val fullMask = UInt((BigInt(1) << beatBytes) - 1)
assert (!repeater.io.full || in_a.bits.mask === fullMask)
out.a.bits.mask := Mux(repeater.io.full, fullMask, in.a.bits.mask)
// Tie off unused channels
in.b.valid := Bool(false)
in.c.ready := Bool(true)
in.e.ready := Bool(true)
out.b.ready := Bool(true)
out.c.valid := Bool(false)
out.e.valid := Bool(false)
} }
// Swallow up non-data ack fragments
val drop = !dHasData && (dFragnum =/= UInt(0))
out.d.ready := in.d.ready || drop
in.d.valid := out.d.valid && !drop
in.d.bits := out.d.bits // pass most stuff unchanged
in.d.bits.addr_lo := out.d.bits.addr_lo & ~dsizeOH1
in.d.bits.source := out.d.bits.source >> fragmentBits
in.d.bits.size := Mux(dFirst, dFirst_size, dOrig)
// Combine the error flag
val r_error = RegInit(Bool(false))
val d_error = r_error | out.d.bits.error
when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) }
in.d.bits.error := d_error
// What maximum transfer sizes do downstream devices support?
val maxArithmetics = managers.map(_.supportsArithmetic.max)
val maxLogicals = managers.map(_.supportsLogical.max)
val maxGets = managers.map(_.supportsGet.max)
val maxPutFulls = managers.map(_.supportsPutFull.max)
val maxPutPartials = managers.map(_.supportsPutPartial.max)
val maxHints = managers.map(m => if (m.supportsHint) maxDownSize else 0)
// We assume that the request is valid => size 0 is impossible
val lgMinSize = UInt(log2Ceil(minSize))
val maxLgArithmetics = maxArithmetics.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
val maxLgLogicals = maxLogicals .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
val maxLgGets = maxGets .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
val maxLgPutFulls = maxPutFulls .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
val maxLgPutPartials = maxPutPartials.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
val maxLgHints = maxHints .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m)))
// Make the request repeatable
val repeater = Module(new Repeater(in.a.bits))
repeater.io.enq <> in.a
val in_a = repeater.io.deq
// If this is infront of a single manager, these become constants
val find = manager.findFast(edgeIn.address(in_a.bits))
val maxLgArithmetic = Mux1H(find, maxLgArithmetics)
val maxLgLogical = Mux1H(find, maxLgLogicals)
val maxLgGet = Mux1H(find, maxLgGets)
val maxLgPutFull = Mux1H(find, maxLgPutFulls)
val maxLgPutPartial = Mux1H(find, maxLgPutPartials)
val maxLgHint = Mux1H(find, maxLgHints)
val limit = if (alwaysMin) lgMinSize else
MuxLookup(in_a.bits.opcode, lgMinSize, Array(
TLMessages.PutFullData -> maxLgPutFull,
TLMessages.PutPartialData -> maxLgPutPartial,
TLMessages.ArithmeticData -> maxLgArithmetic,
TLMessages.LogicalData -> maxLgLogical,
TLMessages.Get -> maxLgGet,
TLMessages.Hint -> maxLgHint))
val aOrig = in_a.bits.size
val aFrag = Mux(aOrig > limit, limit, aOrig)
val aOrigOH1 = UIntToOH1(aOrig, log2Ceil(maxSize))
val aFragOH1 = UIntToOH1(aFrag, log2Up(maxDownSize))
val aHasData = node.edgesIn(0).hasData(in_a.bits)
val aMask = Mux(aHasData, UInt(0), aFragOH1)
val gennum = RegInit(UInt(0, width = counterBits))
val aFirst = gennum === UInt(0)
val old_gennum1 = Mux(aFirst, aOrigOH1 >> log2Ceil(beatBytes), gennum - UInt(1))
val new_gennum = ~(~old_gennum1 | (aMask >> log2Ceil(beatBytes))) // ~(~x|y) is width safe
val aFragnum = ~(~(old_gennum1 >> log2Ceil(minSize/beatBytes)) | (aFragOH1 >> log2Ceil(minSize)))
when (out.a.fire()) { gennum := new_gennum }
repeater.io.repeat := !aHasData && aFragnum =/= UInt(0)
out.a <> in_a
out.a.bits.address := in_a.bits.address | (~aFragnum << log2Ceil(minSize) & aOrigOH1)
out.a.bits.source := Cat(in_a.bits.source, aFragnum)
out.a.bits.size := aFrag
// Optimize away some of the Repeater's registers
assert (!repeater.io.full || !aHasData)
out.a.bits.data := in.a.bits.data
val fullMask = UInt((BigInt(1) << beatBytes) - 1)
assert (!repeater.io.full || in_a.bits.mask === fullMask)
out.a.bits.mask := Mux(repeater.io.full, fullMask, in.a.bits.mask)
// Tie off unused channels
in.b.valid := Bool(false)
in.c.ready := Bool(true)
in.e.ready := Bool(true)
out.b.ready := Bool(true)
out.c.valid := Bool(false)
out.e.valid := Bool(false)
} }
} }

View File

@ -12,8 +12,8 @@ import diplomacy._
class TLHintHandler(supportManagers: Boolean = true, supportClients: Boolean = false, passthrough: Boolean = true)(implicit p: Parameters) extends LazyModule class TLHintHandler(supportManagers: Boolean = true, supportClients: Boolean = false, passthrough: Boolean = true)(implicit p: Parameters) extends LazyModule
{ {
val node = TLAdapterNode( val node = TLAdapterNode(
clientFn = { case Seq(c) => if (!supportClients) c else c.copy(minLatency = min(1, c.minLatency), clients = c.clients .map(_.copy(supportsHint = TransferSizes(1, c.maxTransfer)))) }, clientFn = { c => if (!supportClients) c else c.copy(minLatency = min(1, c.minLatency), clients = c.clients .map(_.copy(supportsHint = TransferSizes(1, c.maxTransfer)))) },
managerFn = { case Seq(m) => if (!supportManagers) m else m.copy(minLatency = min(1, m.minLatency), managers = m.managers.map(_.copy(supportsHint = TransferSizes(1, m.maxTransfer)))) }) managerFn = { m => if (!supportManagers) m else m.copy(minLatency = min(1, m.minLatency), managers = m.managers.map(_.copy(supportsHint = TransferSizes(1, m.maxTransfer)))) })
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {
@ -21,79 +21,76 @@ class TLHintHandler(supportManagers: Boolean = true, supportClients: Boolean = f
val out = node.bundleOut val out = node.bundleOut
} }
val in = io.in(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val out = io.out(0) // Don't add support for clients if there is no BCE channel
val edgeIn = node.edgesIn(0) val bce = edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe
val edgeOut = node.edgesOut(0) require (!supportClients || bce)
// Don't add support for clients if there is no BCE channel // Does it even make sense to add the HintHandler?
val bce = edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe val smartClients = edgeIn.client.clients.map(_.supportsHint.max == edgeIn.client.maxTransfer).reduce(_&&_)
require (!supportClients || bce) val smartManagers = edgeOut.manager.managers.map(_.supportsHint.max == edgeOut.manager.maxTransfer).reduce(_&&_)
// Does it even make sense to add the HintHandler? if (supportManagers && !(passthrough && smartManagers)) {
val smartClients = edgeIn.client.clients.map(_.supportsHint.max == edgeIn.client.maxTransfer).reduce(_&&_) val address = edgeIn.address(in.a.bits)
val smartManagers = edgeOut.manager.managers.map(_.supportsHint.max == edgeOut.manager.maxTransfer).reduce(_&&_) val handleA = if (passthrough) !edgeOut.manager.supportsHintFast(address, edgeIn.size(in.a.bits)) else Bool(true)
val hintBitsAtA = handleA && in.a.bits.opcode === TLMessages.Hint
val hint = Wire(out.d)
if (supportManagers && !(passthrough && smartManagers)) { hint.valid := in.a.valid && hintBitsAtA
val address = edgeIn.address(in.a.bits) out.a.valid := in.a.valid && !hintBitsAtA
val handleA = if (passthrough) !edgeOut.manager.supportsHintFast(address, edgeIn.size(in.a.bits)) else Bool(true) in.a.ready := Mux(hintBitsAtA, hint.ready, out.a.ready)
val hintBitsAtA = handleA && in.a.bits.opcode === TLMessages.Hint
val hint = Wire(out.d)
hint.valid := in.a.valid && hintBitsAtA hint.bits := edgeIn.HintAck(in.a.bits, UInt(0))
out.a.valid := in.a.valid && !hintBitsAtA out.a.bits := in.a.bits
in.a.ready := Mux(hintBitsAtA, hint.ready, out.a.ready)
hint.bits := edgeIn.HintAck(in.a.bits, UInt(0)) TLArbiter(TLArbiter.lowestIndexFirst)(in.d, (edgeOut.numBeats1(out.d.bits), out.d), (UInt(0), Queue(hint, 1)))
out.a.bits := in.a.bits } else {
out.a.valid := in.a.valid
in.a.ready := out.a.ready
out.a.bits := in.a.bits
TLArbiter(TLArbiter.lowestIndexFirst)(in.d, (edgeOut.numBeats1(out.d.bits), out.d), (UInt(0), Queue(hint, 1))) in.d.valid := out.d.valid
} else { out.d.ready := in.d.ready
out.a.valid := in.a.valid in.d.bits := out.d.bits
in.a.ready := out.a.ready }
out.a.bits := in.a.bits
in.d.valid := out.d.valid if (supportClients && !(passthrough && smartClients)) {
out.d.ready := in.d.ready val handleB = if (passthrough) !edgeIn.client.supportsHint(out.b.bits.source, edgeOut.size(out.b.bits)) else Bool(true)
in.d.bits := out.d.bits val hintBitsAtB = handleB && out.b.bits.opcode === TLMessages.Hint
} val hint = Wire(in.c)
if (supportClients && !(passthrough && smartClients)) { hint.valid := out.b.valid && hintBitsAtB
val handleB = if (passthrough) !edgeIn.client.supportsHint(out.b.bits.source, edgeOut.size(out.b.bits)) else Bool(true) in.b.valid := out.b.valid && !hintBitsAtB
val hintBitsAtB = handleB && out.b.bits.opcode === TLMessages.Hint out.b.ready := Mux(hintBitsAtB, hint.ready, in.b.ready)
val hint = Wire(in.c)
hint.valid := out.b.valid && hintBitsAtB hint.bits := edgeOut.HintAck(out.b.bits)
in.b.valid := out.b.valid && !hintBitsAtB in.b.bits := out.b.bits
out.b.ready := Mux(hintBitsAtB, hint.ready, in.b.ready)
hint.bits := edgeOut.HintAck(out.b.bits) TLArbiter(TLArbiter.lowestIndexFirst)(out.c, (edgeIn.numBeats1(in.c.bits), in.c), (UInt(0), Queue(hint, 1)))
in.b.bits := out.b.bits } else if (bce) {
in.b.valid := out.b.valid
out.b.ready := in.b.ready
in.b.bits := out.b.bits
TLArbiter(TLArbiter.lowestIndexFirst)(out.c, (edgeIn.numBeats1(in.c.bits), in.c), (UInt(0), Queue(hint, 1))) out.c.valid := in.c.valid
} else if (bce) { in.c.ready := out.c.ready
in.b.valid := out.b.valid out.c.bits := in.c.bits
out.b.ready := in.b.ready } else {
in.b.bits := out.b.bits in.b.valid := Bool(false)
in.c.ready := Bool(true)
out.b.ready := Bool(true)
out.c.valid := Bool(false)
}
out.c.valid := in.c.valid if (bce) {
in.c.ready := out.c.ready // Pass E through unchanged
out.c.bits := in.c.bits out.e.valid := in.e.valid
} else { in.e.ready := out.e.ready
in.b.valid := Bool(false) out.e.bits := in.e.bits
in.c.ready := Bool(true) } else {
out.b.ready := Bool(true) in.e.ready := Bool(true)
out.c.valid := Bool(false) out.e.valid := Bool(false)
} }
if (bce) {
// Pass E through unchanged
out.e.valid := in.e.valid
in.e.ready := out.e.ready
out.e.bits := in.e.bits
} else {
in.e.ready := Bool(true)
out.e.valid := Bool(false)
} }
} }
} }

View File

@ -81,16 +81,16 @@ object IntImp extends NodeImp[IntSourcePortParameters, IntSinkPortParameters, In
case class IntIdentityNode() extends IdentityNode(IntImp) case class IntIdentityNode() extends IdentityNode(IntImp)
case class IntSourceNode(num: Int) extends SourceNode(IntImp)( case class IntSourceNode(num: Int) extends SourceNode(IntImp)(
IntSourcePortParameters(Seq(IntSourceParameters(num))), (if (num == 0) 0 else 1) to 1) if (num == 0) Seq() else Seq(IntSourcePortParameters(Seq(IntSourceParameters(num)))))
case class IntSinkNode() extends SinkNode(IntImp)( case class IntSinkNode() extends SinkNode(IntImp)(
IntSinkPortParameters(Seq(IntSinkParameters()))) Seq(IntSinkPortParameters(Seq(IntSinkParameters()))))
case class IntAdapterNode( case class IntNexusNode(
sourceFn: Seq[IntSourcePortParameters] => IntSourcePortParameters, sourceFn: Seq[IntSourcePortParameters] => IntSourcePortParameters,
sinkFn: Seq[IntSinkPortParameters] => IntSinkPortParameters, sinkFn: Seq[IntSinkPortParameters] => IntSinkPortParameters,
numSourcePorts: Range.Inclusive = 1 to 1, numSourcePorts: Range.Inclusive = 0 to 128,
numSinkPorts: Range.Inclusive = 1 to 1) numSinkPorts: Range.Inclusive = 0 to 128)
extends InteriorNode(IntImp)(sourceFn, sinkFn, numSourcePorts, numSinkPorts) extends NexusNode(IntImp)(sourceFn, sinkFn, numSourcePorts, numSinkPorts)
case class IntOutputNode() extends OutputNode(IntImp) case class IntOutputNode() extends OutputNode(IntImp)
case class IntInputNode() extends InputNode(IntImp) case class IntInputNode() extends InputNode(IntImp)
@ -103,9 +103,7 @@ case class IntInternalInputNode(num: Int) extends InternalInputNode(IntImp)(Seq(
class IntXbar()(implicit p: Parameters) extends LazyModule class IntXbar()(implicit p: Parameters) extends LazyModule
{ {
val intnode = IntAdapterNode( val intnode = IntNexusNode(
numSourcePorts = 0 to 128,
numSinkPorts = 0 to 128,
sinkFn = { _ => IntSinkPortParameters(Seq(IntSinkParameters())) }, sinkFn = { _ => IntSinkPortParameters(Seq(IntSinkParameters())) },
sourceFn = { seq => sourceFn = { seq =>
IntSourcePortParameters((seq zip seq.map(_.num).scanLeft(0)(_+_).init).map { IntSourcePortParameters((seq zip seq.map(_.num).scanLeft(0)(_+_).init).map {

View File

@ -86,29 +86,33 @@ object TLImp extends NodeImp[TLClientPortParameters, TLManagerPortParameters, TL
// Nodes implemented inside modules // Nodes implemented inside modules
case class TLIdentityNode() extends IdentityNode(TLImp) case class TLIdentityNode() extends IdentityNode(TLImp)
case class TLClientNode(portParams: TLClientPortParameters, numPorts: Range.Inclusive = 1 to 1) case class TLClientNode(portParams: Seq[TLClientPortParameters]) extends SourceNode(TLImp)(portParams)
extends SourceNode(TLImp)(portParams, numPorts) case class TLManagerNode(portParams: Seq[TLManagerPortParameters]) extends SinkNode(TLImp)(portParams)
case class TLManagerNode(portParams: TLManagerPortParameters, numPorts: Range.Inclusive = 1 to 1)
extends SinkNode(TLImp)(portParams, numPorts)
object TLClientNode object TLClientNode
{ {
def apply(params: TLClientParameters) = def apply(params: TLClientParameters) =
new TLClientNode(TLClientPortParameters(Seq(params)), 1 to 1) new TLClientNode(Seq(TLClientPortParameters(Seq(params))))
} }
object TLManagerNode object TLManagerNode
{ {
def apply(beatBytes: Int, params: TLManagerParameters) = def apply(beatBytes: Int, params: TLManagerParameters) =
new TLManagerNode(TLManagerPortParameters(Seq(params), beatBytes, minLatency = 0), 1 to 1) new TLManagerNode(Seq(TLManagerPortParameters(Seq(params), beatBytes, minLatency = 0)))
} }
case class TLAdapterNode( case class TLAdapterNode(
clientFn: TLClientPortParameters => TLClientPortParameters,
managerFn: TLManagerPortParameters => TLManagerPortParameters,
num: Range.Inclusive = 0 to 999)
extends AdapterNode(TLImp)(clientFn, managerFn, num)
case class TLNexusNode(
clientFn: Seq[TLClientPortParameters] => TLClientPortParameters, clientFn: Seq[TLClientPortParameters] => TLClientPortParameters,
managerFn: Seq[TLManagerPortParameters] => TLManagerPortParameters, managerFn: Seq[TLManagerPortParameters] => TLManagerPortParameters,
numClientPorts: Range.Inclusive = 1 to 1, numClientPorts: Range.Inclusive = 1 to 999,
numManagerPorts: Range.Inclusive = 1 to 1) numManagerPorts: Range.Inclusive = 1 to 999)
extends InteriorNode(TLImp)(clientFn, managerFn, numClientPorts, numManagerPorts) extends NexusNode(TLImp)(clientFn, managerFn, numClientPorts, numManagerPorts)
// Nodes passed from an inner module // Nodes passed from an inner module
case class TLOutputNode() extends OutputNode(TLImp) case class TLOutputNode() extends OutputNode(TLImp)
@ -169,17 +173,15 @@ case class TLAsyncIdentityNode() extends IdentityNode(TLAsyncImp)
case class TLAsyncOutputNode() extends OutputNode(TLAsyncImp) case class TLAsyncOutputNode() extends OutputNode(TLAsyncImp)
case class TLAsyncInputNode() extends InputNode(TLAsyncImp) case class TLAsyncInputNode() extends InputNode(TLAsyncImp)
case class TLAsyncSourceNode(sync: Int) extends MixedNode(TLImp, TLAsyncImp)( case class TLAsyncSourceNode(sync: Int)
dFn = { case (1, Seq(p)) => Seq(TLAsyncClientPortParameters(p)) }, extends MixedAdapterNode(TLImp, TLAsyncImp)(
uFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) }, // discard cycles in other clock domain dFn = { p => TLAsyncClientPortParameters(p) },
numPO = 1 to 1, uFn = { p => p.base.copy(minLatency = sync+1) }) // discard cycles in other clock domain
numPI = 1 to 1)
case class TLAsyncSinkNode(depth: Int, sync: Int) extends MixedNode(TLAsyncImp, TLImp)( case class TLAsyncSinkNode(depth: Int, sync: Int)
dFn = { case (1, Seq(p)) => Seq(p.base.copy(minLatency = sync+1)) }, extends MixedAdapterNode(TLAsyncImp, TLImp)(
uFn = { case (1, Seq(p)) => Seq(TLAsyncManagerPortParameters(depth, p)) }, dFn = { p => p.base.copy(minLatency = sync+1) },
numPO = 1 to 1, uFn = { p => TLAsyncManagerPortParameters(depth, p) })
numPI = 1 to 1)
object TLRationalImp extends NodeImp[TLClientPortParameters, TLManagerPortParameters, TLEdgeParameters, TLEdgeParameters, TLRationalBundle] object TLRationalImp extends NodeImp[TLClientPortParameters, TLManagerPortParameters, TLEdgeParameters, TLEdgeParameters, TLRationalBundle]
{ {
@ -205,14 +207,12 @@ case class TLRationalIdentityNode() extends IdentityNode(TLRationalImp)
case class TLRationalOutputNode() extends OutputNode(TLRationalImp) case class TLRationalOutputNode() extends OutputNode(TLRationalImp)
case class TLRationalInputNode() extends InputNode(TLRationalImp) case class TLRationalInputNode() extends InputNode(TLRationalImp)
case class TLRationalSourceNode() extends MixedNode(TLImp, TLRationalImp)( case class TLRationalSourceNode()
dFn = { case (_, s) => s }, extends MixedAdapterNode(TLImp, TLRationalImp)(
uFn = { case (_, s) => s.map(p => p.copy(minLatency = 1)) }, // discard cycles from other clock domain dFn = { p => p },
numPO = 0 to 999, uFn = { p => p.copy(minLatency = 1) }) // discard cycles from other clock domain
numPI = 0 to 999)
case class TLRationalSinkNode() extends MixedNode(TLRationalImp, TLImp)( case class TLRationalSinkNode()
dFn = { case (_, s) => s.map(p => p.copy(minLatency = 1)) }, extends MixedAdapterNode(TLRationalImp, TLImp)(
uFn = { case (_, s) => s }, dFn = { p => p.copy(minLatency = 1) },
numPO = 0 to 999, uFn = { p => p })
numPI = 0 to 999)

View File

@ -5,6 +5,7 @@ package uncore.tilelink2
import Chisel._ import Chisel._
import config._ import config._
import diplomacy._ import diplomacy._
import util.GenericParameterizedBundle
// We detect concurrent puts that put memory into an undefined state. // We detect concurrent puts that put memory into an undefined state.
// put0, put0Ack, put1, put1Ack => ok: defined // put0, put0Ack, put1, put1Ack => ok: defined
@ -31,268 +32,271 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule
val out = node.bundleOut val out = node.bundleOut
} }
// !!! support multiple clients via clock division ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
require (io.out.size == 1) val edge = edgeIn
val endAddress = edge.manager.maxAddress + 1
val endSourceId = edge.client.endSourceId
val maxTransfer = edge.manager.maxTransfer
val beatBytes = edge.manager.beatBytes
val endAddressHi = (endAddress / beatBytes).intValue
val maxLgBeats = log2Up(maxTransfer/beatBytes)
val shift = log2Ceil(beatBytes)
val decTrees = log2Up(maxTransfer/beatBytes)
val addressBits = log2Up(endAddress)
val countBits = log2Up(endSourceId)
val sizeBits = edge.bundle.sizeBits
val in = io.in(0) // Reset control logic
val out = io.out(0) val wipeIndex = RegInit(UInt(0, width = log2Ceil(endAddressHi) + 1))
val wipe = !wipeIndex(log2Ceil(endAddressHi))
wipeIndex := wipeIndex + wipe.asUInt
val edge = node.edgesIn(0) // Block traffic while wiping Mems
val endAddress = edge.manager.maxAddress + 1 in.a.ready := out.a.ready && !wipe
val endSourceId = edge.client.endSourceId out.a.valid := in.a.valid && !wipe
val maxTransfer = edge.manager.maxTransfer out.a.bits := in.a.bits
val beatBytes = edge.manager.beatBytes out.d.ready := in.d.ready && !wipe
val endAddressHi = (endAddress / beatBytes).intValue in.d.valid := out.d.valid && !wipe
val maxLgBeats = log2Up(maxTransfer/beatBytes) in.d.bits := out.d.bits
val shift = log2Ceil(beatBytes)
val decTrees = log2Up(maxTransfer/beatBytes)
val addressBits = log2Up(endAddress)
val countBits = log2Up(endSourceId)
val sizeBits = edge.bundle.sizeBits
// Reset control logic // BCE unsupported
val wipeIndex = RegInit(UInt(0, width = log2Ceil(endAddressHi) + 1)) in.b.valid := Bool(false)
val wipe = !wipeIndex(log2Ceil(endAddressHi)) out.c.valid := Bool(false)
wipeIndex := wipeIndex + wipe.asUInt out.e.valid := Bool(false)
out.b.ready := Bool(true)
in.c.ready := Bool(true)
in.e.ready := Bool(true)
// Block traffic while wiping Mems val params = TLRAMModel.MonitorParameters(addressBits, sizeBits)
in.a.ready := out.a.ready && !wipe
out.a.valid := in.a.valid && !wipe
out.a.bits := in.a.bits
out.d.ready := in.d.ready && !wipe
in.d.valid := out.d.valid && !wipe
in.d.bits := out.d.bits
// BCE unsupported // Infer as simple dual port BRAM/M10k with write-first/new-data semantics (bypass needed)
in.b.valid := Bool(false) val shadow = Seq.fill(beatBytes) { Mem(endAddressHi, new TLRAMModel.ByteMonitor(params)) }
out.c.valid := Bool(false) val inc_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
out.e.valid := Bool(false) val dec_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
out.b.ready := Bool(true) val inc_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
in.c.ready := Bool(true) val dec_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
in.e.ready := Bool(true)
class ByteMonitor extends Bundle { val shadow_wen = Wire(init = Fill(beatBytes, wipe))
val valid = Bool() val inc_bytes_wen = Wire(init = Fill(beatBytes, wipe))
val value = UInt(width = 8) val dec_bytes_wen = Wire(init = Fill(beatBytes, wipe))
} val inc_trees_wen = Wire(init = Fill(decTrees, wipe))
class FlightMonitor extends Bundle { val dec_trees_wen = Wire(init = Fill(decTrees, wipe))
val base = UInt(width = addressBits)
val size = UInt(width = sizeBits)
val opcode = UInt(width = 3)
}
// Infer as simple dual port BRAM/M10k with write-first/new-data semantics (bypass needed) // This must be registers b/c we build a CAM from it
val shadow = Seq.fill(beatBytes) { Mem(endAddressHi, new ByteMonitor) } val flight = Reg(Vec(endSourceId, new TLRAMModel.FlightMonitor(params)))
val inc_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) } val valid = Reg(Vec(endSourceId, Bool()))
val dec_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) }
val inc_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
val dec_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) }
val shadow_wen = Wire(init = Fill(beatBytes, wipe)) // We want to cross flight data from A to D in the same cycle (for combinational TL2 devices)
val inc_bytes_wen = Wire(init = Fill(beatBytes, wipe)) val a_flight = Wire(new TLRAMModel.FlightMonitor(params))
val dec_bytes_wen = Wire(init = Fill(beatBytes, wipe)) a_flight.base := edge.address(in.a.bits)
val inc_trees_wen = Wire(init = Fill(decTrees, wipe)) a_flight.size := edge.size(in.a.bits)
val dec_trees_wen = Wire(init = Fill(decTrees, wipe)) a_flight.opcode := in.a.bits.opcode
// This must be registers b/c we build a CAM from it when (in.a.fire()) { flight(in.a.bits.source) := a_flight }
val flight = Reg(Vec(endSourceId, new FlightMonitor)) val bypass = if (edge.manager.minLatency > 0) Bool(false) else in.a.valid && in.a.bits.source === out.d.bits.source
val valid = Reg(Vec(endSourceId, Bool())) val d_flight = RegNext(Mux(bypass, a_flight, flight(out.d.bits.source)))
// We want to cross flight data from A to D in the same cycle (for combinational TL2 devices) // Process A access requests
val a_flight = Wire(new FlightMonitor) val a = Reg(next = in.a.bits)
a_flight.base := edge.address(in.a.bits) val a_fire = Reg(next = in.a.fire(), init = Bool(false))
a_flight.size := edge.size(in.a.bits) val (a_first, a_last, _, a_address_inc) = edge.addr_inc(a, a_fire)
a_flight.opcode := in.a.bits.opcode val a_size = edge.size(a)
val a_sizeOH = UIntToOH(a_size)
val a_address = a.address | a_address_inc
val a_addr_hi = edge.addr_hi(a_address)
val a_base = edge.address(a)
val a_mask = edge.mask(a_base, a_size)
val a_fifo = edge.manager.hasFifoIdFast(a_base)
when (in.a.fire()) { flight(in.a.bits.source) := a_flight } // Grab the concurrency state we need
val bypass = if (edge.manager.minLatency > 0) Bool(false) else in.a.valid && in.a.bits.source === out.d.bits.source val a_inc_bytes = inc_bytes.map(_.read(a_addr_hi))
val d_flight = RegNext(Mux(bypass, a_flight, flight(out.d.bits.source))) val a_dec_bytes = dec_bytes.map(_.read(a_addr_hi))
val a_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
val a_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
val a_inc_tree = a_inc_trees.fold(UInt(0))(_ + _)
val a_dec_tree = a_dec_trees.fold(UInt(0))(_ + _)
val a_inc = a_inc_bytes.map(_ + a_inc_tree)
val a_dec = a_dec_bytes.map(_ + a_dec_tree)
// Process A access requests when (a_fire) {
val a = Reg(next = in.a.bits) // Record the request so we can handle it's response
val a_fire = Reg(next = in.a.fire(), init = Bool(false)) assert (a.opcode =/= TLMessages.Acquire)
val (a_first, a_last, _, a_address_inc) = edge.addr_inc(a, a_fire)
val a_size = edge.size(a)
val a_sizeOH = UIntToOH(a_size)
val a_address = a.address | a_address_inc
val a_addr_hi = edge.addr_hi(a_address)
val a_base = edge.address(a)
val a_mask = edge.mask(a_base, a_size)
val a_fifo = edge.manager.hasFifoIdFast(a_base)
// Grab the concurrency state we need // Mark the operation as valid
val a_inc_bytes = inc_bytes.map(_.read(a_addr_hi)) valid(a.source) := Bool(true)
val a_dec_bytes = dec_bytes.map(_.read(a_addr_hi))
val a_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
val a_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(a_addr_hi >> (i+1)) }
val a_inc_tree = a_inc_trees.fold(UInt(0))(_ + _)
val a_dec_tree = a_dec_trees.fold(UInt(0))(_ + _)
val a_inc = a_inc_bytes.map(_ + a_inc_tree)
val a_dec = a_dec_bytes.map(_ + a_dec_tree)
when (a_fire) { // Increase the per-byte flight counter for the whole transaction
// Record the request so we can handle it's response when (a_first && a.opcode =/= TLMessages.Hint && a.opcode =/= TLMessages.Get) {
assert (a.opcode =/= TLMessages.Acquire) when (a_size <= UInt(shift)) {
inc_bytes_wen := a_mask
// Mark the operation as valid }
valid(a.source) := Bool(true) inc_trees_wen := a_sizeOH >> (shift+1)
// Increase the per-byte flight counter for the whole transaction
when (a_first && a.opcode =/= TLMessages.Hint && a.opcode =/= TLMessages.Get) {
when (a_size <= UInt(shift)) {
inc_bytes_wen := a_mask
} }
inc_trees_wen := a_sizeOH >> (shift+1)
}
when (a.opcode === TLMessages.PutFullData || a.opcode === TLMessages.PutPartialData || when (a.opcode === TLMessages.PutFullData || a.opcode === TLMessages.PutPartialData ||
a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData) { a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData) {
shadow_wen := a.mask shadow_wen := a.mask
for (i <- 0 until beatBytes) { for (i <- 0 until beatBytes) {
val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt
val byte = a.data(8*(i+1)-1, 8*i) val byte = a.data(8*(i+1)-1, 8*i)
when (a.mask(i)) { when (a.mask(i)) {
printf(log + " ") printf(log + " ")
when (a.opcode === TLMessages.PutFullData) { printf("PF") } when (a.opcode === TLMessages.PutFullData) { printf("PF") }
when (a.opcode === TLMessages.PutPartialData) { printf("PP") } when (a.opcode === TLMessages.PutPartialData) { printf("PP") }
when (a.opcode === TLMessages.ArithmeticData) { printf("A ") } when (a.opcode === TLMessages.ArithmeticData) { printf("A ") }
when (a.opcode === TLMessages.LogicalData) { printf("L ") } when (a.opcode === TLMessages.LogicalData) { printf("L ") }
printf(" 0x%x := 0x%x #%d %x\n", a_addr_hi << shift | UInt(i), byte, busy, a.param) printf(" 0x%x := 0x%x #%d %x\n", a_addr_hi << shift | UInt(i), byte, busy, a.param)
}
} }
} }
}
when (a.opcode === TLMessages.Get) { when (a.opcode === TLMessages.Get) {
printf(log + " G 0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits)) printf(log + " G 0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits))
}
}
val a_waddr = Mux(wipe, wipeIndex, a_addr_hi)
for (i <- 0 until beatBytes) {
val data = Wire(new ByteMonitor)
val busy = a_inc(i) =/= a_dec(i) + (!a_first).asUInt
val amo = a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData
data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && !amo)
// !!! calculate the AMO?
data.value := a.data(8*(i+1)-1, 8*i)
when (shadow_wen(i)) {
shadow(i).write(a_waddr, data)
}
}
for (i <- 0 until beatBytes) {
val data = Mux(wipe, UInt(0), a_inc_bytes(i) + UInt(1))
when (inc_bytes_wen(i)) {
inc_bytes(i).write(a_waddr, data)
}
}
for (i <- 0 until inc_trees.size) {
val data = Mux(wipe, UInt(0), a_inc_trees(i) + UInt(1))
when (inc_trees_wen(i)) {
inc_trees(i).write(a_waddr >> (i+1), data)
}
}
// Process D access responses
val d = RegNext(out.d.bits)
val d_fire = Reg(next = out.d.fire(), init = Bool(false))
val (d_first, d_last, _, d_address_inc) = edge.addr_inc(d, d_fire)
val d_size = edge.size(d)
val d_sizeOH = UIntToOH(d_size)
val d_base = d_flight.base
val d_address = d_base | d_address_inc
val d_addr_hi = edge.addr_hi(d_address)
val d_mask = edge.mask(d_base, d_size)
val d_fifo = edge.manager.hasFifoIdFast(d_flight.base)
// Grab the concurrency state we need
val d_inc_bytes = inc_bytes.map(_.read(d_addr_hi))
val d_dec_bytes = dec_bytes.map(_.read(d_addr_hi))
val d_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
val d_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
val d_inc_tree = d_inc_trees.fold(UInt(0))(_ + _)
val d_dec_tree = d_dec_trees.fold(UInt(0))(_ + _)
val d_inc = d_inc_bytes.map(_ + d_inc_tree)
val d_dec = d_dec_bytes.map(_ + d_dec_tree)
val d_shadow = shadow.map(_.read(d_addr_hi))
val d_valid = valid(d.source)
when (d_fire) {
// Check the response is correct
assert (d_size === d_flight.size)
// addr_lo is allowed to differ
when (d_flight.opcode === TLMessages.Hint) {
assert (d.opcode === TLMessages.HintAck)
}
// Decrease the per-byte flight counter for the whole transaction
when (d_last && d_flight.opcode =/= TLMessages.Hint && d_flight.opcode =/= TLMessages.Get) {
when (d_size <= UInt(shift)) {
dec_bytes_wen := d_mask
}
dec_trees_wen := d_sizeOH >> (shift+1)
// NOTE: D channel carries uninterrupted multibeast op, so updating on last is fine
for (i <- 0 until endSourceId) {
// Does this modification overlap a Get? => wipe it's valid
val f_base = flight(i).base
val f_size = flight(i).size
val f_bits = UIntToOH1(f_size, addressBits)
val d_bits = UIntToOH1(d_size, addressBits)
val overlap = ~(~(f_base ^ d_base) | (f_bits | d_bits)) === UInt(0)
when (overlap) { valid(i) := Bool(false) }
} }
} }
when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) { val a_waddr = Mux(wipe, wipeIndex, a_addr_hi)
assert (d.opcode === TLMessages.AccessAck) for (i <- 0 until beatBytes) {
printf(log + " ") val data = Wire(new TLRAMModel.ByteMonitor(params))
when (d_flight.opcode === TLMessages.PutFullData) { printf("pf") } val busy = a_inc(i) =/= a_dec(i) + (!a_first).asUInt
when (d_flight.opcode === TLMessages.PutPartialData) { printf("pp") } val amo = a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData
printf(" 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits)) data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && !amo)
// !!! calculate the AMO?
data.value := a.data(8*(i+1)-1, 8*i)
when (shadow_wen(i)) {
shadow(i).write(a_waddr, data)
}
} }
when (d_flight.opcode === TLMessages.Get || d_flight.opcode === TLMessages.ArithmeticData || d_flight.opcode === TLMessages.LogicalData) { for (i <- 0 until beatBytes) {
assert (d.opcode === TLMessages.AccessAckData) val data = Mux(wipe, UInt(0), a_inc_bytes(i) + UInt(1))
for (i <- 0 until beatBytes) { when (inc_bytes_wen(i)) {
val got = d.data(8*(i+1)-1, 8*i) inc_bytes(i).write(a_waddr, data)
val shadow = Wire(init = d_shadow(i)) }
when (d_mask(i)) { }
val d_addr = d_addr_hi << shift | UInt(i)
printf(log + " ") for (i <- 0 until inc_trees.size) {
when (d_flight.opcode === TLMessages.Get) { printf("g ") } val data = Mux(wipe, UInt(0), a_inc_trees(i) + UInt(1))
when (d_flight.opcode === TLMessages.ArithmeticData) { printf("a ") } when (inc_trees_wen(i)) {
when (d_flight.opcode === TLMessages.LogicalData) { printf("l ") } inc_trees(i).write(a_waddr >> (i+1), data)
printf(" 0x%x := 0x%x", d_addr, got) }
when (!shadow.valid) { }
printf(", undefined (uninitialized or prior overlapping puts)\n")
} .elsewhen (d_inc(i) =/= d_dec(i)) { // Process D access responses
printf(", undefined (concurrent incomplete puts #%d)\n", d_inc(i) - d_dec(i)) val d = RegNext(out.d.bits)
} .elsewhen (!d_fifo && !d_valid) { val d_fire = Reg(next = out.d.fire(), init = Bool(false))
printf(", undefined (concurrent completed put)\n") val (d_first, d_last, _, d_address_inc) = edge.addr_inc(d, d_fire)
} .otherwise { val d_size = edge.size(d)
printf("\n") val d_sizeOH = UIntToOH(d_size)
assert (shadow.value === got) val d_base = d_flight.base
val d_address = d_base | d_address_inc
val d_addr_hi = edge.addr_hi(d_address)
val d_mask = edge.mask(d_base, d_size)
val d_fifo = edge.manager.hasFifoIdFast(d_flight.base)
// Grab the concurrency state we need
val d_inc_bytes = inc_bytes.map(_.read(d_addr_hi))
val d_dec_bytes = dec_bytes.map(_.read(d_addr_hi))
val d_inc_trees = inc_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
val d_dec_trees = dec_trees.zipWithIndex.map{ case (m, i) => m.read(d_addr_hi >> (i+1)) }
val d_inc_tree = d_inc_trees.fold(UInt(0))(_ + _)
val d_dec_tree = d_dec_trees.fold(UInt(0))(_ + _)
val d_inc = d_inc_bytes.map(_ + d_inc_tree)
val d_dec = d_dec_bytes.map(_ + d_dec_tree)
val d_shadow = shadow.map(_.read(d_addr_hi))
val d_valid = valid(d.source)
when (d_fire) {
// Check the response is correct
assert (d_size === d_flight.size)
// addr_lo is allowed to differ
when (d_flight.opcode === TLMessages.Hint) {
assert (d.opcode === TLMessages.HintAck)
}
// Decrease the per-byte flight counter for the whole transaction
when (d_last && d_flight.opcode =/= TLMessages.Hint && d_flight.opcode =/= TLMessages.Get) {
when (d_size <= UInt(shift)) {
dec_bytes_wen := d_mask
}
dec_trees_wen := d_sizeOH >> (shift+1)
// NOTE: D channel carries uninterrupted multibeast op, so updating on last is fine
for (i <- 0 until endSourceId) {
// Does this modification overlap a Get? => wipe it's valid
val f_base = flight(i).base
val f_size = flight(i).size
val f_bits = UIntToOH1(f_size, addressBits)
val d_bits = UIntToOH1(d_size, addressBits)
val overlap = ~(~(f_base ^ d_base) | (f_bits | d_bits)) === UInt(0)
when (overlap) { valid(i) := Bool(false) }
}
}
when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) {
assert (d.opcode === TLMessages.AccessAck)
printf(log + " ")
when (d_flight.opcode === TLMessages.PutFullData) { printf("pf") }
when (d_flight.opcode === TLMessages.PutPartialData) { printf("pp") }
printf(" 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits))
}
when (d_flight.opcode === TLMessages.Get || d_flight.opcode === TLMessages.ArithmeticData || d_flight.opcode === TLMessages.LogicalData) {
assert (d.opcode === TLMessages.AccessAckData)
for (i <- 0 until beatBytes) {
val got = d.data(8*(i+1)-1, 8*i)
val shadow = Wire(init = d_shadow(i))
when (d_mask(i)) {
val d_addr = d_addr_hi << shift | UInt(i)
printf(log + " ")
when (d_flight.opcode === TLMessages.Get) { printf("g ") }
when (d_flight.opcode === TLMessages.ArithmeticData) { printf("a ") }
when (d_flight.opcode === TLMessages.LogicalData) { printf("l ") }
printf(" 0x%x := 0x%x", d_addr, got)
when (!shadow.valid) {
printf(", undefined (uninitialized or prior overlapping puts)\n")
} .elsewhen (d_inc(i) =/= d_dec(i)) {
printf(", undefined (concurrent incomplete puts #%d)\n", d_inc(i) - d_dec(i))
} .elsewhen (!d_fifo && !d_valid) {
printf(", undefined (concurrent completed put)\n")
} .otherwise {
printf("\n")
assert (shadow.value === got)
}
} }
} }
} }
} }
}
val d_waddr = Mux(wipe, wipeIndex, d_addr_hi) val d_waddr = Mux(wipe, wipeIndex, d_addr_hi)
for (i <- 0 until beatBytes) { for (i <- 0 until beatBytes) {
val data = Mux(wipe, UInt(0), d_dec_bytes(i) + UInt(1)) val data = Mux(wipe, UInt(0), d_dec_bytes(i) + UInt(1))
when (dec_bytes_wen(i)) { when (dec_bytes_wen(i)) {
dec_bytes(i).write(d_waddr, data) dec_bytes(i).write(d_waddr, data)
}
} }
}
for (i <- 0 until dec_trees.size) { for (i <- 0 until dec_trees.size) {
val data = Mux(wipe, UInt(0), d_dec_trees(i) + UInt(1)) val data = Mux(wipe, UInt(0), d_dec_trees(i) + UInt(1))
when (dec_trees_wen(i)) { when (dec_trees_wen(i)) {
dec_trees(i).write(d_waddr >> (i+1), data) dec_trees(i).write(d_waddr >> (i+1), data)
}
} }
} }
} }
} }
object TLRAMModel
{
case class MonitorParameters(addressBits: Int, sizeBits: Int)
class ByteMonitor(params: MonitorParameters) extends GenericParameterizedBundle(params) {
val valid = Bool()
val value = UInt(width = 8)
}
class FlightMonitor(params: MonitorParameters) extends GenericParameterizedBundle(params) {
val base = UInt(width = params.addressBits)
val size = UInt(width = params.sizeBits)
val opcode = UInt(width = 3)
}
}

View File

@ -9,7 +9,7 @@ import regmapper._
import scala.math.{min,max} import scala.math.{min,max}
class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false) class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true, executable: Boolean = false)
extends TLManagerNode(TLManagerPortParameters( extends TLManagerNode(Seq(TLManagerPortParameters(
Seq(TLManagerParameters( Seq(TLManagerParameters(
address = Seq(address), address = Seq(address),
executable = executable, executable = executable,
@ -18,7 +18,7 @@ class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int =
supportsPutFull = TransferSizes(1, beatBytes), supportsPutFull = TransferSizes(1, beatBytes),
fifoId = Some(0))), // requests are handled in order fifoId = Some(0))), // requests are handled in order
beatBytes = beatBytes, beatBytes = beatBytes,
minLatency = min(concurrency, 1))) // the Queue adds at most one cycle minLatency = min(concurrency, 1)))) // the Queue adds at most one cycle
{ {
require (address.contiguous) require (address.contiguous)

View File

@ -8,7 +8,7 @@ import diplomacy._
class TLRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule class TLRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
{ {
val node = TLManagerNode(TLManagerPortParameters( val node = TLManagerNode(Seq(TLManagerPortParameters(
Seq(TLManagerParameters( Seq(TLManagerParameters(
address = List(address), address = List(address),
regionType = RegionType.UNCACHED, regionType = RegionType.UNCACHED,
@ -18,7 +18,7 @@ class TLRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4)
supportsPutFull = TransferSizes(1, beatBytes), supportsPutFull = TransferSizes(1, beatBytes),
fifoId = Some(0))), // requests are handled in order fifoId = Some(0))), // requests are handled in order
beatBytes = beatBytes, beatBytes = beatBytes,
minLatency = 1)) // no bypass needed for this device minLatency = 1))) // no bypass needed for this device
// We require the address range to include an entire beat (for the write mask) // We require the address range to include an entire beat (for the write mask)
require ((address.mask & (beatBytes-1)) == beatBytes-1) require ((address.mask & (beatBytes-1)) == beatBytes-1)

View File

@ -15,8 +15,8 @@ class TLSourceShrinker(maxInFlight: Int)(implicit p: Parameters) extends LazyMod
private val client = TLClientParameters(sourceId = IdRange(0, maxInFlight)) private val client = TLClientParameters(sourceId = IdRange(0, maxInFlight))
val node = TLAdapterNode( val node = TLAdapterNode(
// We erase all client information since we crush the source Ids // We erase all client information since we crush the source Ids
clientFn = { case _ => TLClientPortParameters(clients = Seq(client)) }, clientFn = { _ => TLClientPortParameters(clients = Seq(client)) },
managerFn = { case Seq(mp) => mp }) managerFn = { mp => mp })
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {
@ -24,54 +24,51 @@ class TLSourceShrinker(maxInFlight: Int)(implicit p: Parameters) extends LazyMod
val out = node.bundleOut val out = node.bundleOut
} }
val edgeIn = node.edgesIn(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val edgeOut = node.edgesOut(0) // Acquires cannot pass this adapter; it makes Probes impossible
val in = io.in(0) require (!edgeIn.client.anySupportProbe ||
val out = io.out(0) !edgeOut.manager.anySupportAcquireB)
// Acquires cannot pass this adapter; it makes Probes impossible out.b.ready := Bool(true)
require (!edgeIn.client.anySupportProbe || out.c.valid := Bool(false)
!edgeOut.manager.anySupportAcquireB) out.e.valid := Bool(false)
in.b.valid := Bool(false)
in.c.ready := Bool(true)
in.e.ready := Bool(true)
out.b.ready := Bool(true) if (maxInFlight >= edgeIn.client.endSourceId) {
out.c.valid := Bool(false) out.a <> in.a
out.e.valid := Bool(false) in.d <> out.d
in.b.valid := Bool(false) } else {
in.c.ready := Bool(true) // State tracking
in.e.ready := Bool(true) val sourceIdMap = Mem(maxInFlight, in.a.bits.source)
val allocated = RegInit(UInt(0, width = maxInFlight))
val nextFreeOH = ~(leftOR(~allocated) << 1) & ~allocated
val nextFree = OHToUInt(nextFreeOH)
val full = allocated.andR()
if (maxInFlight >= edgeIn.client.endSourceId) { val a_first = edgeIn.first(in.a)
out.a <> in.a val d_last = edgeIn.last(in.d)
in.d <> out.d
} else {
// State tracking
val sourceIdMap = Mem(maxInFlight, in.a.bits.source)
val allocated = RegInit(UInt(0, width = maxInFlight))
val nextFreeOH = ~(leftOR(~allocated) << 1) & ~allocated
val nextFree = OHToUInt(nextFreeOH)
val full = allocated.andR()
val a_first = edgeIn.first(in.a) val block = a_first && full
val d_last = edgeIn.last(in.d) in.a.ready := out.a.ready && !block
out.a.valid := in.a.valid && !block
out.a.bits := in.a.bits
out.a.bits.source := holdUnless(nextFree, a_first)
val block = a_first && full in.d <> out.d
in.a.ready := out.a.ready && !block in.d.bits.source := sourceIdMap(out.d.bits.source)
out.a.valid := in.a.valid && !block
out.a.bits := in.a.bits
out.a.bits.source := holdUnless(nextFree, a_first)
in.d <> out.d when (a_first && in.a.fire()) {
in.d.bits.source := sourceIdMap(out.d.bits.source) sourceIdMap(nextFree) := in.a.bits.source
}
when (a_first && in.a.fire()) { val alloc = a_first && in.a.fire()
sourceIdMap(nextFree) := in.a.bits.source val free = d_last && in.d.fire()
val alloc_id = Mux(alloc, nextFreeOH, UInt(0))
val free_id = Mux(free, UIntToOH(out.d.bits.source), UInt(0))
allocated := (allocated | alloc_id) & ~free_id
} }
val alloc = a_first && in.a.fire()
val free = d_last && in.d.fire()
val alloc_id = Mux(alloc, nextFreeOH, UInt(0))
val free_id = Mux(free, UIntToOH(out.d.bits.source), UInt(0))
allocated := (allocated | alloc_id) & ~free_id
} }
} }
} }

View File

@ -10,12 +10,12 @@ import uncore.ahb._
import scala.math.{min, max} import scala.math.{min, max}
import AHBParameters._ import AHBParameters._
case class TLToAHBNode() extends MixedNode(TLImp, AHBImp)( case class TLToAHBNode() extends MixedAdapterNode(TLImp, AHBImp)(
dFn = { case (1, Seq(TLClientPortParameters(clients, unsafeAtomics, minLatency))) => dFn = { case TLClientPortParameters(clients, unsafeAtomics, minLatency) =>
val masters = clients.map { case c => AHBMasterParameters(nodePath = c.nodePath) } val masters = clients.map { case c => AHBMasterParameters(nodePath = c.nodePath) }
Seq(AHBMasterPortParameters(masters)) AHBMasterPortParameters(masters)
}, },
uFn = { case (1, Seq(AHBSlavePortParameters(slaves, beatBytes))) => uFn = { case AHBSlavePortParameters(slaves, beatBytes) =>
val managers = slaves.map { case s => val managers = slaves.map { case s =>
TLManagerParameters( TLManagerParameters(
address = s.address, address = s.address,
@ -26,10 +26,8 @@ case class TLToAHBNode() extends MixedNode(TLImp, AHBImp)(
supportsPutFull = s.supportsWrite, // but not PutPartial supportsPutFull = s.supportsWrite, // but not PutPartial
fifoId = Some(0)) // a common FIFO domain fifoId = Some(0)) // a common FIFO domain
} }
Seq(TLManagerPortParameters(managers, beatBytes, 1, 1)) TLManagerPortParameters(managers, beatBytes, 1, 1)
}, })
numPO = 1 to 1,
numPI = 1 to 1)
class TLToAHB(combinational: Boolean = true)(implicit p: Parameters) extends LazyModule class TLToAHB(combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
{ {
@ -41,91 +39,89 @@ class TLToAHB(combinational: Boolean = true)(implicit p: Parameters) extends Laz
val out = node.bundleOut val out = node.bundleOut
} }
val in = io.in(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val out = io.out(0) val beatBytes = edgeOut.slave.beatBytes
val edgeIn = node.edgesIn(0) val maxTransfer = edgeOut.slave.maxTransfer
val edgeOut = node.edgesOut(0) val lgMax = log2Ceil(maxTransfer)
val beatBytes = edgeOut.slave.beatBytes val lgBytes = log2Ceil(beatBytes)
val maxTransfer = edgeOut.slave.maxTransfer
val lgMax = log2Ceil(maxTransfer)
val lgBytes = log2Ceil(beatBytes)
// AHB has no cache coherence // AHB has no cache coherence
in.b.valid := Bool(false) in.b.valid := Bool(false)
in.c.ready := Bool(true) in.c.ready := Bool(true)
in.e.ready := Bool(true) in.e.ready := Bool(true)
// We need a skidpad to capture D output: // We need a skidpad to capture D output:
// We cannot know if the D response will be accepted until we have // We cannot know if the D response will be accepted until we have
// presented it on D as valid. We also can't back-pressure AHB in the // presented it on D as valid. We also can't back-pressure AHB in the
// data phase. Therefore, we must have enough space to save the data // data phase. Therefore, we must have enough space to save the data
// phase result. Whenever we have a queued response, we can not allow // phase result. Whenever we have a queued response, we can not allow
// AHB to present new responses, so we must quash the address phase. // AHB to present new responses, so we must quash the address phase.
val d = Wire(in.d) val d = Wire(in.d)
in.d <> Queue(d, 1, flow = true) in.d <> Queue(d, 1, flow = true)
val a_quash = in.d.valid && !in.d.ready val a_quash = in.d.valid && !in.d.ready
// Record what is coming out in d_phase // Record what is coming out in d_phase
val d_valid = RegInit(Bool(false)) val d_valid = RegInit(Bool(false))
val d_hasData = Reg(Bool()) val d_hasData = Reg(Bool())
val d_error = Reg(Bool()) val d_error = Reg(Bool())
val d_addr_lo = Reg(UInt(width = lgBytes)) val d_addr_lo = Reg(UInt(width = lgBytes))
val d_source = Reg(UInt()) val d_source = Reg(UInt())
val d_size = Reg(UInt()) val d_size = Reg(UInt())
when (out.hreadyout) { d_error := d_error || out.hresp } when (out.hreadyout) { d_error := d_error || out.hresp }
when (d.fire()) { d_valid := Bool(false) } when (d.fire()) { d_valid := Bool(false) }
d.valid := d_valid && out.hreadyout d.valid := d_valid && out.hreadyout
d.bits := edgeIn.AccessAck(d_addr_lo, UInt(0), d_source, d_size, out.hrdata, out.hresp || d_error) d.bits := edgeIn.AccessAck(d_addr_lo, UInt(0), d_source, d_size, out.hrdata, out.hresp || d_error)
d.bits.opcode := Mux(d_hasData, TLMessages.AccessAckData, TLMessages.AccessAck) d.bits.opcode := Mux(d_hasData, TLMessages.AccessAckData, TLMessages.AccessAck)
// We need an irrevocable input for AHB to stall on read bursts // We need an irrevocable input for AHB to stall on read bursts
// We also need the values to NOT change when valid goes low => 1 entry only // We also need the values to NOT change when valid goes low => 1 entry only
val a = Queue(in.a, 1, flow = combinational, pipe = !combinational) val a = Queue(in.a, 1, flow = combinational, pipe = !combinational)
val a_valid = a.valid && !a_quash val a_valid = a.valid && !a_quash
// This is lot like TLEdge.firstlast, but counts beats also for single-beat TL types // This is lot like TLEdge.firstlast, but counts beats also for single-beat TL types
val a_size = edgeIn.size(a.bits) val a_size = edgeIn.size(a.bits)
val a_beats1 = UIntToOH1(a_size, lgMax) >> lgBytes val a_beats1 = UIntToOH1(a_size, lgMax) >> lgBytes
val a_counter = RegInit(UInt(0, width = log2Up(maxTransfer/beatBytes))) val a_counter = RegInit(UInt(0, width = log2Up(maxTransfer/beatBytes)))
val a_counter1 = a_counter - UInt(1) val a_counter1 = a_counter - UInt(1)
val a_first = a_counter === UInt(0) val a_first = a_counter === UInt(0)
val a_last = a_counter === UInt(1) || a_beats1 === UInt(0) val a_last = a_counter === UInt(1) || a_beats1 === UInt(0)
val a_offset = (a_beats1 & ~a_counter1) << lgBytes val a_offset = (a_beats1 & ~a_counter1) << lgBytes
val a_hasData = edgeIn.hasData(a.bits) val a_hasData = edgeIn.hasData(a.bits)
// Expand no-data A-channel requests into multiple beats // Expand no-data A-channel requests into multiple beats
a.ready := (a_hasData || a_last) && out.hreadyout && !a_quash a.ready := (a_hasData || a_last) && out.hreadyout && !a_quash
when (a_valid && out.hreadyout) { when (a_valid && out.hreadyout) {
a_counter := Mux(a_first, a_beats1, a_counter1) a_counter := Mux(a_first, a_beats1, a_counter1)
d_valid := !a_hasData || a_last d_valid := !a_hasData || a_last
// Record what will be in the data phase // Record what will be in the data phase
when (a_first) { when (a_first) {
d_hasData := !a_hasData d_hasData := !a_hasData
d_error := Bool(false) d_error := Bool(false)
d_addr_lo := a.bits.address d_addr_lo := a.bits.address
d_source := a.bits.source d_source := a.bits.source
d_size := a.bits.size d_size := a.bits.size
}
} }
// Transform TL size into AHB hsize+hburst
val a_size_bits = a_size.getWidth
val a_sizeDelta = Cat(UInt(0, width = 1), a_size) - UInt(lgBytes+1)
val a_singleBeat = a_sizeDelta(a_size_bits)
val a_logBeats1 = a_sizeDelta(a_size_bits-1, 0)
out.hmastlock := Bool(false) // for now
out.htrans := Mux(a_valid, Mux(a_first, TRANS_NONSEQ, TRANS_SEQ), Mux(a_first, TRANS_IDLE, TRANS_BUSY))
out.hsel := a_valid || !a_first
out.hready := out.hreadyout
out.hwrite := a_hasData
out.haddr := a.bits.address | a_offset
out.hsize := Mux(a_singleBeat, a.bits.size, UInt(lgBytes))
out.hburst := Mux(a_singleBeat, BURST_SINGLE, (a_logBeats1<<1) | UInt(1))
out.hprot := PROT_DEFAULT
out.hwdata := RegEnable(a.bits.data, a.fire())
} }
// Transform TL size into AHB hsize+hburst
val a_size_bits = a_size.getWidth
val a_sizeDelta = Cat(UInt(0, width = 1), a_size) - UInt(lgBytes+1)
val a_singleBeat = a_sizeDelta(a_size_bits)
val a_logBeats1 = a_sizeDelta(a_size_bits-1, 0)
out.hmastlock := Bool(false) // for now
out.htrans := Mux(a_valid, Mux(a_first, TRANS_NONSEQ, TRANS_SEQ), Mux(a_first, TRANS_IDLE, TRANS_BUSY))
out.hsel := a_valid || !a_first
out.hready := out.hreadyout
out.hwrite := a_hasData
out.haddr := a.bits.address | a_offset
out.hsize := Mux(a_singleBeat, a.bits.size, UInt(lgBytes))
out.hburst := Mux(a_singleBeat, BURST_SINGLE, (a_logBeats1<<1) | UInt(1))
out.hprot := PROT_DEFAULT
out.hwdata := RegEnable(a.bits.data, a.fire())
} }
} }

View File

@ -10,12 +10,12 @@ import uncore.apb._
import scala.math.{min, max} import scala.math.{min, max}
import APBParameters._ import APBParameters._
case class TLToAPBNode() extends MixedNode(TLImp, APBImp)( case class TLToAPBNode() extends MixedAdapterNode(TLImp, APBImp)(
dFn = { case (1, Seq(TLClientPortParameters(clients, unsafeAtomics, minLatency))) => dFn = { case TLClientPortParameters(clients, unsafeAtomics, minLatency) =>
val masters = clients.map { case c => APBMasterParameters(nodePath = c.nodePath) } val masters = clients.map { case c => APBMasterParameters(nodePath = c.nodePath) }
Seq(APBMasterPortParameters(masters)) APBMasterPortParameters(masters)
}, },
uFn = { case (1, Seq(APBSlavePortParameters(slaves, beatBytes))) => uFn = { case APBSlavePortParameters(slaves, beatBytes) =>
val managers = slaves.map { case s => val managers = slaves.map { case s =>
TLManagerParameters( TLManagerParameters(
address = s.address, address = s.address,
@ -27,10 +27,8 @@ case class TLToAPBNode() extends MixedNode(TLImp, APBImp)(
supportsPutFull = if (s.supportsWrite) TransferSizes(1, beatBytes) else TransferSizes.none, supportsPutFull = if (s.supportsWrite) TransferSizes(1, beatBytes) else TransferSizes.none,
fifoId = Some(0)) // a common FIFO domain fifoId = Some(0)) // a common FIFO domain
} }
Seq(TLManagerPortParameters(managers, beatBytes, 1, 0)) TLManagerPortParameters(managers, beatBytes, 1, 0)
}, })
numPO = 1 to 1,
numPI = 1 to 1)
class TLToAPB(combinational: Boolean = true)(implicit p: Parameters) extends LazyModule class TLToAPB(combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
{ {
@ -42,51 +40,49 @@ class TLToAPB(combinational: Boolean = true)(implicit p: Parameters) extends Laz
val out = node.bundleOut val out = node.bundleOut
} }
val in = io.in(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val out = io.out(0) val beatBytes = edgeOut.slave.beatBytes
val edgeIn = node.edgesIn(0) val lgBytes = log2Ceil(beatBytes)
val edgeOut = node.edgesOut(0)
val beatBytes = edgeOut.slave.beatBytes
val lgBytes = log2Ceil(beatBytes)
// APB has no cache coherence // APB has no cache coherence
in.b.valid := Bool(false) in.b.valid := Bool(false)
in.c.ready := Bool(true) in.c.ready := Bool(true)
in.e.ready := Bool(true) in.e.ready := Bool(true)
// We need a skidpad to capture D output: // We need a skidpad to capture D output:
// We cannot know if the D response will be accepted until we have // We cannot know if the D response will be accepted until we have
// presented it on D as valid. We also can't back-pressure APB in the // presented it on D as valid. We also can't back-pressure APB in the
// data phase. Therefore, we must have enough space to save the data // data phase. Therefore, we must have enough space to save the data
// phase result. Whenever we have a queued response, we can not allow // phase result. Whenever we have a queued response, we can not allow
// APB to present new responses, so we must quash the address phase. // APB to present new responses, so we must quash the address phase.
val d = Wire(in.d) val d = Wire(in.d)
in.d <> Queue(d, 1, flow = true) in.d <> Queue(d, 1, flow = true)
// We need an irrevocable input for APB to stall // We need an irrevocable input for APB to stall
val a = Queue(in.a, 1, flow = combinational, pipe = !combinational) val a = Queue(in.a, 1, flow = combinational, pipe = !combinational)
val a_enable = RegInit(Bool(false)) val a_enable = RegInit(Bool(false))
val a_sel = a.valid && RegNext(!in.d.valid || in.d.ready) val a_sel = a.valid && RegNext(!in.d.valid || in.d.ready)
val a_write = edgeIn.hasData(a.bits) val a_write = edgeIn.hasData(a.bits)
when (a_sel) { a_enable := Bool(true) } when (a_sel) { a_enable := Bool(true) }
when (d.fire()) { a_enable := Bool(false) } when (d.fire()) { a_enable := Bool(false) }
out.psel := a_sel out.psel := a_sel
out.penable := a_enable out.penable := a_enable
out.pwrite := a_write out.pwrite := a_write
out.paddr := a.bits.address out.paddr := a.bits.address
out.pprot := PROT_DEFAULT out.pprot := PROT_DEFAULT
out.pwdata := a.bits.data out.pwdata := a.bits.data
out.pstrb := Mux(a_write, a.bits.mask, UInt(0)) out.pstrb := Mux(a_write, a.bits.mask, UInt(0))
a.ready := a_enable && out.pready a.ready := a_enable && out.pready
d.valid := a_enable && out.pready d.valid := a_enable && out.pready
assert (!d.valid || d.ready) assert (!d.valid || d.ready)
d.bits := edgeIn.AccessAck(a.bits, UInt(0), out.prdata, out.pslverr) d.bits := edgeIn.AccessAck(a.bits, UInt(0), out.prdata, out.pslverr)
d.bits.opcode := Mux(a_write, TLMessages.AccessAck, TLMessages.AccessAckData) d.bits.opcode := Mux(a_write, TLMessages.AccessAck, TLMessages.AccessAckData)
}
} }
} }

View File

@ -10,16 +10,16 @@ import util.PositionalMultiQueue
import uncore.axi4._ import uncore.axi4._
import scala.math.{min, max} import scala.math.{min, max}
case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)( case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)(
dFn = { case (1, _) => dFn = { _ =>
// We must erase all client information, because we crush their source Ids // We must erase all client information, because we crush their source Ids
val masters = Seq( val masters = Seq(
AXI4MasterParameters( AXI4MasterParameters(
id = IdRange(0, 1 << idBits), id = IdRange(0, 1 << idBits),
aligned = true)) aligned = true))
Seq(AXI4MasterPortParameters(masters)) AXI4MasterPortParameters(masters)
}, },
uFn = { case (1, Seq(p)) => Seq(TLManagerPortParameters( uFn = { p => TLManagerPortParameters(
managers = p.slaves.map { case s => managers = p.slaves.map { case s =>
TLManagerParameters( TLManagerParameters(
address = s.address, address = s.address,
@ -31,10 +31,8 @@ case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)(
supportsPutPartial = s.supportsWrite)}, supportsPutPartial = s.supportsWrite)},
// AXI4 is NEVER fifo in TL sense (R+W are independent) // AXI4 is NEVER fifo in TL sense (R+W are independent)
beatBytes = p.beatBytes, beatBytes = p.beatBytes,
minLatency = p.minLatency)) minLatency = p.minLatency)
}, })
numPO = 1 to 1,
numPI = 1 to 1)
class TLToAXI4(idBits: Int, combinational: Boolean = true)(implicit p: Parameters) extends LazyModule class TLToAXI4(idBits: Int, combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
{ {
@ -46,185 +44,182 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true)(implicit p: Parameter
val out = node.bundleOut val out = node.bundleOut
} }
val in = io.in(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val out = io.out(0) val slaves = edgeOut.slave.slaves
val edgeIn = node.edgesIn(0) // All pairs of slaves must promise that they will never interleave data
val edgeOut = node.edgesOut(0) require (slaves(0).interleavedId.isDefined)
val slaves = edgeOut.slave.slaves slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) }
// All pairs of slaves must promise that they will never interleave data // We need to ensure that a slave does not stall trying to send B while we need to receive R
require (slaves(0).interleavedId.isDefined) // Since R&W have independent flow control, it is possible for a W to cut in-line and get into
slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) } // a slave's buffers, preventing us from getting all the R responses we need to release D for B.
// This risk is compounded by an AXI fragmentation. Even a slave which responds completely to
// AR before working on AW might have an AW slipped between two AR fragments.
val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational)
// We need to ensure that a slave does not stall trying to send B while we need to receive R // We need to keep the following state from A => D: (addr_lo, size, source)
// Since R&W have independent flow control, it is possible for a W to cut in-line and get into // All of those fields could potentially require 0 bits (argh. Chisel.)
// a slave's buffers, preventing us from getting all the R responses we need to release D for B. // We will pack as many of the lowest bits of state as fit into the AXI ID.
// This risk is compounded by an AXI fragmentation. Even a slave which responds completely to // Any bits left-over must be put into a bank of Queues.
// AR before working on AW might have an AW slipped between two AR fragments. // The Queues are indexed by as many of the source bits as fit into the AXI ID.
val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational) // The Queues are deep enough that every source has guaranteed space in its Queue.
// We need to keep the following state from A => D: (addr_lo, size, source) val sourceBits = log2Ceil(edgeIn.client.endSourceId)
// All of those fields could potentially require 0 bits (argh. Chisel.) val sizeBits = log2Ceil(edgeIn.maxLgSize+1)
// We will pack as many of the lowest bits of state as fit into the AXI ID. val addrBits = log2Ceil(edgeIn.manager.beatBytes)
// Any bits left-over must be put into a bank of Queues. val stateBits = addrBits + sizeBits + sourceBits // could be 0
// The Queues are indexed by as many of the source bits as fit into the AXI ID.
// The Queues are deep enough that every source has guaranteed space in its Queue.
val sourceBits = log2Ceil(edgeIn.client.endSourceId) val a_address = edgeIn.address(in.a.bits)
val sizeBits = log2Ceil(edgeIn.maxLgSize+1) val a_addr_lo = edgeIn.addr_lo(a_address)
val addrBits = log2Ceil(edgeIn.manager.beatBytes) val a_source = in.a.bits.source
val stateBits = addrBits + sizeBits + sourceBits // could be 0 val a_size = edgeIn.size(in.a.bits)
val a_isPut = edgeIn.hasData(in.a.bits)
val a_last = edgeIn.last(in.a)
val a_address = edgeIn.address(in.a.bits) // Make sure the fields are within the bounds we assumed
val a_addr_lo = edgeIn.addr_lo(a_address) assert (a_source < UInt(BigInt(1) << sourceBits))
val a_source = in.a.bits.source assert (a_size < UInt(BigInt(1) << sizeBits))
val a_size = edgeIn.size(in.a.bits) assert (a_addr_lo < UInt(BigInt(1) << addrBits))
val a_isPut = edgeIn.hasData(in.a.bits)
val a_last = edgeIn.last(in.a)
// Make sure the fields are within the bounds we assumed // Carefully pack/unpack fields into the state we send
assert (a_source < UInt(BigInt(1) << sourceBits)) val baseEnd = 0
assert (a_size < UInt(BigInt(1) << sizeBits)) val (sourceEnd, sourceOff) = (sourceBits + baseEnd, baseEnd)
assert (a_addr_lo < UInt(BigInt(1) << addrBits)) val (sizeEnd, sizeOff) = (sizeBits + sourceEnd, sourceEnd)
val (addrEnd, addrOff) = (addrBits + sizeEnd, sizeEnd)
require (addrEnd == stateBits)
// Carefully pack/unpack fields into the state we send val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff)
val baseEnd = 0 val a_id = if (idBits == 0) UInt(0) else a_state
val (sourceEnd, sourceOff) = (sourceBits + baseEnd, baseEnd)
val (sizeEnd, sizeOff) = (sizeBits + sourceEnd, sourceEnd)
val (addrEnd, addrOff) = (addrBits + sizeEnd, sizeEnd)
require (addrEnd == stateBits)
val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff) val r_state = Wire(UInt(width = stateBits))
val a_id = if (idBits == 0) UInt(0) else a_state val r_source = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0)
val r_size = if (sizeBits > 0) r_state(sizeEnd -1, sizeOff) else UInt(0)
val r_addr_lo = if (addrBits > 0) r_state(addrEnd -1, addrOff) else UInt(0)
val r_state = Wire(UInt(width = stateBits)) val b_state = Wire(UInt(width = stateBits))
val r_source = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0) val b_source = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0)
val r_size = if (sizeBits > 0) r_state(sizeEnd -1, sizeOff) else UInt(0) val b_size = if (sizeBits > 0) b_state(sizeEnd -1, sizeOff) else UInt(0)
val r_addr_lo = if (addrBits > 0) r_state(addrEnd -1, addrOff) else UInt(0) val b_addr_lo = if (addrBits > 0) b_state(addrEnd -1, addrOff) else UInt(0)
val b_state = Wire(UInt(width = stateBits)) val r_last = out.r.bits.last
val b_source = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0) val r_id = out.r.bits.id
val b_size = if (sizeBits > 0) b_state(sizeEnd -1, sizeOff) else UInt(0) val b_id = out_b.bits.id
val b_addr_lo = if (addrBits > 0) b_state(addrEnd -1, addrOff) else UInt(0)
val r_last = out.r.bits.last if (stateBits <= idBits) { // No need for any state tracking
val r_id = out.r.bits.id r_state := r_id
val b_id = out_b.bits.id b_state := b_id
} else {
val bankIndexBits = min(sourceBits, idBits)
val posBits = max(0, sourceBits - idBits)
val implicitBits = max(idBits, sourceBits)
val bankBits = stateBits - implicitBits
val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId)
def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks
if (stateBits <= idBits) { // No need for any state tracking val banks = Seq.tabulate(numBanks) { i =>
r_state := r_id // We know there can only be as many outstanding requests as TL sources
b_state := b_id // However, AXI read and write queues are not mutually FIFO.
} else { // Therefore, we want to pop them individually, but share the storage.
val bankIndexBits = min(sourceBits, idBits) val bypass = combinational && edgeOut.slave.minLatency == 0
val posBits = max(0, sourceBits - idBits) PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass)
val implicitBits = max(idBits, sourceBits) }
val bankBits = stateBits - implicitBits
val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId)
def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks
val banks = Seq.tabulate(numBanks) { i => val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits)
// We know there can only be as many outstanding requests as TL sources val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0)
// However, AXI read and write queues are not mutually FIFO. val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0)
// Therefore, we want to pop them individually, but share the storage. val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0)
val bypass = combinational && edgeOut.slave.minLatency == 0 val a_bankSelect = UIntToOH(a_bankIndex, numBanks)
PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass) val r_bankSelect = UIntToOH(r_bankIndex, numBanks)
val b_bankSelect = UIntToOH(b_bankIndex, numBanks)
banks.zipWithIndex.foreach { case (q, i) =>
// Push a_state into the banks
q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i)
q.io.enq.bits.pos := a_bankPosition
q.io.enq.bits.data := a_state >> implicitBits
q.io.enq.bits.way := Mux(a_isPut, UInt(0), UInt(1))
// Pop the bank's ways
q.io.deq(0).ready := out_b.fire() && b_bankSelect(i)
q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last
// The FIFOs must be valid when we're ready to pop them...
assert (q.io.deq(0).valid || !q.io.deq(0).ready)
assert (q.io.deq(1).valid || !q.io.deq(1).ready)
}
val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex)
val b_bankPos = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex)
val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex)
val r_bankPos = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex)
def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) }
b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id))
r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id))
} }
val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits) // We need these Queues because AXI4 queues are irrevocable
val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0) val depth = if (combinational) 1 else 2
val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0) val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params)))
val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0) val out_w = Wire(out.w)
val a_bankSelect = UIntToOH(a_bankIndex, numBanks) out.w <> Queue.irrevocable(out_w, entries=depth, flow=combinational)
val r_bankSelect = UIntToOH(r_bankIndex, numBanks) val queue_arw = Queue.irrevocable(out_arw, entries=depth, flow=combinational)
val b_bankSelect = UIntToOH(b_bankIndex, numBanks)
banks.zipWithIndex.foreach { case (q, i) => // Fan out the ARW channel to AR and AW
// Push a_state into the banks out.ar.bits := queue_arw.bits
q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i) out.aw.bits := queue_arw.bits
q.io.enq.bits.pos := a_bankPosition out.ar.valid := queue_arw.valid && !queue_arw.bits.wen
q.io.enq.bits.data := a_state >> implicitBits out.aw.valid := queue_arw.valid && queue_arw.bits.wen
q.io.enq.bits.way := Mux(a_isPut, UInt(0), UInt(1)) queue_arw.ready := Mux(queue_arw.bits.wen, out.aw.ready, out.ar.ready)
// Pop the bank's ways
q.io.deq(0).ready := out_b.fire() && b_bankSelect(i)
q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last
// The FIFOs must be valid when we're ready to pop them...
assert (q.io.deq(0).valid || !q.io.deq(0).ready)
assert (q.io.deq(1).valid || !q.io.deq(1).ready)
}
val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex) val beatBytes = edgeIn.manager.beatBytes
val b_bankPos = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex) val maxSize = UInt(log2Ceil(beatBytes))
val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex) val doneAW = RegInit(Bool(false))
val r_bankPos = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex) when (in.a.fire()) { doneAW := !a_last }
def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) } val arw = out_arw.bits
b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id)) arw.wen := a_isPut
r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id)) arw.id := a_id // truncated
arw.addr := a_address
arw.len := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes)
arw.size := Mux(a_size >= maxSize, maxSize, a_size)
arw.burst := AXI4Parameters.BURST_INCR
arw.lock := UInt(0) // not exclusive (LR/SC unsupported b/c no forward progress guarantee)
arw.cache := UInt(0) // do not allow AXI to modify our transactions
arw.prot := AXI4Parameters.PROT_PRIVILEDGED
arw.qos := UInt(0) // no QoS
in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready)
out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true))
out_w.valid := in.a.valid && a_isPut && (doneAW || out_arw.ready)
out_w.bits.data := in.a.bits.data
out_w.bits.strb := in.a.bits.mask
out_w.bits.last := a_last
// R and B => D arbitration
val r_holds_d = RegInit(Bool(false))
when (out.r.fire()) { r_holds_d := !out.r.bits.last }
// Give R higher priority than B
val r_wins = out.r.valid || r_holds_d
out.r.ready := in.d.ready
out_b.ready := in.d.ready && !r_wins
in.d.valid := Mux(r_wins, out.r.valid, out_b.valid)
val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY
val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY
val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error)
val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error)
in.d.bits := Mux(r_wins, r_d, b_d)
in.d.bits.data := out.r.bits.data // avoid a costly Mux
// Tie off unused channels
in.b.valid := Bool(false)
in.c.ready := Bool(true)
in.e.ready := Bool(true)
} }
// We need these Queues because AXI4 queues are irrevocable
val depth = if (combinational) 1 else 2
val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params)))
val out_w = Wire(out.w)
out.w <> Queue.irrevocable(out_w, entries=depth, flow=combinational)
val queue_arw = Queue.irrevocable(out_arw, entries=depth, flow=combinational)
// Fan out the ARW channel to AR and AW
out.ar.bits := queue_arw.bits
out.aw.bits := queue_arw.bits
out.ar.valid := queue_arw.valid && !queue_arw.bits.wen
out.aw.valid := queue_arw.valid && queue_arw.bits.wen
queue_arw.ready := Mux(queue_arw.bits.wen, out.aw.ready, out.ar.ready)
val beatBytes = edgeIn.manager.beatBytes
val maxSize = UInt(log2Ceil(beatBytes))
val doneAW = RegInit(Bool(false))
when (in.a.fire()) { doneAW := !a_last }
val arw = out_arw.bits
arw.wen := a_isPut
arw.id := a_id // truncated
arw.addr := a_address
arw.len := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes)
arw.size := Mux(a_size >= maxSize, maxSize, a_size)
arw.burst := AXI4Parameters.BURST_INCR
arw.lock := UInt(0) // not exclusive (LR/SC unsupported b/c no forward progress guarantee)
arw.cache := UInt(0) // do not allow AXI to modify our transactions
arw.prot := AXI4Parameters.PROT_PRIVILEDGED
arw.qos := UInt(0) // no QoS
in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready)
out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true))
out_w.valid := in.a.valid && a_isPut && (doneAW || out_arw.ready)
out_w.bits.data := in.a.bits.data
out_w.bits.strb := in.a.bits.mask
out_w.bits.last := a_last
// R and B => D arbitration
val r_holds_d = RegInit(Bool(false))
when (out.r.fire()) { r_holds_d := !out.r.bits.last }
// Give R higher priority than B
val r_wins = out.r.valid || r_holds_d
out.r.ready := in.d.ready
out_b.ready := in.d.ready && !r_wins
in.d.valid := Mux(r_wins, out.r.valid, out_b.valid)
val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY
val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY
val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error)
val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error)
in.d.bits := Mux(r_wins, r_d, b_d)
in.d.bits.data := out.r.bits.data // avoid a costly Mux
// Tie off unused channels
in.b.valid := Bool(false)
in.c.ready := Bool(true)
in.e.ready := Bool(true)
} }
} }

View File

@ -12,8 +12,8 @@ import scala.math.{min,max}
class TLWidthWidget(innerBeatBytes: Int)(implicit p: Parameters) extends LazyModule class TLWidthWidget(innerBeatBytes: Int)(implicit p: Parameters) extends LazyModule
{ {
val node = TLAdapterNode( val node = TLAdapterNode(
clientFn = { case Seq(c) => c }, clientFn = { case c => c },
managerFn = { case Seq(m) => m.copy(beatBytes = innerBeatBytes) }) managerFn = { case m => m.copy(beatBytes = innerBeatBytes) })
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {
@ -139,27 +139,24 @@ class TLWidthWidget(innerBeatBytes: Int)(implicit p: Parameters) extends LazyMod
} }
} }
val edgeOut = node.edgesOut(0) ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val edgeIn = node.edgesIn(0) splice(edgeIn, in.a, edgeOut, out.a)
val in = io.in(0) splice(edgeOut, out.d, edgeIn, in.d)
val out = io.out(0)
splice(edgeIn, in.a, edgeOut, out.a) if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) {
splice(edgeOut, out.d, edgeIn, in.d) splice(edgeOut, out.b, edgeIn, in.b)
splice(edgeIn, in.c, edgeOut, out.c)
if (edgeOut.manager.anySupportAcquireB && edgeIn.client.anySupportProbe) { in.e.ready := out.e.ready
splice(edgeOut, out.b, edgeIn, in.b) out.e.valid := in.e.valid
splice(edgeIn, in.c, edgeOut, out.c) out.e.bits := in.e.bits
in.e.ready := out.e.ready } else {
out.e.valid := in.e.valid in.b.valid := Bool(false)
out.e.bits := in.e.bits in.c.ready := Bool(true)
} else { in.e.ready := Bool(true)
in.b.valid := Bool(false) out.b.ready := Bool(true)
in.c.ready := Bool(true) out.c.valid := Bool(false)
in.e.ready := Bool(true) out.e.valid := Bool(false)
out.b.ready := Bool(true) }
out.c.valid := Bool(false)
out.e.valid := Bool(false)
} }
} }
} }

View File

@ -34,7 +34,7 @@ class TLXbar(policy: TLArbiter.Policy = TLArbiter.lowestIndexFirst)(implicit p:
} }
} }
val node = TLAdapterNode( val node = TLNexusNode(
numClientPorts = 1 to 32, numClientPorts = 1 to 32,
numManagerPorts = 1 to 32, numManagerPorts = 1 to 32,
clientFn = { seq => clientFn = { seq =>