1
0

tilelink2: ToAXI4 provide FIFO order semantics

This commit is contained in:
Wesley W. Terpstra 2017-04-25 17:55:06 -07:00
parent 61a6f94196
commit 9f08c484bd

View File

@ -10,14 +10,19 @@ import util.PositionalMultiQueue
import uncore.axi4._ import uncore.axi4._
import scala.math.{min, max} import scala.math.{min, max}
case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)( case class TLToAXI4Node(beatBytes: Int) extends MixedAdapterNode(TLImp, AXI4Imp)(
dFn = { _ => dFn = { p =>
// We must erase all client information, because we crush their source Ids val idSize = p.clients.map { c => if (c.requestFifo) 1 else c.sourceId.size }
val masters = Seq( val idStart = idSize.scanLeft(0)(_+_).init
val masters = ((idStart zip idSize) zip p.clients) map { case ((start, size), c) =>
AXI4MasterParameters( AXI4MasterParameters(
id = IdRange(0, 1 << idBits), id = IdRange(start, start+size),
aligned = true)) aligned = true,
AXI4MasterPortParameters(masters) nodePath = c.nodePath)
}
AXI4MasterPortParameters(
masters = masters,
userBits = log2Ceil(p.endSourceId) + 4 + log2Ceil(beatBytes))
}, },
uFn = { p => TLManagerPortParameters( uFn = { p => TLManagerPortParameters(
managers = p.slaves.map { case s => managers = p.slaves.map { case s =>
@ -29,15 +34,15 @@ case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)(
nodePath = s.nodePath, nodePath = s.nodePath,
supportsGet = s.supportsRead, supportsGet = s.supportsRead,
supportsPutFull = s.supportsWrite, supportsPutFull = s.supportsWrite,
supportsPutPartial = s.supportsWrite)}, supportsPutPartial = s.supportsWrite,
// AXI4 is NEVER fifo in TL sense (R+W are independent) fifoId = Some(0))},
beatBytes = p.beatBytes, beatBytes = p.beatBytes,
minLatency = p.minLatency) minLatency = p.minLatency)
}) })
class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: Parameters) extends LazyModule class TLToAXI4(beatBytes: Int, combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
{ {
val node = TLToAXI4Node(idBits) val node = TLToAXI4Node(beatBytes)
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val io = new Bundle { val io = new Bundle {
@ -52,24 +57,23 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
require (slaves(0).interleavedId.isDefined) require (slaves(0).interleavedId.isDefined)
slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) } slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) }
// We need to ensure that a slave does not stall trying to send B while we need to receive R // Construct the source=>ID mapping table
// Since R&W have independent flow control, it is possible for a W to cut in-line and get into val idTable = Wire(Vec(edgeIn.client.endSourceId, out.aw.bits.id))
// a slave's buffers, preventing us from getting all the R responses we need to release D for B. (edgeIn.client.clients zip edgeOut.master.masters) foreach { case (c, m) =>
// This risk is compounded by an AXI fragmentation. Even a slave which responds completely to for (i <- 0 until c.sourceId.size) {
// AR before working on AW might have an AW slipped between two AR fragments. idTable(c.sourceId.start + i) := UInt(m.id.start + (if (c.requestFifo) 0 else i))
val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational) }
}
// We need to keep the following state from A => D: (addr_lo, size, source) // We need to keep the following state from A => D: (addr_lo, size, source)
// All of those fields could potentially require 0 bits (argh. Chisel.) // All of those fields could potentially require 0 bits (argh. Chisel.)
// We will pack as many of the lowest bits of state as fit into the AXI ID. // We will pack all of that extra information into the user bits.
// Any bits left-over must be put into a bank of Queues.
// The Queues are indexed by as many of the source bits as fit into the AXI ID.
// The Queues are deep enough that every source has guaranteed space in its Queue.
val sourceBits = log2Ceil(edgeIn.client.endSourceId) val sourceBits = log2Ceil(edgeIn.client.endSourceId)
val sizeBits = log2Ceil(edgeIn.maxLgSize+1) val sizeBits = log2Ceil(edgeIn.maxLgSize+1)
val addrBits = log2Ceil(edgeIn.manager.beatBytes) val addrBits = log2Ceil(edgeIn.manager.beatBytes)
val stateBits = addrBits + sizeBits + sourceBits // could be 0 val stateBits = addrBits + sizeBits + sourceBits // could be 0
require (stateBits <= out.aw.bits.params.userBits)
val a_address = edgeIn.address(in.a.bits) val a_address = edgeIn.address(in.a.bits)
val a_addr_lo = edgeIn.addr_lo(a_address) val a_addr_lo = edgeIn.addr_lo(a_address)
@ -91,73 +95,17 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
require (addrEnd == stateBits) require (addrEnd == stateBits)
val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff) val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff)
val a_id = if (idBits == 0) UInt(0) else a_state
val r_state = Wire(UInt(width = stateBits)) val r_state = out.r.bits.user.getOrElse(UInt(0))
val r_source = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0) val r_source = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0)
val r_size = if (sizeBits > 0) r_state(sizeEnd -1, sizeOff) else UInt(0) val r_size = if (sizeBits > 0) r_state(sizeEnd -1, sizeOff) else UInt(0)
val r_addr_lo = if (addrBits > 0) r_state(addrEnd -1, addrOff) else UInt(0) val r_addr_lo = if (addrBits > 0) r_state(addrEnd -1, addrOff) else UInt(0)
val b_state = Wire(UInt(width = stateBits)) val b_state = out.b.bits.user.getOrElse(UInt(0))
val b_source = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0) val b_source = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0)
val b_size = if (sizeBits > 0) b_state(sizeEnd -1, sizeOff) else UInt(0) val b_size = if (sizeBits > 0) b_state(sizeEnd -1, sizeOff) else UInt(0)
val b_addr_lo = if (addrBits > 0) b_state(addrEnd -1, addrOff) else UInt(0) val b_addr_lo = if (addrBits > 0) b_state(addrEnd -1, addrOff) else UInt(0)
val r_last = out.r.bits.last
val r_id = out.r.bits.id
val b_id = out_b.bits.id
if (stateBits <= idBits) { // No need for any state tracking
r_state := r_id
b_state := b_id
} else {
val bankIndexBits = min(sourceBits, idBits)
val posBits = max(0, sourceBits - idBits)
val implicitBits = max(idBits, sourceBits)
val bankBits = stateBits - implicitBits
val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId)
def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks
val banks = Seq.tabulate(numBanks) { i =>
// We know there can only be as many outstanding requests as TL sources
// However, AXI read and write queues are not mutually FIFO.
// Therefore, we want to pop them individually, but share the storage.
val bypass = combinational && edgeOut.slave.minLatency == 0
PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass)
}
val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits)
val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0)
val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0)
val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0)
val a_bankSelect = UIntToOH(a_bankIndex, numBanks)
val r_bankSelect = UIntToOH(r_bankIndex, numBanks)
val b_bankSelect = UIntToOH(b_bankIndex, numBanks)
banks.zipWithIndex.foreach { case (q, i) =>
// Push a_state into the banks
q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i)
q.io.enq.bits.pos := a_bankPosition
q.io.enq.bits.data := a_state >> implicitBits
q.io.enq.bits.way := Mux(a_isPut, UInt(0), UInt(1))
// Pop the bank's ways
q.io.deq(0).ready := out_b.fire() && b_bankSelect(i)
q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last
// The FIFOs must be valid when we're ready to pop them...
assert (q.io.deq(0).valid || !q.io.deq(0).ready)
assert (q.io.deq(1).valid || !q.io.deq(1).ready)
}
val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex)
val b_bankPos = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex)
val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex)
val r_bankPos = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex)
def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) }
b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id))
r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id))
}
// We need these Queues because AXI4 queues are irrevocable // We need these Queues because AXI4 queues are irrevocable
val depth = if (combinational) 1 else 2 val depth = if (combinational) 1 else 2
val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params))) val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params)))
@ -179,7 +127,7 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
val arw = out_arw.bits val arw = out_arw.bits
arw.wen := a_isPut arw.wen := a_isPut
arw.id := a_id // truncated arw.id := idTable(a_source)
arw.addr := a_address arw.addr := a_address
arw.len := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes) arw.len := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes)
arw.size := Mux(a_size >= maxSize, maxSize, a_size) arw.size := Mux(a_size >= maxSize, maxSize, a_size)
@ -188,7 +136,9 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
arw.cache := UInt(0) // do not allow AXI to modify our transactions arw.cache := UInt(0) // do not allow AXI to modify our transactions
arw.prot := AXI4Parameters.PROT_PRIVILEDGED arw.prot := AXI4Parameters.PROT_PRIVILEDGED
arw.qos := UInt(0) // no QoS arw.qos := UInt(0) // no QoS
arw.user.foreach { _ := a_state }
// !!! Mix R-W stall here
in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready) in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready)
out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true)) out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true))
@ -204,11 +154,11 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
val r_wins = out.r.valid || r_holds_d val r_wins = out.r.valid || r_holds_d
out.r.ready := in.d.ready out.r.ready := in.d.ready
out_b.ready := in.d.ready && !r_wins out.b.ready := in.d.ready && !r_wins
in.d.valid := Mux(r_wins, out.r.valid, out_b.valid) in.d.valid := Mux(r_wins, out.r.valid, out.b.valid)
val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY
val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY val b_error = out.b.bits.resp =/= AXI4Parameters.RESP_OKAY
val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error) val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error)
val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error) val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error)
@ -226,9 +176,9 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
object TLToAXI4 object TLToAXI4
{ {
// applied to the TL source node; y.node := TLToAXI4(idBits)(x.node) // applied to the TL source node; y.node := TLToAXI4(beatBytes)(x.node)
def apply(idBits: Int, combinational: Boolean = true)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { def apply(beatBytes: Int, combinational: Boolean = true)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = {
val axi4 = LazyModule(new TLToAXI4(idBits, combinational)) val axi4 = LazyModule(new TLToAXI4(beatBytes, combinational))
axi4.node := x axi4.node := x
axi4.node axi4.node
} }