1
0

Merge pull request #695 from ucb-bar/pipeline-mmio

Pipeline AXI4 MMIO
This commit is contained in:
Henry Cook 2017-05-03 11:25:24 -07:00 committed by GitHub
commit cd547fabbd
24 changed files with 741 additions and 369 deletions

View File

@ -32,7 +32,9 @@ trait CoreplexNetwork extends HasCoreplexParameters {
// Allows a variable number of inputs from outside to the Xbar
private val l2in_buffer = LazyModule(new TLBuffer)
l1tol2.node :=* l2in_buffer.node
private val l2in_fifo = LazyModule(new TLFIFOFixer)
l1tol2.node :=* l2in_fifo.node
l2in_fifo.node :=* l2in_buffer.node
l2in_buffer.node :=* l2in
private val l2out_buffer = LazyModule(new TLBuffer(BufferParams.flow, BufferParams.none))

View File

@ -17,14 +17,16 @@ object RegionType {
}
// A non-empty half-open range; [start, end)
case class IdRange(start: Int, end: Int)
case class IdRange(start: Int, end: Int) extends Ordered[IdRange]
{
require (start >= 0, s"Ids cannot be negative, but got: $start.")
require (start < end, "Id ranges cannot be empty.")
// This is a strict partial ordering
def <(x: IdRange) = end <= x.start
def >(x: IdRange) = x < this
def compare(x: IdRange) = {
val primary = (this.start - x.start).signum
val secondary = (x.end - this.end).signum
if (primary != 0) primary else secondary
}
def overlaps(x: IdRange) = start < x.end && x.start < end
def contains(x: IdRange) = start <= x.start && x.end <= end
@ -43,6 +45,14 @@ case class IdRange(start: Int, end: Int)
def range = start until end
}
object IdRange
{
def overlaps(s: Seq[IdRange]) = if (s.isEmpty) None else {
val ranges = s.sorted
(ranges.tail zip ranges.init) find { case (a, b) => a overlaps b }
}
}
// An potentially empty inclusive range of 2-powers [min, max] (in bytes)
case class TransferSizes(min: Int, max: Int)
{

View File

@ -104,7 +104,7 @@ trait CanHaveScratchpad extends HasHellaCache with HasICacheFrontend with HasCor
val slaveNode = TLInputNode() // Up to two uses for this input node:
// 1) Frontend always exists, but may or may not have a scratchpad node
val fg = LazyModule(new TLFragmenter(fetchWidth*coreInstBytes, p(CacheBlockBytes), true))
val fg = LazyModule(new TLFragmenter(fetchWidth*coreInstBytes, p(CacheBlockBytes), earlyAck=true))
val ww = LazyModule(new TLWidthWidget(xLen/8))
frontend.slaveNode :*= fg.node
fg.node :*= ww.node
@ -113,7 +113,7 @@ trait CanHaveScratchpad extends HasHellaCache with HasICacheFrontend with HasCor
// 2) ScratchpadSlavePort always has a node, but only exists when the HellaCache has a scratchpad
val scratch = tileParams.dcache.flatMap(d => d.scratch.map(s =>
LazyModule(new ScratchpadSlavePort(AddressSet(s, d.dataScratchpadBytes-1)))))
scratch foreach { lm => lm.node := TLFragmenter(xLen/8, p(CacheBlockBytes))(slaveNode) }
scratch foreach { lm => lm.node := TLFragmenter(xLen/8, p(CacheBlockBytes), earlyAck=true)(slaveNode) }
def findScratchpadFromICache: Option[AddressSet] = scratch.map { s =>
val finalNode = frontend.masterNode.edgesOut.head.manager.managers.find(_.nodePath.last == s.node)

View File

@ -39,9 +39,10 @@ class BasePlatformConfig extends Config((site, here, up) => {
case IncludeJtagDTM => false
case JtagDTMKey => new JtagDTMKeyDefault()
case ZeroConfig => ZeroConfig(base=0xa000000L, size=0x2000000L, beatBytes=8)
case ErrorConfig => ErrorConfig(Seq(AddressSet(0x3000, 0xfff)))
case ExtMem => MasterConfig(base=0x80000000L, size=0x10000000L, beatBytes=8, idBits=4)
case ExtBus => MasterConfig(base=0x60000000L, size=0x20000000L, beatBytes=8, idBits=4)
case ExtIn => SlaveConfig(beatBytes=8, idBits=8, sourceBits=2)
case ExtIn => SlaveConfig(beatBytes=8, idBits=8, sourceBits=4)
})
/** Actual elaboratable target Configs */

View File

@ -10,6 +10,7 @@ import rocketchip._
/** Example Top with Periphery (w/o coreplex) */
abstract class ExampleTop(implicit p: Parameters) extends BaseTop
with PeripheryAsyncExtInterrupts
with PeripheryErrorSlave
with PeripheryMasterAXI4Mem
with PeripheryMasterAXI4MMIO
with PeripherySlaveAXI4 {
@ -18,12 +19,14 @@ abstract class ExampleTop(implicit p: Parameters) extends BaseTop
class ExampleTopBundle[+L <: ExampleTop](_outer: L) extends BaseTopBundle(_outer)
with PeripheryExtInterruptsBundle
with PeripheryErrorSlaveBundle
with PeripheryMasterAXI4MemBundle
with PeripheryMasterAXI4MMIOBundle
with PeripherySlaveAXI4Bundle
class ExampleTopModule[+L <: ExampleTop, +B <: ExampleTopBundle[L]](_outer: L, _io: () => B) extends BaseTopModule(_outer, _io)
with PeripheryExtInterruptsModule
with PeripheryErrorSlaveModule
with PeripheryMasterAXI4MemModule
with PeripheryMasterAXI4MMIOModule
with PeripherySlaveAXI4Module

View File

@ -13,7 +13,7 @@ import uncore.converters._
import uncore.devices._
import uncore.util._
import util._
import scala.math.max
import scala.math.{min,max}
/** Specifies the size of external memory */
case class MasterConfig(base: Long, size: Long, beatBytes: Int, idBits: Int)
@ -33,6 +33,9 @@ case object SOCBusConfig extends Field[TLBusConfig]
/* Specifies the location of the Zero device */
case class ZeroConfig(base: Long, size: Long, beatBytes: Int)
case object ZeroConfig extends Field[ZeroConfig]
/* Specifies the location of the Error device */
case class ErrorConfig(address: Seq[AddressSet])
case object ErrorConfig extends Field[ErrorConfig]
/** Utility trait for quick access to some relevant parameters */
trait HasPeripheryParameters {
@ -131,12 +134,16 @@ trait PeripheryMasterAXI4Mem {
beatBytes = config.beatBytes)
})
private val converter = LazyModule(new TLToAXI4(config.idBits))
private val converter = LazyModule(new TLToAXI4(config.beatBytes))
private val trim = LazyModule(new AXI4IdIndexer(config.idBits))
private val yank = LazyModule(new AXI4UserYanker)
private val buffer = LazyModule(new AXI4Buffer)
mem foreach { case xbar =>
converter.node := xbar.node
buffer.node := converter.node
trim.node := converter.node
yank.node := trim.node
buffer.node := yank.node
mem_axi4 := buffer.node
}
}
@ -199,16 +206,17 @@ trait PeripheryMasterAXI4MMIO {
resources = device.reg,
executable = true, // Can we run programs on this memory?
supportsWrite = TransferSizes(1, 256), // The slave supports 1-256 byte transfers
supportsRead = TransferSizes(1, 256),
interleavedId = Some(0))), // slave does not interleave read responses
supportsRead = TransferSizes(1, 256))),
beatBytes = config.beatBytes)))
mmio_axi4 :=
AXI4Buffer()(
// AXI4Fragmenter(lite=false, maxInFlight = 20)( // beef device up to support awlen = 0xff
TLToAXI4(idBits = config.idBits)( // use idBits = 0 for AXI4-Lite
AXI4UserYanker()(
AXI4Deinterleaver(cacheBlockBytes)(
AXI4IdIndexer(config.idBits)(
TLToAXI4(config.beatBytes)(
TLWidthWidget(socBusConfig.beatBytes)( // convert width before attaching to socBus
socBus.node)))
socBus.node))))))
}
trait PeripheryMasterAXI4MMIOBundle {
@ -235,12 +243,14 @@ trait PeripherySlaveAXI4 extends HasTopLevelNetworks {
masters = Seq(AXI4MasterParameters(
id = IdRange(0, 1 << config.idBits))))))
private val fifoBits = 1
fsb.node :=
TLSourceShrinker(1 << config.sourceBits)(
TLWidthWidget(config.beatBytes)(
AXI4ToTL()(
AXI4UserYanker(Some(1 << (config.sourceBits - fifoBits - 1)))(
AXI4Fragmenter()(
l2FrontendAXI4Node))))
AXI4IdIndexer(fifoBits)(
l2FrontendAXI4Node)))))
}
trait PeripherySlaveAXI4Bundle extends HasTopLevelNetworksBundle {
@ -388,3 +398,26 @@ trait PeripheryTestBusMasterModule {
val io: PeripheryTestBusMasterBundle
} =>
}
/////
trait PeripheryErrorSlave {
this: HasTopLevelNetworks =>
private val config = p(ErrorConfig)
private val maxXfer = min(config.address.map(_.alignment).max.toInt, 4096)
val error = LazyModule(new TLError(config.address, peripheryBusConfig.beatBytes))
error.node := TLFragmenter(peripheryBusConfig.beatBytes, maxXfer)(peripheryBus.node)
}
trait PeripheryErrorSlaveBundle {
this: HasTopLevelNetworksBundle {
val outer: PeripheryErrorSlave
} =>
}
trait PeripheryErrorSlaveModule {
this: HasTopLevelNetworksModule {
val outer: PeripheryErrorSlave
val io: PeripheryErrorSlaveBundle
} =>
}

View File

@ -53,7 +53,7 @@ class SimAXIMem(channels: Int, forceSize: BigInt = 0)(implicit p: Parameters) ex
for (i <- 0 until channels) {
val sram = LazyModule(new AXI4RAM(AddressSet(0, size-1), beatBytes = config.beatBytes))
sram.node := AXI4Buffer()(AXI4Fragmenter(maxInFlight = 4)(node))
sram.node := AXI4Buffer()(AXI4Fragmenter()(node))
}
lazy val module = new LazyModuleImp(this) {

View File

@ -19,6 +19,7 @@ abstract class AXI4BundleA(params: AXI4BundleParameters) extends AXI4BundleBase(
val cache = UInt(width = params.cacheBits)
val prot = UInt(width = params.protBits)
val qos = UInt(width = params.qosBits) // 0=no QoS, bigger = higher priority
val user = if (params.userBits > 0) Some(UInt(width = params.userBits)) else None
// val region = UInt(width = 4) // optional
// Number of bytes-1 in this operation
@ -51,6 +52,7 @@ class AXI4BundleR(params: AXI4BundleParameters) extends AXI4BundleBase(params)
val id = UInt(width = params.idBits)
val data = UInt(width = params.dataBits)
val resp = UInt(width = params.respBits)
val user = if (params.userBits > 0) Some(UInt(width = params.userBits)) else None
val last = Bool()
}
@ -58,6 +60,7 @@ class AXI4BundleB(params: AXI4BundleParameters) extends AXI4BundleBase(params)
{
val id = UInt(width = params.idBits)
val resp = UInt(width = params.respBits)
val user = if (params.userBits > 0) Some(UInt(width = params.userBits)) else None
}
class AXI4Bundle(params: AXI4BundleParameters) extends AXI4BundleBase(params)

View File

@ -0,0 +1,102 @@
// See LICENSE.SiFive for license details.
package uncore.axi4
import Chisel._
import chisel3.internal.sourceinfo.SourceInfo
import chisel3.util.IrrevocableIO
import config._
import diplomacy._
import scala.math.{min,max}
import uncore.tilelink2.{leftOR, rightOR, UIntToOH1, OH1ToOH}
class AXI4Deinterleaver(maxReadBytes: Int)(implicit p: Parameters) extends LazyModule
{
require (maxReadBytes >= 1 && isPow2(maxReadBytes))
val node = AXI4AdapterNode(
masterFn = { mp => mp },
slaveFn = { sp => sp.copy(slaves = sp.slaves.map(s => s.copy(
supportsRead = s.supportsRead.intersect(TransferSizes(1, maxReadBytes)),
interleavedId = Some(0))))
})
lazy val module = new LazyModuleImp(this) {
val io = new Bundle {
val in = node.bundleIn
val out = node.bundleOut
}
((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val queues = edgeOut.master.endId
val beatBytes = edgeOut.slave.beatBytes
val beats = (maxReadBytes+beatBytes-1) / beatBytes
// This adapter leaves the control + write paths completely untouched
out.ar <> in.ar
out.aw <> in.aw
out.w <> in.w
in.b <> out.b
if (queues == 1) {
// Gracefully do nothing
in.r <> out.r
} else {
// Buffer R response
val count = RegInit(Vec.fill(queues) { UInt(0, width=log2Ceil(beats+1)) })
val qs = Seq.fill(queues) { Module(new Queue(out.r.bits, beats)) }
// Which ID is being enqueued and dequeued?
val locked = RegInit(Bool(false))
val deq_id = Reg(UInt(width=log2Ceil(queues)))
val enq_id = out.r.bits.id
val deq_OH = UIntToOH(deq_id, queues)
val enq_OH = UIntToOH(enq_id, queues)
// Track the number of completely received bursts per FIFO id
val next_count = Wire(count)
((count zip next_count) zip (enq_OH.toBools zip deq_OH.toBools)) foreach { case ((p, n), (i, d)) =>
val inc = i && out.r.fire() && out.r.bits.last
val dec = d && in.r.fire() && in.r.bits.last
n := p + inc.asUInt - dec.asUInt
// Bounds checking
assert (!dec || p =/= UInt(0))
assert (!inc || p =/= UInt(beats))
}
count := next_count
// Select which Q will we start sending next cycle
val pending = Cat(next_count.map(_ =/= UInt(0)).reverse)
val winner = pending & ~(leftOR(pending) << 1)
when (!locked || (in.r.fire() && in.r.bits.last)) {
locked := pending.orR
deq_id := OHToUInt(winner)
}
// Transmit the selected burst to inner
in.r.valid := locked
in.r.bits := Vec(qs.map(_.io.deq.bits))(deq_id)
(deq_OH.toBools zip qs) foreach { case (s, q) =>
q.io.deq.ready := s && in.r.fire()
}
// Feed response into matching Q
out.r.ready := Vec(qs.map(_.io.enq.ready))(enq_id)
(enq_OH.toBools zip qs) foreach { case (s, q) =>
q.io.enq.valid := s && out.r.valid
q.io.enq.bits := out.r.bits
}
}
}
}
}
object AXI4Deinterleaver
{
// applied to the AXI4 source node; y.node := AXI4Deinterleaver()(x.node)
def apply(maxReadBytes: Int)(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = {
val deinterleaver = LazyModule(new AXI4Deinterleaver(maxReadBytes))
deinterleaver.node := x
deinterleaver.node
}
}

View File

@ -10,8 +10,7 @@ import diplomacy._
import scala.math.{min,max}
import uncore.tilelink2.{leftOR, rightOR, UIntToOH1, OH1ToOH}
// lite: masters all use only one ID => reads will not be interleaved
class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
class AXI4Fragmenter()(implicit p: Parameters) extends LazyModule
{
val maxBeats = 1 << AXI4Parameters.lenBits
def expandTransfer(x: TransferSizes, beatBytes: Int, alignment: BigInt) =
@ -19,11 +18,11 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio
def mapSlave(s: AXI4SlaveParameters, beatBytes: Int) = s.copy(
supportsWrite = expandTransfer(s.supportsWrite, beatBytes, s.minAlignment),
supportsRead = expandTransfer(s.supportsRead, beatBytes, s.minAlignment),
interleavedId = if (lite) Some(0) else s.interleavedId) // see AXI4FragmenterSideband for !lite case
interleavedId = None) // this breaks interleaving guarantees
def mapMaster(m: AXI4MasterParameters) = m.copy(aligned = true)
val node = AXI4AdapterNode(
masterFn = { mp => mp.copy(masters = mp.masters.map(m => mapMaster(m))) },
masterFn = { mp => mp.copy(masters = mp.masters.map(m => mapMaster(m)), userBits = mp.userBits + 1) },
slaveFn = { sp => sp.copy(slaves = sp.slaves .map(s => mapSlave(s, sp.beatBytes))) })
lazy val module = new LazyModuleImp(this) {
@ -40,9 +39,6 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio
val master = edgeIn.master
val masters = master.masters
// If the user claimed this was a lite interface, then there must be only one Id
require (!lite || master.endId == 1)
// We don't support fragmenting to sub-beat accesses
slaves.foreach { s =>
require (!s.supportsRead || s.supportsRead.contains(beatBytes))
@ -139,154 +135,77 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio
val readSizes1 = slaves.map(s => s.supportsRead .max/beatBytes-1)
val writeSizes1 = slaves.map(s => s.supportsWrite.max/beatBytes-1)
// Indirection variables for inputs and outputs; makes transformation application easier
// Irrevocable queues in front because we want to accept the request before responses come back
val (in_ar, ar_last, _) = fragment(Queue.irrevocable(in.ar, 1, flow=true), readSizes1)
val (in_aw, aw_last, w_beats) = fragment(Queue.irrevocable(in.aw, 1, flow=true), writeSizes1)
val in_w = in.w
val in_r = in.r
val in_b = in.b
val out_ar = Wire(out.ar)
val out_aw = out.aw
val out_w = out.w
val out_r = Wire(out.r)
val out_b = Wire(out.b)
val depth = if (combinational) 1 else 2
// In case a slave ties arready := rready, we need a queue to break the combinational loop
// between the two branches (in_ar => {out_ar => out_r, sideband} => in_r).
if (in.ar.bits.getWidth < in.r.bits.getWidth) {
out.ar <> Queue(out_ar, depth, flow=combinational)
out_r <> out.r
} else {
out.ar <> out_ar
out_r <> Queue(out.r, depth, flow=combinational)
}
// In case a slave ties awready := bready or wready := bready, we need this queue
out_b <> Queue(out.b, depth, flow=combinational)
// AXI ready may not depend on valid of other channels
// We cut wready here along with awready and arready before AXI4ToTL
val in_w = Queue.irrevocable(in.w, 1, flow=true)
// Sideband to track which transfers were the last fragment
def sideband() = if (lite) {
Module(new Queue(Bool(), maxInFlight, flow=combinational)).io
} else {
Module(new AXI4FragmenterSideband(maxInFlight, flow=combinational)).io
}
val sideband_ar_r = sideband()
val sideband_aw_b = sideband()
// AR flow control
out_ar.valid := in_ar.valid && sideband_ar_r.enq.ready
in_ar.ready := sideband_ar_r.enq.ready && out_ar.ready
sideband_ar_r.enq.valid := in_ar.valid && out_ar.ready
out_ar.bits := in_ar.bits
sideband_ar_r.enq.bits := ar_last
// AR flow control; super easy
out.ar <> in_ar
out.ar.bits.user.get := Cat(in_ar.bits.user.toList ++ Seq(ar_last))
// When does W channel start counting a new transfer
val wbeats_latched = RegInit(Bool(false))
val wbeats_ready = Wire(Bool())
val wbeats_valid = Wire(Bool())
when (wbeats_valid && wbeats_ready) { wbeats_latched := Bool(true) }
when (out_aw.fire()) { wbeats_latched := Bool(false) }
when (out.aw.fire()) { wbeats_latched := Bool(false) }
// AW flow control
out_aw.valid := in_aw.valid && sideband_aw_b.enq.ready && (wbeats_ready || wbeats_latched)
in_aw.ready := sideband_aw_b.enq.ready && out_aw.ready && (wbeats_ready || wbeats_latched)
sideband_aw_b.enq.valid := in_aw.valid && out_aw.ready && (wbeats_ready || wbeats_latched)
out.aw.valid := in_aw.valid && (wbeats_ready || wbeats_latched)
in_aw.ready := out.aw.ready && (wbeats_ready || wbeats_latched)
wbeats_valid := in_aw.valid && !wbeats_latched
out_aw.bits := in_aw.bits
sideband_aw_b.enq.bits := aw_last
out.aw.bits := in_aw.bits
out.aw.bits.user.get := Cat(in_aw.bits.user.toList ++ Seq(aw_last))
// We need to inject 'last' into the W channel fragments, count!
val w_counter = RegInit(UInt(0, width = AXI4Parameters.lenBits+1))
val w_idle = w_counter === UInt(0)
val w_todo = Mux(w_idle, Mux(wbeats_valid, w_beats, UInt(0)), w_counter)
val w_last = w_todo === UInt(1)
w_counter := w_todo - out_w.fire()
assert (!out_w.fire() || w_todo =/= UInt(0)) // underflow impossible
w_counter := w_todo - out.w.fire()
assert (!out.w.fire() || w_todo =/= UInt(0)) // underflow impossible
// W flow control
wbeats_ready := w_idle
out_w.valid := in_w.valid && (!wbeats_ready || wbeats_valid)
in_w.ready := out_w.ready && (!wbeats_ready || wbeats_valid)
out_w.bits := in_w.bits
out_w.bits.last := w_last
out.w.valid := in_w.valid && (!wbeats_ready || wbeats_valid)
in_w.ready := out.w.ready && (!wbeats_ready || wbeats_valid)
out.w.bits := in_w.bits
out.w.bits.last := w_last
// We should also recreate the last last
assert (!out_w.valid || !in_w.bits.last || w_last)
assert (!out.w.valid || !in_w.bits.last || w_last)
// R flow control
val r_last = out_r.bits.last
in_r.valid := out_r.valid && (!r_last || sideband_ar_r.deq.valid)
out_r.ready := in_r.ready && (!r_last || sideband_ar_r.deq.valid)
sideband_ar_r.deq.ready := r_last && out_r.valid && in_r.ready
in_r.bits := out_r.bits
in_r.bits.last := r_last && sideband_ar_r.deq.bits
val r_last = out.r.bits.user.get(0)
in.r <> out.r
in.r.bits.last := out.r.bits.last && r_last
in.r.bits.user.foreach { _ := out.r.bits.user.get >> 1 }
// B flow control
val b_last = sideband_aw_b.deq.bits
in_b.valid := out_b.valid && sideband_aw_b.deq.valid && b_last
out_b.ready := sideband_aw_b.deq.valid && (!b_last || in_b.ready)
sideband_aw_b.deq.ready := out_b.valid && (!b_last || in_b.ready)
in_b.bits := out_b.bits
val b_last = out.b.bits.user.get(0)
in.b <> out.b
in.b.valid := out.b.valid && b_last
out.b.ready := in.b.ready || !b_last
in.b.bits.user.foreach { _ := out.b.bits.user.get >> 1 }
// Merge errors from dropped B responses
val r_resp = RegInit(UInt(0, width = AXI4Parameters.respBits))
val resp = out_b.bits.resp | r_resp
when (out_b.fire()) { r_resp := Mux(b_last, UInt(0), resp) }
in_b.bits.resp := resp
}
}
/* We want to put barriers between the fragments of a fragmented transfer and all other transfers.
* This lets us use very little state to reassemble the fragments (else we need one FIFO per ID).
* Furthermore, because all the fragments share the same AXI ID, they come back contiguously.
* This guarantees that no other R responses might get mixed between fragments, ensuring that the
* interleavedId for the slaves remains unaffected by the fragmentation transformation.
* Of course, if you need to fragment, this means there is a potentially hefty serialization cost.
* However, this design allows full concurrency in the common no-fragmentation-needed scenario.
*/
class AXI4FragmenterSideband(maxInFlight: Int, flow: Boolean = false) extends Module
{
val io = new QueueIO(Bool(), maxInFlight)
io.count := UInt(0)
val PASS = UInt(2, width = 2) // allow 'last=1' bits to enque, on 'last=0' if count>0 block else accept+FIND
val FIND = UInt(0, width = 2) // allow 'last=0' bits to enque, accept 'last=1' and switch to WAIT
val WAIT = UInt(1, width = 2) // block all access till count=0
val state = RegInit(PASS)
val count = RegInit(UInt(0, width = log2Up(maxInFlight)))
val full = count === UInt(maxInFlight-1)
val empty = count === UInt(0)
val last = count === UInt(1)
io.deq.bits := state(1) || (last && state(0)) // PASS || (last && WAIT)
io.deq.valid := !empty
io.enq.ready := !full && (empty || (state === FIND) || (state === PASS && io.enq.bits))
// WAIT => count > 0
assert (state =/= WAIT || count =/= UInt(0))
if (flow) {
when (io.enq.valid) {
io.deq.valid := Bool(true)
when (empty) { io.deq.bits := io.enq.bits }
val error = RegInit(Vec.fill(edgeIn.master.endId) { UInt(0, width = AXI4Parameters.respBits)})
in.b.bits.resp := out.b.bits.resp | error(out.b.bits.id)
(error zip UIntToOH(out.b.bits.id, edgeIn.master.endId).toBools) foreach { case (reg, sel) =>
when (sel && out.b.fire()) { reg := Mux(b_last, UInt(0), reg | out.b.bits.resp) }
}
}
count := count + io.enq.fire() - io.deq.fire()
switch (state) {
is(PASS) { when (io.enq.valid && !io.enq.bits && empty) { state := FIND } }
is(FIND) { when (io.enq.valid && io.enq.bits && !full) { state := Mux(empty, PASS, WAIT) } }
is(WAIT) { when (last && io.deq.ready) { state := PASS } }
}
}
}
object AXI4Fragmenter
{
// applied to the AXI4 source node; y.node := AXI4Fragmenter()(x.node)
def apply(lite: Boolean = false, maxInFlight: => Int = 32, combinational: Boolean = true)(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = {
val fragmenter = LazyModule(new AXI4Fragmenter(lite, maxInFlight, combinational))
def apply()(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = {
val fragmenter = LazyModule(new AXI4Fragmenter)
fragmenter.node := x
fragmenter.node
}

View File

@ -0,0 +1,78 @@
// See LICENSE.SiFive for license details.
package uncore.axi4
import Chisel._
import chisel3.internal.sourceinfo.SourceInfo
import config._
import diplomacy._
import scala.math.{min,max}
class AXI4IdIndexer(idBits: Int)(implicit p: Parameters) extends LazyModule
{
require (idBits >= 0)
val node = AXI4AdapterNode(
masterFn = { mp =>
// Create one new "master" per ID
val masters = Array.tabulate(1 << idBits) { i => AXI4MasterParameters(
id = IdRange(i, i+1),
aligned = true,
maxFlight = Some(0))
}
// Squash the information from original masters into new ID masters
mp.masters.foreach { m =>
for (i <- m.id.start until m.id.end) {
val j = i % (1 << idBits)
val old = masters(j)
masters(j) = old.copy(
aligned = old.aligned && m.aligned,
maxFlight = old.maxFlight.flatMap { o => m.maxFlight.map { n => o+n } })
}
}
mp.copy(
userBits = mp.userBits + max(0, log2Ceil(mp.endId) - idBits),
masters = masters)
},
slaveFn = { sp => sp.copy(
slaves = sp.slaves.map(s => s.copy(
interleavedId = if (idBits == 0) Some(0) else s.interleavedId)))
})
lazy val module = new LazyModuleImp(this) {
val io = new Bundle {
val in = node.bundleIn
val out = node.bundleOut
}
((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
// Leave everything mostly untouched
out.ar <> in.ar
out.aw <> in.aw
out.w <> in.w
in.b <> out.b
in.r <> out.r
val bits = log2Ceil(edgeIn.master.endId) - idBits
if (bits > 0) {
out.ar.bits.user.get := Cat(in.ar.bits.user.toList ++ Seq(in.ar.bits.id >> idBits))
out.aw.bits.user.get := Cat(in.aw.bits.user.toList ++ Seq(in.aw.bits.id >> idBits))
in.r.bits.user.foreach { _ := out.r.bits.user.get >> bits }
in.b.bits.user.foreach { _ := out.b.bits.user.get >> bits }
in.r.bits.id := Cat(out.r.bits.user.get, out.r.bits.id)
in.b.bits.id := Cat(out.b.bits.user.get, out.b.bits.id)
}
}
}
}
object AXI4IdIndexer
{
// applied to the AXI4 source node; y.node := AXI4IdIndexer(idBits)(x.node)
def apply(idBits: Int)(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = {
val indexer = LazyModule(new AXI4IdIndexer(idBits))
indexer.node := x
indexer.node
}
}

View File

@ -62,26 +62,33 @@ case class AXI4SlavePortParameters(
}
case class AXI4MasterParameters(
id: IdRange = IdRange(0, 1),
aligned: Boolean = false,
nodePath: Seq[BaseNode] = Seq())
id: IdRange = IdRange(0, 1),
aligned: Boolean = false,
maxFlight: Option[Int] = None, // None = infinite, else is a per-ID cap
nodePath: Seq[BaseNode] = Seq())
{
val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected")
maxFlight.foreach { m => require (m >= 0) }
}
case class AXI4MasterPortParameters(
masters: Seq[AXI4MasterParameters])
masters: Seq[AXI4MasterParameters],
userBits: Int = 0)
{
val endId = masters.map(_.id.end).max
require (userBits >= 0)
// Require disjoint ranges for ids
masters.combinations(2).foreach { case Seq(x,y) => require (!x.id.overlaps(y.id), s"$x and $y overlap") }
IdRange.overlaps(masters.map(_.id)).foreach { case (x, y) =>
require (!x.overlaps(y), s"AXI4MasterParameters.id $x and $y overlap")
}
}
case class AXI4BundleParameters(
addrBits: Int,
dataBits: Int,
idBits: Int)
idBits: Int,
userBits: Int)
{
require (dataBits >= 8, s"AXI4 data bits must be >= 8 (got $dataBits)")
require (addrBits >= 1, s"AXI4 addr bits must be >= 1 (got $addrBits)")
@ -102,19 +109,21 @@ case class AXI4BundleParameters(
AXI4BundleParameters(
max(addrBits, x.addrBits),
max(dataBits, x.dataBits),
max(idBits, x.idBits))
max(idBits, x.idBits),
max(userBits, x.userBits))
}
object AXI4BundleParameters
{
val emptyBundleParams = AXI4BundleParameters(addrBits=1, dataBits=8, idBits=1)
val emptyBundleParams = AXI4BundleParameters(addrBits=1, dataBits=8, idBits=1, userBits=0)
def union(x: Seq[AXI4BundleParameters]) = x.foldLeft(emptyBundleParams)((x,y) => x.union(y))
def apply(master: AXI4MasterPortParameters, slave: AXI4SlavePortParameters) =
new AXI4BundleParameters(
addrBits = log2Up(slave.maxAddress+1),
dataBits = slave.beatBytes * 8,
idBits = log2Up(master.endId))
idBits = log2Up(master.endId),
userBits = master.userBits)
}
case class AXI4EdgeParameters(

View File

@ -17,7 +17,7 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int
supportsRead = TransferSizes(1, beatBytes),
interleavedId = Some(0))),
beatBytes = beatBytes,
minLatency = min(concurrency, 1)))) // the Queue adds at most one cycle
minLatency = 1)))
{
require (address.contiguous)
@ -30,7 +30,7 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int
val r = bundleIn(0).r
val b = bundleIn(0).b
val params = RegMapperParams(log2Up((address.mask+1)/beatBytes), beatBytes, ar.bits.params.idBits)
val params = RegMapperParams(log2Up((address.mask+1)/beatBytes), beatBytes, ar.bits.params.idBits + ar.bits.params.userBits)
val in = Wire(Decoupled(new RegMapperInput(params)))
// Prefer to execute reads first
@ -39,34 +39,39 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int
aw.ready := in.ready && !ar.valid && w .valid
w .ready := in.ready && !ar.valid && aw.valid
val addr = Mux(ar.valid, ar.bits.addr, aw.bits.addr)
val in_id = Mux(ar.valid, ar.bits.id, aw.bits.id)
val ar_extra = Cat(Seq(ar.bits.id) ++ ar.bits.user.toList)
val aw_extra = Cat(Seq(aw.bits.id) ++ aw.bits.user.toList)
val in_extra = Mux(ar.valid, ar_extra, aw_extra)
val addr = Mux(ar.valid, ar.bits.addr, aw.bits.addr)
val mask = uncore.tilelink2.maskGen(ar.bits.addr, ar.bits.size, beatBytes)
in.bits.read := ar.valid
in.bits.index := addr >> log2Ceil(beatBytes)
in.bits.data := w.bits.data
in.bits.mask := Mux(ar.valid, mask, w.bits.strb)
in.bits.extra := in_id
in.bits.extra := in_extra
// Invoke the register map builder and make it Irrevocable
val out = Queue.irrevocable(
RegMapper(beatBytes, concurrency, undefZero, in, mapping:_*),
entries = 1, flow = true)
entries = 2)
// No flow control needed
out.ready := Mux(out.bits.read, r.ready, b.ready)
r.valid := out.valid && out.bits.read
b.valid := out.valid && !out.bits.read
val out_id = if (r.bits.params.idBits == 0) UInt(0) else out.bits.extra
val out_id = if (r.bits.params.idBits == 0) UInt(0) else (out.bits.extra >> ar.bits.params.userBits)
r.bits.id := out_id
r.bits.data := out.bits.data
r.bits.last := Bool(true)
r.bits.resp := AXI4Parameters.RESP_OKAY
r.bits.user.foreach { _ := out.bits.extra }
b.bits.id := out_id
b.bits.resp := AXI4Parameters.RESP_OKAY
b.bits.user.foreach { _ := out.bits.extra }
}
}

View File

@ -18,7 +18,7 @@ class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int =
supportsWrite = TransferSizes(1, beatBytes),
interleavedId = Some(0))),
beatBytes = beatBytes,
minLatency = 0))) // B responds on same cycle
minLatency = 1)))
// We require the address range to include an entire beat (for the write mask)
require ((address.mask & (beatBytes-1)) == beatBytes-1)
@ -38,36 +38,53 @@ class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int =
val r_addr = Cat((mask zip (in.ar.bits.addr >> log2Ceil(beatBytes)).toBools).filter(_._1).map(_._2).reverse)
val w_addr = Cat((mask zip (in.aw.bits.addr >> log2Ceil(beatBytes)).toBools).filter(_._1).map(_._2).reverse)
in.aw.ready := in. w.valid && in.b.ready
in. w.ready := in.aw.valid && in.b.ready
in. b.valid := in.w.valid && in.aw.valid
val w_full = RegInit(Bool(false))
val w_id = Reg(UInt())
val w_user = Reg(UInt())
when (in. b.fire()) { w_full := Bool(false) }
when (in.aw.fire()) { w_full := Bool(true) }
when (in.aw.fire()) {
w_id := in.aw.bits.id
in.aw.bits.user.foreach { w_user := _ }
}
in.b.bits.id := in.aw.bits.id
in.b.bits.resp := AXI4Parameters.RESP_OKAY
val wdata = Vec.tabulate(beatBytes) { i => in.w.bits.data(8*(i+1)-1, 8*i) }
when (in.b.fire()) {
when (in.aw.fire()) {
mem.write(w_addr, wdata, in.w.bits.strb.toBools)
}
in. b.valid := w_full
in.aw.ready := in. w.valid && (in.b.ready || !w_full)
in. w.ready := in.aw.valid && (in.b.ready || !w_full)
in.b.bits.id := w_id
in.b.bits.resp := AXI4Parameters.RESP_OKAY
in.b.bits.user.foreach { _ := w_user }
val r_full = RegInit(Bool(false))
val r_id = Reg(UInt())
val r_user = Reg(UInt())
when (in. r.fire()) { r_full := Bool(false) }
when (in.ar.fire()) { r_full := Bool(true) }
in. r.valid := r_full
in.ar.ready := in.r.ready || !r_full
when (in.ar.fire()) {
r_id := in.ar.bits.id
in.ar.bits.user.foreach { r_user := _ }
}
val ren = in.ar.fire()
val rdata = mem.readAndHold(r_addr, ren)
in. r.valid := r_full
in.ar.ready := in.r.ready || !r_full
in.r.bits.id := r_id
in.r.bits.resp := AXI4Parameters.RESP_OKAY
in.r.bits.data := Cat(rdata.reverse)
in.r.bits.user.foreach { _ := r_user }
in.r.bits.last := Bool(true)
}
}

View File

@ -26,8 +26,8 @@ class AXI4LiteFuzzRAM()(implicit p: Parameters) extends LazyModule
model.node := fuzz.node
xbar.node := TLDelayer(0.1)(TLBuffer(BufferParams.flow)(TLDelayer(0.2)(model.node)))
ram.node := AXI4Fragmenter(lite=true)(TLToAXI4(0, true )(xbar.node))
gpio.node := AXI4Fragmenter(lite=true)(TLToAXI4(0, false)(xbar.node))
ram.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(4, true )(xbar.node)))
gpio.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(4, false)(xbar.node)))
lazy val module = new LazyModuleImp(this) with HasUnitTestIO {
io.finished := fuzz.module.io.finished
@ -49,8 +49,8 @@ class AXI4FullFuzzRAM()(implicit p: Parameters) extends LazyModule
model.node := fuzz.node
xbar.node := TLDelayer(0.1)(TLBuffer(BufferParams.flow)(TLDelayer(0.2)(model.node)))
ram.node := AXI4Fragmenter(lite=false, maxInFlight = 2)(TLToAXI4(4,false)(xbar.node))
gpio.node := AXI4Fragmenter(lite=false, maxInFlight = 5)(TLToAXI4(4,true )(xbar.node))
ram.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(4,false)(xbar.node)))
gpio.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(4,true )(xbar.node)))
lazy val module = new LazyModuleImp(this) with HasUnitTestIO {
io.finished := fuzz.module.io.finished
@ -70,11 +70,13 @@ class AXI4FuzzMaster()(implicit p: Parameters) extends LazyModule
model.node := fuzz.node
node :=
AXI4UserYanker()(
AXI4Deinterleaver(64)(
TLToAXI4(4)(
TLDelayer(0.1)(
TLBuffer(BufferParams.flow)(
TLDelayer(0.1)(
model.node))))
model.node))))))
lazy val module = new LazyModuleImp(this) {
val io = new Bundle {
@ -89,16 +91,23 @@ class AXI4FuzzMaster()(implicit p: Parameters) extends LazyModule
class AXI4FuzzSlave()(implicit p: Parameters) extends LazyModule
{
val node = AXI4InputNode()
val ram = LazyModule(new TLTestRAM(AddressSet(0x0, 0xfff)))
val xbar = LazyModule(new TLXbar)
val ram = LazyModule(new TLRAM(AddressSet(0x0, 0xfff)))
val error= LazyModule(new TLError(Seq(AddressSet(0x1800, 0xff))))
ram.node :=
TLFragmenter(4, 16)(
ram.node := TLFragmenter(4, 16)(xbar.node)
error.node := TLFragmenter(4, 16)(xbar.node)
xbar.node :=
TLFIFOFixer()(
TLDelayer(0.1)(
TLBuffer(BufferParams.flow)(
TLDelayer(0.1)(
AXI4ToTL()(
AXI4UserYanker(Some(4))(
AXI4Fragmenter()(
node))))))
AXI4IdIndexer(2)(
node))))))))
lazy val module = new LazyModuleImp(this) {
val io = new Bundle {

View File

@ -9,23 +9,29 @@ import diplomacy._
import uncore.tilelink2._
case class AXI4ToTLNode() extends MixedAdapterNode(AXI4Imp, TLImp)(
dFn = { case AXI4MasterPortParameters(masters) =>
TLClientPortParameters(clients = masters.map { m =>
TLClientParameters(
sourceId = IdRange(m.id.start << 1, m.id.end << 1), // R+W ids are distinct
nodePath = m.nodePath)
})
dFn = { case AXI4MasterPortParameters(masters, userBits) =>
masters.foreach { m => require (m.maxFlight.isDefined, "AXI4 must include a transaction maximum per ID to convert to TL") }
val maxFlight = masters.map(_.maxFlight.get).max
TLClientPortParameters(
clients = masters.flatMap { m =>
for (id <- m.id.start until m.id.end)
yield TLClientParameters(
sourceId = IdRange(id * maxFlight*2, (id+1) * maxFlight*2), // R+W ids are distinct
nodePath = m.nodePath,
requestFifo = true)
})
},
uFn = { mp => AXI4SlavePortParameters(
slaves = mp.managers.map { m =>
val maxXfer = TransferSizes(1, mp.beatBytes * (1 << AXI4Parameters.lenBits))
AXI4SlaveParameters(
address = m.address,
resources = m.resources,
regionType = m.regionType,
executable = m.executable,
nodePath = m.nodePath,
supportsWrite = m.supportsPutPartial,
supportsRead = m.supportsGet,
supportsWrite = m.supportsPutPartial.intersect(maxXfer),
supportsRead = m.supportsGet.intersect(maxXfer),
interleavedId = Some(0))}, // TL2 never interleaves D beats
beatBytes = mp.beatBytes,
minLatency = mp.minLatency)
@ -45,58 +51,64 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule
val numIds = edgeIn.master.endId
val beatBytes = edgeOut.manager.beatBytes
val countBits = AXI4Parameters.lenBits + (1 << AXI4Parameters.sizeBits) - 1
val maxFlight = edgeIn.master.masters.map(_.maxFlight.get).max
val addedBits = log2Ceil(maxFlight) + 1
require (edgeIn.master.userBits == 0, "AXI4 user bits cannot be transported by TL")
require (edgeIn.master.masters(0).aligned)
edgeOut.manager.requireFifo()
// Look for an Error device to redirect bad requests
val errorDevs = edgeOut.manager.managers.filter(_.nodePath.last.lazyModule.className == "TLError")
require (!errorDevs.isEmpty, "There is no TLError reachable from AXI4ToTL. One must be instantiated.")
val error = errorDevs.head.address.head.base
require (errorDevs.head.supportsPutPartial.contains(edgeOut.manager.maxTransfer),
s"Error device supports ${errorDevs.head.supportsPutPartial} PutPartial but must support ${edgeOut.manager.maxTransfer}")
require (errorDevs.head.supportsGet.contains(edgeOut.manager.maxTransfer),
s"Error device supports ${errorDevs.head.supportsGet} Get but must support ${edgeOut.manager.maxTransfer}")
val r_out = Wire(out.a)
val r_inflight = RegInit(UInt(0, width = numIds))
val r_block = r_inflight(in.ar.bits.id)
val r_size1 = in.ar.bits.bytes1()
val r_size = OH1ToUInt(r_size1)
val r_addr = in.ar.bits.addr
val r_ok = edgeOut.manager.supportsGetSafe(r_addr, r_size)
val r_err_in = Wire(Decoupled(new AXI4BundleRError(in.ar.bits.params)))
val r_err_out = Queue(r_err_in, 2)
val r_count = RegInit(UInt(0, width = in.ar.bits.params.lenBits))
val r_last = r_count === in.ar.bits.len
val r_ok = edgeOut.manager.supportsGetSafe(in.ar.bits.addr, r_size)
val r_addr = Mux(r_ok, in.ar.bits.addr, UInt(error) | in.ar.bits.addr(log2Up(beatBytes)-1, 0))
val r_count = RegInit(Vec.fill(numIds) { UInt(0, width = log2Ceil(maxFlight)) })
val r_id = Cat(in.ar.bits.id, r_count(in.ar.bits.id), UInt(0, width=1))
assert (!in.ar.valid || r_size1 === UIntToOH1(r_size, countBits)) // because aligned
in.ar.ready := Mux(r_ok, r_out.ready, r_err_in.ready && r_last) && !r_block
r_out.valid := in.ar.valid && !r_block && r_ok
r_out.bits := edgeOut.Get(in.ar.bits.id << 1 | UInt(1), r_addr, r_size)._2
r_err_in.valid := in.ar.valid && !r_block && !r_ok
r_err_in.bits.last := r_last
r_err_in.bits.id := in.ar.bits.id
in.ar.ready := r_out.ready
r_out.valid := in.ar.valid
r_out.bits := edgeOut.Get(r_id, r_addr, r_size)._2
when (r_err_in.fire()) { r_count := Mux(r_last, UInt(0), r_count + UInt(1)) }
val r_sel = UIntToOH(in.ar.bits.id, numIds)
(r_sel.toBools zip r_count) foreach { case (s, r) =>
when (in.ar.fire() && s) { r := r + UInt(1) }
}
val w_out = Wire(out.a)
val w_inflight = RegInit(UInt(0, width = numIds))
val w_block = w_inflight(in.aw.bits.id)
val w_size1 = in.aw.bits.bytes1()
val w_size = OH1ToUInt(w_size1)
val w_addr = in.aw.bits.addr
val w_ok = edgeOut.manager.supportsPutPartialSafe(w_addr, w_size)
val w_err_in = Wire(Decoupled(in.aw.bits.id))
val w_err_out = Queue(w_err_in, 2)
val w_ok = edgeOut.manager.supportsPutPartialSafe(in.aw.bits.addr, w_size)
val w_addr = Mux(w_ok, in.aw.bits.addr, UInt(error) | in.aw.bits.addr(log2Up(beatBytes)-1, 0))
val w_count = RegInit(Vec.fill(numIds) { UInt(0, width = log2Ceil(maxFlight)) })
val w_id = Cat(in.aw.bits.id, w_count(in.aw.bits.id), UInt(1, width=1))
assert (!in.aw.valid || w_size1 === UIntToOH1(w_size, countBits)) // because aligned
assert (!in.aw.valid || in.aw.bits.len === UInt(0) || in.aw.bits.size === UInt(log2Ceil(beatBytes))) // because aligned
in.aw.ready := Mux(w_ok, w_out.ready, w_err_in.ready) && in.w.valid && in.w.bits.last && !w_block
in.w.ready := Mux(w_ok, w_out.ready, w_err_in.ready || !in.w.bits.last) && in.aw.valid && !w_block
w_out.valid := in.aw.valid && in.w.valid && !w_block && w_ok
w_out.bits := edgeOut.Put(in.aw.bits.id << 1, w_addr, w_size, in.w.bits.data, in.w.bits.strb)._2
w_err_in.valid := in.aw.valid && in.w.valid && !w_block && !w_ok && in.w.bits.last
w_err_in.bits := in.aw.bits.id
in.aw.ready := w_out.ready && in.w.valid && in.w.bits.last
in.w.ready := w_out.ready && in.aw.valid
w_out.valid := in.aw.valid && in.w.valid
w_out.bits := edgeOut.Put(w_id, w_addr, w_size, in.w.bits.data, in.w.bits.strb)._2
TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out))
val w_sel = UIntToOH(in.aw.bits.id, numIds)
(w_sel.toBools zip w_count) foreach { case (s, r) =>
when (in.aw.fire() && s) { r := r + UInt(1) }
}
TLArbiter(TLArbiter.roundRobin)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out))
val ok_b = Wire(in.b)
val err_b = Wire(in.b)
val mux_b = Wire(in.b)
val ok_r = Wire(in.r)
val err_r = Wire(in.r)
val mux_r = Wire(in.r)
val d_resp = Mux(out.d.bits.error, AXI4Parameters.RESP_SLVERR, AXI4Parameters.RESP_OKAY)
val d_hasData = edgeOut.hasData(out.d.bits)
@ -106,58 +118,33 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule
ok_r.valid := out.d.valid && d_hasData
ok_b.valid := out.d.valid && !d_hasData
ok_r.bits.id := out.d.bits.source >> 1
ok_r.bits.id := out.d.bits.source >> addedBits
ok_r.bits.data := out.d.bits.data
ok_r.bits.resp := d_resp
ok_r.bits.last := d_last
r_err_out.ready := err_r.ready
err_r.valid := r_err_out.valid
err_r.bits.id := r_err_out.bits.id
err_r.bits.data := out.d.bits.data // don't care
err_r.bits.resp := AXI4Parameters.RESP_DECERR
err_r.bits.last := r_err_out.bits.last
// AXI4 must hold R to one source until last
val mux_lock_ok = RegInit(Bool(false))
val mux_lock_err = RegInit(Bool(false))
when (ok_r .fire()) { mux_lock_ok := !ok_r .bits.last }
when (err_r.fire()) { mux_lock_err := !err_r.bits.last }
assert (!mux_lock_ok || !mux_lock_err)
// Prioritize err over ok (b/c err_r.valid comes from a register)
mux_r.valid := (!mux_lock_err && ok_r.valid) || (!mux_lock_ok && err_r.valid)
mux_r.bits := Mux(!mux_lock_ok && err_r.valid, err_r.bits, ok_r.bits)
ok_r.ready := mux_r.ready && (mux_lock_ok || !err_r.valid)
err_r.ready := mux_r.ready && !mux_lock_ok
// AXI4 needs irrevocable behaviour
in.r <> Queue.irrevocable(mux_r, 1, flow=true)
in.r <> Queue.irrevocable(ok_r, 1, flow=true)
ok_b.bits.id := out.d.bits.source >> 1
ok_b.bits.id := out.d.bits.source >> addedBits
ok_b.bits.resp := d_resp
w_err_out.ready := err_b.ready
err_b.valid := w_err_out.valid
err_b.bits.id := w_err_out.bits
err_b.bits.resp := AXI4Parameters.RESP_DECERR
// Prioritize err over ok (b/c err_b.valid comes from a register)
mux_b.valid := ok_b.valid || err_b.valid
mux_b.bits := Mux(err_b.valid, err_b.bits, ok_b.bits)
ok_b.ready := mux_b.ready && !err_b.valid
err_b.ready := mux_b.ready
// AXI4 needs irrevocable behaviour
in.b <> Queue.irrevocable(mux_b, 1, flow=true)
val q_b = Queue.irrevocable(ok_b, 1, flow=true)
// Update flight trackers
val r_set = in.ar.fire().asUInt << in.ar.bits.id
val r_clr = (in.r.fire() && in.r.bits.last).asUInt << in.r.bits.id
r_inflight := (r_inflight | r_set) & ~r_clr
val w_set = in.aw.fire().asUInt << in.aw.bits.id
val w_clr = in.b.fire().asUInt << in.b.bits.id
w_inflight := (w_inflight | w_set) & ~w_clr
// We need to prevent sending B valid before the last W beat is accepted
// TileLink allows early acknowledgement of a write burst, but AXI does not.
val b_count = RegInit(Vec.fill(numIds) { UInt(0, width = log2Ceil(maxFlight)) })
val b_allow = b_count(in.b.bits.id) =/= w_count(in.b.bits.id)
val b_sel = UIntToOH(in.b.bits.id, numIds)
(b_sel.toBools zip b_count) foreach { case (s, r) =>
when (in.b.fire() && s) { r := r + UInt(1) }
}
in.b.bits := q_b.bits
in.b.valid := q_b.valid && b_allow
q_b.ready := in.b.ready && b_allow
// Unused channels
out.b.ready := Bool(true)

View File

@ -0,0 +1,106 @@
// See LICENSE.SiFive for license details.
package uncore.axi4
import Chisel._
import chisel3.internal.sourceinfo.SourceInfo
import config._
import diplomacy._
import uncore.tilelink2.UIntToOH1
class AXI4UserYanker(capMaxFlight: Option[Int] = None)(implicit p: Parameters) extends LazyModule
{
val node = AXI4AdapterNode(
masterFn = { mp => mp.copy(
userBits = 0,
masters = mp.masters.map { m => m.copy(
maxFlight = (m.maxFlight, capMaxFlight) match {
case (Some(x), Some(y)) => Some(x min y)
case (Some(x), None) => Some(x)
case (None, Some(y)) => Some(y)
case (None, None) => None })})},
slaveFn = { sp => sp })
lazy val module = new LazyModuleImp(this) {
val io = new Bundle {
val in = node.bundleIn
val out = node.bundleOut
}
((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
val bits = edgeIn.bundle.userBits
val need_bypass = edgeOut.slave.minLatency < 1
require (bits > 0) // useless UserYanker!
edgeOut.master.masters.foreach { m =>
require (m.maxFlight.isDefined, "UserYanker needs a flight cap on each ID")
}
def queue(id: Int) = {
val depth = edgeOut.master.masters.find(_.id.contains(id)).flatMap(_.maxFlight).getOrElse(0)
if (depth == 0) {
Wire(new QueueIO(UInt(width = bits), 1)) // unused ID => undefined value
} else {
Module(new Queue(UInt(width = bits), depth, flow=need_bypass)).io
}
}
val rqueues = Seq.tabulate(edgeIn.master.endId) { i => queue(i) }
val wqueues = Seq.tabulate(edgeIn.master.endId) { i => queue(i) }
val arid = in.ar.bits.id
val ar_ready = Vec(rqueues.map(_.enq.ready))(arid)
in .ar.ready := out.ar.ready && ar_ready
out.ar.valid := in .ar.valid && ar_ready
out.ar.bits := in .ar.bits
val rid = out.r.bits.id
val r_valid = Vec(rqueues.map(_.deq.valid))(rid)
val r_bits = Vec(rqueues.map(_.deq.bits))(rid)
assert (!out.r.valid || r_valid) // Q must be ready faster than the response
in.r <> out.r
in.r.bits.user.get := r_bits
val arsel = UIntToOH(arid, edgeIn.master.endId).toBools
val rsel = UIntToOH(rid, edgeIn.master.endId).toBools
(rqueues zip (arsel zip rsel)) foreach { case (q, (ar, r)) =>
q.deq.ready := out.r .valid && in .r .ready && r && out.r.bits.last
q.enq.valid := in .ar.valid && out.ar.ready && ar
q.enq.bits := in.ar.bits.user.get
}
val awid = in.aw.bits.id
val aw_ready = Vec(wqueues.map(_.enq.ready))(awid)
in .aw.ready := out.aw.ready && aw_ready
out.aw.valid := in .aw.valid && aw_ready
out.aw.bits := in .aw.bits
val bid = out.b.bits.id
val b_valid = Vec(wqueues.map(_.deq.valid))(bid)
val b_bits = Vec(wqueues.map(_.deq.bits))(bid)
assert (!out.b.valid || b_valid) // Q must be ready faster than the response
in.b <> out.b
in.b.bits.user.get := b_bits
val awsel = UIntToOH(awid, edgeIn.master.endId).toBools
val bsel = UIntToOH(bid, edgeIn.master.endId).toBools
(wqueues zip (awsel zip bsel)) foreach { case (q, (aw, b)) =>
q.deq.ready := out.b .valid && in .b .ready && b
q.enq.valid := in .aw.valid && out.aw.ready && aw
q.enq.bits := in.aw.bits.user.get
}
out.w <> in.w
}
}
}
object AXI4UserYanker
{
// applied to the AXI4 source node; y.node := AXI4UserYanker(idBits, maxFlight)(x.node)
def apply(capMaxFlight: Option[Int] = None)(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = {
val yanker = LazyModule(new AXI4UserYanker(capMaxFlight))
yanker.node := x
yanker.node
}
}

View File

@ -3,15 +3,28 @@
package uncore.tilelink2
import Chisel._
import config._
import diplomacy._
object TLArbiter
{
// (valids, granted) => readys
type Policy = (Seq[Bool], Bool) => Seq[Bool]
// (valids, select) => readys
type Policy = (Integer, UInt, Bool) => UInt
val lowestIndexFirst: Policy = (valids, granted) =>
valids.scanLeft(Bool(true))(_ && !_).init
val lowestIndexFirst: Policy = (width, valids, select) => ~(leftOR(valids) << 1)(width-1, 0)
val roundRobin: Policy = (width, valids, select) => {
val valid = valids(width-1, 0)
assert (valid === valids)
val mask = RegInit(~UInt(0, width=width))
val filter = Cat(valid & ~mask, valid)
val unready = (rightOR(filter, width*2) >> 1) | (mask << width) // last right shift unneeded
val readys = ~((unready >> width) & unready(width-1, 0))
when (select && valid.orR) {
mask := leftOR(readys & valid, width)
}
readys(width-1, 0)
}
def lowestFromSeq[T <: TLChannel](edge: TLEdge, sink: DecoupledIO[T], sources: Seq[DecoupledIO[T]]) {
apply(lowestIndexFirst)(sink, sources.map(s => (edge.numBeats1(s.bits), s)):_*)
@ -21,6 +34,10 @@ object TLArbiter
apply(lowestIndexFirst)(sink, sources.toList.map(s => (edge.numBeats1(s.bits), s)):_*)
}
def robin[T <: TLChannel](edge: TLEdge, sink: DecoupledIO[T], sources: DecoupledIO[T]*) {
apply(roundRobin)(sink, sources.toList.map(s => (edge.numBeats1(s.bits), s)):_*)
}
def apply[T <: Data](policy: Policy)(sink: DecoupledIO[T], sources: (UInt, DecoupledIO[T])*) {
if (sources.isEmpty) {
sink.valid := Bool(false)
@ -37,13 +54,13 @@ object TLArbiter
// Who wants access to the sink?
val valids = sourcesIn.map(_.valid)
// Arbitrate amongst the requests
val readys = Vec(policy(valids, latch))
val readys = Vec(policy(valids.size, Cat(valids.reverse), latch).toBools)
// Which request wins arbitration?
val winner = Vec((readys zip valids) map { case (r,v) => r&&v })
// Confirm the policy works properly
require (readys.size == valids.size)
// Never two winner
// Never two winners
val prefixOR = winner.scanLeft(Bool(false))(_||_).init
assert((prefixOR zip winner) map { case (p,w) => !p || !w } reduce {_ && _})
// If there was any request, there is a winner
@ -73,3 +90,32 @@ object TLArbiter
}
}
}
/** Synthesizeable unit tests */
import unittest._
class TestRobin()(implicit p: Parameters) extends UnitTest(timeout = 500000) {
val sources = Wire(Vec(6, DecoupledIO(UInt(width=3))))
val sink = Wire(DecoupledIO(UInt(width=3)))
val count = RegInit(UInt(0, width=8))
val lfsr = LFSR16(Bool(true))
val valid = lfsr(0)
val ready = lfsr(15)
sources.zipWithIndex.map { case (z, i) => z.bits := UInt(i) }
sources(0).valid := valid
sources(1).valid := Bool(false)
sources(2).valid := valid
sources(3).valid := valid
sources(4).valid := Bool(false)
sources(5).valid := valid
sink.ready := ready
TLArbiter(TLArbiter.roundRobin)(sink, sources.zipWithIndex.map { case (z, i) => (UInt(i), z) }:_*)
when (sink.fire()) { printf("TestRobin: %d\n", sink.bits) }
when (!sink.fire()) { printf("TestRobin: idle (%d %d)\n", valid, ready) }
count := count + UInt(1)
io.finished := count >= UInt(128)
}

View File

@ -0,0 +1,56 @@
// See LICENSE.SiFive for license details.
package uncore.tilelink2
import Chisel._
import config._
import diplomacy._
import util._
class TLError(address: Seq[AddressSet], beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule
{
val device = new SimpleDevice("error-device", Seq("sifive,error0"))
val node = TLManagerNode(Seq(TLManagerPortParameters(
Seq(TLManagerParameters(
address = address,
resources = device.reg,
supportsGet = TransferSizes(1, beatBytes),
supportsPutPartial = TransferSizes(1, beatBytes),
supportsPutFull = TransferSizes(1, beatBytes),
supportsArithmetic = TransferSizes(1, beatBytes),
supportsLogical = TransferSizes(1, beatBytes),
supportsHint = TransferSizes(1, beatBytes),
fifoId = Some(0))), // requests are handled in order
beatBytes = beatBytes,
minLatency = 1))) // no bypass needed for this device
lazy val module = new LazyModuleImp(this) {
val io = new Bundle {
val in = node.bundleIn
}
import TLMessages._
val opcodes = Vec(AccessAck, AccessAck, AccessAckData, AccessAckData, AccessAckData, HintAck)
val in = io.in(0)
val a = Queue(in.a, 1)
val d = in.d
a.ready := d.ready
d.valid := a.valid
d.bits.opcode := opcodes(a.bits.opcode)
d.bits.param := UInt(0)
d.bits.size := a.bits.size
d.bits.source := a.bits.source
d.bits.sink := UInt(0)
d.bits.addr_lo := a.bits.address
d.bits.data := UInt(0)
d.bits.error := a.bits.opcode =/= Hint // Hints may not error
// Tie off unused channels
in.b.valid := Bool(false)
in.c.ready := Bool(true)
in.e.ready := Bool(true)
}
}

View File

@ -10,9 +10,8 @@ import scala.math.max
class TLFIFOFixer(implicit p: Parameters) extends LazyModule
{
// We request downstream FIFO so we can use the existing fifoId
val node = TLAdapterNode(
clientFn = { cp => cp.copy(clients = cp.clients .map(c => c.copy(requestFifo = !c.supportsProbe))) },
clientFn = { cp => cp },
managerFn = { mp => mp.copy(managers = mp.managers.map(m => m.copy(fifoId = Some(0)))) })
lazy val module = new LazyModuleImp(this) {

View File

@ -14,7 +14,7 @@ import scala.math.{min,max}
// Fragmenter modifies: PutFull, PutPartial, LogicalData, Get, Hint
// Fragmenter passes: ArithmeticData (truncated to minSize if alwaysMin)
// Fragmenter cannot modify acquire (could livelock); thus it is unsafe to put caches on both sides
class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean = false)(implicit p: Parameters) extends LazyModule
class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean = false, val earlyAck: Boolean = false)(implicit p: Parameters) extends LazyModule
{
require (isPow2 (maxSize))
require (isPow2 (minSize))
@ -137,6 +137,7 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean =
val dOrig = Reg(UInt())
val dFragnum = out.d.bits.source(fragmentBits-1, 0)
val dFirst = acknum === UInt(0)
val dLast = dFragnum === UInt(0)
val dsizeOH = UIntToOH (out.d.bits.size, log2Ceil(maxDownSize)+1)
val dsizeOH1 = UIntToOH1(out.d.bits.size, log2Up(maxDownSize))
val dHasData = edgeOut.hasData(out.d.bits)
@ -156,7 +157,7 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean =
}
// Swallow up non-data ack fragments
val drop = !dHasData && (dFragnum =/= UInt(0))
val drop = !dHasData && !(if (earlyAck) dFirst else dLast)
out.d.ready := in.d.ready || drop
in.d.valid := out.d.valid && !drop
in.d.bits := out.d.bits // pass most stuff unchanged
@ -164,11 +165,18 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean =
in.d.bits.source := out.d.bits.source >> fragmentBits
in.d.bits.size := Mux(dFirst, dFirst_size, dOrig)
// Combine the error flag
val r_error = RegInit(Bool(false))
val d_error = r_error | out.d.bits.error
when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) }
in.d.bits.error := d_error
if (earlyAck) {
// If you do early Ack, errors may not be dropped
// ... which roughly means: Puts may not fail
assert (!out.d.bits.error || !drop)
in.d.bits.error := out.d.bits.error
} else {
// Combine the error flag
val r_error = RegInit(Bool(false))
val d_error = r_error | out.d.bits.error
when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) }
in.d.bits.error := d_error
}
// What maximum transfer sizes do downstream devices support?
val maxArithmetics = managers.map(_.supportsArithmetic.max)
@ -252,8 +260,8 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean =
object TLFragmenter
{
// applied to the TL source node; y.node := TLFragmenter(x.node, 256, 4)
def apply(minSize: Int, maxSize: Int, alwaysMin: Boolean = false)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): TLOutwardNode = {
val fragmenter = LazyModule(new TLFragmenter(minSize, maxSize, alwaysMin))
def apply(minSize: Int, maxSize: Int, alwaysMin: Boolean = false, earlyAck: Boolean = false)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): TLOutwardNode = {
val fragmenter = LazyModule(new TLFragmenter(minSize, maxSize, alwaysMin, earlyAck))
fragmenter.node := x
fragmenter.node
}

View File

@ -199,9 +199,9 @@ case class TLClientPortParameters(
require (minLatency >= 0)
// Require disjoint ranges for Ids
clients.combinations(2).foreach({ case Seq(x,y) =>
require (!x.sourceId.overlaps(y.sourceId))
})
IdRange.overlaps(clients.map(_.sourceId)).foreach { case (x, y) =>
require (!x.overlaps(y), s"TLClientParameters.sourceId ${x} overlaps ${y}")
}
// Bounds on required sizes
def endSourceId = clients.map(_.sourceId.end).max

View File

@ -10,14 +10,20 @@ import util.PositionalMultiQueue
import uncore.axi4._
import scala.math.{min, max}
case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)(
dFn = { _ =>
// We must erase all client information, because we crush their source Ids
val masters = Seq(
case class TLToAXI4Node(beatBytes: Int) extends MixedAdapterNode(TLImp, AXI4Imp)(
dFn = { p =>
val idSize = p.clients.map { c => if (c.requestFifo) 1 else c.sourceId.size }
val idStart = idSize.scanLeft(0)(_+_).init
val masters = ((idStart zip idSize) zip p.clients) map { case ((start, size), c) =>
AXI4MasterParameters(
id = IdRange(0, 1 << idBits),
aligned = true))
AXI4MasterPortParameters(masters)
id = IdRange(start, start+size),
aligned = true,
maxFlight = Some(if (c.requestFifo) c.sourceId.size else 1),
nodePath = c.nodePath)
}
AXI4MasterPortParameters(
masters = masters,
userBits = log2Ceil(p.endSourceId) + 4 + log2Ceil(beatBytes))
},
uFn = { p => TLManagerPortParameters(
managers = p.slaves.map { case s =>
@ -29,15 +35,15 @@ case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)(
nodePath = s.nodePath,
supportsGet = s.supportsRead,
supportsPutFull = s.supportsWrite,
supportsPutPartial = s.supportsWrite)},
// AXI4 is NEVER fifo in TL sense (R+W are independent)
supportsPutPartial = s.supportsWrite,
fifoId = Some(0))},
beatBytes = p.beatBytes,
minLatency = p.minLatency)
})
class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
class TLToAXI4(beatBytes: Int, combinational: Boolean = true)(implicit p: Parameters) extends LazyModule
{
val node = TLToAXI4Node(idBits)
val node = TLToAXI4Node(beatBytes)
lazy val module = new LazyModuleImp(this) {
val io = new Bundle {
@ -52,24 +58,26 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
require (slaves(0).interleavedId.isDefined)
slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) }
// We need to ensure that a slave does not stall trying to send B while we need to receive R
// Since R&W have independent flow control, it is possible for a W to cut in-line and get into
// a slave's buffers, preventing us from getting all the R responses we need to release D for B.
// This risk is compounded by an AXI fragmentation. Even a slave which responds completely to
// AR before working on AW might have an AW slipped between two AR fragments.
val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational)
// Construct the source=>ID mapping table
val idTable = Wire(Vec(edgeIn.client.endSourceId, out.aw.bits.id))
var idCount = Array.fill(edgeOut.master.endId) { 0 }
(edgeIn.client.clients zip edgeOut.master.masters) foreach { case (c, m) =>
for (i <- 0 until c.sourceId.size) {
val id = m.id.start + (if (c.requestFifo) 0 else i)
idTable(c.sourceId.start + i) := UInt(id)
idCount(id) = idCount(id) + 1
}
}
// We need to keep the following state from A => D: (addr_lo, size, source)
// All of those fields could potentially require 0 bits (argh. Chisel.)
// We will pack as many of the lowest bits of state as fit into the AXI ID.
// Any bits left-over must be put into a bank of Queues.
// The Queues are indexed by as many of the source bits as fit into the AXI ID.
// The Queues are deep enough that every source has guaranteed space in its Queue.
// We will pack all of that extra information into the user bits.
val sourceBits = log2Ceil(edgeIn.client.endSourceId)
val sizeBits = log2Ceil(edgeIn.maxLgSize+1)
val addrBits = log2Ceil(edgeIn.manager.beatBytes)
val stateBits = addrBits + sizeBits + sourceBits // could be 0
require (stateBits <= out.aw.bits.params.userBits)
val a_address = edgeIn.address(in.a.bits)
val a_addr_lo = edgeIn.addr_lo(a_address)
@ -91,73 +99,17 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
require (addrEnd == stateBits)
val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff)
val a_id = if (idBits == 0) UInt(0) else a_state
val r_state = Wire(UInt(width = stateBits))
val r_state = out.r.bits.user.getOrElse(UInt(0))
val r_source = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0)
val r_size = if (sizeBits > 0) r_state(sizeEnd -1, sizeOff) else UInt(0)
val r_addr_lo = if (addrBits > 0) r_state(addrEnd -1, addrOff) else UInt(0)
val b_state = Wire(UInt(width = stateBits))
val b_state = out.b.bits.user.getOrElse(UInt(0))
val b_source = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0)
val b_size = if (sizeBits > 0) b_state(sizeEnd -1, sizeOff) else UInt(0)
val b_addr_lo = if (addrBits > 0) b_state(addrEnd -1, addrOff) else UInt(0)
val r_last = out.r.bits.last
val r_id = out.r.bits.id
val b_id = out_b.bits.id
if (stateBits <= idBits) { // No need for any state tracking
r_state := r_id
b_state := b_id
} else {
val bankIndexBits = min(sourceBits, idBits)
val posBits = max(0, sourceBits - idBits)
val implicitBits = max(idBits, sourceBits)
val bankBits = stateBits - implicitBits
val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId)
def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks
val banks = Seq.tabulate(numBanks) { i =>
// We know there can only be as many outstanding requests as TL sources
// However, AXI read and write queues are not mutually FIFO.
// Therefore, we want to pop them individually, but share the storage.
val bypass = combinational && edgeOut.slave.minLatency == 0
PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass)
}
val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits)
val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0)
val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0)
val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0)
val a_bankSelect = UIntToOH(a_bankIndex, numBanks)
val r_bankSelect = UIntToOH(r_bankIndex, numBanks)
val b_bankSelect = UIntToOH(b_bankIndex, numBanks)
banks.zipWithIndex.foreach { case (q, i) =>
// Push a_state into the banks
q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i)
q.io.enq.bits.pos := a_bankPosition
q.io.enq.bits.data := a_state >> implicitBits
q.io.enq.bits.way := Mux(a_isPut, UInt(0), UInt(1))
// Pop the bank's ways
q.io.deq(0).ready := out_b.fire() && b_bankSelect(i)
q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last
// The FIFOs must be valid when we're ready to pop them...
assert (q.io.deq(0).valid || !q.io.deq(0).ready)
assert (q.io.deq(1).valid || !q.io.deq(1).ready)
}
val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex)
val b_bankPos = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex)
val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex)
val r_bankPos = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex)
def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) }
b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id))
r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id))
}
// We need these Queues because AXI4 queues are irrevocable
val depth = if (combinational) 1 else 2
val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params)))
@ -179,7 +131,7 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
val arw = out_arw.bits
arw.wen := a_isPut
arw.id := a_id // truncated
arw.id := idTable(a_source)
arw.addr := a_address
arw.len := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes)
arw.size := Mux(a_size >= maxSize, maxSize, a_size)
@ -188,11 +140,13 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
arw.cache := UInt(0) // do not allow AXI to modify our transactions
arw.prot := AXI4Parameters.PROT_PRIVILEDGED
arw.qos := UInt(0) // no QoS
arw.user.foreach { _ := a_state }
in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready)
out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true))
val stall = Wire(Bool())
in.a.ready := !stall && Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready)
out_arw.valid := !stall && in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true))
out_w.valid := in.a.valid && a_isPut && (doneAW || out_arw.ready)
out_w.valid := !stall && in.a.valid && a_isPut && (doneAW || out_arw.ready)
out_w.bits.data := in.a.bits.data
out_w.bits.strb := in.a.bits.mask
out_w.bits.last := a_last
@ -204,11 +158,11 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
val r_wins = out.r.valid || r_holds_d
out.r.ready := in.d.ready
out_b.ready := in.d.ready && !r_wins
in.d.valid := Mux(r_wins, out.r.valid, out_b.valid)
out.b.ready := in.d.ready && !r_wins
in.d.valid := Mux(r_wins, out.r.valid, out.b.valid)
val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY
val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY
val b_error = out.b.bits.resp =/= AXI4Parameters.RESP_OKAY
val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error)
val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error)
@ -216,6 +170,31 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
in.d.bits := Mux(r_wins, r_d, b_d)
in.d.bits.data := out.r.bits.data // avoid a costly Mux
// We need to track if any reads or writes are inflight for a given ID.
// If the opposite type arrives, we must stall until it completes.
val a_sel = UIntToOH(arw.id, edgeOut.master.endId).toBools
val d_sel = UIntToOH(Mux(r_wins, out.r.bits.id, out.b.bits.id), edgeOut.master.endId).toBools
val d_last = Mux(r_wins, out.r.bits.last, Bool(true))
val d_first = RegInit(Bool(true))
when (in.d.fire()) { d_first := d_last }
val stalls = ((a_sel zip d_sel) zip idCount) filter { case (_, n) => n > 1 } map { case ((as, ds), n) =>
val count = RegInit(UInt(0, width = log2Ceil(n + 1)))
val write = Reg(Bool())
val idle = count === UInt(0)
// Once we start getting the response, it's safe to already switch R/W
val inc = as && out_arw.fire()
val dec = ds && d_first && in.d.fire()
count := count + inc.asUInt - dec.asUInt
assert (!dec || count =/= UInt(0)) // underflow
assert (!inc || count =/= UInt(n)) // overflow
when (inc) { write := arw.wen }
!idle && write =/= arw.wen
}
stall := stalls.foldLeft(Bool(false))(_||_)
// Tie off unused channels
in.b.valid := Bool(false)
in.c.ready := Bool(true)
@ -226,9 +205,9 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P
object TLToAXI4
{
// applied to the TL source node; y.node := TLToAXI4(idBits)(x.node)
def apply(idBits: Int, combinational: Boolean = true)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = {
val axi4 = LazyModule(new TLToAXI4(idBits, combinational))
// applied to the TL source node; y.node := TLToAXI4(beatBytes)(x.node)
def apply(beatBytes: Int, combinational: Boolean = true)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = {
val axi4 = LazyModule(new TLToAXI4(beatBytes, combinational))
axi4.node := x
axi4.node
}

View File

@ -19,18 +19,18 @@ package object tilelink2
def UIntToOH1(x: UInt, width: Int) = ~(SInt(-1, width=width).asUInt << x)(width-1, 0)
def trailingZeros(x: Int) = if (x > 0) Some(log2Ceil(x & -x)) else None
// Fill 1s from low bits to high bits
def leftOR(x: UInt) = {
val w = x.getWidth
def leftOR(x: UInt): UInt = leftOR(x, x.getWidth)
def leftOR(x: UInt, w: Integer): UInt = {
def helper(s: Int, x: UInt): UInt =
if (s >= w) x else helper(s+s, x | (x << s)(w-1,0))
helper(1, x)
helper(1, x)(w-1, 0)
}
// Fill 1s form high bits to low bits
def rightOR(x: UInt) = {
val w = x.getWidth
def rightOR(x: UInt): UInt = rightOR(x, x.getWidth)
def rightOR(x: UInt, w: Integer): UInt = {
def helper(s: Int, x: UInt): UInt =
if (s >= w) x else helper(s+s, x | (x >> s))
helper(1, x)
helper(1, x)(w-1, 0)
}
// This gets used everywhere, so make the smallest circuit possible ...
// Given an address and size, create a mask of beatBytes size