1
0

Merge pull request #395 from ucb-bar/axi4-fragmenter

AXI4 fragmenter
This commit is contained in:
Wesley W. Terpstra 2016-10-13 17:01:53 -07:00 committed by GitHub
commit 980bb3fbfd
16 changed files with 494 additions and 61 deletions

View File

@ -123,24 +123,15 @@ case class AddressSet(base: BigInt, mask: BigInt) extends Ordered[AddressSet]
object AddressSet object AddressSet
{ {
def misaligned(base: BigInt, size: BigInt): Seq[AddressSet] = { def misaligned(base: BigInt, size: BigInt, tail: Seq[AddressSet] = Seq()): Seq[AddressSet] = {
val largestPow2 = BigInt(1) << log2Floor(size) if (size == 0) tail.reverse else {
val mostZeros = (base + size - 1) & ~(largestPow2 - 1) val maxBaseAlignment = base & (-base) // 0 for infinite (LSB)
def splitLo(low: BigInt, high: BigInt, tail: Seq[AddressSet]): Seq[AddressSet] = { val maxSizeAlignment = BigInt(1) << log2Floor(size) // MSB of size
if (low == high) tail else { val step =
val toggleBits = low ^ high if (maxBaseAlignment == 0 || maxBaseAlignment > maxSizeAlignment)
val misalignment = toggleBits & (-toggleBits) maxSizeAlignment else maxBaseAlignment
splitLo(low+misalignment, high, AddressSet(low, misalignment-1) +: tail) misaligned(base+step, size-step, AddressSet(base, step-1) +: tail)
}
} }
def splitHi(low: BigInt, high: BigInt, tail: Seq[AddressSet]): Seq[AddressSet] = {
if (low == high) tail else {
val toggleBits = low ^ high
val misalignment = toggleBits & (-toggleBits)
splitHi(low, high-misalignment, AddressSet(high-misalignment, misalignment-1) +: tail)
}
}
splitLo(base, mostZeros, splitHi(mostZeros, base+size, Seq())).sorted
} }
} }

View File

@ -0,0 +1,50 @@
// See LICENSE for license details.
package uncore.axi4
import Chisel._
import chisel3.internal.sourceinfo.SourceInfo
import diplomacy._
import scala.math.max
// pipe is only used if a queue has depth = 1
class AXI4Buffer(aw: Int = 2, w: Int = 2, b: Int = 2, ar: Int = 2, r: Int = 2, pipe: Boolean = true) extends LazyModule
{
require (aw >= 0)
require (w >= 0)
require (b >= 0)
require (ar >= 0)
require (r >= 0)
val node = AXI4IdentityNode()
lazy val module = new LazyModuleImp(this) {
val io = new Bundle {
val in = node.bundleIn
val out = node.bundleOut
}
((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) =>
if (aw>0) { out.aw <> Queue(in .aw, aw, pipe && aw<2) } else { out.aw <> in .aw }
if (w >0) { out.w <> Queue(in .w, w, pipe && w <2) } else { out.w <> in .w }
if (b >0) { in .b <> Queue(out.b, b, pipe && b <2) } else { in .b <> out.b }
if (ar>0) { out.ar <> Queue(in .ar, ar, pipe && ar<2) } else { out.ar <> in .ar }
if (r >0) { in .r <> Queue(out.r, r, pipe && r <2) } else { in .r <> out.r }
}
}
}
object AXI4Buffer
{
// applied to the AXI4 source node; y.node := AXI4Buffer(x.node)
def apply() (x: AXI4OutwardNode)(implicit sourceInfo: SourceInfo): AXI4OutwardNode = apply(2)(x)
def apply(entries: Int) (x: AXI4OutwardNode)(implicit sourceInfo: SourceInfo): AXI4OutwardNode = apply(entries, true)(x)
def apply(entries: Int, pipe: Boolean) (x: AXI4OutwardNode)(implicit sourceInfo: SourceInfo): AXI4OutwardNode = apply(entries, entries, pipe)(x)
def apply(aw: Int, br: Int) (x: AXI4OutwardNode)(implicit sourceInfo: SourceInfo): AXI4OutwardNode = apply(aw, br, true)(x)
def apply(aw: Int, br: Int, pipe: Boolean)(x: AXI4OutwardNode)(implicit sourceInfo: SourceInfo): AXI4OutwardNode = apply(aw, aw, br, aw, br, pipe)(x)
def apply(aw: Int, w: Int, b: Int, ar: Int, r: Int, pipe: Boolean = true)(x: AXI4OutwardNode)(implicit sourceInfo: SourceInfo): AXI4OutwardNode = {
val buffer = LazyModule(new AXI4Buffer(aw, w, b, ar, r, pipe))
buffer.node := x
buffer.node
}
}

View File

@ -0,0 +1,295 @@
// See LICENSE for license details.
package uncore.axi4
import Chisel._
import chisel3.internal.sourceinfo.SourceInfo
import chisel3.util.IrrevocableIO
import diplomacy._
import scala.math.{min,max}
import uncore.tilelink2.{leftOR, rightOR, UIntToOH1}
// lite: masters all use only one ID => reads will not be interleaved
class AXI4Fragmenter(lite: Boolean = false, maxInFlight: Int = 32, combinational: Boolean = true) extends LazyModule
{
val maxBeats = 1 << AXI4Parameters.lenBits
def expandTransfer(x: TransferSizes, beatBytes: Int, alignment: BigInt) =
if (!x) x else TransferSizes(x.min, alignment.min(maxBeats*beatBytes).intValue)
def mapSlave(s: AXI4SlaveParameters, beatBytes: Int) = s.copy(
supportsWrite = expandTransfer(s.supportsWrite, beatBytes, s.minAlignment),
supportsRead = expandTransfer(s.supportsRead, beatBytes, s.minAlignment),
interleavedId = if (lite) Some(0) else s.interleavedId) // see AXI4FragmenterSideband for !lite case
def mapMaster(m: AXI4MasterParameters) = m.copy(aligned = true)
val node = AXI4AdapterNode(
masterFn = { case Seq(mp) => mp.copy(masters = mp.masters.map(m => mapMaster(m))) },
slaveFn = { case Seq(sp) => sp.copy(slaves = sp.slaves .map(s => mapSlave(s, sp.beatBytes))) })
lazy val module = new LazyModuleImp(this) {
val io = new Bundle {
val in = node.bundleIn
val out = node.bundleOut
}
val edgeOut = node.edgesOut(0)
val edgeIn = node.edgesIn(0)
val slave = edgeOut.slave
val slaves = slave.slaves
val beatBytes = slave.beatBytes
val lgBytes = log2Ceil(beatBytes)
val master = edgeIn.master
val masters = master.masters
// If the user claimed this was a lite interface, then there must be only one Id
require (!lite || master.endId == 1)
// We don't support fragmenting to sub-beat accesses
slaves.foreach { s =>
require (!s.supportsRead || s.supportsRead.contains(beatBytes))
require (!s.supportsWrite || s.supportsWrite.contains(beatBytes))
}
/* We need to decompose a request into
* FIXED => each beat is a new request
* WRAP/INCR => take xfr up to next power of two, capped by max size of target
*
* On AR and AW, we fragment one request into many
* On W we set 'last' on beats which are fragment boundaries
* On R we clear 'last' on the fragments being reassembled
* On B we clear 'valid' on the responses for the injected fragments
*
* AR=>R and AW+W=>B are completely independent state machines.
*/
/* Returns the number of beats to execute and the new address */
def fragment(a: IrrevocableIO[AXI4BundleA], supportedSizes1: Seq[Int]): (IrrevocableIO[AXI4BundleA], Bool, UInt) = {
val out = Wire(a)
val busy = RegInit(Bool(false))
val r_addr = Reg(UInt(width = a.bits.params.addrBits))
val r_len = Reg(UInt(width = AXI4Parameters.lenBits))
val len = Mux(busy, r_len, a.bits.len)
val addr = Mux(busy, r_addr, a.bits.addr)
val lo = if (lgBytes == 0) UInt(0) else addr(lgBytes-1, 0)
val hi = addr >> lgBytes
val alignment = hi(AXI4Parameters.lenBits-1,0)
val allSame = supportedSizes1.filter(_ >= 0).distinct.size <= 1
val dynamic1 = Mux1H(slave.findFast(addr), supportedSizes1.map(s => UInt(max(0, s))))
val fixed1 = UInt(supportedSizes1.filter(_ >= 0).headOption.getOrElse(0))
/* We need to compute the largest transfer allowed by the AXI len.
* len+1 is the number of beats to execute.
* We want the MSB(len+1)-1; one less than the largest power of two we could execute.
* There are two cases; either len is 2^n-1 in which case we leave it unchanged, ELSE
* fill the bits from highest to lowest, and shift right by one bit.
*/
val fillLow = rightOR(len) >> 1 // set all bits in positions < a set bit
val wipeHigh = ~leftOR(~len) // clear all bits in position >= a cleared bit
val remain1 = fillLow | wipeHigh // MSB(a.len+1)-1
val align1 = ~leftOR(alignment) // transfer size limited by address alignment
val support1 = if (allSame) fixed1 else dynamic1 // maximum supported size-1 based on target address
val maxSupported1 = remain1 & align1 & support1 // Take the minimum of all the limits
// Things that cause us to degenerate to a single beat
val fixed = a.bits.burst === AXI4Parameters.BURST_FIXED
val narrow = a.bits.size =/= UInt(lgBytes)
val bad = fixed || narrow
// The number of beats-1 to execute
val beats1 = Mux(bad, UInt(0), maxSupported1)
val beats = ~(~(beats1 << 1 | UInt(1)) | beats1) // beats1 + 1
val inc_addr = addr + (beats << a.bits.size) // address after adding transfer
val wrapMask = ~(~a.bits.len << a.bits.size) // only these bits may change, if wrapping
val mux_addr = Wire(init = inc_addr)
when (a.bits.burst === AXI4Parameters.BURST_WRAP) {
mux_addr := (inc_addr & wrapMask) | ~(~a.bits.addr | wrapMask)
}
when (a.bits.burst === AXI4Parameters.BURST_FIXED) {
mux_addr := a.bits.addr
}
val last = beats1 === len
a.ready := out.ready && last
out.valid := a.valid
out.bits := a.bits
out.bits.len := beats1
// We forcibly align every access. If the first beat was misaligned, the strb bits
// for the lower addresses must not have been set. Therefore, rounding the address
// down is harmless. We can do this after the address update algorithm, because the
// incremented values will be rounded down the same way. Furthermore, a subword
// offset cannot cause a premature wrap-around.
out.bits.addr := ~(~addr | UIntToOH1(a.bits.size, lgBytes))
when (out.fire()) {
busy := !last
r_addr := mux_addr
r_len := len - beats
}
(out, last, beats)
}
val in = io.in(0)
val out = io.out(0)
// The size to which we will fragment the access
val readSizes1 = slaves.map(s => s.supportsRead .max/beatBytes-1)
val writeSizes1 = slaves.map(s => s.supportsWrite.max/beatBytes-1)
// Indirection variables for inputs and outputs; makes transformation application easier
val (in_ar, ar_last, _) = fragment(in.ar, readSizes1)
val (in_aw, aw_last, w_beats) = fragment(in.aw, writeSizes1)
val in_w = in.w
val in_r = in.r
val in_b = in.b
val out_ar = Wire(out.ar)
val out_aw = out.aw
val out_w = out.w
val out_r = Wire(out.r)
val out_b = Wire(out.b)
val depth = if (combinational) 1 else 2
// In case a slave ties arready := rready, we need a queue to break the combinational loop
// between the two branches (in_ar => {out_ar => out_r, sideband} => in_r).
if (in.ar.bits.getWidth < in.r.bits.getWidth) {
out.ar <> Queue(out_ar, depth, flow=combinational)
out_r <> out.r
} else {
out.ar <> out_ar
out_r <> Queue(out.r, depth, flow=combinational)
}
// In case a slave ties awready := bready or wready := bready, we need this queue
out_b <> Queue(out.b, depth, flow=combinational)
// Sideband to track which transfers were the last fragment
def sideband() = if (lite) {
Module(new Queue(Bool(), maxInFlight, flow=combinational)).io
} else {
Module(new AXI4FragmenterSideband(maxInFlight, flow=combinational)).io
}
val sideband_ar_r = sideband()
val sideband_aw_b = sideband()
// AR flow control
out_ar.valid := in_ar.valid && sideband_ar_r.enq.ready
in_ar.ready := sideband_ar_r.enq.ready && out_ar.ready
sideband_ar_r.enq.valid := in_ar.valid && out_ar.ready
out_ar.bits := in_ar.bits
sideband_ar_r.enq.bits := ar_last
// When does W channel start counting a new transfer
val wbeats_latched = RegInit(Bool(false))
val wbeats_ready = Wire(Bool())
val wbeats_valid = Wire(Bool())
when (wbeats_valid && wbeats_ready) { wbeats_latched := Bool(true) }
when (out_aw.fire()) { wbeats_latched := Bool(false) }
// AW flow control
out_aw.valid := in_aw.valid && sideband_aw_b.enq.ready && (wbeats_ready || wbeats_latched)
in_aw.ready := sideband_aw_b.enq.ready && out_aw.ready && (wbeats_ready || wbeats_latched)
sideband_aw_b.enq.valid := in_aw.valid && out_aw.ready && (wbeats_ready || wbeats_latched)
wbeats_valid := in_aw.valid && !wbeats_latched
out_aw.bits := in_aw.bits
sideband_aw_b.enq.bits := aw_last
// We need to inject 'last' into the W channel fragments, count!
val w_counter = RegInit(UInt(0, width = AXI4Parameters.lenBits+1))
val w_idle = w_counter === UInt(0)
val w_todo = Mux(w_idle, Mux(wbeats_valid, w_beats, UInt(0)), w_counter)
val w_last = w_todo === UInt(1)
w_counter := w_todo - out_w.fire()
assert (!out_w.fire() || w_todo =/= UInt(0)) // underflow impossible
// W flow control
wbeats_ready := w_idle
out_w.valid := in_w.valid && (!wbeats_ready || wbeats_valid)
in_w.ready := out_w.ready && (!wbeats_ready || wbeats_valid)
out_w.bits := in_w.bits
out_w.bits.last := w_last
// We should also recreate the last last
assert (!out_w.valid || !in_w.bits.last || w_last)
// R flow control
val r_last = out_r.bits.last
in_r.valid := out_r.valid && (!r_last || sideband_ar_r.deq.valid)
out_r.ready := in_r.ready && (!r_last || sideband_ar_r.deq.valid)
sideband_ar_r.deq.ready := r_last && out_r.valid && in_r.ready
in_r.bits := out_r.bits
in_r.bits.last := r_last && sideband_ar_r.deq.bits
// B flow control
val b_last = sideband_aw_b.deq.bits
in_b.valid := out_b.valid && sideband_aw_b.deq.valid && b_last
out_b.ready := sideband_aw_b.deq.valid && (!b_last || in_b.ready)
sideband_aw_b.deq.ready := out_b.valid && (!b_last || in_b.ready)
in_b.bits := out_b.bits
// Merge errors from dropped B responses
val r_resp = RegInit(UInt(0, width = AXI4Parameters.respBits))
val resp = out_b.bits.resp | r_resp
when (out_b.fire()) { r_resp := Mux(b_last, UInt(0), resp) }
in_b.bits.resp := resp
}
}
/* We want to put barriers between the fragments of a fragmented transfer and all other transfers.
* This lets us use very little state to reassemble the fragments (else we need one FIFO per ID).
* Furthermore, because all the fragments share the same AXI ID, they come back contiguously.
* This guarantees that no other R responses might get mixed between fragments, ensuring that the
* interleavedId for the slaves remains unaffected by the fragmentation transformation.
* Of course, if you need to fragment, this means there is a potentially hefty serialization cost.
* However, this design allows full concurrency in the common no-fragmentation-needed scenario.
*/
class AXI4FragmenterSideband(maxInFlight: Int, flow: Boolean = false) extends Module
{
val io = new QueueIO(Bool(), maxInFlight)
io.count := UInt(0)
val PASS = UInt(2, width = 2) // allow 'last=1' bits to enque, on 'last=0' if count>0 block else accept+FIND
val FIND = UInt(0, width = 2) // allow 'last=0' bits to enque, accept 'last=1' and switch to WAIT
val WAIT = UInt(1, width = 2) // block all access till count=0
val state = RegInit(PASS)
val count = RegInit(UInt(0, width = log2Up(maxInFlight)))
val full = count === UInt(maxInFlight-1)
val empty = count === UInt(0)
val last = count === UInt(1)
io.deq.bits := state(1) || (last && state(0)) // PASS || (last && WAIT)
io.deq.valid := !empty
io.enq.ready := !full && (empty || (state === FIND) || (state === PASS && io.enq.bits))
// WAIT => count > 0
assert (state =/= WAIT || count =/= UInt(0))
if (flow) {
when (io.enq.valid) {
io.deq.valid := Bool(true)
when (empty) { io.deq.bits := io.enq.bits }
}
}
count := count + io.enq.fire() - io.deq.fire()
switch (state) {
is(PASS) { when (io.enq.valid && !io.enq.bits && empty) { state := FIND } }
is(FIND) { when (io.enq.valid && io.enq.bits && !full) { state := Mux(empty, PASS, WAIT) } }
is(WAIT) { when (last && io.deq.ready) { state := PASS } }
}
}
object AXI4Fragmenter
{
// applied to the AXI4 source node; y.node := AXI4Fragmenter()(x.node)
def apply(lite: Boolean = false, maxInFlight: Int = 32, combinational: Boolean = true)(x: AXI4OutwardNode)(implicit sourceInfo: SourceInfo): AXI4OutwardNode = {
val fragmenter = LazyModule(new AXI4Fragmenter(lite, maxInFlight, combinational))
fragmenter.node := x
fragmenter.node
}
}

View File

@ -40,8 +40,8 @@ case class AXI4SlaveNode(portParams: AXI4SlavePortParameters, numPorts: Range.In
extends SinkNode(AXI4Imp)(portParams, numPorts) extends SinkNode(AXI4Imp)(portParams, numPorts)
case class AXI4AdapterNode( case class AXI4AdapterNode(
clientFn: Seq[AXI4MasterPortParameters] => AXI4MasterPortParameters, masterFn: Seq[AXI4MasterPortParameters] => AXI4MasterPortParameters,
managerFn: Seq[AXI4SlavePortParameters] => AXI4SlavePortParameters, slaveFn: Seq[AXI4SlavePortParameters] => AXI4SlavePortParameters,
numMasterPorts: Range.Inclusive = 1 to 1, numMasterPorts: Range.Inclusive = 1 to 1,
numSlavePorts: Range.Inclusive = 1 to 1) numSlavePorts: Range.Inclusive = 1 to 1)
extends InteriorNode(AXI4Imp)(clientFn, managerFn, numMasterPorts, numSlavePorts) extends InteriorNode(AXI4Imp)(masterFn, slaveFn, numMasterPorts, numSlavePorts)

View File

@ -21,9 +21,10 @@ case class AXI4SlaveParameters(
val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected") val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected")
val maxTransfer = max(supportsWrite.max, supportsRead.max) val maxTransfer = max(supportsWrite.max, supportsRead.max)
val maxAddress = address.map(_.max).max val maxAddress = address.map(_.max).max
val minAlignment = address.map(_.alignment).min
// The device had better not support a transfer larger than it's alignment // The device had better not support a transfer larger than it's alignment
address.foreach { case a => require (a.alignment >= maxTransfer) } require (minAlignment >= maxTransfer)
} }
case class AXI4SlavePortParameters( case class AXI4SlavePortParameters(
@ -41,6 +42,10 @@ case class AXI4SlavePortParameters(
// Check that the link can be implemented in AXI4 // Check that the link can be implemented in AXI4
require (maxTransfer <= beatBytes * (1 << AXI4Parameters.lenBits)) require (maxTransfer <= beatBytes * (1 << AXI4Parameters.lenBits))
lazy val routingMask = AddressDecoder(slaves.map(_.address))
def findSafe(address: UInt) = Vec(slaves.map(_.address.map(_.contains(address)).reduce(_ || _)))
def findFast(address: UInt) = Vec(slaves.map(_.address.map(_.widen(~routingMask)).distinct.map(_.contains(address)).reduce(_ || _)))
// Require disjoint ranges for addresses // Require disjoint ranges for addresses
slaves.combinations(2).foreach { case Seq(x,y) => slaves.combinations(2).foreach { case Seq(x,y) =>
x.address.foreach { a => y.address.foreach { b => x.address.foreach { a => y.address.foreach { b =>
@ -51,6 +56,7 @@ case class AXI4SlavePortParameters(
case class AXI4MasterParameters( case class AXI4MasterParameters(
id: IdRange = IdRange(0, 1), id: IdRange = IdRange(0, 1),
aligned: Boolean = false,
nodePath: Seq[BaseNode] = Seq()) nodePath: Seq[BaseNode] = Seq())
{ {
val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected") val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected")

View File

@ -49,7 +49,7 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int
// Invoke the register map builder and make it Irrevocable // Invoke the register map builder and make it Irrevocable
val out = Queue.irrevocable( val out = Queue.irrevocable(
RegMapper(beatBytes, concurrency, undefZero, in, mapping:_*), RegMapper(beatBytes, concurrency, undefZero, in, mapping:_*),
entries = 1, pipe = true, flow = true) entries = 1, flow = true)
// No flow control needed // No flow control needed
out.ready := Mux(out.bits.read, r.ready, b.ready) out.ready := Mux(out.bits.read, r.ready, b.ready)

View File

@ -0,0 +1,61 @@
// See LICENSE for license details.
package uncore.axi4
import Chisel._
import diplomacy._
import uncore.tilelink2._
import unittest._
class RRTest0(address: BigInt) extends AXI4RegisterRouter(address, 0, 32, 0, 4)(
new AXI4RegBundle((), _) with RRTest0Bundle)(
new AXI4RegModule((), _, _) with RRTest0Module)
class RRTest1(address: BigInt) extends AXI4RegisterRouter(address, 0, 32, 6, 4, false)(
new AXI4RegBundle((), _) with RRTest1Bundle)(
new AXI4RegModule((), _, _) with RRTest1Module)
class AXI4LiteFuzzRAM extends LazyModule
{
val fuzz = LazyModule(new TLFuzzer(5000))
val model = LazyModule(new TLRAMModel("AXI4LiteFuzzRAM"))
val xbar = LazyModule(new TLXbar)
val gpio = LazyModule(new RRTest1(0x400))
val ram = LazyModule(new AXI4RAM(AddressSet(0x0, 0x3ff)))
model.node := fuzz.node
xbar.node := model.node
ram.node := AXI4Fragmenter(lite=true)(TLToAXI4(0, true )(xbar.node))
gpio.node := AXI4Fragmenter(lite=true)(TLToAXI4(0, false)(xbar.node))
lazy val module = new LazyModuleImp(this) with HasUnitTestIO {
io.finished := fuzz.module.io.finished
}
}
class AXI4LiteFuzzRAMTest extends UnitTest(500000) {
val dut = Module(LazyModule(new AXI4LiteFuzzRAM).module)
io.finished := dut.io.finished
}
class AXI4FullFuzzRAM extends LazyModule
{
val fuzz = LazyModule(new TLFuzzer(5000))
val model = LazyModule(new TLRAMModel("AXI4FullFuzzRAM"))
val xbar = LazyModule(new TLXbar)
val gpio = LazyModule(new RRTest0(0x400))
val ram = LazyModule(new AXI4RAM(AddressSet(0x0, 0x3ff)))
model.node := fuzz.node
xbar.node := model.node
ram.node := AXI4Fragmenter(lite=false, maxInFlight = 2)(TLToAXI4(4,false)(xbar.node))
gpio.node := AXI4Fragmenter(lite=false, maxInFlight = 5)(TLToAXI4(4,true )(xbar.node))
lazy val module = new LazyModuleImp(this) with HasUnitTestIO {
io.finished := fuzz.module.io.finished
}
}
class AXI4FullFuzzRAMTest extends UnitTest(500000) {
val dut = Module(LazyModule(new AXI4FullFuzzRAM).module)
io.finished := dut.io.finished
}

View File

@ -138,8 +138,8 @@ class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, conc
// Move the selected sign bit into the first byte position it will extend // Move the selected sign bit into the first byte position it will extend
val signbit_a = ((signbits_a & signSel) << 1)(beatBytes-1, 0) val signbit_a = ((signbits_a & signSel) << 1)(beatBytes-1, 0)
val signbit_d = ((signbits_d & signSel) << 1)(beatBytes-1, 0) val signbit_d = ((signbits_d & signSel) << 1)(beatBytes-1, 0)
val signext_a = FillInterleaved(8, highOR(signbit_a)) val signext_a = FillInterleaved(8, leftOR(signbit_a))
val signext_d = FillInterleaved(8, highOR(signbit_d)) val signext_d = FillInterleaved(8, leftOR(signbit_d))
// NOTE: sign-extension does not change the relative ordering in EITHER unsigned or signed arithmetic // NOTE: sign-extension does not change the relative ordering in EITHER unsigned or signed arithmetic
val wide_mask = FillInterleaved(8, mask) val wide_mask = FillInterleaved(8, mask)
val a_a_ext = (a_a & wide_mask) | signext_a val a_a_ext = (a_a & wide_mask) | signext_a

View File

@ -12,7 +12,7 @@ import scala.math.{min,max}
// alwaysMin: fragment all requests down to minSize (else fragment to maximum supported by manager) // alwaysMin: fragment all requests down to minSize (else fragment to maximum supported by manager)
// Fragmenter modifies: PutFull, PutPartial, LogicalData, Get, Hint // Fragmenter modifies: PutFull, PutPartial, LogicalData, Get, Hint
// Fragmenter passes: ArithmeticData (truncated to minSize if alwaysMin) // Fragmenter passes: ArithmeticData (truncated to minSize if alwaysMin)
// Fragmenter breaks: Acquire (and thus cuts BCE channels) // Fragmenter cannot modify acquire (could livelock); thus it is unsafe to put caches on both sides
class TLFragmenter(minSize: Int, maxSize: Int, alwaysMin: Boolean = false) extends LazyModule class TLFragmenter(minSize: Int, maxSize: Int, alwaysMin: Boolean = false) extends LazyModule
{ {
require (isPow2 (maxSize)) require (isPow2 (maxSize))
@ -30,7 +30,6 @@ class TLFragmenter(minSize: Int, maxSize: Int, alwaysMin: Boolean = false) exten
if (x.min <= minSize) TransferSizes(x.min, min(minSize, x.max)) else if (x.min <= minSize) TransferSizes(x.min, min(minSize, x.max)) else
TransferSizes.none TransferSizes.none
def mapManager(m: TLManagerParameters) = m.copy( def mapManager(m: TLManagerParameters) = m.copy(
supportsAcquire = TransferSizes.none, // this adapter breaks acquires
supportsArithmetic = shrinkTransfer(m.supportsArithmetic), supportsArithmetic = shrinkTransfer(m.supportsArithmetic),
supportsLogical = expandTransfer(m.supportsLogical), supportsLogical = expandTransfer(m.supportsLogical),
supportsGet = expandTransfer(m.supportsGet), supportsGet = expandTransfer(m.supportsGet),
@ -38,15 +37,7 @@ class TLFragmenter(minSize: Int, maxSize: Int, alwaysMin: Boolean = false) exten
supportsPutPartial = expandTransfer(m.supportsPutPartial), supportsPutPartial = expandTransfer(m.supportsPutPartial),
supportsHint = expandTransfer(m.supportsHint)) supportsHint = expandTransfer(m.supportsHint))
def mapClient(c: TLClientParameters) = c.copy( def mapClient(c: TLClientParameters) = c.copy(
sourceId = IdRange(c.sourceId.start << fragmentBits, c.sourceId.end << fragmentBits), sourceId = IdRange(c.sourceId.start << fragmentBits, c.sourceId.end << fragmentBits))
// since we break Acquires, none of these work either:
supportsProbe = TransferSizes.none,
supportsArithmetic = TransferSizes.none,
supportsLogical = TransferSizes.none,
supportsGet = TransferSizes.none,
supportsPutFull = TransferSizes.none,
supportsPutPartial = TransferSizes.none,
supportsHint = TransferSizes.none)
// Because the Fragmenter stalls inner A while serving outer, it can wipe away inner latency // Because the Fragmenter stalls inner A while serving outer, it can wipe away inner latency
val node = TLAdapterNode( val node = TLAdapterNode(
@ -70,6 +61,8 @@ class TLFragmenter(minSize: Int, maxSize: Int, alwaysMin: Boolean = false) exten
// We don't support fragmenting to sub-beat accesses // We don't support fragmenting to sub-beat accesses
require (minSize >= beatBytes) require (minSize >= beatBytes)
// We can't support devices which are cached on both sides of us
require (!edgeOut.manager.anySupportAcquire || !edgeIn.client.anySupportProbe)
/* The Fragmenter is a bit tricky, because there are 5 sizes in play: /* The Fragmenter is a bit tricky, because there are 5 sizes in play:
* max size -- the maximum transfer size possible * max size -- the maximum transfer size possible
@ -174,6 +167,12 @@ class TLFragmenter(minSize: Int, maxSize: Int, alwaysMin: Boolean = false) exten
in.d.bits.source := out.d.bits.source >> fragmentBits in.d.bits.source := out.d.bits.source >> fragmentBits
in.d.bits.size := Mux(dFirst, dFirst_size, dOrig) in.d.bits.size := Mux(dFirst, dFirst_size, dOrig)
// Combine the error flag
val r_error = RegInit(Bool(false))
val d_error = r_error | out.d.bits.error
when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) }
in.d.bits.error := d_error
// What maximum transfer sizes do downstream devices support? // What maximum transfer sizes do downstream devices support?
val maxArithmetics = managers.map(_.supportsArithmetic.max) val maxArithmetics = managers.map(_.supportsArithmetic.max)
val maxLogicals = managers.map(_.supportsLogical.max) val maxLogicals = managers.map(_.supportsLogical.max)
@ -271,4 +270,3 @@ class TLRAMFragmenter(ramBeatBytes: Int, maxSize: Int) extends LazyModule {
class TLRAMFragmenterTest(ramBeatBytes: Int, maxSize: Int) extends UnitTest(timeout = 500000) { class TLRAMFragmenterTest(ramBeatBytes: Int, maxSize: Int) extends UnitTest(timeout = 500000) {
io.finished := Module(LazyModule(new TLRAMFragmenter(ramBeatBytes,maxSize)).module).io.finished io.finished := Module(LazyModule(new TLRAMFragmenter(ramBeatBytes,maxSize)).module).io.finished
} }

View File

@ -17,7 +17,7 @@ class IDMapGenerator(numIds: Int) extends Module {
io.free.ready := Bool(true) io.free.ready := Bool(true)
assert (!io.free.valid || !bitmap(io.free.bits)) // No double freeing assert (!io.free.valid || !bitmap(io.free.bits)) // No double freeing
val select = ~(highOR(bitmap) << 1) & bitmap val select = ~(leftOR(bitmap) << 1) & bitmap
io.alloc.bits := OHToUInt(select) io.alloc.bits := OHToUInt(select)
io.alloc.valid := bitmap.orR() io.alloc.valid := bitmap.orR()
@ -206,7 +206,7 @@ import unittest._
class TLFuzzRAM extends LazyModule class TLFuzzRAM extends LazyModule
{ {
val model = LazyModule(new TLRAMModel) val model = LazyModule(new TLRAMModel("TLFuzzRAM"))
val ram = LazyModule(new TLRAM(AddressSet(0x800, 0x7ff))) val ram = LazyModule(new TLRAM(AddressSet(0x800, 0x7ff)))
val ram2 = LazyModule(new TLRAM(AddressSet(0, 0x3ff), beatBytes = 16)) val ram2 = LazyModule(new TLRAM(AddressSet(0, 0x3ff), beatBytes = 16))
val gpio = LazyModule(new RRTest1(0x400)) val gpio = LazyModule(new RRTest1(0x400))

View File

@ -25,9 +25,7 @@ case class TLManagerParameters(
customDTS: Option[String]= None) customDTS: Option[String]= None)
{ {
address.foreach { a => require (a.finite) } address.foreach { a => require (a.finite) }
address.combinations(2).foreach({ case Seq(x,y) => address.combinations(2).foreach { case Seq(x,y) => require (!x.overlaps(y)) }
require (!x.overlaps(y))
})
require (supportsPutFull.contains(supportsPutPartial)) require (supportsPutFull.contains(supportsPutPartial))
// Largest support transfer of all types // Largest support transfer of all types
@ -38,6 +36,7 @@ case class TLManagerParameters(
supportsGet.max, supportsGet.max,
supportsPutFull.max, supportsPutFull.max,
supportsPutPartial.max).max supportsPutPartial.max).max
val maxAddress = address.map(_.max).max
val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected") val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected")
@ -53,9 +52,8 @@ case class TLManagerParameters(
} }
// The device had better not support a transfer larger than it's alignment // The device had better not support a transfer larger than it's alignment
address.foreach({ case a => val minAlignment = address.map(_.alignment).min
require (a.alignment >= maxTransfer) require (minAlignment >= maxTransfer)
})
} }
case class TLManagerPortParameters( case class TLManagerPortParameters(
@ -77,7 +75,7 @@ case class TLManagerPortParameters(
// Bounds on required sizes // Bounds on required sizes
def endSinkId = managers.map(_.sinkId.end).max def endSinkId = managers.map(_.sinkId.end).max
def maxAddress = managers.map(_.address.map(_.max).max).max def maxAddress = managers.map(_.maxAddress).max
def maxTransfer = managers.map(_.maxTransfer).max def maxTransfer = managers.map(_.maxTransfer).max
// Operation sizes supported by all outward Managers // Operation sizes supported by all outward Managers
@ -166,6 +164,13 @@ case class TLClientParameters(
supportsHint: TransferSizes = TransferSizes.none) supportsHint: TransferSizes = TransferSizes.none)
{ {
require (supportsPutFull.contains(supportsPutPartial)) require (supportsPutFull.contains(supportsPutPartial))
// We only support these operations if we support Probe (ie: we're a cache)
require (supportsProbe.contains(supportsArithmetic))
require (supportsProbe.contains(supportsLogical))
require (supportsProbe.contains(supportsGet))
require (supportsProbe.contains(supportsPutFull))
require (supportsProbe.contains(supportsPutPartial))
require (supportsProbe.contains(supportsHint))
val maxTransfer = List( val maxTransfer = List(
supportsProbe.max, supportsProbe.max,

View File

@ -20,7 +20,7 @@ import diplomacy._
// put, get, getAck, putAck => ok: detected by getAck (it sees busy>0) impossible for FIFO // put, get, getAck, putAck => ok: detected by getAck (it sees busy>0) impossible for FIFO
// If FIFO, the getAck should check data even if its validity was wiped // If FIFO, the getAck should check data even if its validity was wiped
class TLRAMModel extends LazyModule class TLRAMModel(log: String = "") extends LazyModule
{ {
val node = TLIdentityNode() val node = TLIdentityNode()
@ -150,6 +150,7 @@ class TLRAMModel extends LazyModule
val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt
val byte = a.data(8*(i+1)-1, 8*i) val byte = a.data(8*(i+1)-1, 8*i)
when (a.mask(i)) { when (a.mask(i)) {
printf(log + " ")
when (a.opcode === TLMessages.PutFullData) { printf("PF") } when (a.opcode === TLMessages.PutFullData) { printf("PF") }
when (a.opcode === TLMessages.PutPartialData) { printf("PP") } when (a.opcode === TLMessages.PutPartialData) { printf("PP") }
when (a.opcode === TLMessages.ArithmeticData) { printf("A ") } when (a.opcode === TLMessages.ArithmeticData) { printf("A ") }
@ -160,7 +161,7 @@ class TLRAMModel extends LazyModule
} }
when (a.opcode === TLMessages.Get) { when (a.opcode === TLMessages.Get) {
printf("G 0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits)) printf(log + " G 0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits))
} }
} }
@ -245,6 +246,7 @@ class TLRAMModel extends LazyModule
when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) { when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) {
assert (d.opcode === TLMessages.AccessAck) assert (d.opcode === TLMessages.AccessAck)
printf(log + " ")
when (d_flight.opcode === TLMessages.PutFullData) { printf("pf") } when (d_flight.opcode === TLMessages.PutFullData) { printf("pf") }
when (d_flight.opcode === TLMessages.PutPartialData) { printf("pp") } when (d_flight.opcode === TLMessages.PutPartialData) { printf("pp") }
printf(" 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits)) printf(" 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits))
@ -257,6 +259,7 @@ class TLRAMModel extends LazyModule
val shadow = Wire(init = d_shadow(i)) val shadow = Wire(init = d_shadow(i))
when (d_mask(i)) { when (d_mask(i)) {
val d_addr = d_addr_hi << shift | UInt(i) val d_addr = d_addr_hi << shift | UInt(i)
printf(log + " ")
when (d_flight.opcode === TLMessages.Get) { printf("g ") } when (d_flight.opcode === TLMessages.Get) { printf("g ") }
when (d_flight.opcode === TLMessages.ArithmeticData) { printf("a ") } when (d_flight.opcode === TLMessages.ArithmeticData) { printf("a ") }
when (d_flight.opcode === TLMessages.LogicalData) { printf("l ") } when (d_flight.opcode === TLMessages.LogicalData) { printf("l ") }

View File

@ -44,7 +44,7 @@ class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int =
// Invoke the register map builder and make it Irrevocable // Invoke the register map builder and make it Irrevocable
val out = Queue.irrevocable( val out = Queue.irrevocable(
RegMapper(beatBytes, concurrency, undefZero, in, mapping:_*), RegMapper(beatBytes, concurrency, undefZero, in, mapping:_*),
entries = 1, pipe = true, flow = true) entries = 1, flow = true)
// No flow control needed // No flow control needed
in.valid := a.valid in.valid := a.valid

View File

@ -12,7 +12,11 @@ import scala.math.{min, max}
case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)( case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)(
dFn = { case (1, _) => dFn = { case (1, _) =>
// We must erase all client information, because we crush their source Ids // We must erase all client information, because we crush their source Ids
Seq(AXI4MasterPortParameters(Seq(AXI4MasterParameters(id = IdRange(0, 1 << idBits))))) val masters = Seq(
AXI4MasterParameters(
id = IdRange(0, 1 << idBits),
aligned = true))
Seq(AXI4MasterPortParameters(masters))
}, },
uFn = { case (1, Seq(AXI4SlavePortParameters(slaves, beatBytes))) => uFn = { case (1, Seq(AXI4SlavePortParameters(slaves, beatBytes))) =>
val managers = slaves.zipWithIndex.map { case (s, id) => val managers = slaves.zipWithIndex.map { case (s, id) =>
@ -53,6 +57,13 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true) extends LazyModule
require (slaves(0).interleavedId.isDefined) require (slaves(0).interleavedId.isDefined)
slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) } slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) }
// We need to ensure that a slave does not stall trying to send B while we need to receive R
// Since R&W have independent flow control, it is possible for a W to cut in-line and get into
// a slave's buffers, preventing us from getting all the R responses we need to release D for B.
// This risk is compounded by an AXI fragmentation. Even a slave which responds completely to
// AR before working on AW might have an AW slipped between two AR fragments.
val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational)
// We need to keep the following state from A => D: (addr_lo, size, sink, source) // We need to keep the following state from A => D: (addr_lo, size, sink, source)
// All of those fields could potentially require 0 bits (argh. Chisel.) // All of those fields could potentially require 0 bits (argh. Chisel.)
// We will pack as many of the lowest bits of state as fit into the AXI ID. // We will pack as many of the lowest bits of state as fit into the AXI ID.
@ -113,7 +124,7 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true) extends LazyModule
val r_last = out.r.bits.last val r_last = out.r.bits.last
val r_id = out.r.bits.id val r_id = out.r.bits.id
val b_id = out.b.bits.id val b_id = out_b.bits.id
if (stateBits <= idBits) { // No need for any state tracking if (stateBits <= idBits) { // No need for any state tracking
r_state := r_id r_state := r_id
@ -148,7 +159,7 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true) extends LazyModule
q.io.enq.bits.data := a_state >> implicitBits q.io.enq.bits.data := a_state >> implicitBits
q.io.enq.bits.way := Mux(a_isPut, UInt(0), UInt(1)) q.io.enq.bits.way := Mux(a_isPut, UInt(0), UInt(1))
// Pop the bank's ways // Pop the bank's ways
q.io.deq(0).ready := out.b.fire() && b_bankSelect(i) q.io.deq(0).ready := out_b.fire() && b_bankSelect(i)
q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last
// The FIFOs must be valid when we're ready to pop them... // The FIFOs must be valid when we're ready to pop them...
assert (q.io.deq(0).valid || !q.io.deq(0).ready) assert (q.io.deq(0).valid || !q.io.deq(0).ready)
@ -169,8 +180,8 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true) extends LazyModule
val depth = if (combinational) 1 else 2 val depth = if (combinational) 1 else 2
val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params))) val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params)))
val out_w = Wire(out.w) val out_w = Wire(out.w)
out.w <> Queue.irrevocable(out_w, entries=depth, pipe=combinational, flow=combinational) out.w <> Queue.irrevocable(out_w, entries=depth, flow=combinational)
val queue_arw = Queue.irrevocable(out_arw, entries=depth, pipe=combinational, flow=combinational) val queue_arw = Queue.irrevocable(out_arw, entries=depth, flow=combinational)
// Fan out the ARW channel to AR and AW // Fan out the ARW channel to AR and AW
out.ar.bits := queue_arw.bits out.ar.bits := queue_arw.bits
@ -210,18 +221,21 @@ class TLToAXI4(idBits: Int, combinational: Boolean = true) extends LazyModule
// Give R higher priority than B // Give R higher priority than B
val r_wins = out.r.valid || r_holds_d val r_wins = out.r.valid || r_holds_d
out.r.ready := in.d.ready val in_d = Wire(in.d)
out.b.ready := in.d.ready && !r_wins in.d <> Queue.irrevocable(in_d, entries=1, flow=combinational)
in.d.valid := Mux(r_wins, out.r.valid, out.b.valid)
out.r.ready := in_d.ready
out_b.ready := in_d.ready && !r_wins
in_d.valid := Mux(r_wins, out.r.valid, out_b.valid)
val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY
val b_error = out.b.bits.resp =/= AXI4Parameters.RESP_OKAY val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY
val r_d = edgeIn.AccessAck(r_addr_lo, r_sink, r_source, r_size, UInt(0), r_error) val r_d = edgeIn.AccessAck(r_addr_lo, r_sink, r_source, r_size, UInt(0), r_error)
val b_d = edgeIn.AccessAck(b_addr_lo, b_sink, b_source, b_size, b_error) val b_d = edgeIn.AccessAck(b_addr_lo, b_sink, b_source, b_size, b_error)
in.d.bits := Mux(r_wins, r_d, b_d) in_d.bits := Mux(r_wins, r_d, b_d)
in.d.bits.data := out.r.bits.data // avoid a costly Mux in_d.bits.data := out.r.bits.data // avoid a costly Mux
// Tie off unused channels // Tie off unused channels
in.b.valid := Bool(false) in.b.valid := Bool(false)

View File

@ -11,12 +11,20 @@ package object tilelink2
def OH1ToUInt(x: UInt) = OHToUInt((x << 1 | UInt(1)) ^ x) def OH1ToUInt(x: UInt) = OHToUInt((x << 1 | UInt(1)) ^ x)
def UIntToOH1(x: UInt, width: Int) = ~(SInt(-1, width=width).asUInt << x)(width-1, 0) def UIntToOH1(x: UInt, width: Int) = ~(SInt(-1, width=width).asUInt << x)(width-1, 0)
def trailingZeros(x: Int) = if (x > 0) Some(log2Ceil(x & -x)) else None def trailingZeros(x: Int) = if (x > 0) Some(log2Ceil(x & -x)) else None
def highOR(x: UInt) = { // Fill 1s from low bits to high bits
def leftOR(x: UInt) = {
val w = x.getWidth val w = x.getWidth
def helper(s: Int, x: UInt): UInt = def helper(s: Int, x: UInt): UInt =
if (s >= w) x else helper(s+s, x | (x << s)(w-1,0)) if (s >= w) x else helper(s+s, x | (x << s)(w-1,0))
helper(1, x) helper(1, x)
} }
// Fill 1s form high bits to low bits
def rightOR(x: UInt) = {
val w = x.getWidth
def helper(s: Int, x: UInt): UInt =
if (s >= w) x else helper(s+s, x | (x >> s))
helper(1, x)
}
// This gets used everywhere, so make the smallest circuit possible ... // This gets used everywhere, so make the smallest circuit possible ...
def maskGen(addr_lo: UInt, lgSize: UInt, beatBytes: Int): UInt = { def maskGen(addr_lo: UInt, lgSize: UInt, beatBytes: Int): UInt = {
val lgBytes = log2Ceil(beatBytes) val lgBytes = log2Ceil(beatBytes)

View File

@ -25,7 +25,9 @@ class WithUncoreUnitTests extends Config(
case UnitTests => (p: Parameters) => Seq( case UnitTests => (p: Parameters) => Seq(
Module(new uncore.devices.ROMSlaveTest()(p)), Module(new uncore.devices.ROMSlaveTest()(p)),
Module(new uncore.devices.TileLinkRAMTest()(p)), Module(new uncore.devices.TileLinkRAMTest()(p)),
Module(new uncore.tilelink2.TLFuzzRAMTest)) Module(new uncore.tilelink2.TLFuzzRAMTest),
Module(new uncore.axi4.AXI4LiteFuzzRAMTest),
Module(new uncore.axi4.AXI4FullFuzzRAMTest))
case _ => throw new CDEMatchError case _ => throw new CDEMatchError
} }
) )