From 396ecacda4105b77b57fb222b1f0611bcff87ccf Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 17 Mar 2017 17:06:04 -0700 Subject: [PATCH 01/29] AXI4: add an optional user bundle field --- src/main/scala/uncore/axi4/Bundles.scala | 3 +++ src/main/scala/uncore/axi4/Parameters.scala | 17 ++++++++++++----- src/main/scala/uncore/axi4/ToTL.scala | 6 ++++-- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/main/scala/uncore/axi4/Bundles.scala b/src/main/scala/uncore/axi4/Bundles.scala index f8c9ddc6..75f43c66 100644 --- a/src/main/scala/uncore/axi4/Bundles.scala +++ b/src/main/scala/uncore/axi4/Bundles.scala @@ -19,6 +19,7 @@ abstract class AXI4BundleA(params: AXI4BundleParameters) extends AXI4BundleBase( val cache = UInt(width = params.cacheBits) val prot = UInt(width = params.protBits) val qos = UInt(width = params.qosBits) // 0=no QoS, bigger = higher priority + val user = if (params.userBits > 0) Some(UInt(width = params.userBits)) else None // val region = UInt(width = 4) // optional // Number of bytes-1 in this operation @@ -51,6 +52,7 @@ class AXI4BundleR(params: AXI4BundleParameters) extends AXI4BundleBase(params) val id = UInt(width = params.idBits) val data = UInt(width = params.dataBits) val resp = UInt(width = params.respBits) + val user = if (params.userBits > 0) Some(UInt(width = params.userBits)) else None val last = Bool() } @@ -58,6 +60,7 @@ class AXI4BundleB(params: AXI4BundleParameters) extends AXI4BundleBase(params) { val id = UInt(width = params.idBits) val resp = UInt(width = params.respBits) + val user = if (params.userBits > 0) Some(UInt(width = params.userBits)) else None } class AXI4Bundle(params: AXI4BundleParameters) extends AXI4BundleBase(params) diff --git a/src/main/scala/uncore/axi4/Parameters.scala b/src/main/scala/uncore/axi4/Parameters.scala index 017bc001..a3a8f472 100644 --- a/src/main/scala/uncore/axi4/Parameters.scala +++ b/src/main/scala/uncore/axi4/Parameters.scala @@ -70,9 +70,13 @@ case class AXI4MasterParameters( } case class AXI4MasterPortParameters( - masters: Seq[AXI4MasterParameters]) + masters: Seq[AXI4MasterParameters], + userBits: Int = 0, + maxFlight: Int = 0) // at most X transactions per ID (0 = unlimited) { val endId = masters.map(_.id.end).max + require (userBits >= 0) + require (maxFlight >= 0) // Require disjoint ranges for ids masters.combinations(2).foreach { case Seq(x,y) => require (!x.id.overlaps(y.id), s"$x and $y overlap") } @@ -81,7 +85,8 @@ case class AXI4MasterPortParameters( case class AXI4BundleParameters( addrBits: Int, dataBits: Int, - idBits: Int) + idBits: Int, + userBits: Int) { require (dataBits >= 8, s"AXI4 data bits must be >= 8 (got $dataBits)") require (addrBits >= 1, s"AXI4 addr bits must be >= 1 (got $addrBits)") @@ -102,19 +107,21 @@ case class AXI4BundleParameters( AXI4BundleParameters( max(addrBits, x.addrBits), max(dataBits, x.dataBits), - max(idBits, x.idBits)) + max(idBits, x.idBits), + max(userBits, x.userBits)) } object AXI4BundleParameters { - val emptyBundleParams = AXI4BundleParameters(addrBits=1, dataBits=8, idBits=1) + val emptyBundleParams = AXI4BundleParameters(addrBits=1, dataBits=8, idBits=1, userBits=0) def union(x: Seq[AXI4BundleParameters]) = x.foldLeft(emptyBundleParams)((x,y) => x.union(y)) def apply(master: AXI4MasterPortParameters, slave: AXI4SlavePortParameters) = new AXI4BundleParameters( addrBits = log2Up(slave.maxAddress+1), dataBits = slave.beatBytes * 8, - idBits = log2Up(master.endId)) + idBits = log2Up(master.endId), + userBits = master.userBits) } case class AXI4EdgeParameters( diff --git a/src/main/scala/uncore/axi4/ToTL.scala b/src/main/scala/uncore/axi4/ToTL.scala index a7612b36..16542763 100644 --- a/src/main/scala/uncore/axi4/ToTL.scala +++ b/src/main/scala/uncore/axi4/ToTL.scala @@ -9,10 +9,12 @@ import diplomacy._ import uncore.tilelink2._ case class AXI4ToTLNode() extends MixedAdapterNode(AXI4Imp, TLImp)( - dFn = { case AXI4MasterPortParameters(masters) => + dFn = { case AXI4MasterPortParameters(masters, userBits, maxFlight) => + require (userBits == 0, "AXI4 user bits cannot be transported by TL") + require (maxFlight > 0, "AXI4 must include a maximum transactions per ID to convert to TL") TLClientPortParameters(clients = masters.map { m => TLClientParameters( - sourceId = IdRange(m.id.start << 1, m.id.end << 1), // R+W ids are distinct + sourceId = IdRange((maxFlight * m.id.start) << 1, (maxFlight * m.id.end) << 1), // R+W ids are distinct nodePath = m.nodePath) }) }, From de6ea9b44249a82f5ce3d820f7d8cf6f12009d3a Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 20 Apr 2017 17:43:41 -0700 Subject: [PATCH 02/29] axi4: support user bits in SRAM --- src/main/scala/uncore/axi4/SRAM.scala | 37 +++++++++++++++++++-------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/main/scala/uncore/axi4/SRAM.scala b/src/main/scala/uncore/axi4/SRAM.scala index 38251361..e0ae971b 100644 --- a/src/main/scala/uncore/axi4/SRAM.scala +++ b/src/main/scala/uncore/axi4/SRAM.scala @@ -18,7 +18,7 @@ class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = supportsWrite = TransferSizes(1, beatBytes), interleavedId = Some(0))), beatBytes = beatBytes, - minLatency = 0))) // B responds on same cycle + minLatency = 1))) // We require the address range to include an entire beat (for the write mask) require ((address.mask & (beatBytes-1)) == beatBytes-1) @@ -38,36 +38,53 @@ class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = val r_addr = Cat((mask zip (in.ar.bits.addr >> log2Ceil(beatBytes)).toBools).filter(_._1).map(_._2).reverse) val w_addr = Cat((mask zip (in.aw.bits.addr >> log2Ceil(beatBytes)).toBools).filter(_._1).map(_._2).reverse) - in.aw.ready := in. w.valid && in.b.ready - in. w.ready := in.aw.valid && in.b.ready - in. b.valid := in.w.valid && in.aw.valid + val w_full = RegInit(Bool(false)) + val w_id = Reg(UInt()) + val w_user = Reg(UInt()) + + when (in. b.fire()) { w_full := Bool(false) } + when (in.aw.fire()) { w_full := Bool(true) } + + when (in.aw.fire()) { + w_id := in.aw.bits.id + in.aw.bits.user.foreach { w_user := _ } + } - in.b.bits.id := in.aw.bits.id - in.b.bits.resp := AXI4Parameters.RESP_OKAY val wdata = Vec.tabulate(beatBytes) { i => in.w.bits.data(8*(i+1)-1, 8*i) } - when (in.b.fire()) { + when (in.aw.fire()) { mem.write(w_addr, wdata, in.w.bits.strb.toBools) } + in. b.valid := w_full + in.aw.ready := in. w.valid && (in.b.ready || !w_full) + in. w.ready := in.aw.valid && (in.b.ready || !w_full) + + in.b.bits.id := w_id + in.b.bits.resp := AXI4Parameters.RESP_OKAY + in.b.bits.user.foreach { _ := w_user } + val r_full = RegInit(Bool(false)) val r_id = Reg(UInt()) + val r_user = Reg(UInt()) when (in. r.fire()) { r_full := Bool(false) } when (in.ar.fire()) { r_full := Bool(true) } - in. r.valid := r_full - in.ar.ready := in.r.ready || !r_full - when (in.ar.fire()) { r_id := in.ar.bits.id + in.ar.bits.user.foreach { r_user := _ } } val ren = in.ar.fire() val rdata = mem.readAndHold(r_addr, ren) + in. r.valid := r_full + in.ar.ready := in.r.ready || !r_full + in.r.bits.id := r_id in.r.bits.resp := AXI4Parameters.RESP_OKAY in.r.bits.data := Cat(rdata.reverse) + in.r.bits.user.foreach { _ := r_user } in.r.bits.last := Bool(true) } } From 5163ccd11f4b84fb4c16b6df2c8299811f6c22ae Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 20 Apr 2017 17:58:07 -0700 Subject: [PATCH 03/29] axi4: RegisterRouter supports user bits --- src/main/scala/uncore/axi4/RegisterRouter.scala | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/main/scala/uncore/axi4/RegisterRouter.scala b/src/main/scala/uncore/axi4/RegisterRouter.scala index b7d51611..54cb7a38 100644 --- a/src/main/scala/uncore/axi4/RegisterRouter.scala +++ b/src/main/scala/uncore/axi4/RegisterRouter.scala @@ -30,7 +30,7 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int val r = bundleIn(0).r val b = bundleIn(0).b - val params = RegMapperParams(log2Up((address.mask+1)/beatBytes), beatBytes, ar.bits.params.idBits) + val params = RegMapperParams(log2Up((address.mask+1)/beatBytes), beatBytes, ar.bits.params.idBits + ar.bits.params.userBits) val in = Wire(Decoupled(new RegMapperInput(params))) // Prefer to execute reads first @@ -39,15 +39,17 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int aw.ready := in.ready && !ar.valid && w .valid w .ready := in.ready && !ar.valid && aw.valid - val addr = Mux(ar.valid, ar.bits.addr, aw.bits.addr) - val in_id = Mux(ar.valid, ar.bits.id, aw.bits.id) + val ar_extra = Cat(Seq(ar.bits.id) ++ ar.bits.user.toList) + val aw_extra = Cat(Seq(aw.bits.id) ++ aw.bits.user.toList) + val in_extra = Mux(ar.valid, ar_extra, aw_extra) + val addr = Mux(ar.valid, ar.bits.addr, aw.bits.addr) val mask = uncore.tilelink2.maskGen(ar.bits.addr, ar.bits.size, beatBytes) in.bits.read := ar.valid in.bits.index := addr >> log2Ceil(beatBytes) in.bits.data := w.bits.data in.bits.mask := Mux(ar.valid, mask, w.bits.strb) - in.bits.extra := in_id + in.bits.extra := in_extra // Invoke the register map builder and make it Irrevocable val out = Queue.irrevocable( @@ -59,14 +61,17 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int r.valid := out.valid && out.bits.read b.valid := out.valid && !out.bits.read - val out_id = if (r.bits.params.idBits == 0) UInt(0) else out.bits.extra + val out_id = if (r.bits.params.idBits == 0) UInt(0) else (out.bits.extra >> ar.bits.params.userBits) r.bits.id := out_id r.bits.data := out.bits.data r.bits.last := Bool(true) r.bits.resp := AXI4Parameters.RESP_OKAY + r.bits.user.foreach { _ := out.bits.extra } + b.bits.id := out_id b.bits.resp := AXI4Parameters.RESP_OKAY + b.bits.user.foreach { _ := out.bits.extra } } } From f1217519f109da8de1dd0cd9f9a2aa7e3af4572e Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 21 Apr 2017 16:59:59 -0700 Subject: [PATCH 04/29] axi4: RegisterRouter; concurrent response illegal in AXI --- src/main/scala/uncore/axi4/RegisterRouter.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/uncore/axi4/RegisterRouter.scala b/src/main/scala/uncore/axi4/RegisterRouter.scala index 54cb7a38..7ac32fae 100644 --- a/src/main/scala/uncore/axi4/RegisterRouter.scala +++ b/src/main/scala/uncore/axi4/RegisterRouter.scala @@ -17,7 +17,7 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int supportsRead = TransferSizes(1, beatBytes), interleavedId = Some(0))), beatBytes = beatBytes, - minLatency = min(concurrency, 1)))) // the Queue adds at most one cycle + minLatency = 1))) { require (address.contiguous) @@ -54,7 +54,7 @@ class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int // Invoke the register map builder and make it Irrevocable val out = Queue.irrevocable( RegMapper(beatBytes, concurrency, undefZero, in, mapping:_*), - entries = 1, flow = true) + entries = 2) // No flow control needed out.ready := Mux(out.bits.read, r.ready, b.ready) From 06efc01d96e40c425c79cdfc92dd7de3b9a1e001 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 20 Apr 2017 17:32:49 -0700 Subject: [PATCH 05/29] axi4: an adapter to remove user bits --- src/main/scala/uncore/axi4/UserYanker.scala | 88 +++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 src/main/scala/uncore/axi4/UserYanker.scala diff --git a/src/main/scala/uncore/axi4/UserYanker.scala b/src/main/scala/uncore/axi4/UserYanker.scala new file mode 100644 index 00000000..286e4a24 --- /dev/null +++ b/src/main/scala/uncore/axi4/UserYanker.scala @@ -0,0 +1,88 @@ +// See LICENSE.SiFive for license details. + +package uncore.axi4 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import config._ +import diplomacy._ +import uncore.tilelink2.UIntToOH1 + +class AXI4UserYanker(maxFlightPerId: Int)(implicit p: Parameters) extends LazyModule +{ + require (maxFlightPerId >= 1) + + val node = AXI4AdapterNode( + masterFn = { mp => mp.copy(maxFlight = maxFlightPerId, userBits = 0) }, + slaveFn = { sp => sp }) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + val out = node.bundleOut + } + + ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) => + val bits = edgeIn.bundle.userBits + val need_bypass = edgeOut.slave.minLatency < 1 + require (bits > 0) // useless UserYanker! + + val rqueues = Seq.fill(edgeIn.master.endId) { Module(new Queue(UInt(width = bits), maxFlightPerId, flow=need_bypass)) } + val wqueues = Seq.fill(edgeIn.master.endId) { Module(new Queue(UInt(width = bits), maxFlightPerId, flow=need_bypass)) } + + val arid = in.ar.bits.id + val ar_ready = Vec(rqueues.map(_.io.enq.ready))(arid) + in .ar.ready := out.ar.ready && ar_ready + out.ar.valid := in .ar.valid && ar_ready + out.ar.bits := in .ar.bits + + val rid = out.r.bits.id + val r_valid = Vec(rqueues.map(_.io.deq.valid))(rid) + val r_bits = Vec(rqueues.map(_.io.deq.bits))(rid) + assert (!out.r.valid || r_valid) // Q must be ready faster than the response + in.r <> out.r + in.r.bits.user.get := r_bits + + val arsel = UIntToOH(arid, edgeIn.master.endId).toBools + val rsel = UIntToOH(rid, edgeIn.master.endId).toBools + (rqueues zip (arsel zip rsel)) foreach { case (q, (ar, r)) => + q.io.deq.ready := out.r .valid && in .r .ready && r && out.r.bits.last + q.io.enq.valid := in .ar.valid && out.ar.ready && ar + q.io.enq.bits := in.ar.bits.user.get + } + + val awid = in.aw.bits.id + val aw_ready = Vec(wqueues.map(_.io.enq.ready))(awid) + in .aw.ready := out.aw.ready && aw_ready + out.aw.valid := in .aw.valid && aw_ready + out.aw.bits := in .aw.bits + + val bid = out.b.bits.id + val b_valid = Vec(wqueues.map(_.io.deq.valid))(bid) + val b_bits = Vec(wqueues.map(_.io.deq.bits))(bid) + assert (!out.b.valid || b_valid) // Q must be ready faster than the response + in.b <> out.b + in.b.bits.user.get := b_bits + + val awsel = UIntToOH(awid, edgeIn.master.endId).toBools + val bsel = UIntToOH(bid, edgeIn.master.endId).toBools + (wqueues zip (awsel zip bsel)) foreach { case (q, (aw, b)) => + q.io.deq.ready := out.b .valid && in .b .ready && b + q.io.enq.valid := in .aw.valid && out.aw.ready && aw + q.io.enq.bits := in.aw.bits.user.get + } + + out.w <> in.w + } + } +} + +object AXI4UserYanker +{ + // applied to the AXI4 source node; y.node := AXI4UserYanker(idBits, maxFlight)(x.node) + def apply(maxFlight: Int)(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { + val yanker = LazyModule(new AXI4UserYanker(maxFlight)) + yanker.node := x + yanker.node + } +} From a580b17ece03f786a815e581c333ac4bde9d21a1 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 20 Apr 2017 11:20:16 -0700 Subject: [PATCH 06/29] axi4: IdIndexer => reduce number of needed ids --- src/main/scala/uncore/axi4/IdIndexer.scala | 63 ++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 src/main/scala/uncore/axi4/IdIndexer.scala diff --git a/src/main/scala/uncore/axi4/IdIndexer.scala b/src/main/scala/uncore/axi4/IdIndexer.scala new file mode 100644 index 00000000..fa3916b1 --- /dev/null +++ b/src/main/scala/uncore/axi4/IdIndexer.scala @@ -0,0 +1,63 @@ +// See LICENSE.SiFive for license details. + +package uncore.axi4 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import config._ +import diplomacy._ +import scala.math.{min,max} + +class AXI4IdIndexer(idBits: Int)(implicit p: Parameters) extends LazyModule +{ + require (idBits >= 0) + + val node = AXI4AdapterNode( + masterFn = { mp => mp.copy( + userBits = mp.userBits + max(0, log2Ceil(mp.endId) - idBits), + masters = Seq(AXI4MasterParameters( + id = IdRange(0, min(mp.endId, 1 << idBits)), + aligned = mp.masters.map(_.aligned).reduce(_ && _)))) + }, + slaveFn = { sp => sp.copy( + slaves = sp.slaves.map(s => s.copy( + interleavedId = if (idBits == 0) Some(0) else s.interleavedId))) + }) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + val out = node.bundleOut + } + + ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) => + + // Leave everything mostly untouched + out.ar <> in.ar + out.aw <> in.aw + out.w <> in.w + in.b <> out.b + in.r <> out.r + + val bits = log2Ceil(edgeIn.master.endId) - idBits + if (bits > 0) { + out.ar.bits.user.get := Cat(in.ar.bits.user.toList ++ Seq(in.ar.bits.id >> idBits)) + out.aw.bits.user.get := Cat(in.aw.bits.user.toList ++ Seq(in.aw.bits.id >> idBits)) + in.r.bits.user.foreach { _ := out.r.bits.user.get >> bits } + in.b.bits.user.foreach { _ := out.b.bits.user.get >> bits } + in.r.bits.id := Cat(out.r.bits.user.get, out.r.bits.id) + in.b.bits.id := Cat(out.b.bits.user.get, out.b.bits.id) + } + } + } +} + +object AXI4IdIndexer +{ + // applied to the AXI4 source node; y.node := AXI4IdIndexer(idBits)(x.node) + def apply(idBits: Int)(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { + val indexer = LazyModule(new AXI4IdIndexer(idBits)) + indexer.node := x + indexer.node + } +} From 641a4d577af5b8c0bcaf88d7f446316851dc71ee Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 20 Apr 2017 11:49:07 -0700 Subject: [PATCH 07/29] tilelink2: Error device for returning errors on demand --- src/main/scala/uncore/tilelink2/Error.scala | 56 +++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 src/main/scala/uncore/tilelink2/Error.scala diff --git a/src/main/scala/uncore/tilelink2/Error.scala b/src/main/scala/uncore/tilelink2/Error.scala new file mode 100644 index 00000000..38ebf646 --- /dev/null +++ b/src/main/scala/uncore/tilelink2/Error.scala @@ -0,0 +1,56 @@ +// See LICENSE.SiFive for license details. + +package uncore.tilelink2 + +import Chisel._ +import config._ +import diplomacy._ +import util._ + +class TLError(address: Seq[AddressSet], beatBytes: Int = 4)(implicit p: Parameters) extends LazyModule +{ + val device = new SimpleDevice("error-device", Seq("sifive,error0")) + + val node = TLManagerNode(Seq(TLManagerPortParameters( + Seq(TLManagerParameters( + address = address, + resources = device.reg, + supportsGet = TransferSizes(1, beatBytes), + supportsPutPartial = TransferSizes(1, beatBytes), + supportsPutFull = TransferSizes(1, beatBytes), + supportsArithmetic = TransferSizes(1, beatBytes), + supportsLogical = TransferSizes(1, beatBytes), + supportsHint = TransferSizes(1, beatBytes), + fifoId = Some(0))), // requests are handled in order + beatBytes = beatBytes, + minLatency = 1))) // no bypass needed for this device + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + } + + import TLMessages._ + val opcodes = Vec(AccessAck, AccessAck, AccessAckData, AccessAckData, AccessAckData, HintAck) + + val in = io.in(0) + val a = Queue(in.a, 1) + val d = in.d + + a.ready := d.ready + d.valid := a.valid + d.bits.opcode := opcodes(a.bits.opcode) + d.bits.param := UInt(0) + d.bits.size := a.bits.size + d.bits.source := a.bits.source + d.bits.sink := UInt(0) + d.bits.addr_lo := a.bits.address + d.bits.data := UInt(0) + d.bits.error := a.bits.opcode =/= Hint // Hints may not error + + // Tie off unused channels + in.b.valid := Bool(false) + in.c.ready := Bool(true) + in.e.ready := Bool(true) + } +} From 7a1d107c9ec88993a58b4e925f277c7e326b88e4 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 20 Apr 2017 12:09:02 -0700 Subject: [PATCH 08/29] rocketchip: include an ErrorSlave by default --- src/main/scala/rocketchip/Configs.scala | 1 + src/main/scala/rocketchip/ExampleTop.scala | 3 +++ src/main/scala/rocketchip/Periphery.scala | 28 +++++++++++++++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/main/scala/rocketchip/Configs.scala b/src/main/scala/rocketchip/Configs.scala index c5c883ff..05a4b08e 100644 --- a/src/main/scala/rocketchip/Configs.scala +++ b/src/main/scala/rocketchip/Configs.scala @@ -39,6 +39,7 @@ class BasePlatformConfig extends Config((site, here, up) => { case IncludeJtagDTM => false case JtagDTMKey => new JtagDTMKeyDefault() case ZeroConfig => ZeroConfig(base=0xa000000L, size=0x2000000L, beatBytes=8) + case ErrorConfig => ErrorConfig(Seq(AddressSet(0x1000, 0xfff))) case ExtMem => MasterConfig(base=0x80000000L, size=0x10000000L, beatBytes=8, idBits=4) case ExtBus => MasterConfig(base=0x60000000L, size=0x20000000L, beatBytes=8, idBits=4) case ExtIn => SlaveConfig(beatBytes=8, idBits=8, sourceBits=2) diff --git a/src/main/scala/rocketchip/ExampleTop.scala b/src/main/scala/rocketchip/ExampleTop.scala index 96eabc2c..cdf49871 100644 --- a/src/main/scala/rocketchip/ExampleTop.scala +++ b/src/main/scala/rocketchip/ExampleTop.scala @@ -10,6 +10,7 @@ import rocketchip._ /** Example Top with Periphery (w/o coreplex) */ abstract class ExampleTop(implicit p: Parameters) extends BaseTop with PeripheryAsyncExtInterrupts + with PeripheryErrorSlave with PeripheryMasterAXI4Mem with PeripheryMasterAXI4MMIO with PeripherySlaveAXI4 { @@ -18,12 +19,14 @@ abstract class ExampleTop(implicit p: Parameters) extends BaseTop class ExampleTopBundle[+L <: ExampleTop](_outer: L) extends BaseTopBundle(_outer) with PeripheryExtInterruptsBundle + with PeripheryErrorSlaveBundle with PeripheryMasterAXI4MemBundle with PeripheryMasterAXI4MMIOBundle with PeripherySlaveAXI4Bundle class ExampleTopModule[+L <: ExampleTop, +B <: ExampleTopBundle[L]](_outer: L, _io: () => B) extends BaseTopModule(_outer, _io) with PeripheryExtInterruptsModule + with PeripheryErrorSlaveModule with PeripheryMasterAXI4MemModule with PeripheryMasterAXI4MMIOModule with PeripherySlaveAXI4Module diff --git a/src/main/scala/rocketchip/Periphery.scala b/src/main/scala/rocketchip/Periphery.scala index 75071cf0..dff9c598 100644 --- a/src/main/scala/rocketchip/Periphery.scala +++ b/src/main/scala/rocketchip/Periphery.scala @@ -13,7 +13,7 @@ import uncore.converters._ import uncore.devices._ import uncore.util._ import util._ -import scala.math.max +import scala.math.{min,max} /** Specifies the size of external memory */ case class MasterConfig(base: Long, size: Long, beatBytes: Int, idBits: Int) @@ -33,6 +33,9 @@ case object SOCBusConfig extends Field[TLBusConfig] /* Specifies the location of the Zero device */ case class ZeroConfig(base: Long, size: Long, beatBytes: Int) case object ZeroConfig extends Field[ZeroConfig] +/* Specifies the location of the Error device */ +case class ErrorConfig(address: Seq[AddressSet]) +case object ErrorConfig extends Field[ErrorConfig] /** Utility trait for quick access to some relevant parameters */ trait HasPeripheryParameters { @@ -388,3 +391,26 @@ trait PeripheryTestBusMasterModule { val io: PeripheryTestBusMasterBundle } => } + +///// + +trait PeripheryErrorSlave { + this: HasTopLevelNetworks => + private val config = p(ErrorConfig) + private val maxXfer = min(config.address.map(_.alignment).max.toInt, 4096) + val error = LazyModule(new TLError(config.address, peripheryBusConfig.beatBytes)) + error.node := TLFragmenter(peripheryBusConfig.beatBytes, maxXfer)(peripheryBus.node) +} + +trait PeripheryErrorSlaveBundle { + this: HasTopLevelNetworksBundle { + val outer: PeripheryErrorSlave + } => +} + +trait PeripheryErrorSlaveModule { + this: HasTopLevelNetworksModule { + val outer: PeripheryErrorSlave + val io: PeripheryErrorSlaveBundle + } => +} From e100a943eabe32851067ffc780b0d85fbe0fb724 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 20 Apr 2017 15:20:20 -0700 Subject: [PATCH 09/29] axi4: simplify Fragmenter by using user bits --- src/main/scala/uncore/axi4/Fragmenter.scala | 153 +++++--------------- 1 file changed, 34 insertions(+), 119 deletions(-) diff --git a/src/main/scala/uncore/axi4/Fragmenter.scala b/src/main/scala/uncore/axi4/Fragmenter.scala index 42cf8fd7..dcc2ddf5 100644 --- a/src/main/scala/uncore/axi4/Fragmenter.scala +++ b/src/main/scala/uncore/axi4/Fragmenter.scala @@ -10,8 +10,7 @@ import diplomacy._ import scala.math.{min,max} import uncore.tilelink2.{leftOR, rightOR, UIntToOH1, OH1ToOH} -// lite: masters all use only one ID => reads will not be interleaved -class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinational: Boolean = true)(implicit p: Parameters) extends LazyModule +class AXI4Fragmenter()(implicit p: Parameters) extends LazyModule { val maxBeats = 1 << AXI4Parameters.lenBits def expandTransfer(x: TransferSizes, beatBytes: Int, alignment: BigInt) = @@ -19,11 +18,11 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio def mapSlave(s: AXI4SlaveParameters, beatBytes: Int) = s.copy( supportsWrite = expandTransfer(s.supportsWrite, beatBytes, s.minAlignment), supportsRead = expandTransfer(s.supportsRead, beatBytes, s.minAlignment), - interleavedId = if (lite) Some(0) else s.interleavedId) // see AXI4FragmenterSideband for !lite case + interleavedId = None) // this breaks interleaving guarantees def mapMaster(m: AXI4MasterParameters) = m.copy(aligned = true) val node = AXI4AdapterNode( - masterFn = { mp => mp.copy(masters = mp.masters.map(m => mapMaster(m))) }, + masterFn = { mp => mp.copy(masters = mp.masters.map(m => mapMaster(m)), userBits = mp.userBits + 1) }, slaveFn = { sp => sp.copy(slaves = sp.slaves .map(s => mapSlave(s, sp.beatBytes))) }) lazy val module = new LazyModuleImp(this) { @@ -40,9 +39,6 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio val master = edgeIn.master val masters = master.masters - // If the user claimed this was a lite interface, then there must be only one Id - require (!lite || master.endId == 1) - // We don't support fragmenting to sub-beat accesses slaves.foreach { s => require (!s.supportsRead || s.supportsRead.contains(beatBytes)) @@ -139,154 +135,73 @@ class AXI4Fragmenter(lite: Boolean = false, maxInFlight: => Int = 32, combinatio val readSizes1 = slaves.map(s => s.supportsRead .max/beatBytes-1) val writeSizes1 = slaves.map(s => s.supportsWrite.max/beatBytes-1) - // Indirection variables for inputs and outputs; makes transformation application easier + // Irrevocable queues in front because we want to accept the request before responses come back val (in_ar, ar_last, _) = fragment(Queue.irrevocable(in.ar, 1, flow=true), readSizes1) val (in_aw, aw_last, w_beats) = fragment(Queue.irrevocable(in.aw, 1, flow=true), writeSizes1) - val in_w = in.w - val in_r = in.r - val in_b = in.b - val out_ar = Wire(out.ar) - val out_aw = out.aw - val out_w = out.w - val out_r = Wire(out.r) - val out_b = Wire(out.b) - val depth = if (combinational) 1 else 2 - // In case a slave ties arready := rready, we need a queue to break the combinational loop - // between the two branches (in_ar => {out_ar => out_r, sideband} => in_r). - if (in.ar.bits.getWidth < in.r.bits.getWidth) { - out.ar <> Queue(out_ar, depth, flow=combinational) - out_r <> out.r - } else { - out.ar <> out_ar - out_r <> Queue(out.r, depth, flow=combinational) - } - // In case a slave ties awready := bready or wready := bready, we need this queue - out_b <> Queue(out.b, depth, flow=combinational) - - // Sideband to track which transfers were the last fragment - def sideband() = if (lite) { - Module(new Queue(Bool(), maxInFlight, flow=combinational)).io - } else { - Module(new AXI4FragmenterSideband(maxInFlight, flow=combinational)).io - } - val sideband_ar_r = sideband() - val sideband_aw_b = sideband() - - // AR flow control - out_ar.valid := in_ar.valid && sideband_ar_r.enq.ready - in_ar.ready := sideband_ar_r.enq.ready && out_ar.ready - sideband_ar_r.enq.valid := in_ar.valid && out_ar.ready - out_ar.bits := in_ar.bits - sideband_ar_r.enq.bits := ar_last + // AR flow control; super easy + out.ar <> in_ar + out.ar.bits.user.get := Cat(in_ar.bits.user.toList ++ Seq(ar_last)) // When does W channel start counting a new transfer val wbeats_latched = RegInit(Bool(false)) val wbeats_ready = Wire(Bool()) val wbeats_valid = Wire(Bool()) when (wbeats_valid && wbeats_ready) { wbeats_latched := Bool(true) } - when (out_aw.fire()) { wbeats_latched := Bool(false) } + when (out.aw.fire()) { wbeats_latched := Bool(false) } // AW flow control - out_aw.valid := in_aw.valid && sideband_aw_b.enq.ready && (wbeats_ready || wbeats_latched) - in_aw.ready := sideband_aw_b.enq.ready && out_aw.ready && (wbeats_ready || wbeats_latched) - sideband_aw_b.enq.valid := in_aw.valid && out_aw.ready && (wbeats_ready || wbeats_latched) + out.aw.valid := in_aw.valid && (wbeats_ready || wbeats_latched) + in_aw.ready := out.aw.ready && (wbeats_ready || wbeats_latched) wbeats_valid := in_aw.valid && !wbeats_latched - out_aw.bits := in_aw.bits - sideband_aw_b.enq.bits := aw_last + out.aw.bits := in_aw.bits + out.aw.bits.user.get := Cat(in_aw.bits.user.toList ++ Seq(aw_last)) // We need to inject 'last' into the W channel fragments, count! val w_counter = RegInit(UInt(0, width = AXI4Parameters.lenBits+1)) val w_idle = w_counter === UInt(0) val w_todo = Mux(w_idle, Mux(wbeats_valid, w_beats, UInt(0)), w_counter) val w_last = w_todo === UInt(1) - w_counter := w_todo - out_w.fire() - assert (!out_w.fire() || w_todo =/= UInt(0)) // underflow impossible + w_counter := w_todo - out.w.fire() + assert (!out.w.fire() || w_todo =/= UInt(0)) // underflow impossible // W flow control wbeats_ready := w_idle - out_w.valid := in_w.valid && (!wbeats_ready || wbeats_valid) - in_w.ready := out_w.ready && (!wbeats_ready || wbeats_valid) - out_w.bits := in_w.bits - out_w.bits.last := w_last + out.w.valid := in.w.valid && (!wbeats_ready || wbeats_valid) + in.w.ready := out.w.ready && (!wbeats_ready || wbeats_valid) + out.w.bits := in.w.bits + out.w.bits.last := w_last // We should also recreate the last last - assert (!out_w.valid || !in_w.bits.last || w_last) + assert (!out.w.valid || !in.w.bits.last || w_last) // R flow control - val r_last = out_r.bits.last - in_r.valid := out_r.valid && (!r_last || sideband_ar_r.deq.valid) - out_r.ready := in_r.ready && (!r_last || sideband_ar_r.deq.valid) - sideband_ar_r.deq.ready := r_last && out_r.valid && in_r.ready - in_r.bits := out_r.bits - in_r.bits.last := r_last && sideband_ar_r.deq.bits + val r_last = out.r.bits.user.get(0) + in.r <> out.r + in.r.bits.last := out.r.bits.last && r_last + in.r.bits.user.foreach { _ := out.r.bits.user.get >> 1 } // B flow control - val b_last = sideband_aw_b.deq.bits - in_b.valid := out_b.valid && sideband_aw_b.deq.valid && b_last - out_b.ready := sideband_aw_b.deq.valid && (!b_last || in_b.ready) - sideband_aw_b.deq.ready := out_b.valid && (!b_last || in_b.ready) - in_b.bits := out_b.bits + val b_last = out.b.bits.user.get(0) + in.b <> out.b + in.b.valid := out.b.valid && b_last + out.b.ready := in.b.ready || !b_last + in.b.bits.user.foreach { _ := out.b.bits.user.get >> 1 } // Merge errors from dropped B responses - val r_resp = RegInit(UInt(0, width = AXI4Parameters.respBits)) - val resp = out_b.bits.resp | r_resp - when (out_b.fire()) { r_resp := Mux(b_last, UInt(0), resp) } - in_b.bits.resp := resp - } - } - - /* We want to put barriers between the fragments of a fragmented transfer and all other transfers. - * This lets us use very little state to reassemble the fragments (else we need one FIFO per ID). - * Furthermore, because all the fragments share the same AXI ID, they come back contiguously. - * This guarantees that no other R responses might get mixed between fragments, ensuring that the - * interleavedId for the slaves remains unaffected by the fragmentation transformation. - * Of course, if you need to fragment, this means there is a potentially hefty serialization cost. - * However, this design allows full concurrency in the common no-fragmentation-needed scenario. - */ - class AXI4FragmenterSideband(maxInFlight: Int, flow: Boolean = false) extends Module - { - val io = new QueueIO(Bool(), maxInFlight) - io.count := UInt(0) - - val PASS = UInt(2, width = 2) // allow 'last=1' bits to enque, on 'last=0' if count>0 block else accept+FIND - val FIND = UInt(0, width = 2) // allow 'last=0' bits to enque, accept 'last=1' and switch to WAIT - val WAIT = UInt(1, width = 2) // block all access till count=0 - - val state = RegInit(PASS) - val count = RegInit(UInt(0, width = log2Up(maxInFlight))) - val full = count === UInt(maxInFlight-1) - val empty = count === UInt(0) - val last = count === UInt(1) - - io.deq.bits := state(1) || (last && state(0)) // PASS || (last && WAIT) - io.deq.valid := !empty - - io.enq.ready := !full && (empty || (state === FIND) || (state === PASS && io.enq.bits)) - - // WAIT => count > 0 - assert (state =/= WAIT || count =/= UInt(0)) - - if (flow) { - when (io.enq.valid) { - io.deq.valid := Bool(true) - when (empty) { io.deq.bits := io.enq.bits } + val error = RegInit(Vec.fill(edgeIn.master.endId) { UInt(0, width = AXI4Parameters.respBits)}) + in.b.bits.resp := out.b.bits.resp | error(out.b.bits.id) + (error zip UIntToOH(out.b.bits.id, edgeIn.master.endId).toBools) foreach { case (reg, sel) => + when (sel && out.b.fire()) { reg := Mux(b_last, UInt(0), reg | out.b.bits.resp) } } } - - count := count + io.enq.fire() - io.deq.fire() - switch (state) { - is(PASS) { when (io.enq.valid && !io.enq.bits && empty) { state := FIND } } - is(FIND) { when (io.enq.valid && io.enq.bits && !full) { state := Mux(empty, PASS, WAIT) } } - is(WAIT) { when (last && io.deq.ready) { state := PASS } } - } } } object AXI4Fragmenter { // applied to the AXI4 source node; y.node := AXI4Fragmenter()(x.node) - def apply(lite: Boolean = false, maxInFlight: => Int = 32, combinational: Boolean = true)(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { - val fragmenter = LazyModule(new AXI4Fragmenter(lite, maxInFlight, combinational)) + def apply()(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { + val fragmenter = LazyModule(new AXI4Fragmenter) fragmenter.node := x fragmenter.node } From ca2cb033cdb6ffae09290f5c4ebfd8ad207ca198 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 20 Apr 2017 18:54:50 -0700 Subject: [PATCH 10/29] rocketchip: fix uses of AXI4 Fragmenter --- src/main/scala/rocketchip/Configs.scala | 2 +- src/main/scala/rocketchip/Periphery.scala | 6 ++++-- src/main/scala/rocketchip/TestHarness.scala | 2 +- src/main/scala/uncore/axi4/Test.scala | 12 +++++++----- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/main/scala/rocketchip/Configs.scala b/src/main/scala/rocketchip/Configs.scala index 05a4b08e..7196507b 100644 --- a/src/main/scala/rocketchip/Configs.scala +++ b/src/main/scala/rocketchip/Configs.scala @@ -42,7 +42,7 @@ class BasePlatformConfig extends Config((site, here, up) => { case ErrorConfig => ErrorConfig(Seq(AddressSet(0x1000, 0xfff))) case ExtMem => MasterConfig(base=0x80000000L, size=0x10000000L, beatBytes=8, idBits=4) case ExtBus => MasterConfig(base=0x60000000L, size=0x20000000L, beatBytes=8, idBits=4) - case ExtIn => SlaveConfig(beatBytes=8, idBits=8, sourceBits=2) + case ExtIn => SlaveConfig(beatBytes=8, idBits=8, sourceBits=4) }) /** Actual elaboratable target Configs */ diff --git a/src/main/scala/rocketchip/Periphery.scala b/src/main/scala/rocketchip/Periphery.scala index dff9c598..7d29c8cc 100644 --- a/src/main/scala/rocketchip/Periphery.scala +++ b/src/main/scala/rocketchip/Periphery.scala @@ -238,12 +238,14 @@ trait PeripherySlaveAXI4 extends HasTopLevelNetworks { masters = Seq(AXI4MasterParameters( id = IdRange(0, 1 << config.idBits)))))) + private val fifoBits = 1 fsb.node := - TLSourceShrinker(1 << config.sourceBits)( TLWidthWidget(config.beatBytes)( AXI4ToTL()( + AXI4UserYanker(1 << (config.sourceBits - fifoBits - 1))( AXI4Fragmenter()( - l2FrontendAXI4Node)))) + AXI4IdIndexer(fifoBits)( + l2FrontendAXI4Node))))) } trait PeripherySlaveAXI4Bundle extends HasTopLevelNetworksBundle { diff --git a/src/main/scala/rocketchip/TestHarness.scala b/src/main/scala/rocketchip/TestHarness.scala index 5c1c360e..12db2e00 100644 --- a/src/main/scala/rocketchip/TestHarness.scala +++ b/src/main/scala/rocketchip/TestHarness.scala @@ -53,7 +53,7 @@ class SimAXIMem(channels: Int, forceSize: BigInt = 0)(implicit p: Parameters) ex for (i <- 0 until channels) { val sram = LazyModule(new AXI4RAM(AddressSet(0, size-1), beatBytes = config.beatBytes)) - sram.node := AXI4Buffer()(AXI4Fragmenter(maxInFlight = 4)(node)) + sram.node := AXI4Buffer()(AXI4Fragmenter()(node)) } lazy val module = new LazyModuleImp(this) { diff --git a/src/main/scala/uncore/axi4/Test.scala b/src/main/scala/uncore/axi4/Test.scala index 11bf11a8..76a6c7bf 100644 --- a/src/main/scala/uncore/axi4/Test.scala +++ b/src/main/scala/uncore/axi4/Test.scala @@ -26,8 +26,8 @@ class AXI4LiteFuzzRAM()(implicit p: Parameters) extends LazyModule model.node := fuzz.node xbar.node := TLDelayer(0.1)(TLBuffer(BufferParams.flow)(TLDelayer(0.2)(model.node))) - ram.node := AXI4Fragmenter(lite=true)(TLToAXI4(0, true )(xbar.node)) - gpio.node := AXI4Fragmenter(lite=true)(TLToAXI4(0, false)(xbar.node)) + ram.node := AXI4Fragmenter()(TLToAXI4(0, true )(xbar.node)) + gpio.node := AXI4Fragmenter()(TLToAXI4(0, false)(xbar.node)) lazy val module = new LazyModuleImp(this) with HasUnitTestIO { io.finished := fuzz.module.io.finished @@ -49,8 +49,8 @@ class AXI4FullFuzzRAM()(implicit p: Parameters) extends LazyModule model.node := fuzz.node xbar.node := TLDelayer(0.1)(TLBuffer(BufferParams.flow)(TLDelayer(0.2)(model.node))) - ram.node := AXI4Fragmenter(lite=false, maxInFlight = 2)(TLToAXI4(4,false)(xbar.node)) - gpio.node := AXI4Fragmenter(lite=false, maxInFlight = 5)(TLToAXI4(4,true )(xbar.node)) + ram.node := AXI4Fragmenter()(TLToAXI4(4,false)(xbar.node)) + gpio.node := AXI4Fragmenter()(TLToAXI4(4,true )(xbar.node)) lazy val module = new LazyModuleImp(this) with HasUnitTestIO { io.finished := fuzz.module.io.finished @@ -97,8 +97,10 @@ class AXI4FuzzSlave()(implicit p: Parameters) extends LazyModule TLBuffer(BufferParams.flow)( TLDelayer(0.1)( AXI4ToTL()( + AXI4UserYanker(4)( AXI4Fragmenter()( - node)))))) + AXI4IdIndexer(4)( + node)))))))) lazy val module = new LazyModuleImp(this) { val io = new Bundle { From b4188ee62599e570142960f9f9b014f1910e89b7 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 21 Apr 2017 12:30:41 -0700 Subject: [PATCH 11/29] axi4: ToTL supporting pipelined MMIO --- src/main/scala/coreplex/CoreplexNetwork.scala | 4 +- src/main/scala/uncore/axi4/ToTL.scala | 146 ++++++++---------- 2 files changed, 68 insertions(+), 82 deletions(-) diff --git a/src/main/scala/coreplex/CoreplexNetwork.scala b/src/main/scala/coreplex/CoreplexNetwork.scala index 5368d2cf..8c99e189 100644 --- a/src/main/scala/coreplex/CoreplexNetwork.scala +++ b/src/main/scala/coreplex/CoreplexNetwork.scala @@ -32,7 +32,9 @@ trait CoreplexNetwork extends HasCoreplexParameters { // Allows a variable number of inputs from outside to the Xbar private val l2in_buffer = LazyModule(new TLBuffer) - l1tol2.node :=* l2in_buffer.node + private val l2in_fifo = LazyModule(new TLFIFOFixer) + l1tol2.node :=* l2in_fifo.node + l2in_fifo.node :=* l2in_buffer.node l2in_buffer.node :=* l2in private val l2out_buffer = LazyModule(new TLBuffer(BufferParams.flow, BufferParams.none)) diff --git a/src/main/scala/uncore/axi4/ToTL.scala b/src/main/scala/uncore/axi4/ToTL.scala index 16542763..8e2dd849 100644 --- a/src/main/scala/uncore/axi4/ToTL.scala +++ b/src/main/scala/uncore/axi4/ToTL.scala @@ -10,24 +10,27 @@ import uncore.tilelink2._ case class AXI4ToTLNode() extends MixedAdapterNode(AXI4Imp, TLImp)( dFn = { case AXI4MasterPortParameters(masters, userBits, maxFlight) => - require (userBits == 0, "AXI4 user bits cannot be transported by TL") require (maxFlight > 0, "AXI4 must include a maximum transactions per ID to convert to TL") - TLClientPortParameters(clients = masters.map { m => - TLClientParameters( - sourceId = IdRange((maxFlight * m.id.start) << 1, (maxFlight * m.id.end) << 1), // R+W ids are distinct - nodePath = m.nodePath) - }) + TLClientPortParameters( + clients = masters.flatMap { m => + for (id <- m.id.start until m.id.end) + yield TLClientParameters( + sourceId = IdRange(id * maxFlight*2, (id+1) * maxFlight*2), // R+W ids are distinct + nodePath = m.nodePath, + requestFifo = true) + }) }, uFn = { mp => AXI4SlavePortParameters( slaves = mp.managers.map { m => + val maxXfer = TransferSizes(1, mp.beatBytes * (1 << AXI4Parameters.lenBits)) AXI4SlaveParameters( address = m.address, resources = m.resources, regionType = m.regionType, executable = m.executable, nodePath = m.nodePath, - supportsWrite = m.supportsPutPartial, - supportsRead = m.supportsGet, + supportsWrite = m.supportsPutPartial.intersect(maxXfer), + supportsRead = m.supportsGet.intersect(maxXfer), interleavedId = Some(0))}, // TL2 never interleaves D beats beatBytes = mp.beatBytes, minLatency = mp.minLatency) @@ -47,58 +50,64 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule val numIds = edgeIn.master.endId val beatBytes = edgeOut.manager.beatBytes val countBits = AXI4Parameters.lenBits + (1 << AXI4Parameters.sizeBits) - 1 + val maxFlight = edgeIn.master.maxFlight + val addedBits = log2Ceil(maxFlight) + 1 + require (edgeIn.master.userBits == 0, "AXI4 user bits cannot be transported by TL") require (edgeIn.master.masters(0).aligned) + edgeOut.manager.requireFifo() + + // Look for an Error device to redirect bad requests + val errorDevs = edgeOut.manager.managers.filter(_.nodePath.last.lazyModule.className == "TLError") + require (!errorDevs.isEmpty, "There is no TLError reachable from AXI4ToTL. One must be instantiated.") + val error = errorDevs.head.address.head.base + require (errorDevs.head.supportsPutPartial.contains(edgeOut.manager.maxTransfer), + s"Error device supports ${errorDevs.head.supportsPutPartial} PutPartial but must support ${edgeOut.manager.maxTransfer}") + require (errorDevs.head.supportsGet.contains(edgeOut.manager.maxTransfer), + s"Error device supports ${errorDevs.head.supportsGet} Get but must support ${edgeOut.manager.maxTransfer}") val r_out = Wire(out.a) - val r_inflight = RegInit(UInt(0, width = numIds)) - val r_block = r_inflight(in.ar.bits.id) val r_size1 = in.ar.bits.bytes1() val r_size = OH1ToUInt(r_size1) - val r_addr = in.ar.bits.addr - val r_ok = edgeOut.manager.supportsGetSafe(r_addr, r_size) - val r_err_in = Wire(Decoupled(new AXI4BundleRError(in.ar.bits.params))) - val r_err_out = Queue(r_err_in, 2) - val r_count = RegInit(UInt(0, width = in.ar.bits.params.lenBits)) - val r_last = r_count === in.ar.bits.len + val r_ok = edgeOut.manager.supportsGetSafe(in.ar.bits.addr, r_size) + val r_addr = Mux(r_ok, in.ar.bits.addr, UInt(error)) + val r_count = RegInit(Vec.fill(numIds) { UInt(0, width = log2Ceil(maxFlight)) }) + val r_id = Cat(in.ar.bits.id, r_count(in.ar.bits.id), UInt(0, width=1)) assert (!in.ar.valid || r_size1 === UIntToOH1(r_size, countBits)) // because aligned - in.ar.ready := Mux(r_ok, r_out.ready, r_err_in.ready && r_last) && !r_block - r_out.valid := in.ar.valid && !r_block && r_ok - r_out.bits := edgeOut.Get(in.ar.bits.id << 1 | UInt(1), r_addr, r_size)._2 - r_err_in.valid := in.ar.valid && !r_block && !r_ok - r_err_in.bits.last := r_last - r_err_in.bits.id := in.ar.bits.id + in.ar.ready := r_out.ready + r_out.valid := in.ar.valid + r_out.bits := edgeOut.Get(r_id, r_addr, r_size)._2 - when (r_err_in.fire()) { r_count := Mux(r_last, UInt(0), r_count + UInt(1)) } + val r_sel = UIntToOH(in.ar.bits.id, numIds) + (r_sel.toBools zip r_count) foreach { case (s, r) => + when (in.ar.fire() && s) { r := r + UInt(1) } + } val w_out = Wire(out.a) - val w_inflight = RegInit(UInt(0, width = numIds)) - val w_block = w_inflight(in.aw.bits.id) val w_size1 = in.aw.bits.bytes1() val w_size = OH1ToUInt(w_size1) - val w_addr = in.aw.bits.addr - val w_ok = edgeOut.manager.supportsPutPartialSafe(w_addr, w_size) - val w_err_in = Wire(Decoupled(in.aw.bits.id)) - val w_err_out = Queue(w_err_in, 2) + val w_ok = edgeOut.manager.supportsPutPartialSafe(in.aw.bits.addr, w_size) + val w_addr = Mux(w_ok, in.aw.bits.addr, UInt(error)) + val w_count = RegInit(Vec.fill(numIds) { UInt(0, width = log2Ceil(maxFlight)) }) + val w_id = Cat(in.aw.bits.id, w_count(in.aw.bits.id), UInt(1, width=1)) assert (!in.aw.valid || w_size1 === UIntToOH1(w_size, countBits)) // because aligned assert (!in.aw.valid || in.aw.bits.len === UInt(0) || in.aw.bits.size === UInt(log2Ceil(beatBytes))) // because aligned - in.aw.ready := Mux(w_ok, w_out.ready, w_err_in.ready) && in.w.valid && in.w.bits.last && !w_block - in.w.ready := Mux(w_ok, w_out.ready, w_err_in.ready || !in.w.bits.last) && in.aw.valid && !w_block - w_out.valid := in.aw.valid && in.w.valid && !w_block && w_ok - w_out.bits := edgeOut.Put(in.aw.bits.id << 1, w_addr, w_size, in.w.bits.data, in.w.bits.strb)._2 - w_err_in.valid := in.aw.valid && in.w.valid && !w_block && !w_ok && in.w.bits.last - w_err_in.bits := in.aw.bits.id + in.aw.ready := w_out.ready && in.w.valid && in.w.bits.last + in.w.ready := w_out.ready && in.aw.valid + w_out.valid := in.aw.valid && in.w.valid + w_out.bits := edgeOut.Put(w_id, w_addr, w_size, in.w.bits.data, in.w.bits.strb)._2 + + val w_sel = UIntToOH(in.aw.bits.id, numIds) + (w_sel.toBools zip w_count) foreach { case (s, r) => + when (in.aw.fire() && s) { r := r + UInt(1) } + } TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out)) val ok_b = Wire(in.b) - val err_b = Wire(in.b) - val mux_b = Wire(in.b) val ok_r = Wire(in.r) - val err_r = Wire(in.r) - val mux_r = Wire(in.r) val d_resp = Mux(out.d.bits.error, AXI4Parameters.RESP_SLVERR, AXI4Parameters.RESP_OKAY) val d_hasData = edgeOut.hasData(out.d.bits) @@ -108,58 +117,33 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule ok_r.valid := out.d.valid && d_hasData ok_b.valid := out.d.valid && !d_hasData - ok_r.bits.id := out.d.bits.source >> 1 + ok_r.bits.id := out.d.bits.source >> addedBits ok_r.bits.data := out.d.bits.data ok_r.bits.resp := d_resp ok_r.bits.last := d_last - r_err_out.ready := err_r.ready - err_r.valid := r_err_out.valid - err_r.bits.id := r_err_out.bits.id - err_r.bits.data := out.d.bits.data // don't care - err_r.bits.resp := AXI4Parameters.RESP_DECERR - err_r.bits.last := r_err_out.bits.last - - // AXI4 must hold R to one source until last - val mux_lock_ok = RegInit(Bool(false)) - val mux_lock_err = RegInit(Bool(false)) - when (ok_r .fire()) { mux_lock_ok := !ok_r .bits.last } - when (err_r.fire()) { mux_lock_err := !err_r.bits.last } - assert (!mux_lock_ok || !mux_lock_err) - - // Prioritize err over ok (b/c err_r.valid comes from a register) - mux_r.valid := (!mux_lock_err && ok_r.valid) || (!mux_lock_ok && err_r.valid) - mux_r.bits := Mux(!mux_lock_ok && err_r.valid, err_r.bits, ok_r.bits) - ok_r.ready := mux_r.ready && (mux_lock_ok || !err_r.valid) - err_r.ready := mux_r.ready && !mux_lock_ok - // AXI4 needs irrevocable behaviour - in.r <> Queue.irrevocable(mux_r, 1, flow=true) + in.r <> Queue.irrevocable(ok_r, 1, flow=true) - ok_b.bits.id := out.d.bits.source >> 1 + ok_b.bits.id := out.d.bits.source >> addedBits ok_b.bits.resp := d_resp - w_err_out.ready := err_b.ready - err_b.valid := w_err_out.valid - err_b.bits.id := w_err_out.bits - err_b.bits.resp := AXI4Parameters.RESP_DECERR - - // Prioritize err over ok (b/c err_b.valid comes from a register) - mux_b.valid := ok_b.valid || err_b.valid - mux_b.bits := Mux(err_b.valid, err_b.bits, ok_b.bits) - ok_b.ready := mux_b.ready && !err_b.valid - err_b.ready := mux_b.ready - // AXI4 needs irrevocable behaviour - in.b <> Queue.irrevocable(mux_b, 1, flow=true) + val q_b = Queue.irrevocable(ok_b, 1, flow=true) - // Update flight trackers - val r_set = in.ar.fire().asUInt << in.ar.bits.id - val r_clr = (in.r.fire() && in.r.bits.last).asUInt << in.r.bits.id - r_inflight := (r_inflight | r_set) & ~r_clr - val w_set = in.aw.fire().asUInt << in.aw.bits.id - val w_clr = in.b.fire().asUInt << in.b.bits.id - w_inflight := (w_inflight | w_set) & ~w_clr + // We need to prevent sending B valid before the last W beat is accepted + // TileLink allows early acknowledgement of a write burst, but AXI does not. + val b_count = RegInit(Vec.fill(numIds) { UInt(0, width = log2Ceil(maxFlight)) }) + val b_allow = b_count(in.b.bits.id) =/= w_count(in.b.bits.id) + val b_sel = UIntToOH(in.b.bits.id, numIds) + + (b_sel.toBools zip b_count) foreach { case (s, r) => + when (in.b.fire() && s) { r := r + UInt(1) } + } + + in.b.bits := q_b.bits + in.b.valid := q_b.valid && b_allow + q_b.ready := in.b.ready && b_allow // Unused channels out.b.ready := Bool(true) From 24f577c156f1fa0bb3f5c76319cb4827b279a8e4 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 21 Apr 2017 17:12:35 -0700 Subject: [PATCH 12/29] axi4: Deinterleaver ensures R channel ID does not change till last --- .../scala/uncore/axi4/Deinterleaver.scala | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 src/main/scala/uncore/axi4/Deinterleaver.scala diff --git a/src/main/scala/uncore/axi4/Deinterleaver.scala b/src/main/scala/uncore/axi4/Deinterleaver.scala new file mode 100644 index 00000000..bc926c69 --- /dev/null +++ b/src/main/scala/uncore/axi4/Deinterleaver.scala @@ -0,0 +1,102 @@ +// See LICENSE.SiFive for license details. + +package uncore.axi4 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import chisel3.util.IrrevocableIO +import config._ +import diplomacy._ +import scala.math.{min,max} +import uncore.tilelink2.{leftOR, rightOR, UIntToOH1, OH1ToOH} + +class AXI4Deinterleaver(maxReadBytes: Int)(implicit p: Parameters) extends LazyModule +{ + require (maxReadBytes >= 1 && isPow2(maxReadBytes)) + + val node = AXI4AdapterNode( + masterFn = { mp => mp }, + slaveFn = { sp => sp.copy(slaves = sp.slaves.map(s => s.copy( + supportsRead = s.supportsRead.intersect(TransferSizes(1, maxReadBytes)), + interleavedId = Some(0)))) + }) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + val out = node.bundleOut + } + + ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) => + val queues = edgeOut.master.endId + val beatBytes = edgeOut.slave.beatBytes + val beats = (maxReadBytes+beatBytes-1) / beatBytes + + // This adapter leaves the control + write paths completely untouched + out.ar <> in.ar + out.aw <> in.aw + out.w <> in.w + in.b <> out.b + + if (queues == 1) { + // Gracefully do nothing + in.r <> out.r + } else { + // Buffer R response + val count = RegInit(Vec.fill(queues) { UInt(0, width=log2Ceil(beats+1)) }) + val qs = Seq.fill(queues) { Module(new Queue(out.r.bits, beats)) } + + // Which ID is being enqueued and dequeued? + val locked = RegInit(Bool(false)) + val deq_id = Reg(UInt(width=log2Ceil(queues))) + val enq_id = out.r.bits.id + val deq_OH = UIntToOH(deq_id, queues) + val enq_OH = UIntToOH(enq_id, queues) + + // Track the number of completely received bursts per FIFO id + val next_count = Wire(count) + ((count zip next_count) zip (enq_OH.toBools zip deq_OH.toBools)) foreach { case ((p, n), (i, d)) => + val inc = i && out.r.fire() && out.r.bits.last + val dec = d && in.r.fire() && in.r.bits.last + n := p + inc.asUInt - dec.asUInt + // Bounds checking + assert (!dec || p =/= UInt(0)) + assert (!inc || p =/= UInt(beats)) + } + count := next_count + + // Select which Q will we start sending next cycle + val pending = Cat(next_count.map(_ =/= UInt(0)).reverse) + val winner = pending & ~(leftOR(pending) << 1) + when (!locked || (in.r.fire() && in.r.bits.last)) { + locked := pending.orR + deq_id := OHToUInt(winner) + } + + // Transmit the selected burst to inner + in.r.valid := locked + in.r.bits := Vec(qs.map(_.io.deq.bits))(deq_id) + (deq_OH.toBools zip qs) foreach { case (s, q) => + q.io.deq.ready := s && in.r.fire() + } + + // Feed response into matching Q + out.r.ready := Vec(qs.map(_.io.enq.ready))(enq_id) + (enq_OH.toBools zip qs) foreach { case (s, q) => + q.io.enq.valid := s && out.r.valid + q.io.enq.bits := out.r.bits + } + } + } + } +} + +object AXI4Deinterleaver +{ + // applied to the AXI4 source node; y.node := AXI4Deinterleaver()(x.node) + def apply(maxReadBytes: Int)(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { + val deinterleaver = LazyModule(new AXI4Deinterleaver(maxReadBytes)) + deinterleaver.node := x + deinterleaver.node + } +} From bf5cb396b956a93d9f6f49171544c46cb8282e65 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 21 Apr 2017 17:13:09 -0700 Subject: [PATCH 13/29] rocketchip: relax mmio no-interleaving requirement --- src/main/scala/rocketchip/Periphery.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/scala/rocketchip/Periphery.scala b/src/main/scala/rocketchip/Periphery.scala index 7d29c8cc..92bbbd76 100644 --- a/src/main/scala/rocketchip/Periphery.scala +++ b/src/main/scala/rocketchip/Periphery.scala @@ -202,16 +202,15 @@ trait PeripheryMasterAXI4MMIO { resources = device.reg, executable = true, // Can we run programs on this memory? supportsWrite = TransferSizes(1, 256), // The slave supports 1-256 byte transfers - supportsRead = TransferSizes(1, 256), - interleavedId = Some(0))), // slave does not interleave read responses + supportsRead = TransferSizes(1, 256))), beatBytes = config.beatBytes))) mmio_axi4 := AXI4Buffer()( - // AXI4Fragmenter(lite=false, maxInFlight = 20)( // beef device up to support awlen = 0xff + AXI4Deinterleaver(cacheBlockBytes)( TLToAXI4(idBits = config.idBits)( // use idBits = 0 for AXI4-Lite TLWidthWidget(socBusConfig.beatBytes)( // convert width before attaching to socBus - socBus.node))) + socBus.node)))) } trait PeripheryMasterAXI4MMIOBundle { From 61a6f94196b8a4aee3457d42852992d963f927f5 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 21 Apr 2017 17:13:51 -0700 Subject: [PATCH 14/29] axi4: get unit tests legal again --- src/main/scala/uncore/axi4/Test.scala | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/main/scala/uncore/axi4/Test.scala b/src/main/scala/uncore/axi4/Test.scala index 76a6c7bf..0d2a3777 100644 --- a/src/main/scala/uncore/axi4/Test.scala +++ b/src/main/scala/uncore/axi4/Test.scala @@ -26,8 +26,8 @@ class AXI4LiteFuzzRAM()(implicit p: Parameters) extends LazyModule model.node := fuzz.node xbar.node := TLDelayer(0.1)(TLBuffer(BufferParams.flow)(TLDelayer(0.2)(model.node))) - ram.node := AXI4Fragmenter()(TLToAXI4(0, true )(xbar.node)) - gpio.node := AXI4Fragmenter()(TLToAXI4(0, false)(xbar.node)) + ram.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(0, true )(xbar.node))) + gpio.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(0, false)(xbar.node))) lazy val module = new LazyModuleImp(this) with HasUnitTestIO { io.finished := fuzz.module.io.finished @@ -49,8 +49,8 @@ class AXI4FullFuzzRAM()(implicit p: Parameters) extends LazyModule model.node := fuzz.node xbar.node := TLDelayer(0.1)(TLBuffer(BufferParams.flow)(TLDelayer(0.2)(model.node))) - ram.node := AXI4Fragmenter()(TLToAXI4(4,false)(xbar.node)) - gpio.node := AXI4Fragmenter()(TLToAXI4(4,true )(xbar.node)) + ram.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(4,false)(xbar.node))) + gpio.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(4,true )(xbar.node))) lazy val module = new LazyModuleImp(this) with HasUnitTestIO { io.finished := fuzz.module.io.finished @@ -70,11 +70,13 @@ class AXI4FuzzMaster()(implicit p: Parameters) extends LazyModule model.node := fuzz.node node := + // AXI4UserYanker()( ... once TLToAXI is updated + AXI4Deinterleaver(64)( TLToAXI4(4)( TLDelayer(0.1)( TLBuffer(BufferParams.flow)( TLDelayer(0.1)( - model.node)))) + model.node))))) lazy val module = new LazyModuleImp(this) { val io = new Bundle { @@ -89,17 +91,22 @@ class AXI4FuzzMaster()(implicit p: Parameters) extends LazyModule class AXI4FuzzSlave()(implicit p: Parameters) extends LazyModule { val node = AXI4InputNode() + val xbar = LazyModule(new TLXbar) val ram = LazyModule(new TLTestRAM(AddressSet(0x0, 0xfff))) + val error= LazyModule(new TLError(Seq(AddressSet(0x1800, 0xff)))) - ram.node := - TLFragmenter(4, 16)( + ram.node := TLFragmenter(4, 16)(xbar.node) + error.node := TLFragmenter(4, 16)(xbar.node) + + xbar.node := + TLFIFOFixer()( TLDelayer(0.1)( TLBuffer(BufferParams.flow)( TLDelayer(0.1)( AXI4ToTL()( AXI4UserYanker(4)( AXI4Fragmenter()( - AXI4IdIndexer(4)( + AXI4IdIndexer(2)( node)))))))) lazy val module = new LazyModuleImp(this) { From 9f08c484bdc868f16138bc188cd79261620960cc Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 25 Apr 2017 17:55:06 -0700 Subject: [PATCH 15/29] tilelink2: ToAXI4 provide FIFO order semantics --- src/main/scala/uncore/tilelink2/ToAXI4.scala | 122 ++++++------------- 1 file changed, 36 insertions(+), 86 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/ToAXI4.scala b/src/main/scala/uncore/tilelink2/ToAXI4.scala index 8cfb311e..0229bc0f 100644 --- a/src/main/scala/uncore/tilelink2/ToAXI4.scala +++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala @@ -10,14 +10,19 @@ import util.PositionalMultiQueue import uncore.axi4._ import scala.math.{min, max} -case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)( - dFn = { _ => - // We must erase all client information, because we crush their source Ids - val masters = Seq( +case class TLToAXI4Node(beatBytes: Int) extends MixedAdapterNode(TLImp, AXI4Imp)( + dFn = { p => + val idSize = p.clients.map { c => if (c.requestFifo) 1 else c.sourceId.size } + val idStart = idSize.scanLeft(0)(_+_).init + val masters = ((idStart zip idSize) zip p.clients) map { case ((start, size), c) => AXI4MasterParameters( - id = IdRange(0, 1 << idBits), - aligned = true)) - AXI4MasterPortParameters(masters) + id = IdRange(start, start+size), + aligned = true, + nodePath = c.nodePath) + } + AXI4MasterPortParameters( + masters = masters, + userBits = log2Ceil(p.endSourceId) + 4 + log2Ceil(beatBytes)) }, uFn = { p => TLManagerPortParameters( managers = p.slaves.map { case s => @@ -29,15 +34,15 @@ case class TLToAXI4Node(idBits: Int) extends MixedAdapterNode(TLImp, AXI4Imp)( nodePath = s.nodePath, supportsGet = s.supportsRead, supportsPutFull = s.supportsWrite, - supportsPutPartial = s.supportsWrite)}, - // AXI4 is NEVER fifo in TL sense (R+W are independent) + supportsPutPartial = s.supportsWrite, + fifoId = Some(0))}, beatBytes = p.beatBytes, minLatency = p.minLatency) }) -class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: Parameters) extends LazyModule +class TLToAXI4(beatBytes: Int, combinational: Boolean = true)(implicit p: Parameters) extends LazyModule { - val node = TLToAXI4Node(idBits) + val node = TLToAXI4Node(beatBytes) lazy val module = new LazyModuleImp(this) { val io = new Bundle { @@ -52,24 +57,23 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P require (slaves(0).interleavedId.isDefined) slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) } - // We need to ensure that a slave does not stall trying to send B while we need to receive R - // Since R&W have independent flow control, it is possible for a W to cut in-line and get into - // a slave's buffers, preventing us from getting all the R responses we need to release D for B. - // This risk is compounded by an AXI fragmentation. Even a slave which responds completely to - // AR before working on AW might have an AW slipped between two AR fragments. - val out_b = Queue.irrevocable(out.b, entries=edgeIn.client.endSourceId, flow=combinational) + // Construct the source=>ID mapping table + val idTable = Wire(Vec(edgeIn.client.endSourceId, out.aw.bits.id)) + (edgeIn.client.clients zip edgeOut.master.masters) foreach { case (c, m) => + for (i <- 0 until c.sourceId.size) { + idTable(c.sourceId.start + i) := UInt(m.id.start + (if (c.requestFifo) 0 else i)) + } + } // We need to keep the following state from A => D: (addr_lo, size, source) // All of those fields could potentially require 0 bits (argh. Chisel.) - // We will pack as many of the lowest bits of state as fit into the AXI ID. - // Any bits left-over must be put into a bank of Queues. - // The Queues are indexed by as many of the source bits as fit into the AXI ID. - // The Queues are deep enough that every source has guaranteed space in its Queue. + // We will pack all of that extra information into the user bits. val sourceBits = log2Ceil(edgeIn.client.endSourceId) val sizeBits = log2Ceil(edgeIn.maxLgSize+1) val addrBits = log2Ceil(edgeIn.manager.beatBytes) val stateBits = addrBits + sizeBits + sourceBits // could be 0 + require (stateBits <= out.aw.bits.params.userBits) val a_address = edgeIn.address(in.a.bits) val a_addr_lo = edgeIn.addr_lo(a_address) @@ -91,73 +95,17 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P require (addrEnd == stateBits) val a_state = (a_source << sourceOff) | (a_size << sizeOff) | (a_addr_lo << addrOff) - val a_id = if (idBits == 0) UInt(0) else a_state - val r_state = Wire(UInt(width = stateBits)) + val r_state = out.r.bits.user.getOrElse(UInt(0)) val r_source = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0) val r_size = if (sizeBits > 0) r_state(sizeEnd -1, sizeOff) else UInt(0) val r_addr_lo = if (addrBits > 0) r_state(addrEnd -1, addrOff) else UInt(0) - val b_state = Wire(UInt(width = stateBits)) + val b_state = out.b.bits.user.getOrElse(UInt(0)) val b_source = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0) val b_size = if (sizeBits > 0) b_state(sizeEnd -1, sizeOff) else UInt(0) val b_addr_lo = if (addrBits > 0) b_state(addrEnd -1, addrOff) else UInt(0) - val r_last = out.r.bits.last - val r_id = out.r.bits.id - val b_id = out_b.bits.id - - if (stateBits <= idBits) { // No need for any state tracking - r_state := r_id - b_state := b_id - } else { - val bankIndexBits = min(sourceBits, idBits) - val posBits = max(0, sourceBits - idBits) - val implicitBits = max(idBits, sourceBits) - val bankBits = stateBits - implicitBits - val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId) - def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks - - val banks = Seq.tabulate(numBanks) { i => - // We know there can only be as many outstanding requests as TL sources - // However, AXI read and write queues are not mutually FIFO. - // Therefore, we want to pop them individually, but share the storage. - val bypass = combinational && edgeOut.slave.minLatency == 0 - PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=bypass) - } - - val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits) - val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0) - val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0) - val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0) - val a_bankSelect = UIntToOH(a_bankIndex, numBanks) - val r_bankSelect = UIntToOH(r_bankIndex, numBanks) - val b_bankSelect = UIntToOH(b_bankIndex, numBanks) - - banks.zipWithIndex.foreach { case (q, i) => - // Push a_state into the banks - q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i) - q.io.enq.bits.pos := a_bankPosition - q.io.enq.bits.data := a_state >> implicitBits - q.io.enq.bits.way := Mux(a_isPut, UInt(0), UInt(1)) - // Pop the bank's ways - q.io.deq(0).ready := out_b.fire() && b_bankSelect(i) - q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last - // The FIFOs must be valid when we're ready to pop them... - assert (q.io.deq(0).valid || !q.io.deq(0).ready) - assert (q.io.deq(1).valid || !q.io.deq(1).ready) - } - - val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex) - val b_bankPos = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex) - val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex) - val r_bankPos = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex) - - def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) } - b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id)) - r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id)) - } - // We need these Queues because AXI4 queues are irrevocable val depth = if (combinational) 1 else 2 val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params))) @@ -179,7 +127,7 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P val arw = out_arw.bits arw.wen := a_isPut - arw.id := a_id // truncated + arw.id := idTable(a_source) arw.addr := a_address arw.len := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes) arw.size := Mux(a_size >= maxSize, maxSize, a_size) @@ -188,7 +136,9 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P arw.cache := UInt(0) // do not allow AXI to modify our transactions arw.prot := AXI4Parameters.PROT_PRIVILEDGED arw.qos := UInt(0) // no QoS + arw.user.foreach { _ := a_state } + // !!! Mix R-W stall here in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready) out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true)) @@ -204,11 +154,11 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P val r_wins = out.r.valid || r_holds_d out.r.ready := in.d.ready - out_b.ready := in.d.ready && !r_wins - in.d.valid := Mux(r_wins, out.r.valid, out_b.valid) + out.b.ready := in.d.ready && !r_wins + in.d.valid := Mux(r_wins, out.r.valid, out.b.valid) val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY - val b_error = out_b.bits.resp =/= AXI4Parameters.RESP_OKAY + val b_error = out.b.bits.resp =/= AXI4Parameters.RESP_OKAY val r_d = edgeIn.AccessAck(r_addr_lo, UInt(0), r_source, r_size, UInt(0), r_error) val b_d = edgeIn.AccessAck(b_addr_lo, UInt(0), b_source, b_size, b_error) @@ -226,9 +176,9 @@ class TLToAXI4(val idBits: Int, val combinational: Boolean = true)(implicit p: P object TLToAXI4 { - // applied to the TL source node; y.node := TLToAXI4(idBits)(x.node) - def apply(idBits: Int, combinational: Boolean = true)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { - val axi4 = LazyModule(new TLToAXI4(idBits, combinational)) + // applied to the TL source node; y.node := TLToAXI4(beatBytes)(x.node) + def apply(beatBytes: Int, combinational: Boolean = true)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { + val axi4 = LazyModule(new TLToAXI4(beatBytes, combinational)) axi4.node := x axi4.node } From e1a072a644d4fb93200fcaddcbaeec941f4908af Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 25 Apr 2017 17:56:06 -0700 Subject: [PATCH 16/29] axi4: massage test cases into shape again --- src/main/scala/rocketchip/Periphery.scala | 16 +++++++++++----- src/main/scala/uncore/axi4/Test.scala | 6 +++--- src/main/scala/uncore/axi4/UserYanker.scala | 8 +++++--- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/main/scala/rocketchip/Periphery.scala b/src/main/scala/rocketchip/Periphery.scala index 92bbbd76..2fac8023 100644 --- a/src/main/scala/rocketchip/Periphery.scala +++ b/src/main/scala/rocketchip/Periphery.scala @@ -134,12 +134,16 @@ trait PeripheryMasterAXI4Mem { beatBytes = config.beatBytes) }) - private val converter = LazyModule(new TLToAXI4(config.idBits)) + private val converter = LazyModule(new TLToAXI4(config.beatBytes)) + private val trim = LazyModule(new AXI4IdIndexer(config.idBits)) + private val yank = LazyModule(new AXI4UserYanker) private val buffer = LazyModule(new AXI4Buffer) mem foreach { case xbar => converter.node := xbar.node - buffer.node := converter.node + trim.node := converter.node + yank.node := trim.node + buffer.node := yank.node mem_axi4 := buffer.node } } @@ -207,10 +211,12 @@ trait PeripheryMasterAXI4MMIO { mmio_axi4 := AXI4Buffer()( + AXI4UserYanker()( AXI4Deinterleaver(cacheBlockBytes)( - TLToAXI4(idBits = config.idBits)( // use idBits = 0 for AXI4-Lite + AXI4IdIndexer(config.idBits)( + TLToAXI4(config.beatBytes)( TLWidthWidget(socBusConfig.beatBytes)( // convert width before attaching to socBus - socBus.node)))) + socBus.node)))))) } trait PeripheryMasterAXI4MMIOBundle { @@ -241,7 +247,7 @@ trait PeripherySlaveAXI4 extends HasTopLevelNetworks { fsb.node := TLWidthWidget(config.beatBytes)( AXI4ToTL()( - AXI4UserYanker(1 << (config.sourceBits - fifoBits - 1))( + AXI4UserYanker(Some(1 << (config.sourceBits - fifoBits - 1)))( AXI4Fragmenter()( AXI4IdIndexer(fifoBits)( l2FrontendAXI4Node))))) diff --git a/src/main/scala/uncore/axi4/Test.scala b/src/main/scala/uncore/axi4/Test.scala index 0d2a3777..2e75d370 100644 --- a/src/main/scala/uncore/axi4/Test.scala +++ b/src/main/scala/uncore/axi4/Test.scala @@ -26,8 +26,8 @@ class AXI4LiteFuzzRAM()(implicit p: Parameters) extends LazyModule model.node := fuzz.node xbar.node := TLDelayer(0.1)(TLBuffer(BufferParams.flow)(TLDelayer(0.2)(model.node))) - ram.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(0, true )(xbar.node))) - gpio.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(0, false)(xbar.node))) + ram.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(4, true )(xbar.node))) + gpio.node := AXI4Fragmenter()(AXI4Deinterleaver(16)(TLToAXI4(4, false)(xbar.node))) lazy val module = new LazyModuleImp(this) with HasUnitTestIO { io.finished := fuzz.module.io.finished @@ -104,7 +104,7 @@ class AXI4FuzzSlave()(implicit p: Parameters) extends LazyModule TLBuffer(BufferParams.flow)( TLDelayer(0.1)( AXI4ToTL()( - AXI4UserYanker(4)( + AXI4UserYanker(Some(4))( AXI4Fragmenter()( AXI4IdIndexer(2)( node)))))))) diff --git a/src/main/scala/uncore/axi4/UserYanker.scala b/src/main/scala/uncore/axi4/UserYanker.scala index 286e4a24..521b04a2 100644 --- a/src/main/scala/uncore/axi4/UserYanker.scala +++ b/src/main/scala/uncore/axi4/UserYanker.scala @@ -8,8 +8,10 @@ import config._ import diplomacy._ import uncore.tilelink2.UIntToOH1 -class AXI4UserYanker(maxFlightPerId: Int)(implicit p: Parameters) extends LazyModule +class AXI4UserYanker(capMaxFlight: Option[Int] = None)(implicit p: Parameters) extends LazyModule { + // !!! make maxFlightPerId a cap and maxFlight a per AXI4 Master parameter + val maxFlightPerId = capMaxFlight.getOrElse(8) require (maxFlightPerId >= 1) val node = AXI4AdapterNode( @@ -80,8 +82,8 @@ class AXI4UserYanker(maxFlightPerId: Int)(implicit p: Parameters) extends LazyMo object AXI4UserYanker { // applied to the AXI4 source node; y.node := AXI4UserYanker(idBits, maxFlight)(x.node) - def apply(maxFlight: Int)(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { - val yanker = LazyModule(new AXI4UserYanker(maxFlight)) + def apply(capMaxFlight: Option[Int] = None)(x: AXI4OutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): AXI4OutwardNode = { + val yanker = LazyModule(new AXI4UserYanker(capMaxFlight)) yanker.node := x yanker.node } From d27e1928dd86fd0244390885b936c470d29207f0 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 25 Apr 2017 18:49:33 -0700 Subject: [PATCH 17/29] axi4: make maxFlight a per-master parameter --- src/main/scala/uncore/axi4/IdIndexer.scala | 25 +++++++-- src/main/scala/uncore/axi4/Parameters.scala | 12 ++--- src/main/scala/uncore/axi4/ToTL.scala | 7 +-- src/main/scala/uncore/axi4/UserYanker.scala | 54 +++++++++++++------- src/main/scala/uncore/tilelink2/ToAXI4.scala | 7 +-- 5 files changed, 69 insertions(+), 36 deletions(-) diff --git a/src/main/scala/uncore/axi4/IdIndexer.scala b/src/main/scala/uncore/axi4/IdIndexer.scala index fa3916b1..b7283417 100644 --- a/src/main/scala/uncore/axi4/IdIndexer.scala +++ b/src/main/scala/uncore/axi4/IdIndexer.scala @@ -13,11 +13,26 @@ class AXI4IdIndexer(idBits: Int)(implicit p: Parameters) extends LazyModule require (idBits >= 0) val node = AXI4AdapterNode( - masterFn = { mp => mp.copy( - userBits = mp.userBits + max(0, log2Ceil(mp.endId) - idBits), - masters = Seq(AXI4MasterParameters( - id = IdRange(0, min(mp.endId, 1 << idBits)), - aligned = mp.masters.map(_.aligned).reduce(_ && _)))) + masterFn = { mp => + // Create one new "master" per ID + val masters = Array.tabulate(1 << idBits) { i => AXI4MasterParameters( + id = IdRange(i, i+1), + aligned = true, + maxFlight = Some(0)) + } + // Squash the information from original masters into new ID masters + mp.masters.foreach { m => + for (i <- m.id.start until m.id.end) { + val j = i % (1 << idBits) + val old = masters(j) + masters(j) = old.copy( + aligned = old.aligned && m.aligned, + maxFlight = old.maxFlight.flatMap { o => m.maxFlight.map { n => o+n } }) + } + } + mp.copy( + userBits = mp.userBits + max(0, log2Ceil(mp.endId) - idBits), + masters = masters) }, slaveFn = { sp => sp.copy( slaves = sp.slaves.map(s => s.copy( diff --git a/src/main/scala/uncore/axi4/Parameters.scala b/src/main/scala/uncore/axi4/Parameters.scala index a3a8f472..9ef05c77 100644 --- a/src/main/scala/uncore/axi4/Parameters.scala +++ b/src/main/scala/uncore/axi4/Parameters.scala @@ -62,21 +62,21 @@ case class AXI4SlavePortParameters( } case class AXI4MasterParameters( - id: IdRange = IdRange(0, 1), - aligned: Boolean = false, - nodePath: Seq[BaseNode] = Seq()) + id: IdRange = IdRange(0, 1), + aligned: Boolean = false, + maxFlight: Option[Int] = None, // None = infinite, else is a per-ID cap + nodePath: Seq[BaseNode] = Seq()) { val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected") + maxFlight.foreach { m => require (m >= 0) } } case class AXI4MasterPortParameters( masters: Seq[AXI4MasterParameters], - userBits: Int = 0, - maxFlight: Int = 0) // at most X transactions per ID (0 = unlimited) + userBits: Int = 0) { val endId = masters.map(_.id.end).max require (userBits >= 0) - require (maxFlight >= 0) // Require disjoint ranges for ids masters.combinations(2).foreach { case Seq(x,y) => require (!x.id.overlaps(y.id), s"$x and $y overlap") } diff --git a/src/main/scala/uncore/axi4/ToTL.scala b/src/main/scala/uncore/axi4/ToTL.scala index 8e2dd849..d1edd5b6 100644 --- a/src/main/scala/uncore/axi4/ToTL.scala +++ b/src/main/scala/uncore/axi4/ToTL.scala @@ -9,8 +9,9 @@ import diplomacy._ import uncore.tilelink2._ case class AXI4ToTLNode() extends MixedAdapterNode(AXI4Imp, TLImp)( - dFn = { case AXI4MasterPortParameters(masters, userBits, maxFlight) => - require (maxFlight > 0, "AXI4 must include a maximum transactions per ID to convert to TL") + dFn = { case AXI4MasterPortParameters(masters, userBits) => + masters.foreach { m => require (m.maxFlight.isDefined, "AXI4 must include a transaction maximum per ID to convert to TL") } + val maxFlight = masters.map(_.maxFlight.get).max TLClientPortParameters( clients = masters.flatMap { m => for (id <- m.id.start until m.id.end) @@ -50,7 +51,7 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule val numIds = edgeIn.master.endId val beatBytes = edgeOut.manager.beatBytes val countBits = AXI4Parameters.lenBits + (1 << AXI4Parameters.sizeBits) - 1 - val maxFlight = edgeIn.master.maxFlight + val maxFlight = edgeIn.master.masters.map(_.maxFlight.get).max val addedBits = log2Ceil(maxFlight) + 1 require (edgeIn.master.userBits == 0, "AXI4 user bits cannot be transported by TL") diff --git a/src/main/scala/uncore/axi4/UserYanker.scala b/src/main/scala/uncore/axi4/UserYanker.scala index 521b04a2..72bd7bdc 100644 --- a/src/main/scala/uncore/axi4/UserYanker.scala +++ b/src/main/scala/uncore/axi4/UserYanker.scala @@ -10,12 +10,15 @@ import uncore.tilelink2.UIntToOH1 class AXI4UserYanker(capMaxFlight: Option[Int] = None)(implicit p: Parameters) extends LazyModule { - // !!! make maxFlightPerId a cap and maxFlight a per AXI4 Master parameter - val maxFlightPerId = capMaxFlight.getOrElse(8) - require (maxFlightPerId >= 1) - val node = AXI4AdapterNode( - masterFn = { mp => mp.copy(maxFlight = maxFlightPerId, userBits = 0) }, + masterFn = { mp => mp.copy( + userBits = 0, + masters = mp.masters.map { m => m.copy( + maxFlight = (m.maxFlight, capMaxFlight) match { + case (Some(x), Some(y)) => Some(x min y) + case (Some(x), None) => Some(x) + case (None, Some(y)) => Some(y) + case (None, None) => None })})}, slaveFn = { sp => sp }) lazy val module = new LazyModuleImp(this) { @@ -29,18 +32,31 @@ class AXI4UserYanker(capMaxFlight: Option[Int] = None)(implicit p: Parameters) e val need_bypass = edgeOut.slave.minLatency < 1 require (bits > 0) // useless UserYanker! - val rqueues = Seq.fill(edgeIn.master.endId) { Module(new Queue(UInt(width = bits), maxFlightPerId, flow=need_bypass)) } - val wqueues = Seq.fill(edgeIn.master.endId) { Module(new Queue(UInt(width = bits), maxFlightPerId, flow=need_bypass)) } + edgeOut.master.masters.foreach { m => + require (m.maxFlight.isDefined, "UserYanker needs a flight cap on each ID") + } + + def queue(id: Int) = { + val depth = edgeOut.master.masters.find(_.id.contains(id)).flatMap(_.maxFlight).getOrElse(0) + if (depth == 0) { + Wire(new QueueIO(UInt(width = bits), 1)) // unused ID => undefined value + } else { + Module(new Queue(UInt(width = bits), depth, flow=need_bypass)).io + } + } + + val rqueues = Seq.tabulate(edgeIn.master.endId) { i => queue(i) } + val wqueues = Seq.tabulate(edgeIn.master.endId) { i => queue(i) } val arid = in.ar.bits.id - val ar_ready = Vec(rqueues.map(_.io.enq.ready))(arid) + val ar_ready = Vec(rqueues.map(_.enq.ready))(arid) in .ar.ready := out.ar.ready && ar_ready out.ar.valid := in .ar.valid && ar_ready out.ar.bits := in .ar.bits val rid = out.r.bits.id - val r_valid = Vec(rqueues.map(_.io.deq.valid))(rid) - val r_bits = Vec(rqueues.map(_.io.deq.bits))(rid) + val r_valid = Vec(rqueues.map(_.deq.valid))(rid) + val r_bits = Vec(rqueues.map(_.deq.bits))(rid) assert (!out.r.valid || r_valid) // Q must be ready faster than the response in.r <> out.r in.r.bits.user.get := r_bits @@ -48,20 +64,20 @@ class AXI4UserYanker(capMaxFlight: Option[Int] = None)(implicit p: Parameters) e val arsel = UIntToOH(arid, edgeIn.master.endId).toBools val rsel = UIntToOH(rid, edgeIn.master.endId).toBools (rqueues zip (arsel zip rsel)) foreach { case (q, (ar, r)) => - q.io.deq.ready := out.r .valid && in .r .ready && r && out.r.bits.last - q.io.enq.valid := in .ar.valid && out.ar.ready && ar - q.io.enq.bits := in.ar.bits.user.get + q.deq.ready := out.r .valid && in .r .ready && r && out.r.bits.last + q.enq.valid := in .ar.valid && out.ar.ready && ar + q.enq.bits := in.ar.bits.user.get } val awid = in.aw.bits.id - val aw_ready = Vec(wqueues.map(_.io.enq.ready))(awid) + val aw_ready = Vec(wqueues.map(_.enq.ready))(awid) in .aw.ready := out.aw.ready && aw_ready out.aw.valid := in .aw.valid && aw_ready out.aw.bits := in .aw.bits val bid = out.b.bits.id - val b_valid = Vec(wqueues.map(_.io.deq.valid))(bid) - val b_bits = Vec(wqueues.map(_.io.deq.bits))(bid) + val b_valid = Vec(wqueues.map(_.deq.valid))(bid) + val b_bits = Vec(wqueues.map(_.deq.bits))(bid) assert (!out.b.valid || b_valid) // Q must be ready faster than the response in.b <> out.b in.b.bits.user.get := b_bits @@ -69,9 +85,9 @@ class AXI4UserYanker(capMaxFlight: Option[Int] = None)(implicit p: Parameters) e val awsel = UIntToOH(awid, edgeIn.master.endId).toBools val bsel = UIntToOH(bid, edgeIn.master.endId).toBools (wqueues zip (awsel zip bsel)) foreach { case (q, (aw, b)) => - q.io.deq.ready := out.b .valid && in .b .ready && b - q.io.enq.valid := in .aw.valid && out.aw.ready && aw - q.io.enq.bits := in.aw.bits.user.get + q.deq.ready := out.b .valid && in .b .ready && b + q.enq.valid := in .aw.valid && out.aw.ready && aw + q.enq.bits := in.aw.bits.user.get } out.w <> in.w diff --git a/src/main/scala/uncore/tilelink2/ToAXI4.scala b/src/main/scala/uncore/tilelink2/ToAXI4.scala index 0229bc0f..5d26a547 100644 --- a/src/main/scala/uncore/tilelink2/ToAXI4.scala +++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala @@ -16,9 +16,10 @@ case class TLToAXI4Node(beatBytes: Int) extends MixedAdapterNode(TLImp, AXI4Imp) val idStart = idSize.scanLeft(0)(_+_).init val masters = ((idStart zip idSize) zip p.clients) map { case ((start, size), c) => AXI4MasterParameters( - id = IdRange(start, start+size), - aligned = true, - nodePath = c.nodePath) + id = IdRange(start, start+size), + aligned = true, + maxFlight = Some(if (c.requestFifo) c.sourceId.size else 1), + nodePath = c.nodePath) } AXI4MasterPortParameters( masters = masters, From a71f708dc79b5d8be158d9c48d73b30c5daa1711 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Wed, 26 Apr 2017 13:18:55 -0700 Subject: [PATCH 18/29] rocketchip: move the Error device to 0x3000 --- src/main/scala/rocketchip/Configs.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/rocketchip/Configs.scala b/src/main/scala/rocketchip/Configs.scala index 7196507b..9068d1b7 100644 --- a/src/main/scala/rocketchip/Configs.scala +++ b/src/main/scala/rocketchip/Configs.scala @@ -39,7 +39,7 @@ class BasePlatformConfig extends Config((site, here, up) => { case IncludeJtagDTM => false case JtagDTMKey => new JtagDTMKeyDefault() case ZeroConfig => ZeroConfig(base=0xa000000L, size=0x2000000L, beatBytes=8) - case ErrorConfig => ErrorConfig(Seq(AddressSet(0x1000, 0xfff))) + case ErrorConfig => ErrorConfig(Seq(AddressSet(0x3000, 0xfff))) case ExtMem => MasterConfig(base=0x80000000L, size=0x10000000L, beatBytes=8, idBits=4) case ExtBus => MasterConfig(base=0x60000000L, size=0x20000000L, beatBytes=8, idBits=4) case ExtIn => SlaveConfig(beatBytes=8, idBits=8, sourceBits=4) From b040a462c955cc18084df0cc0d2b8d07bb985594 Mon Sep 17 00:00:00 2001 From: Scott Johnson Date: Wed, 26 Apr 2017 16:46:57 -0700 Subject: [PATCH 19/29] Wes's change to remove user bits from external AXI interface, and add 1 cycle latency to make sure external AXI is compliant --- src/main/scala/uncore/axi4/Test.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/uncore/axi4/Test.scala b/src/main/scala/uncore/axi4/Test.scala index 2e75d370..69c97580 100644 --- a/src/main/scala/uncore/axi4/Test.scala +++ b/src/main/scala/uncore/axi4/Test.scala @@ -70,13 +70,13 @@ class AXI4FuzzMaster()(implicit p: Parameters) extends LazyModule model.node := fuzz.node node := - // AXI4UserYanker()( ... once TLToAXI is updated + AXI4UserYanker()( AXI4Deinterleaver(64)( TLToAXI4(4)( TLDelayer(0.1)( TLBuffer(BufferParams.flow)( TLDelayer(0.1)( - model.node))))) + model.node)))))) lazy val module = new LazyModuleImp(this) { val io = new Bundle { @@ -92,7 +92,7 @@ class AXI4FuzzSlave()(implicit p: Parameters) extends LazyModule { val node = AXI4InputNode() val xbar = LazyModule(new TLXbar) - val ram = LazyModule(new TLTestRAM(AddressSet(0x0, 0xfff))) + val ram = LazyModule(new TLRAM(AddressSet(0x0, 0xfff))) val error= LazyModule(new TLError(Seq(AddressSet(0x1800, 0xff)))) ram.node := TLFragmenter(4, 16)(xbar.node) From e09fa866b7a3941f51a5c3e877c33a1c6e1a7290 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Wed, 26 Apr 2017 16:48:35 -0700 Subject: [PATCH 20/29] tilelink2: FIFOFixer should NOT change client request status Just because some clients are not FIFO does not matter. Downstream FIFOFixers will still present a legitimate single domain to those client who care. --- src/main/scala/uncore/tilelink2/FIFOFixer.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/FIFOFixer.scala b/src/main/scala/uncore/tilelink2/FIFOFixer.scala index 32b2fa4f..8b3943ea 100644 --- a/src/main/scala/uncore/tilelink2/FIFOFixer.scala +++ b/src/main/scala/uncore/tilelink2/FIFOFixer.scala @@ -10,9 +10,8 @@ import scala.math.max class TLFIFOFixer(implicit p: Parameters) extends LazyModule { - // We request downstream FIFO so we can use the existing fifoId val node = TLAdapterNode( - clientFn = { cp => cp.copy(clients = cp.clients .map(c => c.copy(requestFifo = !c.supportsProbe))) }, + clientFn = { cp => cp }, managerFn = { mp => mp.copy(managers = mp.managers.map(m => m.copy(fifoId = Some(0)))) }) lazy val module = new LazyModuleImp(this) { From 6ee69454c3d964ccd3b5990066f12d2dbf78b9d6 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Wed, 26 Apr 2017 17:39:57 -0700 Subject: [PATCH 21/29] tilelink2: Fragmenter now supports early Ack --- .../scala/uncore/tilelink2/Fragmenter.scala | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/Fragmenter.scala b/src/main/scala/uncore/tilelink2/Fragmenter.scala index 7676aa2e..c726d7ff 100644 --- a/src/main/scala/uncore/tilelink2/Fragmenter.scala +++ b/src/main/scala/uncore/tilelink2/Fragmenter.scala @@ -14,7 +14,7 @@ import scala.math.{min,max} // Fragmenter modifies: PutFull, PutPartial, LogicalData, Get, Hint // Fragmenter passes: ArithmeticData (truncated to minSize if alwaysMin) // Fragmenter cannot modify acquire (could livelock); thus it is unsafe to put caches on both sides -class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean = false)(implicit p: Parameters) extends LazyModule +class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean = false, val earlyAck: Boolean = false)(implicit p: Parameters) extends LazyModule { require (isPow2 (maxSize)) require (isPow2 (minSize)) @@ -137,6 +137,7 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean = val dOrig = Reg(UInt()) val dFragnum = out.d.bits.source(fragmentBits-1, 0) val dFirst = acknum === UInt(0) + val dLast = dFragnum === UInt(0) val dsizeOH = UIntToOH (out.d.bits.size, log2Ceil(maxDownSize)+1) val dsizeOH1 = UIntToOH1(out.d.bits.size, log2Up(maxDownSize)) val dHasData = edgeOut.hasData(out.d.bits) @@ -156,7 +157,7 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean = } // Swallow up non-data ack fragments - val drop = !dHasData && (dFragnum =/= UInt(0)) + val drop = !dHasData && !(if (earlyAck) dFirst else dLast) out.d.ready := in.d.ready || drop in.d.valid := out.d.valid && !drop in.d.bits := out.d.bits // pass most stuff unchanged @@ -164,11 +165,18 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean = in.d.bits.source := out.d.bits.source >> fragmentBits in.d.bits.size := Mux(dFirst, dFirst_size, dOrig) - // Combine the error flag - val r_error = RegInit(Bool(false)) - val d_error = r_error | out.d.bits.error - when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) } - in.d.bits.error := d_error + if (earlyAck) { + // If you do early Ack, errors may not be dropped + // ... which roughly means: Puts may not fail + assert (!out.d.bits.error || !drop) + in.d.bits.error := out.d.bits.error + } else { + // Combine the error flag + val r_error = RegInit(Bool(false)) + val d_error = r_error | out.d.bits.error + when (out.d.fire()) { r_error := Mux(drop, d_error, UInt(0)) } + in.d.bits.error := d_error + } // What maximum transfer sizes do downstream devices support? val maxArithmetics = managers.map(_.supportsArithmetic.max) @@ -252,8 +260,8 @@ class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean = object TLFragmenter { // applied to the TL source node; y.node := TLFragmenter(x.node, 256, 4) - def apply(minSize: Int, maxSize: Int, alwaysMin: Boolean = false)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): TLOutwardNode = { - val fragmenter = LazyModule(new TLFragmenter(minSize, maxSize, alwaysMin)) + def apply(minSize: Int, maxSize: Int, alwaysMin: Boolean = false, earlyAck: Boolean = false)(x: TLOutwardNode)(implicit p: Parameters, sourceInfo: SourceInfo): TLOutwardNode = { + val fragmenter = LazyModule(new TLFragmenter(minSize, maxSize, alwaysMin, earlyAck)) fragmenter.node := x fragmenter.node } From 30f1f1e7c7b72e97fc57b08b0adf75a353370bca Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Wed, 26 Apr 2017 17:42:04 -0700 Subject: [PATCH 22/29] rocket: turn on early ack for DTIM --- src/main/scala/rocket/ScratchpadSlavePort.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/rocket/ScratchpadSlavePort.scala b/src/main/scala/rocket/ScratchpadSlavePort.scala index a96fd248..231af8c6 100644 --- a/src/main/scala/rocket/ScratchpadSlavePort.scala +++ b/src/main/scala/rocket/ScratchpadSlavePort.scala @@ -120,7 +120,7 @@ trait CanHaveScratchpad extends HasHellaCache with HasICacheFrontend with HasCor // 2) ScratchpadSlavePort always has a node, but only exists when the HellaCache has a scratchpad val scratch = tileParams.dcache.flatMap(d => d.scratch.map(s => LazyModule(new ScratchpadSlavePort(AddressSet(s, d.dataScratchpadBytes-1))))) - scratch foreach { lm => lm.node := TLFragmenter(xLen/8, p(CacheBlockBytes))(slaveNode) } + scratch foreach { lm => lm.node := TLFragmenter(xLen/8, p(CacheBlockBytes), earlyAck=true)(slaveNode) } def findScratchpadFromICache: Option[AddressSet] = scratch.map { s => val finalNode = frontend.masterNode.edgesOut.head.manager.managers.find(_.nodePath.last == s.node) From 40f18e6e430efd942339e2f8576778c542543a22 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Wed, 26 Apr 2017 20:49:18 -0700 Subject: [PATCH 23/29] diplomacy: optimize IdRange overlap detection --- src/main/scala/diplomacy/Parameters.scala | 18 ++++++++++++++---- src/main/scala/uncore/axi4/Parameters.scala | 4 +++- .../scala/uncore/tilelink2/Parameters.scala | 6 +++--- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/main/scala/diplomacy/Parameters.scala b/src/main/scala/diplomacy/Parameters.scala index 795f98a9..4e8ef4c1 100644 --- a/src/main/scala/diplomacy/Parameters.scala +++ b/src/main/scala/diplomacy/Parameters.scala @@ -17,14 +17,16 @@ object RegionType { } // A non-empty half-open range; [start, end) -case class IdRange(start: Int, end: Int) +case class IdRange(start: Int, end: Int) extends Ordered[IdRange] { require (start >= 0, s"Ids cannot be negative, but got: $start.") require (start < end, "Id ranges cannot be empty.") - // This is a strict partial ordering - def <(x: IdRange) = end <= x.start - def >(x: IdRange) = x < this + def compare(x: IdRange) = { + val primary = (this.start - x.start).signum + val secondary = (x.end - this.end).signum + if (primary != 0) primary else secondary + } def overlaps(x: IdRange) = start < x.end && x.start < end def contains(x: IdRange) = start <= x.start && x.end <= end @@ -43,6 +45,14 @@ case class IdRange(start: Int, end: Int) def range = start until end } +object IdRange +{ + def overlaps(s: Seq[IdRange]) = if (s.isEmpty) None else { + val ranges = s.sorted + (ranges.tail zip ranges.init) find { case (a, b) => a overlaps b } + } +} + // An potentially empty inclusive range of 2-powers [min, max] (in bytes) case class TransferSizes(min: Int, max: Int) { diff --git a/src/main/scala/uncore/axi4/Parameters.scala b/src/main/scala/uncore/axi4/Parameters.scala index 9ef05c77..8642613e 100644 --- a/src/main/scala/uncore/axi4/Parameters.scala +++ b/src/main/scala/uncore/axi4/Parameters.scala @@ -79,7 +79,9 @@ case class AXI4MasterPortParameters( require (userBits >= 0) // Require disjoint ranges for ids - masters.combinations(2).foreach { case Seq(x,y) => require (!x.id.overlaps(y.id), s"$x and $y overlap") } + IdRange.overlaps(masters.map(_.id)).foreach { case (x, y) => + require (!x.overlaps(y), s"AXI4MasterParameters.id $x and $y overlap") + } } case class AXI4BundleParameters( diff --git a/src/main/scala/uncore/tilelink2/Parameters.scala b/src/main/scala/uncore/tilelink2/Parameters.scala index 3665b15a..aec47951 100644 --- a/src/main/scala/uncore/tilelink2/Parameters.scala +++ b/src/main/scala/uncore/tilelink2/Parameters.scala @@ -199,9 +199,9 @@ case class TLClientPortParameters( require (minLatency >= 0) // Require disjoint ranges for Ids - clients.combinations(2).foreach({ case Seq(x,y) => - require (!x.sourceId.overlaps(y.sourceId)) - }) + IdRange.overlaps(clients.map(_.sourceId)).foreach { case (x, y) => + require (!x.overlaps(y), s"TLClientParameters.sourceId ${x} overlaps ${y}") + } // Bounds on required sizes def endSourceId = clients.map(_.sourceId.end).max From 976af7a8c7f69251328624be09a322d0197d0f76 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 27 Apr 2017 13:21:25 -0700 Subject: [PATCH 24/29] tilelink2: better width inference for {left,right}OR --- src/main/scala/uncore/tilelink2/package.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/package.scala b/src/main/scala/uncore/tilelink2/package.scala index 677e7248..7370b463 100644 --- a/src/main/scala/uncore/tilelink2/package.scala +++ b/src/main/scala/uncore/tilelink2/package.scala @@ -19,18 +19,18 @@ package object tilelink2 def UIntToOH1(x: UInt, width: Int) = ~(SInt(-1, width=width).asUInt << x)(width-1, 0) def trailingZeros(x: Int) = if (x > 0) Some(log2Ceil(x & -x)) else None // Fill 1s from low bits to high bits - def leftOR(x: UInt) = { - val w = x.getWidth + def leftOR(x: UInt): UInt = leftOR(x, x.getWidth) + def leftOR(x: UInt, w: Integer): UInt = { def helper(s: Int, x: UInt): UInt = if (s >= w) x else helper(s+s, x | (x << s)(w-1,0)) - helper(1, x) + helper(1, x)(w-1, 0) } // Fill 1s form high bits to low bits - def rightOR(x: UInt) = { - val w = x.getWidth + def rightOR(x: UInt): UInt = rightOR(x, x.getWidth) + def rightOR(x: UInt, w: Integer): UInt = { def helper(s: Int, x: UInt): UInt = if (s >= w) x else helper(s+s, x | (x >> s)) - helper(1, x) + helper(1, x)(w-1, 0) } // This gets used everywhere, so make the smallest circuit possible ... // Given an address and size, create a mask of beatBytes size From 661015a78d6cefada1c07e126faec6f49dd7c20a Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 27 Apr 2017 14:40:49 -0700 Subject: [PATCH 25/29] axi4: switch arbiter to round robin --- src/main/scala/uncore/axi4/ToTL.scala | 2 +- src/main/scala/uncore/tilelink2/Arbiter.scala | 58 +++++++++++++++++-- 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/src/main/scala/uncore/axi4/ToTL.scala b/src/main/scala/uncore/axi4/ToTL.scala index d1edd5b6..088b24e7 100644 --- a/src/main/scala/uncore/axi4/ToTL.scala +++ b/src/main/scala/uncore/axi4/ToTL.scala @@ -105,7 +105,7 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule when (in.aw.fire() && s) { r := r + UInt(1) } } - TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out)) + TLArbiter(TLArbiter.roundRobin)(out.a, (UInt(0), r_out), (in.aw.bits.len, w_out)) val ok_b = Wire(in.b) val ok_r = Wire(in.r) diff --git a/src/main/scala/uncore/tilelink2/Arbiter.scala b/src/main/scala/uncore/tilelink2/Arbiter.scala index f468c26a..2a99f843 100644 --- a/src/main/scala/uncore/tilelink2/Arbiter.scala +++ b/src/main/scala/uncore/tilelink2/Arbiter.scala @@ -3,15 +3,28 @@ package uncore.tilelink2 import Chisel._ +import config._ import diplomacy._ object TLArbiter { - // (valids, granted) => readys - type Policy = (Seq[Bool], Bool) => Seq[Bool] + // (valids, select) => readys + type Policy = (Integer, UInt, Bool) => UInt - val lowestIndexFirst: Policy = (valids, granted) => - valids.scanLeft(Bool(true))(_ && !_).init + val lowestIndexFirst: Policy = (width, valids, select) => ~(leftOR(valids) << 1)(width-1, 0) + + val roundRobin: Policy = (width, valids, select) => { + val valid = valids(width-1, 0) + assert (valid === valids) + val mask = RegInit(~UInt(0, width=width)) + val filter = Cat(valid & ~mask, valid) + val unready = (rightOR(filter, width*2) >> 1) | (mask << width) // last right shift unneeded + val readys = ~((unready >> width) & unready(width-1, 0)) + when (select && valid.orR) { + mask := leftOR(readys & valid, width) + } + readys(width-1, 0) + } def lowestFromSeq[T <: TLChannel](edge: TLEdge, sink: DecoupledIO[T], sources: Seq[DecoupledIO[T]]) { apply(lowestIndexFirst)(sink, sources.map(s => (edge.numBeats1(s.bits), s)):_*) @@ -21,6 +34,10 @@ object TLArbiter apply(lowestIndexFirst)(sink, sources.toList.map(s => (edge.numBeats1(s.bits), s)):_*) } + def robin[T <: TLChannel](edge: TLEdge, sink: DecoupledIO[T], sources: DecoupledIO[T]*) { + apply(roundRobin)(sink, sources.toList.map(s => (edge.numBeats1(s.bits), s)):_*) + } + def apply[T <: Data](policy: Policy)(sink: DecoupledIO[T], sources: (UInt, DecoupledIO[T])*) { if (sources.isEmpty) { sink.valid := Bool(false) @@ -37,13 +54,13 @@ object TLArbiter // Who wants access to the sink? val valids = sourcesIn.map(_.valid) // Arbitrate amongst the requests - val readys = Vec(policy(valids, latch)) + val readys = Vec(policy(valids.size, Cat(valids.reverse), latch).toBools) // Which request wins arbitration? val winner = Vec((readys zip valids) map { case (r,v) => r&&v }) // Confirm the policy works properly require (readys.size == valids.size) - // Never two winner + // Never two winners val prefixOR = winner.scanLeft(Bool(false))(_||_).init assert((prefixOR zip winner) map { case (p,w) => !p || !w } reduce {_ && _}) // If there was any request, there is a winner @@ -73,3 +90,32 @@ object TLArbiter } } } + +/** Synthesizeable unit tests */ +import unittest._ + +class TestRobin()(implicit p: Parameters) extends UnitTest(timeout = 500000) { + val sources = Wire(Vec(6, DecoupledIO(UInt(width=3)))) + val sink = Wire(DecoupledIO(UInt(width=3))) + val count = RegInit(UInt(0, width=8)) + + val lfsr = LFSR16(Bool(true)) + val valid = lfsr(0) + val ready = lfsr(15) + + sources.zipWithIndex.map { case (z, i) => z.bits := UInt(i) } + sources(0).valid := valid + sources(1).valid := Bool(false) + sources(2).valid := valid + sources(3).valid := valid + sources(4).valid := Bool(false) + sources(5).valid := valid + sink.ready := ready + + TLArbiter(TLArbiter.roundRobin)(sink, sources.zipWithIndex.map { case (z, i) => (UInt(i), z) }:_*) + when (sink.fire()) { printf("TestRobin: %d\n", sink.bits) } + when (!sink.fire()) { printf("TestRobin: idle (%d %d)\n", valid, ready) } + + count := count + UInt(1) + io.finished := count >= UInt(128) +} From b0b5601e8d8cc4aa9bb7ff02a49a4a27b961794a Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 27 Apr 2017 16:21:01 -0700 Subject: [PATCH 26/29] axi4: ToTL correct error handling If there is an illegal AWADDR = 0x2 on a 32-bit bus, remapping it to an aligned address on the error device may make the mask inconsistent with the address + size. --- src/main/scala/uncore/axi4/ToTL.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/uncore/axi4/ToTL.scala b/src/main/scala/uncore/axi4/ToTL.scala index 088b24e7..bce1c60b 100644 --- a/src/main/scala/uncore/axi4/ToTL.scala +++ b/src/main/scala/uncore/axi4/ToTL.scala @@ -71,7 +71,7 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule val r_size1 = in.ar.bits.bytes1() val r_size = OH1ToUInt(r_size1) val r_ok = edgeOut.manager.supportsGetSafe(in.ar.bits.addr, r_size) - val r_addr = Mux(r_ok, in.ar.bits.addr, UInt(error)) + val r_addr = Mux(r_ok, in.ar.bits.addr, UInt(error) | in.ar.bits.addr(log2Up(beatBytes)-1, 0)) val r_count = RegInit(Vec.fill(numIds) { UInt(0, width = log2Ceil(maxFlight)) }) val r_id = Cat(in.ar.bits.id, r_count(in.ar.bits.id), UInt(0, width=1)) @@ -89,7 +89,7 @@ class AXI4ToTL()(implicit p: Parameters) extends LazyModule val w_size1 = in.aw.bits.bytes1() val w_size = OH1ToUInt(w_size1) val w_ok = edgeOut.manager.supportsPutPartialSafe(in.aw.bits.addr, w_size) - val w_addr = Mux(w_ok, in.aw.bits.addr, UInt(error)) + val w_addr = Mux(w_ok, in.aw.bits.addr, UInt(error) | in.aw.bits.addr(log2Up(beatBytes)-1, 0)) val w_count = RegInit(Vec.fill(numIds) { UInt(0, width = log2Ceil(maxFlight)) }) val w_id = Cat(in.aw.bits.id, w_count(in.aw.bits.id), UInt(1, width=1)) From 58a4529cc52aec640e85a75f3b9c7e4839f73db3 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 27 Apr 2017 17:55:51 -0700 Subject: [PATCH 27/29] axi4: the last missing piece for safe FIFO ordering --- src/main/scala/uncore/tilelink2/ToAXI4.scala | 38 +++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/ToAXI4.scala b/src/main/scala/uncore/tilelink2/ToAXI4.scala index 5d26a547..75003eda 100644 --- a/src/main/scala/uncore/tilelink2/ToAXI4.scala +++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala @@ -60,9 +60,12 @@ class TLToAXI4(beatBytes: Int, combinational: Boolean = true)(implicit p: Parame // Construct the source=>ID mapping table val idTable = Wire(Vec(edgeIn.client.endSourceId, out.aw.bits.id)) + var idCount = Array.fill(edgeOut.master.endId) { 0 } (edgeIn.client.clients zip edgeOut.master.masters) foreach { case (c, m) => for (i <- 0 until c.sourceId.size) { - idTable(c.sourceId.start + i) := UInt(m.id.start + (if (c.requestFifo) 0 else i)) + val id = m.id.start + (if (c.requestFifo) 0 else i) + idTable(c.sourceId.start + i) := UInt(id) + idCount(id) = idCount(id) + 1 } } @@ -139,11 +142,11 @@ class TLToAXI4(beatBytes: Int, combinational: Boolean = true)(implicit p: Parame arw.qos := UInt(0) // no QoS arw.user.foreach { _ := a_state } - // !!! Mix R-W stall here - in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready) - out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true)) + val stall = Wire(Bool()) + in.a.ready := !stall && Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready) + out_arw.valid := !stall && in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true)) - out_w.valid := in.a.valid && a_isPut && (doneAW || out_arw.ready) + out_w.valid := !stall && in.a.valid && a_isPut && (doneAW || out_arw.ready) out_w.bits.data := in.a.bits.data out_w.bits.strb := in.a.bits.mask out_w.bits.last := a_last @@ -167,6 +170,31 @@ class TLToAXI4(beatBytes: Int, combinational: Boolean = true)(implicit p: Parame in.d.bits := Mux(r_wins, r_d, b_d) in.d.bits.data := out.r.bits.data // avoid a costly Mux + // We need to track if any reads or writes are inflight for a given ID. + // If the opposite type arrives, we must stall until it completes. + val a_sel = UIntToOH(arw.id, edgeOut.master.endId).toBools + val d_sel = UIntToOH(Mux(r_wins, out.r.bits.id, out.b.bits.id), edgeOut.master.endId).toBools + val d_last = Mux(r_wins, out.r.bits.last, Bool(true)) + val d_first = RegInit(Bool(true)) + when (in.d.fire()) { d_first := d_last } + val stalls = ((a_sel zip d_sel) zip idCount) filter { case (_, n) => n > 1 } map { case ((as, ds), n) => + val count = RegInit(UInt(0, width = log2Ceil(n + 1))) + val write = Reg(Bool()) + val idle = count === UInt(0) + + // Once we start getting the response, it's safe to already switch R/W + val inc = as && out_arw.fire() + val dec = ds && d_first && in.d.fire() + count := count + inc.asUInt - dec.asUInt + + assert (!dec || count =/= UInt(0)) // underflow + assert (!inc || count =/= UInt(n)) // overflow + + when (inc) { write := arw.wen } + !idle && write =/= arw.wen + } + stall := stalls.foldLeft(Bool(false))(_||_) + // Tie off unused channels in.b.valid := Bool(false) in.c.ready := Bool(true) From 3d06f01a2cea7bdab4e36ef3af735f4cbb7aae61 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 28 Apr 2017 08:41:31 -0700 Subject: [PATCH 28/29] rocket: turn on early ack for ITIM --- src/main/scala/rocket/ScratchpadSlavePort.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/rocket/ScratchpadSlavePort.scala b/src/main/scala/rocket/ScratchpadSlavePort.scala index 231af8c6..c89d7de9 100644 --- a/src/main/scala/rocket/ScratchpadSlavePort.scala +++ b/src/main/scala/rocket/ScratchpadSlavePort.scala @@ -111,7 +111,7 @@ trait CanHaveScratchpad extends HasHellaCache with HasICacheFrontend with HasCor val slaveNode = TLInputNode() // Up to two uses for this input node: // 1) Frontend always exists, but may or may not have a scratchpad node - val fg = LazyModule(new TLFragmenter(fetchWidth*coreInstBytes, p(CacheBlockBytes), true)) + val fg = LazyModule(new TLFragmenter(fetchWidth*coreInstBytes, p(CacheBlockBytes), earlyAck=true)) val ww = LazyModule(new TLWidthWidget(xLen/8)) frontend.slaveNode :*= fg.node fg.node :*= ww.node From fe280187a1fe645222a560351c909bb157be893e Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Mon, 1 May 2017 22:52:33 -0700 Subject: [PATCH 29/29] axi4: Fragmenter cuts all input channel readys AXI4 forbids any input to lead combinationally to an output.For the AXI4ToTL direction, front-load the cuts for {AW, AR, W}.readyAXI4ToTL makes the R and B channels irrevocable. --- src/main/scala/uncore/axi4/Fragmenter.scala | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/scala/uncore/axi4/Fragmenter.scala b/src/main/scala/uncore/axi4/Fragmenter.scala index dcc2ddf5..d29f553d 100644 --- a/src/main/scala/uncore/axi4/Fragmenter.scala +++ b/src/main/scala/uncore/axi4/Fragmenter.scala @@ -139,6 +139,10 @@ class AXI4Fragmenter()(implicit p: Parameters) extends LazyModule val (in_ar, ar_last, _) = fragment(Queue.irrevocable(in.ar, 1, flow=true), readSizes1) val (in_aw, aw_last, w_beats) = fragment(Queue.irrevocable(in.aw, 1, flow=true), writeSizes1) + // AXI ready may not depend on valid of other channels + // We cut wready here along with awready and arready before AXI4ToTL + val in_w = Queue.irrevocable(in.w, 1, flow=true) + // AR flow control; super easy out.ar <> in_ar out.ar.bits.user.get := Cat(in_ar.bits.user.toList ++ Seq(ar_last)) @@ -167,12 +171,12 @@ class AXI4Fragmenter()(implicit p: Parameters) extends LazyModule // W flow control wbeats_ready := w_idle - out.w.valid := in.w.valid && (!wbeats_ready || wbeats_valid) - in.w.ready := out.w.ready && (!wbeats_ready || wbeats_valid) - out.w.bits := in.w.bits + out.w.valid := in_w.valid && (!wbeats_ready || wbeats_valid) + in_w.ready := out.w.ready && (!wbeats_ready || wbeats_valid) + out.w.bits := in_w.bits out.w.bits.last := w_last // We should also recreate the last last - assert (!out.w.valid || !in.w.bits.last || w_last) + assert (!out.w.valid || !in_w.bits.last || w_last) // R flow control val r_last = out.r.bits.user.get(0)