From 9bfd8c1cf5372ca49ee24cea527d325c61a63677 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 8 Sep 2016 10:38:38 -0700 Subject: [PATCH] TL2 WidthWidget (#258) * tilelink2 Narrower: support widenening and narrowing on all channels Be extra careful with the mask transformations We need to make sure that narrowing or widening do not cause a loss of information about the operation. The addr_hi+(mask|addr_lo) conversions are now 1-1, except on D, which should not matter. * tilelink2 SRAM: work around firrtl SeqMem bug * tilelink2 WidthWidget: renamed from Narrower (it now converts both ways) * tilelink2 mask: fix an issue with width=1 data buses --- src/main/scala/uncore/tilelink2/Bundles.scala | 2 +- src/main/scala/uncore/tilelink2/Edges.scala | 2 +- .../scala/uncore/tilelink2/Narrower.scala | 138 -------------- src/main/scala/uncore/tilelink2/SRAM.scala | 11 +- .../scala/uncore/tilelink2/WidthWidget.scala | 180 ++++++++++++++++++ src/main/scala/uncore/tilelink2/package.scala | 1 + 6 files changed, 189 insertions(+), 145 deletions(-) delete mode 100644 src/main/scala/uncore/tilelink2/Narrower.scala create mode 100644 src/main/scala/uncore/tilelink2/WidthWidget.scala diff --git a/src/main/scala/uncore/tilelink2/Bundles.scala b/src/main/scala/uncore/tilelink2/Bundles.scala index e071c466..1548a4f3 100644 --- a/src/main/scala/uncore/tilelink2/Bundles.scala +++ b/src/main/scala/uncore/tilelink2/Bundles.scala @@ -100,7 +100,7 @@ object TLAtomics def isLogical(x: UInt) = x <= SWAP } -sealed trait TLChannel +sealed trait TLChannel extends TLBundleBase sealed trait TLDataChannel extends TLChannel sealed trait TLAddrChannel extends TLDataChannel diff --git a/src/main/scala/uncore/tilelink2/Edges.scala b/src/main/scala/uncore/tilelink2/Edges.scala index 75053b88..558af74b 100644 --- a/src/main/scala/uncore/tilelink2/Edges.scala +++ b/src/main/scala/uncore/tilelink2/Edges.scala @@ -27,7 +27,7 @@ class TLEdge( // This gets used everywhere, so make the smallest circuit possible ... def mask(addr_lo: UInt, lgSize: UInt): UInt = { val lgBytes = log2Ceil(manager.beatBytes) - val sizeOH = UIntToOH(lgSize, lgBytes) + val sizeOH = UIntToOH(lgSize, log2Up(manager.beatBytes)) def helper(i: Int): Seq[(Bool, Bool)] = { if (i == 0) { Seq((lgSize >= UInt(lgBytes), Bool(true))) diff --git a/src/main/scala/uncore/tilelink2/Narrower.scala b/src/main/scala/uncore/tilelink2/Narrower.scala deleted file mode 100644 index 599386d9..00000000 --- a/src/main/scala/uncore/tilelink2/Narrower.scala +++ /dev/null @@ -1,138 +0,0 @@ -// See LICENSE for license details. - -package uncore.tilelink2 - -import Chisel._ -import chisel3.internal.sourceinfo.SourceInfo -import scala.math.{min,max} - -// innBeatBytes => the bus width after the adapter -class TLNarrower(innerBeatBytes: Int) extends LazyModule -{ - val node = TLAdapterNode( - clientFn = { case Seq(c) => c }, - managerFn = { case Seq(m) => m.copy(beatBytes = innerBeatBytes) }) - - lazy val module = new LazyModuleImp(this) { - val io = new Bundle { - val in = node.bundleIn - val out = node.bundleOut - } - - val edgeOut = node.edgesOut(0) - val edgeIn = node.edgesIn(0) - val outerBeatBytes = edgeOut.manager.beatBytes - require (outerBeatBytes < innerBeatBytes) - - val ratio = innerBeatBytes / outerBeatBytes - val bce = edgeOut.manager.anySupportAcquire && edgeIn.client.anySupportProbe - - def trailingZeros(x: Int) = if (x > 0) Some(log2Ceil(x & -x)) else None - - def split(edge: TLEdge, in: TLDataChannel, fire: Bool): (Bool, UInt, UInt) = { - val dataSlices = Vec.tabulate (ratio) { i => edge.data(in)((i+1)*outerBeatBytes*8-1, i*outerBeatBytes*8) } - val maskSlices = Vec.tabulate (ratio) { i => edge.mask(in)((i+1)*outerBeatBytes -1, i*outerBeatBytes) } - val filter = Reg(UInt(width = ratio), init = SInt(-1, width = ratio).asUInt) - val mask = maskSlices.map(_.orR) - val hasData = edge.hasData(in) - - // decoded_size = 1111 (for smallest), 0101, 0001 (for largest) - val sizeOH1 = UIntToOH1(edge.size(in), log2Ceil(innerBeatBytes)) >> log2Ceil(outerBeatBytes) - val decoded_size = Seq.tabulate(ratio) { i => trailingZeros(i).map(!sizeOH1(_)).getOrElse(Bool(true)) } - - val first = filter(ratio-1) - val new_filter = Mux(first, Cat(decoded_size.reverse), filter << 1) - val last = new_filter(ratio-1) || !hasData - when (fire) { - filter := new_filter - when (!hasData) { filter := SInt(-1, width = ratio).asUInt } - } - - if (edge.staticHasData(in) == Some(false)) { - (Bool(true), UInt(0), UInt(0)) - } else { - val select = Cat(mask.reverse) & new_filter - (last, Mux1H(select, dataSlices), Mux1H(select, maskSlices)) - } - } - - def merge(edge: TLEdge, in: TLDataChannel, fire: Bool): (Bool, UInt) = { - val count = RegInit(UInt(0, width = log2Ceil(ratio))) - val rdata = Reg(UInt(width = (ratio-1)*outerBeatBytes*8)) - val data = Cat(edge.data(in), rdata) - val first = count === UInt(0) - val limit = UIntToOH1(edge.size(in), log2Ceil(innerBeatBytes)) >> log2Ceil(outerBeatBytes) - val last = count === limit || !edge.hasData(in) - - when (fire) { - rdata := data >> outerBeatBytes*8 - count := count + UInt(1) - when (last) { count := UInt(0) } - } - - val cases = Seq.tabulate(log2Ceil(ratio)+1) { i => - val high = innerBeatBytes*8 - val take = (1 << i)*outerBeatBytes*8 - Fill(1 << (log2Ceil(ratio)-i), data(high-1, high-take)) - } - val mux = Vec.tabulate(log2Ceil(edge.maxTransfer)+1) { lgSize => - cases(min(max(lgSize - log2Ceil(outerBeatBytes), 0), log2Ceil(ratio))) - } - - if (edge.staticHasData(in) == Some(false)) { - (Bool(true), UInt(0)) - } else { - (last, mux(edge.size(in))) - } - } - - val in = io.in(0) - val out = io.out(0) - - val (alast, adata, amask) = split(edgeIn, in.a.bits, out.a.fire()) - in.a.ready := out.a.ready && alast - out.a.valid := in.a.valid - out.a.bits := in.a.bits - out.a.bits.addr_hi := Cat(in.a.bits.addr_hi, edgeIn.addr_lo(in.a.bits) >> log2Ceil(outerBeatBytes)) - out.a.bits.data := adata - out.a.bits.mask := amask - - val (dlast, ddata) = merge(edgeOut, out.d.bits, out.d.fire()) - out.d.ready := in.d.ready || !dlast - in.d.valid := out.d.valid && dlast - in.d.bits := out.d.bits - in.d.bits.data := ddata - - if (bce) { - require (false) - // C has no wmask !!! -// val (clast, cdata, cmask) = split(in.c.bits, out.c.fire()) -// in.c.ready := out.c.ready && clast -// out.c.valid := in.c.valid -// out.c.bits := in.c.bits -// out.c.bits.data := cdata -// out.c.bits.mask := cmask - - in.e.ready := out.e.ready - out.e.valid := in.e.valid - out.e.bits := in.e.bits - } else { - in.b.valid := Bool(false) - in.c.ready := Bool(true) - in.e.ready := Bool(true) - out.b.ready := Bool(true) - out.c.valid := Bool(false) - out.e.valid := Bool(false) - } - } -} - -object TLNarrower -{ - // applied to the TL source node; connect (Narrower(x.node, 16) -> y.node) - def apply(x: TLBaseNode, innerBeatBytes: Int)(implicit lazyModule: LazyModule, sourceInfo: SourceInfo): TLBaseNode = { - val narrower = LazyModule(new TLNarrower(innerBeatBytes)) - lazyModule.connect(x -> narrower.node) - narrower.node - } -} diff --git a/src/main/scala/uncore/tilelink2/SRAM.scala b/src/main/scala/uncore/tilelink2/SRAM.scala index 07d107df..03e7385d 100644 --- a/src/main/scala/uncore/tilelink2/SRAM.scala +++ b/src/main/scala/uncore/tilelink2/SRAM.scala @@ -59,13 +59,14 @@ class TLRAM(address: AddressSet, beatBytes: Int = 4) extends LazyModule d_size := in.a.bits.size d_source := in.a.bits.source d_addr := edge.addr_lo(in.a.bits) - when (read) { - rdata := mem.read(memAddress) - } .otherwise { - mem.write(memAddress, wdata, in.a.bits.mask.toBools) - } } + // exactly this pattern is required to get a RWM memory + when (in.a.fire() && !read) { + mem.write(memAddress, wdata, in.a.bits.mask.toBools) + } + rdata := mem.read(memAddress, in.a.fire() && read) + // Tie off unused channels in.b.valid := Bool(false) in.c.ready := Bool(true) diff --git a/src/main/scala/uncore/tilelink2/WidthWidget.scala b/src/main/scala/uncore/tilelink2/WidthWidget.scala new file mode 100644 index 00000000..76529564 --- /dev/null +++ b/src/main/scala/uncore/tilelink2/WidthWidget.scala @@ -0,0 +1,180 @@ +// See LICENSE for license details. + +package uncore.tilelink2 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import scala.math.{min,max} + +// innBeatBytes => the new client-facing bus width +class TLWidthWidget(innerBeatBytes: Int) extends LazyModule +{ + val node = TLAdapterNode( + clientFn = { case Seq(c) => c }, + managerFn = { case Seq(m) => m.copy(beatBytes = innerBeatBytes) }) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + val out = node.bundleOut + } + + def merge[T <: TLDataChannel](edgeIn: TLEdge, in: DecoupledIO[T], edgeOut: TLEdge, out: DecoupledIO[T]) = { + val inBytes = edgeIn.manager.beatBytes + val outBytes = edgeOut.manager.beatBytes + val ratio = outBytes / inBytes + + val rdata = Reg(UInt(width = (ratio-1)*inBytes*8)) + val rmask = Reg(UInt(width = (ratio-1)*inBytes)) + val data = Cat(edgeIn.data(in.bits), rdata) + val mask = Cat(edgeIn.mask(in.bits), rmask) + val size = edgeIn.size(in.bits) + val hasData = edgeIn.hasData(in.bits) + val addr_lo = in.bits match { + case x: TLAddrChannel => edgeIn.address(x) + case _ => UInt(0) + } + val addr = addr_lo >> log2Ceil(outBytes) + + val count = RegInit(UInt(0, width = log2Ceil(ratio))) + val first = count === UInt(0) + val limit = UIntToOH1(size, log2Ceil(outBytes)) >> log2Ceil(inBytes) + val last = count === limit || !hasData + + when (in.fire()) { + rdata := data >> inBytes*8 + rmask := mask >> inBytes + count := count + UInt(1) + when (last) { count := UInt(0) } + } + + val cases = Seq.tabulate(log2Ceil(ratio)+1) { i => + val high = outBytes + val take = (1 << i)*inBytes + (Fill(1 << (log2Ceil(ratio)-i), data(high*8-1, (high-take)*8)), + Fill(1 << (log2Ceil(ratio)-i), mask(high -1, (high-take)))) + } + val dataMux = Vec.tabulate(log2Ceil(edgeIn.maxTransfer)+1) { lgSize => + cases(min(max(lgSize - log2Ceil(inBytes), 0), log2Ceil(ratio)))._1 + } + val maskMux = Vec.tabulate(log2Ceil(edgeIn.maxTransfer)+1) { lgSize => + cases(min(max(lgSize - log2Ceil(inBytes), 0), log2Ceil(ratio)))._2 + } + + val dataOut = if (edgeIn.staticHasData(in.bits) == Some(false)) UInt(0) else dataMux(size) + val maskOut = maskMux(size) & edgeOut.mask(addr_lo, size) + + in.ready := out.ready || !last + out.valid := in.valid && last + out.bits := in.bits + edgeOut.data(out.bits) := dataOut + + out.bits match { + case a: TLBundleA => a.addr_hi := addr; a.mask := maskOut + case b: TLBundleB => b.addr_hi := addr; b.mask := maskOut + case c: TLBundleC => c.addr_hi := addr; c.addr_lo := addr_lo + case d: TLBundleD => () + // addr_lo gets padded with 0s on D channel, the only lossy transform in this core + // this should be safe, because we only care about addr_log on D to determine which + // piece of data to extract when the D data bus is narrowed. Since we duplicated the + // data to all locations, addr_lo still points at a valid copy. + } + } + + def split[T <: TLDataChannel](edgeIn: TLEdge, in: DecoupledIO[T], edgeOut: TLEdge, out: DecoupledIO[T]) = { + val inBytes = edgeIn.manager.beatBytes + val outBytes = edgeOut.manager.beatBytes + val ratio = inBytes / outBytes + + val hasData = edgeIn.hasData(in.bits) + val size = edgeIn.size(in.bits) + val data = edgeIn.data(in.bits) + val mask = edgeIn.mask(in.bits) + val addr = in.bits match { + case x: TLAddrChannel => edgeIn.address(x) >> log2Ceil(outBytes) + case _ => UInt(0) + } + + val dataSlices = Vec.tabulate(ratio) { i => data((i+1)*outBytes*8-1, i*outBytes*8) } + val maskSlices = Vec.tabulate(ratio) { i => mask((i+1)*outBytes -1, i*outBytes) } + val filter = Reg(UInt(width = ratio), init = SInt(-1, width = ratio).asUInt) + val maskR = maskSlices.map(_.orR) + + // decoded_size = 1111 (for smallest), 0101, 0001 (for largest) + val sizeOH1 = UIntToOH1(size, log2Ceil(inBytes)) >> log2Ceil(outBytes) + val decoded_size = Seq.tabulate(ratio) { i => trailingZeros(i).map(!sizeOH1(_)).getOrElse(Bool(true)) } + + val first = filter(ratio-1) + val new_filter = Mux(first, Cat(decoded_size.reverse), filter << 1) + val last = new_filter(ratio-1) || !hasData + when (out.fire()) { + filter := new_filter + when (!hasData) { filter := SInt(-1, width = ratio).asUInt } + } + + val select = Cat(maskR.reverse) & new_filter + val dataOut = if (edgeIn.staticHasData(in.bits) == Some(false)) UInt(0) else Mux1H(select, dataSlices) + val maskOut = Mux1H(select, maskSlices) + + in.ready := out.ready && last + out.valid := in.valid + out.bits := in.bits + edgeOut.data(out.bits) := dataOut + + out.bits match { + case a: TLBundleA => a.addr_hi := addr; a.mask := maskOut + case b: TLBundleB => b.addr_hi := addr; b.mask := maskOut + case c: TLBundleC => c.addr_hi := addr + case d: TLBundleD => () + } + + // addr_lo gets truncated automagically + } + + def splice[T <: TLDataChannel](edgeIn: TLEdge, in: DecoupledIO[T], edgeOut: TLEdge, out: DecoupledIO[T]) = { + if (edgeIn.manager.beatBytes == edgeOut.manager.beatBytes) { + // nothing to do; pass it through + out <> in + } else if (edgeIn.manager.beatBytes > edgeOut.manager.beatBytes) { + // split input to output + split(edgeIn, in, edgeOut, out) + } else { + // merge input to output + merge(edgeIn, in, edgeOut, out) + } + } + + val edgeOut = node.edgesOut(0) + val edgeIn = node.edgesIn(0) + val in = io.in(0) + val out = io.out(0) + + splice(edgeIn, in.a, edgeOut, out.a) + splice(edgeOut, out.d, edgeIn, in.d) + + if (edgeOut.manager.anySupportAcquire && edgeIn.client.anySupportProbe) { + splice(edgeOut, out.b, edgeIn, in.b) + splice(edgeIn, in.c, edgeOut, out.c) + in.e.ready := out.e.ready + out.e.valid := in.e.valid + out.e.bits := in.e.bits + } else { + in.b.valid := Bool(false) + in.c.ready := Bool(true) + in.e.ready := Bool(true) + out.b.ready := Bool(true) + out.c.valid := Bool(false) + out.e.valid := Bool(false) + } + } +} + +object TLWidthWidget +{ + // applied to the TL source node; connect (WidthWidget(x.node, 16) -> y.node) + def apply(x: TLBaseNode, innerBeatBytes: Int)(implicit lazyModule: LazyModule, sourceInfo: SourceInfo): TLBaseNode = { + val widget = LazyModule(new TLWidthWidget(innerBeatBytes)) + lazyModule.connect(x -> widget.node) + widget.node + } +} diff --git a/src/main/scala/uncore/tilelink2/package.scala b/src/main/scala/uncore/tilelink2/package.scala index 621fc288..b1e3dda2 100644 --- a/src/main/scala/uncore/tilelink2/package.scala +++ b/src/main/scala/uncore/tilelink2/package.scala @@ -7,4 +7,5 @@ package object tilelink2 type TLBaseNode = BaseNode[TLClientPortParameters, TLManagerPortParameters, TLEdgeOut, TLEdgeIn, TLBundle] def OH1ToUInt(x: UInt) = OHToUInt((x << 1 | UInt(1)) ^ x) def UIntToOH1(x: UInt, width: Int) = ~(SInt(-1, width=width).asUInt << x)(width-1, 0) + def trailingZeros(x: Int) = if (x > 0) Some(log2Ceil(x & -x)) else None }