tilelink: WidthWidget rewrite beat merging
- errors are properly OR reduced - registers latched only as needed (was previously a shift register) - combines beats without inspecting address (removes addr_lo dependency)
This commit is contained in:
parent
f0ffb7e31e
commit
0f5065fbf3
@ -26,58 +26,55 @@ class TLWidthWidget(innerBeatBytes: Int)(implicit p: Parameters) extends LazyMod
|
|||||||
val inBytes = edgeIn.manager.beatBytes
|
val inBytes = edgeIn.manager.beatBytes
|
||||||
val outBytes = edgeOut.manager.beatBytes
|
val outBytes = edgeOut.manager.beatBytes
|
||||||
val ratio = outBytes / inBytes
|
val ratio = outBytes / inBytes
|
||||||
|
val keepBits = log2Ceil(outBytes)
|
||||||
|
val dropBits = log2Ceil(inBytes)
|
||||||
|
val countBits = log2Ceil(ratio)
|
||||||
|
|
||||||
val rdata = Reg(UInt(width = (ratio-1)*inBytes*8))
|
|
||||||
val rmask = Reg(UInt(width = (ratio-1)*inBytes))
|
|
||||||
val data = Cat(edgeIn.data(in.bits), rdata)
|
|
||||||
val mask = Cat(edgeIn.mask(in.bits), rmask)
|
|
||||||
val address = edgeIn.address(in.bits)
|
|
||||||
val size = edgeIn.size(in.bits)
|
val size = edgeIn.size(in.bits)
|
||||||
val hasData = edgeIn.hasData(in.bits)
|
val hasData = edgeIn.hasData(in.bits)
|
||||||
|
val limit = UIntToOH1(size, keepBits) >> dropBits
|
||||||
|
|
||||||
val count = RegInit(UInt(0, width = log2Ceil(ratio)))
|
val count = RegInit(UInt(0, width = countBits))
|
||||||
val first = count === UInt(0)
|
val first = count === UInt(0)
|
||||||
val limit = UIntToOH1(size, log2Ceil(outBytes)) >> log2Ceil(inBytes)
|
|
||||||
val last = count === limit || !hasData
|
val last = count === limit || !hasData
|
||||||
|
val enable = Seq.tabulate(ratio) { i => !((count ^ UInt(i)) & limit).orR }
|
||||||
|
|
||||||
when (in.fire()) {
|
when (in.fire()) {
|
||||||
rdata := data >> inBytes*8
|
|
||||||
rmask := mask >> inBytes
|
|
||||||
count := count + UInt(1)
|
count := count + UInt(1)
|
||||||
when (last) { count := UInt(0) }
|
when (last) { count := UInt(0) }
|
||||||
}
|
}
|
||||||
|
|
||||||
val cases = Seq.tabulate(log2Ceil(ratio)+1) { i =>
|
def helper(idata: UInt): UInt = {
|
||||||
val high = outBytes
|
val odata = Seq.fill(ratio) { idata }
|
||||||
val take = (1 << i)*inBytes
|
val rdata = Reg(Vec(ratio-1, idata))
|
||||||
(Fill(1 << (log2Ceil(ratio)-i), data(high*8-1, (high-take)*8)),
|
val pdata = rdata :+ idata
|
||||||
Fill(1 << (log2Ceil(ratio)-i), mask(high -1, (high-take))))
|
val mdata = (enable zip (odata zip pdata)) map { case (e, (o, p)) => Mux(e, o, p) }
|
||||||
|
when (in.fire() && !last) {
|
||||||
|
(rdata zip mdata) foreach { case (r, m) => r := m }
|
||||||
}
|
}
|
||||||
val dataMux = Vec.tabulate(log2Ceil(edgeIn.maxTransfer)+1) { lgSize =>
|
Cat(mdata.reverse)
|
||||||
cases(min(max(lgSize - log2Ceil(inBytes), 0), log2Ceil(ratio)))._1
|
|
||||||
}
|
|
||||||
val maskMux = Vec.tabulate(log2Ceil(edgeIn.maxTransfer)+1) { lgSize =>
|
|
||||||
cases(min(max(lgSize - log2Ceil(inBytes), 0), log2Ceil(ratio)))._2
|
|
||||||
}
|
}
|
||||||
|
|
||||||
val dataOut = if (edgeIn.staticHasData(in.bits) == Some(false)) UInt(0) else dataMux(size)
|
def reduce(i: Bool): Bool = {
|
||||||
lazy val maskFull = edgeOut.mask(address, size)
|
val state = Reg(Bool())
|
||||||
lazy val maskOut = Mux(hasData, maskMux(size) & maskFull, maskFull)
|
val next = i || (!first && state)
|
||||||
|
when (in.fire()) { state := next }
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
in.ready := out.ready || !last
|
in.ready := out.ready || !last
|
||||||
out.valid := in.valid && last
|
out.valid := in.valid && last
|
||||||
out.bits := in.bits
|
out.bits := in.bits
|
||||||
edgeOut.data(out.bits) := dataOut
|
|
||||||
|
|
||||||
out.bits match {
|
// Don't put down hardware if we never carry data
|
||||||
case a: TLBundleA => a.mask := maskOut
|
edgeOut.data(out.bits) := (if (edgeIn.staticHasData(in.bits) == Some(false)) UInt(0) else helper(edgeIn.data(in.bits)))
|
||||||
case b: TLBundleB => b.mask := maskOut
|
|
||||||
case c: TLBundleC => ()
|
(out.bits, in.bits) match {
|
||||||
case d: TLBundleD => ()
|
case (o: TLBundleA, i: TLBundleA) => o.mask := edgeOut.mask(o.address, o.size) & Mux(hasData, helper(i.mask), ~UInt(0, width=outBytes))
|
||||||
// addr_lo gets padded with 0s on D channel, the only lossy transform in this core
|
case (o: TLBundleB, i: TLBundleB) => o.mask := edgeOut.mask(o.address, o.size) & Mux(hasData, helper(i.mask), ~UInt(0, width=outBytes))
|
||||||
// this should be safe, because we only care about addr_lo on D to determine which
|
case (o: TLBundleC, i: TLBundleC) => o.error := reduce(i.error)
|
||||||
// piece of data to extract when the D data bus is narrowed. Since we duplicated the
|
case (o: TLBundleD, i: TLBundleD) => o.error := reduce(i.error)
|
||||||
// data to all locations, addr_lo still points at a valid copy.
|
case _ => require(false, "Impossible bundle combination in WidthWidget")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user