// See LICENSE.SiFive for license details. package freechips.rocketchip.tilelink import Chisel._ import freechips.rocketchip.config.Parameters import freechips.rocketchip.diplomacy._ import freechips.rocketchip.util._ import scala.math.{min,max} object EarlyAck { sealed trait T case object AllPuts extends T case object PutFulls extends T case object None extends T } // minSize: minimum size of transfers supported by all outward managers // maxSize: maximum size of transfers supported after the Fragmenter is applied // alwaysMin: fragment all requests down to minSize (else fragment to maximum supported by manager) // Fragmenter modifies: PutFull, PutPartial, LogicalData, Get, Hint // Fragmenter passes: ArithmeticData (truncated to minSize if alwaysMin) // Fragmenter cannot modify acquire (could livelock); thus it is unsafe to put caches on both sides class TLFragmenter(val minSize: Int, val maxSize: Int, val alwaysMin: Boolean = false, val earlyAck: EarlyAck.T = EarlyAck.None)(implicit p: Parameters) extends LazyModule { require (isPow2 (maxSize)) require (isPow2 (minSize)) require (minSize < maxSize) val fragmentBits = log2Ceil(maxSize / minSize) val fullBits = if (earlyAck == EarlyAck.PutFulls) 1 else 0 val toggleBits = 1 val addedBits = fragmentBits + toggleBits + fullBits def expandTransfer(x: TransferSizes) = if (!x) x else { // validate that we can apply the fragmenter correctly require (x.max >= minSize, s"max transfer size (${x.max}) must be >= min transfer size (${minSize})") TransferSizes(x.min, maxSize) } def shrinkTransfer(x: TransferSizes) = if (!alwaysMin) x else if (x.min <= minSize) TransferSizes(x.min, min(minSize, x.max)) else TransferSizes.none def mapManager(m: TLManagerParameters) = m.copy( supportsArithmetic = shrinkTransfer(m.supportsArithmetic), supportsLogical = shrinkTransfer(m.supportsLogical), supportsGet = expandTransfer(m.supportsGet), supportsPutFull = expandTransfer(m.supportsPutFull), supportsPutPartial = expandTransfer(m.supportsPutPartial), supportsHint = expandTransfer(m.supportsHint)) val node = TLAdapterNode( // We require that all the responses are mutually FIFO // Thus we need to compact all of the masters into one big master clientFn = { c => c.copy(clients = Seq(TLClientParameters( name = "TLFragmenter", sourceId = IdRange(0, c.endSourceId << addedBits), requestFifo = true))) }, managerFn = { m => m.copy(managers = m.managers.map(mapManager)) }) lazy val module = new LazyModuleImp(this) { (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => // All managers must share a common FIFO domain (responses might end up interleaved) val manager = edgeOut.manager val managers = manager.managers val beatBytes = manager.beatBytes val fifoId = managers(0).fifoId require (fifoId.isDefined && managers.map(_.fifoId == fifoId).reduce(_ && _)) require (manager.endSinkId <= 1) require (minSize >= beatBytes, s"We don't support fragmenting ($minSize) to sub-beat ($beatBytes) accesses") // We can't support devices which are cached on both sides of us require (!edgeOut.manager.anySupportAcquireB || !edgeIn.client.anySupportProbe) /* The Fragmenter is a bit tricky, because there are 5 sizes in play: * max size -- the maximum transfer size possible * orig size -- the original pre-fragmenter size * frag size -- the modified post-fragmenter size * min size -- the threshold below which frag=orig * beat size -- the amount transfered on any given beat * * The relationships are as follows: * max >= orig >= frag * max > min >= beat * It IS possible that orig <= min (then frag=orig; ie: no fragmentation) * * The fragment# (sent via TL.source) is measured in multiples of min size. * Meanwhile, to track the progress, counters measure in multiples of beat size. * * Here is an example of a bus with max=256, min=8, beat=4 and a device supporting 16. * * in.A out.A (frag#) out.D (frag#) in.D gen# ack# * get64 get16 6 ackD16 6 ackD64 12 15 * ackD16 6 ackD64 14 * ackD16 6 ackD64 13 * ackD16 6 ackD64 12 * get16 4 ackD16 4 ackD64 8 11 * ackD16 4 ackD64 10 * ackD16 4 ackD64 9 * ackD16 4 ackD64 8 * get16 2 ackD16 2 ackD64 4 7 * ackD16 2 ackD64 6 * ackD16 2 ackD64 5 * ackD16 2 ackD64 4 * get16 0 ackD16 0 ackD64 0 3 * ackD16 0 ackD64 2 * ackD16 0 ackD64 1 * ackD16 0 ackD64 0 * * get8 get8 0 ackD8 0 ackD8 0 1 * ackD8 0 ackD8 0 * * get4 get4 0 ackD4 0 ackD4 0 0 * get1 get1 0 ackD1 0 ackD1 0 0 * * put64 put16 6 15 * put64 put16 6 14 * put64 put16 6 13 * put64 put16 6 ack16 6 12 12 * put64 put16 4 11 * put64 put16 4 10 * put64 put16 4 9 * put64 put16 4 ack16 4 8 8 * put64 put16 2 7 * put64 put16 2 6 * put64 put16 2 5 * put64 put16 2 ack16 2 4 4 * put64 put16 0 3 * put64 put16 0 2 * put64 put16 0 1 * put64 put16 0 ack16 0 ack64 0 0 * * put8 put8 0 1 * put8 put8 0 ack8 0 ack8 0 0 * * put4 put4 0 ack4 0 ack4 0 0 * put1 put1 0 ack1 0 ack1 0 0 */ val counterBits = log2Up(maxSize/beatBytes) val maxDownSize = if (alwaysMin) minSize else min(manager.maxTransfer, maxSize) // Consider the following waveform for two 4-beat bursts: // ---A----A------------ // -------D-----DDD-DDDD // Under TL rules, the second A can use the same source as the first A, // because the source is released for reuse on the first response beat. // // However, if we fragment the requests, it looks like this: // ---3210-3210--------- // -------3-----210-3210 // ... now we've broken the rules because 210 are twice inflight. // // This phenomenon means we can have essentially 2*maxSize/minSize-1 // fragmented transactions in flight per original transaction source. // // To keep the source unique, we encode the beat counter in the low // bits of the source. To solve the overlap, we use a toggle bit. // Whatever toggle bit the D is reassembling, A will use the opposite. // First, handle the return path val acknum = RegInit(UInt(0, width = counterBits)) val dOrig = Reg(UInt()) val dToggle = RegInit(Bool(false)) val dFragnum = out.d.bits.source(fragmentBits-1, 0) val dFirst = acknum === UInt(0) val dLast = dFragnum === UInt(0) // only for AccessAck (!Data) val dsizeOH = UIntToOH (out.d.bits.size, log2Ceil(maxDownSize)+1) val dsizeOH1 = UIntToOH1(out.d.bits.size, log2Up(maxDownSize)) val dHasData = edgeOut.hasData(out.d.bits) // calculate new acknum val acknum_fragment = dFragnum << log2Ceil(minSize/beatBytes) val acknum_size = dsizeOH1 >> log2Ceil(beatBytes) assert (!out.d.valid || (acknum_fragment & acknum_size) === UInt(0)) val dFirst_acknum = acknum_fragment | Mux(dHasData, acknum_size, UInt(0)) val ack_decrement = Mux(dHasData, UInt(1), dsizeOH >> log2Ceil(beatBytes)) // calculate the original size val dFirst_size = OH1ToUInt((dFragnum << log2Ceil(minSize)) | dsizeOH1) when (out.d.fire()) { acknum := Mux(dFirst, dFirst_acknum, acknum - ack_decrement) when (dFirst) { dOrig := dFirst_size dToggle := out.d.bits.source(fragmentBits) } } // Swallow up non-data ack fragments val doEarlyAck = earlyAck match { case EarlyAck.AllPuts => Bool(true) case EarlyAck.PutFulls => out.d.bits.source(fragmentBits+1) case EarlyAck.None => Bool(false) } val drop = !dHasData && !Mux(doEarlyAck, dFirst, dLast) out.d.ready := in.d.ready || drop in.d.valid := out.d.valid && !drop in.d.bits := out.d.bits // pass most stuff unchanged in.d.bits.source := out.d.bits.source >> addedBits in.d.bits.size := Mux(dFirst, dFirst_size, dOrig) // The specification requires that error transition LOW=>HIGH only once per burst. // Since we fragmented a big burst into mulitple little bursts, we need to OR them. val r_error = Reg(Bool()) val d_error = (!dFirst && r_error) || out.d.bits.error when (out.d.fire()) { r_error := d_error } in.d.bits.error := d_error // If you do early Ack, errors may not be dropped assert (!out.d.valid || !doEarlyAck || !drop || out.d.bits.error === r_error, "Slave device error behaviour unsuitable for earlyAck setting") // What maximum transfer sizes do downstream devices support? val maxArithmetics = managers.map(_.supportsArithmetic.max) val maxLogicals = managers.map(_.supportsLogical.max) val maxGets = managers.map(_.supportsGet.max) val maxPutFulls = managers.map(_.supportsPutFull.max) val maxPutPartials = managers.map(_.supportsPutPartial.max) val maxHints = managers.map(m => if (m.supportsHint) maxDownSize else 0) // We assume that the request is valid => size 0 is impossible val lgMinSize = UInt(log2Ceil(minSize)) val maxLgArithmetics = maxArithmetics.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m))) val maxLgLogicals = maxLogicals .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m))) val maxLgGets = maxGets .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m))) val maxLgPutFulls = maxPutFulls .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m))) val maxLgPutPartials = maxPutPartials.map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m))) val maxLgHints = maxHints .map(m => if (m == 0) lgMinSize else UInt(log2Ceil(m))) // Make the request repeatable val repeater = Module(new Repeater(in.a.bits)) repeater.io.enq <> in.a val in_a = repeater.io.deq // If this is infront of a single manager, these become constants val find = manager.findFast(edgeIn.address(in_a.bits)) val maxLgArithmetic = Mux1H(find, maxLgArithmetics) val maxLgLogical = Mux1H(find, maxLgLogicals) val maxLgGet = Mux1H(find, maxLgGets) val maxLgPutFull = Mux1H(find, maxLgPutFulls) val maxLgPutPartial = Mux1H(find, maxLgPutPartials) val maxLgHint = Mux1H(find, maxLgHints) val limit = if (alwaysMin) lgMinSize else MuxLookup(in_a.bits.opcode, lgMinSize, Array( TLMessages.PutFullData -> maxLgPutFull, TLMessages.PutPartialData -> maxLgPutPartial, TLMessages.ArithmeticData -> maxLgArithmetic, TLMessages.LogicalData -> maxLgLogical, TLMessages.Get -> maxLgGet, TLMessages.Hint -> maxLgHint)) val aOrig = in_a.bits.size val aFrag = Mux(aOrig > limit, limit, aOrig) val aOrigOH1 = UIntToOH1(aOrig, log2Ceil(maxSize)) val aFragOH1 = UIntToOH1(aFrag, log2Up(maxDownSize)) val aHasData = edgeIn.hasData(in_a.bits) val aMask = Mux(aHasData, UInt(0), aFragOH1) val gennum = RegInit(UInt(0, width = counterBits)) val aFirst = gennum === UInt(0) val old_gennum1 = Mux(aFirst, aOrigOH1 >> log2Ceil(beatBytes), gennum - UInt(1)) val new_gennum = ~(~old_gennum1 | (aMask >> log2Ceil(beatBytes))) // ~(~x|y) is width safe val aFragnum = ~(~(old_gennum1 >> log2Ceil(minSize/beatBytes)) | (aFragOH1 >> log2Ceil(minSize))) val aLast = aFragnum === UInt(0) val aToggle = !Mux(aFirst, dToggle, RegEnable(dToggle, aFirst)) val aFull = if (earlyAck == EarlyAck.PutFulls) Some(in_a.bits.opcode === TLMessages.PutFullData) else None when (out.a.fire()) { gennum := new_gennum } repeater.io.repeat := !aHasData && aFragnum =/= UInt(0) out.a <> in_a out.a.bits.address := in_a.bits.address | ~(old_gennum1 << log2Ceil(beatBytes) | ~aOrigOH1 | aFragOH1 | UInt(minSize-1)) out.a.bits.source := Cat(Seq(in_a.bits.source) ++ aFull ++ Seq(aToggle.asUInt, aFragnum)) out.a.bits.size := aFrag // Optimize away some of the Repeater's registers assert (!repeater.io.full || !aHasData) out.a.bits.data := in.a.bits.data val fullMask = UInt((BigInt(1) << beatBytes) - 1) assert (!repeater.io.full || in_a.bits.mask === fullMask) out.a.bits.mask := Mux(repeater.io.full, fullMask, in.a.bits.mask) // Tie off unused channels in.b.valid := Bool(false) in.c.ready := Bool(true) in.e.ready := Bool(true) out.b.ready := Bool(true) out.c.valid := Bool(false) out.e.valid := Bool(false) } } } object TLFragmenter { def apply(minSize: Int, maxSize: Int, alwaysMin: Boolean = false, earlyAck: EarlyAck.T = EarlyAck.None)(implicit p: Parameters): TLNode = { val fragmenter = LazyModule(new TLFragmenter(minSize, maxSize, alwaysMin, earlyAck)) fragmenter.node } } /** Synthesizeable unit tests */ import freechips.rocketchip.unittest._ class TLRAMFragmenter(ramBeatBytes: Int, maxSize: Int, txns: Int)(implicit p: Parameters) extends LazyModule { val fuzz = LazyModule(new TLFuzzer(txns)) val model = LazyModule(new TLRAMModel("Fragmenter")) val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff), beatBytes = ramBeatBytes)) (ram.node := TLDelayer(0.1) := TLBuffer(BufferParams.flow) := TLDelayer(0.1) := TLFragmenter(ramBeatBytes, maxSize, earlyAck = EarlyAck.AllPuts) := TLDelayer(0.1) := TLBuffer(BufferParams.flow) := TLFragmenter(ramBeatBytes, maxSize/2) := TLDelayer(0.1) := TLBuffer(BufferParams.flow) := model.node := fuzz.node) lazy val module = new LazyModuleImp(this) with UnitTestModule { io.finished := fuzz.module.io.finished } } class TLRAMFragmenterTest(ramBeatBytes: Int, maxSize: Int, txns: Int = 5000, timeout: Int = 500000)(implicit p: Parameters) extends UnitTest(timeout) { val dut = Module(LazyModule(new TLRAMFragmenter(ramBeatBytes,maxSize,txns)).module) io.finished := dut.io.finished }