diff --git a/src/main/scala/groundtest/Comparator.scala b/src/main/scala/groundtest/Comparator.scala index 8c5c8213..27f4ce3f 100644 --- a/src/main/scala/groundtest/Comparator.scala +++ b/src/main/scala/groundtest/Comparator.scala @@ -336,10 +336,17 @@ class ComparatorSink(implicit val p: Parameters) extends Module base.addr_beat === g.addr_beat || !g.hasData(), base.data === g.data || !g.hasData()) + // TL1 likes to duplicate 32-bits into both halves of a 64-bit value + // TL2 doesn't do this, so they compare differently when they are the same + def isDupd(x: UInt) = if (tlDataBytes != 8) Bool(false) else x(31, 0) === x(63, 32) + def safeCompare(x: UInt, y: UInt) = + Mux(!isDupd(x) && !isDupd(y), x === y, x(63,32) === y(63,32) || x(31,0) === y(31,0)) + assert (g.is_builtin_type, "grant not builtin") assert (base.g_type === g.g_type, "g_type mismatch") assert (base.addr_beat === g.addr_beat || !g.hasMultibeatData(), "addr_beat mismatch") - assert (base.data === g.data || !g.hasData(), "data mismatch") + assert (base.data === g.data || !g.hasMultibeatData(), "multibeat data mismatch") + assert (safeCompare(base.data, g.data) || !g.hasData(), "singlebeat data mismatch") assert_conds.zipWithIndex.foreach { case (cond, i) => when (!cond) { diff --git a/src/main/scala/groundtest/Configs.scala b/src/main/scala/groundtest/Configs.scala index e6472354..1e12e6ef 100644 --- a/src/main/scala/groundtest/Configs.scala +++ b/src/main/scala/groundtest/Configs.scala @@ -132,7 +132,7 @@ class WithComparator extends Config( site(GlobalAddrMap)(name).start.longValue), width = 8, operations = 1000, - atomics = false, // !!! re-enable soon: site(UseAtomics), + atomics = site(UseAtomics), prefetches = site("COMPARATOR_PREFETCHES")) case FPUConfig => None case UseAtomics => false @@ -296,7 +296,7 @@ class WithDirectComparator extends Config( targets = Seq(0L, 0x100L), width = 8, operations = 1000, - atomics = false, // !!! re-enable soon: site(UseAtomics), + atomics = site(UseAtomics), prefetches = site("COMPARATOR_PREFETCHES")) case FPUConfig => None case UseAtomics => false diff --git a/src/main/scala/rocketchip/BaseTop.scala b/src/main/scala/rocketchip/BaseTop.scala index d3f54b54..32b925d2 100644 --- a/src/main/scala/rocketchip/BaseTop.scala +++ b/src/main/scala/rocketchip/BaseTop.scala @@ -49,7 +49,7 @@ abstract class BaseTop(q: Parameters) extends LazyModule { val legacy = LazyModule(new TLLegacy()(p.alterPartial({ case TLId => "L2toMMIO" }))) - peripheryBus.node := TLBuffer(TLWidthWidget(TLHintHandler(legacy.node), legacy.tlDataBytes)) + peripheryBus.node := TLWidthWidget(TLBuffer(TLAtomicAutomata()(TLHintHandler(legacy.node))), legacy.tlDataBytes) } abstract class BaseTopBundle(val p: Parameters) extends Bundle { diff --git a/src/main/scala/uncore/devices/Prci.scala b/src/main/scala/uncore/devices/Prci.scala index 1024aae2..ce32e8c2 100644 --- a/src/main/scala/uncore/devices/Prci.scala +++ b/src/main/scala/uncore/devices/Prci.scala @@ -89,7 +89,7 @@ trait CoreplexLocalInterrupterModule extends Module with HasRegMap with MixCorep /** Power, Reset, Clock, Interrupt */ // Magic TL2 Incantation to create a TL2 Slave class CoreplexLocalInterrupter(c: CoreplexLocalInterrupterConfig)(implicit val p: Parameters) - extends TLRegisterRouter(c.address, 0, c.size, None, c.beatBytes, false)( + extends TLRegisterRouter(c.address, 0, c.size, 0, c.beatBytes, false)( new TLRegBundle((c, p), _) with CoreplexLocalInterrupterBundle)( new TLRegModule((c, p), _, _) with CoreplexLocalInterrupterModule) { diff --git a/src/main/scala/uncore/tilelink2/Arbiter.scala b/src/main/scala/uncore/tilelink2/Arbiter.scala new file mode 100644 index 00000000..1981a818 --- /dev/null +++ b/src/main/scala/uncore/tilelink2/Arbiter.scala @@ -0,0 +1,69 @@ +// See LICENSE for license details. + +package uncore.tilelink2 + +import Chisel._ +import chisel3.util.IrrevocableIO + +object TLArbiter +{ + // (valids, idle) => readys + type Policy = (Seq[Bool], Bool) => Seq[Bool] + + val lowestIndexFirst: Policy = (valids, idle) => + valids.scanLeft(Bool(true))(_ && !_).init + + def apply[T <: Data](policy: Policy)(sink: IrrevocableIO[T], sources: (UInt, IrrevocableIO[T])*) { + require (sources.size >= 1) + + val pairs = sources.toList + val beatsIn = pairs.map(_._1) + val sourcesIn = pairs.map(_._2) + + // The number of beats which remain to be sent + val beatsLeft = RegInit(UInt(0)) + val idle = beatsLeft === UInt(0) + + // Who wants access to the sink? + val valids = sourcesIn.map(_.valid) + // Arbitrate amongst the requests + val readys = Vec(policy(valids, idle)) + // Which request wins arbitration? + val winners = Vec((readys zip valids) map { case (r,v) => r&&v }) + + // Confirm the policy works properly + require (readys.size == valids.size) + // Never two winners + val prefixOR = winners.scanLeft(Bool(false))(_||_).init + assert((prefixOR zip winners) map { case (p,w) => !p || !w } reduce {_ && _}) + // If there was any request, there is a winner + assert (!valids.reduce(_||_) || winners.reduce(_||_)) + + // Track remaining beats + val maskedBeats = (winners zip beatsIn) map { case (w,b) => Mux(w, b, UInt(0)) } + val initBeats = maskedBeats.reduce(_ | _) // no winner => 0 beats + val todoBeats = Mux(idle, initBeats, beatsLeft) + beatsLeft := todoBeats - sink.fire() + assert (!sink.fire() || todoBeats =/= UInt(0)) // underflow is impoosible + + // The one-hot source granted access in the previous cycle + val state = RegInit(Vec.fill(sources.size)(Bool(false))) + val muxState = Mux(idle, winners, state) + state := muxState + + val ones = Vec.fill(sources.size)(Bool(true)) + val picked = Mux(idle, ones, state) + sink.valid := Mux1H(picked, valids) + + if (sources.size > 1) { + val allowed = Mux(idle, readys, state) + (sourcesIn zip allowed) foreach { case (s, r) => + s.ready := sink.ready && r + } + } else { + sourcesIn(0).ready := sink.ready + } + + sink.bits := Mux1H(muxState, sourcesIn.map(_.bits)) + } +} diff --git a/src/main/scala/uncore/tilelink2/AtomicAutomata.scala b/src/main/scala/uncore/tilelink2/AtomicAutomata.scala new file mode 100644 index 00000000..985f1079 --- /dev/null +++ b/src/main/scala/uncore/tilelink2/AtomicAutomata.scala @@ -0,0 +1,285 @@ +// See LICENSE for license details. + +package uncore.tilelink2 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import scala.math.{min,max} + +// Ensures that all downstream RW managers support Atomic operationss. +// If !passthrough, intercept all Atomics. Otherwise, only intercept those unsupported downstream. +class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, concurrency: Int = 1, passthrough: Boolean = true) extends LazyModule +{ + require (concurrency >= 1) + + val node = TLAdapterNode( + clientFn = { case Seq(cp) => require (!cp.unsafeAtomics); cp.copy(unsafeAtomics = true) }, + managerFn = { case Seq(mp) => mp.copy(managers = mp.managers.map { m => + val ourSupport = TransferSizes(1, mp.beatBytes) + def widen(x: TransferSizes) = if (passthrough && x.min <= 2*mp.beatBytes) TransferSizes(1, max(mp.beatBytes, x.max)) else ourSupport + val canDoit = m.supportsPutFull.contains(ourSupport) && m.supportsGet.contains(ourSupport) + // Blow up if there are devices to which we cannot add Atomics, because their R|W are too inflexible + require (!m.supportsPutFull || !m.supportsGet || canDoit) + m.copy( + supportsArithmetic = if (!arithmetic || !canDoit) m.supportsArithmetic else widen(m.supportsArithmetic), + supportsLogical = if (!logical || !canDoit) m.supportsLogical else widen(m.supportsLogical)) + })}) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + val out = node.bundleOut + } + + val in = io.in(0) + val out = io.out(0) + val edgeIn = node.edgesIn(0) + val edgeOut = node.edgesOut(0) + val managers = edgeOut.manager.managers + val beatBytes = edgeOut.manager.beatBytes + + // To which managers are we adding atomic support? + val ourSupport = TransferSizes(1, edgeOut.manager.beatBytes) + val managersNeedingHelp = managers.filter { m => + m.supportsPutFull.contains(ourSupport) && + m.supportsGet.contains(ourSupport) && + ((logical && !m.supportsLogical .contains(ourSupport)) || + (arithmetic && !m.supportsArithmetic.contains(ourSupport)) || + !passthrough) // we will do atomics for everyone we can + } + // We cannot add atomcis to a non-FIFO manager + managersNeedingHelp foreach { m => require (m.fifoId.isDefined) } + // We need to preserve FIFO semantics across FIFO domains, not managers + // Suppose you have Put(42) Atomic(+1) both inflight; valid results: 42 or 43 + // If we allow Put(42) Get() Put(+1) concurrent; valid results: 42 43 OR undef + // Making non-FIFO work requires waiting for all Acks to come back (=> use FIFOFixer) + val domainsNeedingHelp = managersNeedingHelp.map(_.fifoId.get).distinct + // Don't overprovision the CAM + val camSize = min(domainsNeedingHelp.size, concurrency) + // Compact the fifoIds to only those we care about + val camFifoIds = managers.map(m => UInt(m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0))) + + // CAM entry state machine + val FREE = UInt(0) // unused waiting on Atomic from A + val GET = UInt(3) // Get sent down A waiting on AccessDataAck from D + val AMO = UInt(2) // AccessDataAck sent up D waiting for A availability + val ACK = UInt(1) // Put sent down A waiting for PutAck from D + + def helper(select: Seq[Bool], x: Seq[TransferSizes], lgSize: UInt) = + if (!passthrough) Bool(false) else + if (x.map(_ == x(0)).reduce(_ && _)) x(0).containsLg(lgSize) else + Mux1H(select, x.map(_.containsLg(lgSize))) + + // Do we need to do anything at all? + if (camSize > 0) { + class CAM_S extends Bundle { + val state = UInt(width = 2) + } + class CAM_A extends Bundle { + val bits = new TLBundleA(out.a.bits.params) + val fifoId = UInt(width = log2Up(domainsNeedingHelp.size)) + val lut = UInt(width = 4) + } + class CAM_D extends Bundle { + val data = UInt(width = out.a.bits.params.dataBits) + } + + val initval = Wire(new CAM_S) + initval.state := FREE + val cam_s = RegInit(Vec.fill(camSize)(initval)) + val cam_a = Reg(Vec(camSize, new CAM_A)) + val cam_d = Reg(Vec(camSize, new CAM_D)) + + val cam_free = cam_s.map(_.state === FREE) + val cam_amo = cam_s.map(_.state === AMO) + val cam_abusy = cam_s.map(e => e.state === GET || e.state === AMO) // A is blocked + val cam_dmatch = cam_s.map(e => e.state === GET || e.state === ACK) // D should inspect these entries + + // Can the manager already handle this message? + val a_size = edgeIn.size(in.a.bits) + val a_select = edgeOut.manager.findFast(edgeIn.address(in.a.bits)) + val a_canLogical = helper(a_select, managers.map(_.supportsLogical), a_size) + val a_canArithmetic = helper(a_select, managers.map(_.supportsArithmetic), a_size) + val a_isLogical = in.a.bits.opcode === TLMessages.LogicalData + val a_isArithmetic = in.a.bits.opcode === TLMessages.ArithmeticData + val a_isSupported = Mux(a_isLogical, a_canLogical, Mux(a_isArithmetic, a_canArithmetic, Bool(true))) + + // Must we do a Put? + val a_cam_any_put = cam_amo.reduce(_ || _) + val a_cam_por_put = cam_amo.scanLeft(Bool(false))(_||_).init + val a_cam_sel_put = (cam_amo zip a_cam_por_put) map { case (a, b) => a && !b } + val a_cam_a = PriorityMux(cam_amo, cam_a) + val a_cam_d = PriorityMux(cam_amo, cam_d) + val a_a = a_cam_a.bits.data + val a_d = a_cam_d.data + + // Does the A request conflict with an inflight AMO? + val a_fifoId = Mux1H(a_select, camFifoIds) + val a_cam_busy = (cam_abusy zip cam_a.map(_.fifoId === a_fifoId)) map { case (a,b) => a&&b } reduce (_||_) + + // (Where) are we are allocating in the CAM? + val a_cam_any_free = cam_free.reduce(_ || _) + val a_cam_por_free = cam_free.scanLeft(Bool(false))(_||_).init + val a_cam_sel_free = (cam_free zip a_cam_por_free) map { case (a,b) => a && !b } + + // Logical AMO + val indexes = Seq.tabulate(beatBytes*8) { i => Cat(a_a(i,i), a_d(i,i)) } + val logic_out = Cat(indexes.map(x => a_cam_a.lut(x).asUInt).reverse) + + // Arithmetic AMO + val unsigned = a_cam_a.bits.param(1) + val take_max = a_cam_a.bits.param(0) + val adder = a_cam_a.bits.param(2) + val mask = a_cam_a.bits.mask + val signSel = ~(~mask | (mask >> 1)) + val signbits_a = Cat(Seq.tabulate(beatBytes) { i => a_a(8*i+7,8*i+7) } .reverse) + val signbits_d = Cat(Seq.tabulate(beatBytes) { i => a_d(8*i+7,8*i+7) } .reverse) + // Move the selected sign bit into the first byte position it will extend + val signbit_a = ((signbits_a & signSel) << 1)(beatBytes-1, 0) + val signbit_d = ((signbits_d & signSel) << 1)(beatBytes-1, 0) + val signext_a = FillInterleaved(8, highOR(signbit_a)) + val signext_d = FillInterleaved(8, highOR(signbit_d)) + // NOTE: sign-extension does not change the relative ordering in EITHER unsigned or signed arithmetic + val wide_mask = FillInterleaved(8, mask) + val a_a_ext = (a_a & wide_mask) | signext_a + val a_d_ext = (a_d & wide_mask) | signext_d + val a_d_inv = Mux(adder, a_d_ext, ~a_d_ext) + val adder_out = a_a_ext + a_d_inv + val h = 8*beatBytes-1 // now sign-extended; use biggest bit + val a_bigger_uneq = unsigned === a_a_ext(h) // result if high bits are unequal + val a_bigger = Mux(a_a_ext(h) === a_d_ext(h), !adder_out(h), a_bigger_uneq) + val pick_a = take_max === a_bigger + val arith_out = Mux(adder, adder_out, Mux(pick_a, a_a, a_d)) + + // AMO result data + val amo_data = + if (!logical) arith_out else + if (!arithmetic) logic_out else + Mux(a_cam_a.bits.opcode(0), logic_out, arith_out) + + // Potentially mutate the message from inner + val source_i = Wire(in.a) + val a_allow = !a_cam_busy && (a_isSupported || a_cam_any_free) + in.a.ready := source_i.ready && a_allow + source_i.valid := in.a.valid && a_allow + source_i.bits := in.a.bits + when (!a_isSupported) { // minimal mux difference + source_i.bits.opcode := TLMessages.Get + source_i.bits.param := UInt(0) + } + + // Potentially take the message from the CAM + val source_c = Wire(in.a) + source_c.valid := a_cam_any_put + source_c.bits := edgeOut.Put(a_cam_a.bits.source, edgeIn.address(a_cam_a.bits), a_cam_a.bits.size, amo_data)._2 + + // Finishing an AMO from the CAM has highest priority + TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(1), source_c), (edgeOut.numBeats(in.a.bits), source_i)) + + // Capture the A state into the CAM + when (source_i.fire() && !a_isSupported) { + (a_cam_sel_free zip cam_a) foreach { case (en, r) => + when (en) { + r.fifoId := a_fifoId + r.bits := in.a.bits + r.lut := MuxLookup(in.a.bits.param(1, 0), UInt(0, width = 4), Array( + TLAtomics.AND -> UInt(0x8), + TLAtomics.OR -> UInt(0xe), + TLAtomics.XOR -> UInt(0x6), + TLAtomics.SWAP -> UInt(0xc))) + } + } + (a_cam_sel_free zip cam_s) foreach { case (en, r) => + when (en) { + r.state := GET + } + } + } + + // Advance the put state + when (source_c.fire()) { + (a_cam_sel_put zip cam_s) foreach { case (en, r) => + when (en) { + r.state := ACK + } + } + } + + // We need to deal with a potential D response in the same cycle as the A request + val d_cam_sel_raw = cam_a.map(_.bits.source === in.d.bits.source) + val d_cam_sel_match = (d_cam_sel_raw zip cam_dmatch) map { case (a,b) => a&&b } + val d_cam_data = Mux1H(d_cam_sel_match, cam_d.map(_.data)) + val d_cam_sel_bypass = if (edgeOut.manager.minLatency > 0) Bool(false) else + out.d.bits.source === in.a.bits.source && in.a.valid && out.d.valid && !a_isSupported + val d_cam_sel = (a_cam_sel_free zip d_cam_sel_match) map { case (a,d) => Mux(d_cam_sel_bypass, a, d) } + val d_cam_sel_any = d_cam_sel_bypass || d_cam_sel_match.reduce(_ || _) + val d_ackd = out.d.bits.opcode === TLMessages.AccessAckData + val d_ack = out.d.bits.opcode === TLMessages.AccessAck + + when (out.d.fire()) { + (d_cam_sel zip cam_d) foreach { case (en, r) => + when (en && d_ackd) { + r.data := out.d.bits.data + } + } + (d_cam_sel zip cam_s) foreach { case (en, r) => + when (en) { + // Note: it is important that this comes AFTER the := GET, so we can go FREE=>GET=>AMO in one cycle + r.state := Mux(d_ackd, AMO, FREE) + } + } + } + + val d_drop = d_ackd && d_cam_sel_any + val d_replace = d_ack && d_cam_sel_match.reduce(_ || _) + + in.d.valid := out.d.valid && !d_drop + out.d.ready := in.d.ready || d_drop + + in.d.bits := out.d.bits + when (d_replace) { // minimal muxes + in.d.bits.opcode := TLMessages.AccessAckData + in.d.bits.data := d_cam_data + } + } else { + out.a.valid := in.a.valid + in.a.ready := out.a.ready + out.a.bits := in.a.bits + + in.d.valid := out.d.valid + out.d.ready := in.d.ready + in.d.bits := out.d.bits + } + + if (edgeOut.manager.anySupportAcquire && edgeIn.client.anySupportProbe) { + in.b.valid := out.b.valid + out.b.ready := in.b.ready + in.b.bits := out.b.bits + + out.c.valid := in.c.valid + in.c.ready := out.c.ready + out.c.bits := in.c.bits + + out.e.valid := in.e.valid + in.e.ready := out.e.ready + out.e.bits := in.e.bits + } else { + in.b.valid := Bool(false) + in.c.ready := Bool(true) + in.e.ready := Bool(true) + out.b.ready := Bool(true) + out.c.valid := Bool(false) + out.e.valid := Bool(false) + } + } +} + +object TLAtomicAutomata +{ + // applied to the TL source node; y.node := TLAtomicAutomata(x.node) + def apply(logical: Boolean = true, arithmetic: Boolean = true, concurrency: Int = 1, passthrough: Boolean = true)(x: TLBaseNode)(implicit sourceInfo: SourceInfo): TLBaseNode = { + val atomics = LazyModule(new TLAtomicAutomata(logical, arithmetic, concurrency, passthrough)) + atomics.node := x + atomics.node + } +} diff --git a/src/main/scala/uncore/tilelink2/Buffer.scala b/src/main/scala/uncore/tilelink2/Buffer.scala index 558a524a..e4ba752b 100644 --- a/src/main/scala/uncore/tilelink2/Buffer.scala +++ b/src/main/scala/uncore/tilelink2/Buffer.scala @@ -4,10 +4,20 @@ package uncore.tilelink2 import Chisel._ import chisel3.internal.sourceinfo.SourceInfo +import scala.math.max -class TLBuffer(entries: Int = 2, pipe: Boolean = false) extends LazyModule +// pipe is only used if a queue has depth = 1 +class TLBuffer(a: Int = 2, b: Int = 2, c: Int = 2, d: Int = 2, e: Int = 2, pipe: Boolean = true) extends LazyModule { - val node = TLIdentityNode() + require (a >= 0) + require (b >= 0) + require (c >= 0) + require (d >= 0) + require (e >= 0) + + val node = TLAdapterNode( + clientFn = { seq => seq(0).copy(minLatency = seq(0).minLatency + max(1,b) + max(1,c)) }, + managerFn = { seq => seq(0).copy(minLatency = seq(0).minLatency + max(1,a) + max(1,d)) }) lazy val module = new LazyModuleImp(this) { val io = new Bundle { @@ -16,13 +26,13 @@ class TLBuffer(entries: Int = 2, pipe: Boolean = false) extends LazyModule } ((io.in zip io.out) zip (node.edgesIn zip node.edgesOut)) foreach { case ((in, out), (edgeIn, edgeOut)) => - out.a <> Queue(in .a, entries, pipe) - in .d <> Queue(out.d, entries, pipe) + if (a>0) { out.a <> Queue(in .a, a, pipe && a<2) } else { out.a <> in.a } + if (d>0) { in .d <> Queue(out.d, d, pipe && d<2) } else { in.d <> out.d } if (edgeOut.manager.anySupportAcquire && edgeOut.client.anySupportProbe) { - in .b <> Queue(out.b, entries, pipe) - out.c <> Queue(in .c, entries, pipe) - out.e <> Queue(in .e, entries, pipe) + if (b>0) { in .b <> Queue(out.b, b, pipe && b<2) } else { in.b <> out.b } + if (c>0) { out.c <> Queue(in .c, c, pipe && c<2) } else { out.c <> in.c } + if (e>0) { out.e <> Queue(in .e, e, pipe && e<2) } else { out.e <> in.e } } else { in.b.valid := Bool(false) in.c.ready := Bool(true) @@ -38,8 +48,13 @@ class TLBuffer(entries: Int = 2, pipe: Boolean = false) extends LazyModule object TLBuffer { // applied to the TL source node; y.node := TLBuffer(x.node) - def apply(x: TLBaseNode, entries: Int = 2, pipe: Boolean = false)(implicit sourceInfo: SourceInfo): TLBaseNode = { - val buffer = LazyModule(new TLBuffer(entries, pipe)) + def apply(x: TLBaseNode) (implicit sourceInfo: SourceInfo): TLBaseNode = apply(x, 2) + def apply(x: TLBaseNode, entries: Int) (implicit sourceInfo: SourceInfo): TLBaseNode = apply(x, entries, true) + def apply(x: TLBaseNode, entries: Int, pipe: Boolean) (implicit sourceInfo: SourceInfo): TLBaseNode = apply(x, entries, entries, pipe) + def apply(x: TLBaseNode, ace: Int, bd: Int) (implicit sourceInfo: SourceInfo): TLBaseNode = apply(x, ace, bd, true) + def apply(x: TLBaseNode, ace: Int, bd: Int, pipe: Boolean)(implicit sourceInfo: SourceInfo): TLBaseNode = apply(x, ace, bd, ace, bd, ace, pipe) + def apply(x: TLBaseNode, a: Int, b: Int, c: Int, d: Int, e: Int, pipe: Boolean = true)(implicit sourceInfo: SourceInfo): TLBaseNode = { + val buffer = LazyModule(new TLBuffer(a, b, c, d, e, pipe)) buffer.node := x buffer.node } diff --git a/src/main/scala/uncore/tilelink2/Bundles.scala b/src/main/scala/uncore/tilelink2/Bundles.scala index 128c5070..19fb48a8 100644 --- a/src/main/scala/uncore/tilelink2/Bundles.scala +++ b/src/main/scala/uncore/tilelink2/Bundles.scala @@ -2,8 +2,8 @@ package uncore.tilelink2 -import Chisel._ -import chisel3.util.Irrevocable +import chisel3._ +import chisel3.util._ abstract class GenericParameterizedBundle[T <: Object](val params: T) extends Bundle { @@ -12,7 +12,7 @@ abstract class GenericParameterizedBundle[T <: Object](val params: T) extends Bu this.getClass.getConstructors.head.newInstance(params).asInstanceOf[this.type] } catch { case e: java.lang.IllegalArgumentException => - throwException("Unable to use GenericParameterizedBundle.cloneType on " + + throw new Exception("Unable to use GenericParameterizedBundle.cloneType on " + this.getClass + ", probably because " + this.getClass + "() takes more than one argument. Consider overriding " + "cloneType() on " + this.getClass, e) @@ -189,3 +189,46 @@ object TLBundle { def apply(params: TLBundleParameters) = new TLBundle(params) } + +class IrrevocableSnoop[+T <: Data](gen: T) extends Bundle +{ + val ready = Bool() + val valid = Bool() + val bits = gen.asOutput + + def fire(dummy: Int = 0) = ready && valid + override def cloneType: this.type = new IrrevocableSnoop(gen).asInstanceOf[this.type] +} + +object IrrevocableSnoop +{ + def apply[T <: Data](i: IrrevocableIO[T]) = { + val out = Wire(new IrrevocableSnoop(i.bits)) + out.ready := i.ready + out.valid := i.valid + out.bits := i.bits + out + } +} + +class TLBundleSnoop(params: TLBundleParameters) extends TLBundleBase(params) +{ + val a = new IrrevocableSnoop(new TLBundleA(params)) + val b = new IrrevocableSnoop(new TLBundleB(params)) + val c = new IrrevocableSnoop(new TLBundleC(params)) + val d = new IrrevocableSnoop(new TLBundleD(params)) + val e = new IrrevocableSnoop(new TLBundleE(params)) +} + +object TLBundleSnoop +{ + def apply(x: TLBundle) = { + val out = Wire(new TLBundleSnoop(x.params)) + out.a := IrrevocableSnoop(x.a) + out.b := IrrevocableSnoop(x.b) + out.c := IrrevocableSnoop(x.c) + out.d := IrrevocableSnoop(x.d) + out.e := IrrevocableSnoop(x.e) + out + } +} diff --git a/src/main/scala/uncore/tilelink2/Fragmenter.scala b/src/main/scala/uncore/tilelink2/Fragmenter.scala index 43d73ab0..05e663eb 100644 --- a/src/main/scala/uncore/tilelink2/Fragmenter.scala +++ b/src/main/scala/uncore/tilelink2/Fragmenter.scala @@ -47,9 +47,10 @@ class TLFragmenter(minSize: Int, maxSize: Int, alwaysMin: Boolean = false) exten supportsPutPartial = TransferSizes.none, supportsHint = TransferSizes.none) + // Because the Fragmenter stalls inner A while serving outer, it can wipe away inner latency val node = TLAdapterNode( clientFn = { case Seq(c) => c.copy(clients = c.clients.map(mapClient)) }, - managerFn = { case Seq(m) => m.copy(managers = m.managers.map(mapManager)) }) + managerFn = { case Seq(m) => m.copy(managers = m.managers.map(mapManager), minLatency = 0) }) lazy val module = new LazyModuleImp(this) { val io = new Bundle { diff --git a/src/main/scala/uncore/tilelink2/Fuzzer.scala b/src/main/scala/uncore/tilelink2/Fuzzer.scala index 630f5521..aa7e0d2a 100644 --- a/src/main/scala/uncore/tilelink2/Fuzzer.scala +++ b/src/main/scala/uncore/tilelink2/Fuzzer.scala @@ -14,23 +14,22 @@ class IDMapGenerator(numIds: Int) extends Module { } // True indicates that the id is available - val bitmap = RegInit(Vec.fill(numIds){Bool(true)}) + val bitmap = RegInit(UInt((BigInt(1) << numIds) - 1, width = numIds)) io.free.ready := Bool(true) - assert(!io.free.valid || !bitmap(io.free.bits)) // No double freeing + assert (!io.free.valid || !bitmap(io.free.bits)) // No double freeing - val mask = bitmap.scanLeft(Bool(false))(_||_).init - val select = mask zip bitmap map { case(m,b) => !m && b } + val select = ~(highOR(bitmap) << 1) & bitmap io.alloc.bits := OHToUInt(select) - io.alloc.valid := bitmap.reduce(_||_) + io.alloc.valid := bitmap.orR() - when (io.alloc.fire()) { - bitmap(io.alloc.bits) := Bool(false) - } + val clr = Wire(init = UInt(0, width = numIds)) + when (io.alloc.fire()) { clr := UIntToOH(io.alloc.bits) } - when (io.free.fire()) { - bitmap(io.free.bits) := Bool(true) - } + val set = Wire(init = UInt(0, width = numIds)) + when (io.free.fire()) { set := UIntToOH(io.free.bits) } + + bitmap := (bitmap & ~clr) | set } object LFSR64 @@ -138,7 +137,8 @@ class TLFuzzer( // Increment random number generation for the following subfields val inc = Wire(Bool()) val inc_beat = Wire(Bool()) - val arth_op = noiseMaker(3, inc) + val arth_op_3 = noiseMaker(3, inc) + val arth_op = Mux(arth_op_3 > UInt(4), UInt(4), arth_op_3) val log_op = noiseMaker(2, inc) val amo_size = UInt(2) + noiseMaker(1, inc) // word or dword val size = noiseMaker(sizeBits, inc) @@ -221,11 +221,11 @@ class TLFuzzRAM extends LazyModule val cross = LazyModule(new TLAsyncCrossing) model.node := fuzz.node - xbar2.node := model.node + xbar2.node := TLAtomicAutomata()(model.node) ram2.node := TLFragmenter(xbar2.node, 16, 256) xbar.node := TLWidthWidget(TLHintHandler(xbar2.node), 16) cross.node := TLFragmenter(TLBuffer(xbar.node), 4, 256) - ram.node := cross.node + val monitor = (ram.node := cross.node) gpio.node := TLFragmenter(TLBuffer(xbar.node), 4, 32) lazy val module = new LazyModuleImp(this) with HasUnitTestIO { @@ -240,6 +240,12 @@ class TLFuzzRAM extends LazyModule cross.module.io.in_reset := reset cross.module.io.out_clock := clocks.io.clock_out cross.module.io.out_reset := reset + + // Push the Monitor into the right clock domain + monitor.foreach { m => + m.module.clock := clocks.io.clock_out + m.module.reset := reset + } } } diff --git a/src/main/scala/uncore/tilelink2/HintHandler.scala b/src/main/scala/uncore/tilelink2/HintHandler.scala index 376f8e90..6ebef935 100644 --- a/src/main/scala/uncore/tilelink2/HintHandler.scala +++ b/src/main/scala/uncore/tilelink2/HintHandler.scala @@ -8,9 +8,10 @@ import chisel3.internal.sourceinfo.SourceInfo // Acks Hints for managers that don't support them or Acks all Hints if !passthrough class TLHintHandler(supportManagers: Boolean = true, supportClients: Boolean = false, passthrough: Boolean = true) extends LazyModule { + // HintAcks can come back combinationally => minLatency=0 val node = TLAdapterNode( - clientFn = { case Seq(c) => if (!supportClients) c else c.copy(clients = c.clients .map(_.copy(supportsHint = TransferSizes(1, c.maxTransfer)))) }, - managerFn = { case Seq(m) => if (!supportManagers) m else m.copy(managers = m.managers.map(_.copy(supportsHint = TransferSizes(1, m.maxTransfer)))) }) + clientFn = { case Seq(c) => if (!supportClients) c else c.copy(minLatency = 0, clients = c.clients .map(_.copy(supportsHint = TransferSizes(1, c.maxTransfer)))) }, + managerFn = { case Seq(m) => if (!supportManagers) m else m.copy(minLatency = 0, managers = m.managers.map(_.copy(supportsHint = TransferSizes(1, m.maxTransfer)))) }) lazy val module = new LazyModuleImp(this) { val io = new Bundle { diff --git a/src/main/scala/uncore/tilelink2/IntNodes.scala b/src/main/scala/uncore/tilelink2/IntNodes.scala index a4acc443..5ddfcd5b 100644 --- a/src/main/scala/uncore/tilelink2/IntNodes.scala +++ b/src/main/scala/uncore/tilelink2/IntNodes.scala @@ -60,10 +60,12 @@ object IntImp extends NodeImp[IntSourcePortParameters, IntSinkPortParameters, In Vec(ei.size, Vec(ei.map(_.source.num).max, Bool())).flip } - def connect(bo: Vec[Bool], eo: IntEdge, bi: Vec[Bool], ei: IntEdge)(implicit sourceInfo: SourceInfo): Unit = { - require (eo == ei) - // Cannot use bulk connect, because the widths could differ - (bo zip bi) foreach { case (o, i) => i := o } + def connect(bo: => Vec[Bool], eo: => IntEdge, bi: => Vec[Bool], ei: => IntEdge)(implicit sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = { + (None, () => { + require (eo == ei) + // Cannot use bulk connect, because the widths could differ + (bo zip bi) foreach { case (o, i) => i := o } + }) } override def mixO(po: IntSourcePortParameters, node: IntBaseNode): IntSourcePortParameters = diff --git a/src/main/scala/uncore/tilelink2/Legacy.scala b/src/main/scala/uncore/tilelink2/Legacy.scala index e55f8c05..e4338ad1 100644 --- a/src/main/scala/uncore/tilelink2/Legacy.scala +++ b/src/main/scala/uncore/tilelink2/Legacy.scala @@ -67,16 +67,17 @@ class TLLegacy(implicit val p: Parameters) extends LazyModule with HasTileLinkPa // Only create atomic messages if TL2 managers support them val atomics = if (edge.manager.anySupportLogical) { + val size = io.legacy.acquire.bits.op_size() MuxLookup(io.legacy.acquire.bits.op_code(), Wire(new TLBundleA(edge.bundle)), Array( - MemoryOpConstants.M_XA_SWAP -> edge.Logical(source, address, beat, data, TLAtomics.SWAP)._2, - MemoryOpConstants.M_XA_XOR -> edge.Logical(source, address, beat, data, TLAtomics.XOR) ._2, - MemoryOpConstants.M_XA_OR -> edge.Logical(source, address, beat, data, TLAtomics.OR) ._2, - MemoryOpConstants.M_XA_AND -> edge.Logical(source, address, beat, data, TLAtomics.AND) ._2, - MemoryOpConstants.M_XA_ADD -> edge.Arithmetic(source, address, beat, data, TLAtomics.ADD)._2, - MemoryOpConstants.M_XA_MIN -> edge.Arithmetic(source, address, beat, data, TLAtomics.MIN)._2, - MemoryOpConstants.M_XA_MAX -> edge.Arithmetic(source, address, beat, data, TLAtomics.MAX)._2, - MemoryOpConstants.M_XA_MINU -> edge.Arithmetic(source, address, beat, data, TLAtomics.MINU)._2, - MemoryOpConstants.M_XA_MAXU -> edge.Arithmetic(source, address, beat, data, TLAtomics.MAXU)._2)) + MemoryOpConstants.M_XA_SWAP -> edge.Logical(source, address, size, data, TLAtomics.SWAP)._2, + MemoryOpConstants.M_XA_XOR -> edge.Logical(source, address, size, data, TLAtomics.XOR) ._2, + MemoryOpConstants.M_XA_OR -> edge.Logical(source, address, size, data, TLAtomics.OR) ._2, + MemoryOpConstants.M_XA_AND -> edge.Logical(source, address, size, data, TLAtomics.AND) ._2, + MemoryOpConstants.M_XA_ADD -> edge.Arithmetic(source, address, size, data, TLAtomics.ADD)._2, + MemoryOpConstants.M_XA_MIN -> edge.Arithmetic(source, address, size, data, TLAtomics.MIN)._2, + MemoryOpConstants.M_XA_MAX -> edge.Arithmetic(source, address, size, data, TLAtomics.MAX)._2, + MemoryOpConstants.M_XA_MINU -> edge.Arithmetic(source, address, size, data, TLAtomics.MINU)._2, + MemoryOpConstants.M_XA_MAXU -> edge.Arithmetic(source, address, size, data, TLAtomics.MAXU)._2)) } else { // If no managers support atomics, assert fail if TL1 asks for them assert (!io.legacy.acquire.valid || io.legacy.acquire.bits.a_type =/= Acquire.putAtomicType) @@ -120,7 +121,7 @@ class TLLegacy(implicit val p: Parameters) extends LazyModule with HasTileLinkPa val grant = io.legacy.grant.bits grant.g_type := MuxLookup(out.d.bits.opcode, Grant.prefetchAckType, Array( TLMessages.AccessAck -> Grant.putAckType, - TLMessages.AccessAckData -> Mux(out.d.bits.size === beat, Grant.getDataBeatType, Grant.getDataBlockType), + TLMessages.AccessAckData -> Mux(out.d.bits.size === block, Grant.getDataBlockType, Grant.getDataBeatType), TLMessages.HintAck -> Grant.prefetchAckType)) grant.is_builtin_type := Bool(true) grant.client_xact_id := out.d.bits.source diff --git a/src/main/scala/uncore/tilelink2/Monitor.scala b/src/main/scala/uncore/tilelink2/Monitor.scala index bd07931b..ae686e08 100644 --- a/src/main/scala/uncore/tilelink2/Monitor.scala +++ b/src/main/scala/uncore/tilelink2/Monitor.scala @@ -4,9 +4,8 @@ package uncore.tilelink2 import Chisel._ import chisel3.internal.sourceinfo.{SourceInfo, SourceLine} -import chisel3.util.{Irrevocable, IrrevocableIO} -object TLMonitor +class TLMonitor(gen: () => TLBundleSnoop, edge: () => TLEdge, sourceInfo: SourceInfo) extends LazyModule { def extra(implicit sourceInfo: SourceInfo) = { sourceInfo match { @@ -273,7 +272,7 @@ object TLMonitor assert (edge.manager.containsById(bundle.sink), "'E' channels carries invalid sink ID" + extra) } - def legalizeFormat(bundle: TLBundle, edge: TLEdge)(implicit sourceInfo: SourceInfo) = { + def legalizeFormat(bundle: TLBundleSnoop, edge: TLEdge)(implicit sourceInfo: SourceInfo) = { when (bundle.a.valid) { legalizeFormatA(bundle.a.bits, edge) } when (bundle.b.valid) { legalizeFormatB(bundle.b.bits, edge) } when (bundle.c.valid) { legalizeFormatC(bundle.c.bits, edge) } @@ -281,7 +280,7 @@ object TLMonitor when (bundle.e.valid) { legalizeFormatE(bundle.e.bits, edge) } } - def legalizeMultibeatA(a: IrrevocableIO[TLBundleA], edge: TLEdge)(implicit sourceInfo: SourceInfo) { + def legalizeMultibeatA(a: IrrevocableSnoop[TLBundleA], edge: TLEdge)(implicit sourceInfo: SourceInfo) { val counter = RegInit(UInt(0, width = log2Up(edge.maxTransfer))) val opcode = Reg(UInt()) val param = Reg(UInt()) @@ -308,7 +307,7 @@ object TLMonitor } } - def legalizeMultibeatB(b: IrrevocableIO[TLBundleB], edge: TLEdge)(implicit sourceInfo: SourceInfo) { + def legalizeMultibeatB(b: IrrevocableSnoop[TLBundleB], edge: TLEdge)(implicit sourceInfo: SourceInfo) { val counter = RegInit(UInt(0, width = log2Up(edge.maxTransfer))) val opcode = Reg(UInt()) val param = Reg(UInt()) @@ -335,7 +334,7 @@ object TLMonitor } } - def legalizeMultibeatC(c: IrrevocableIO[TLBundleC], edge: TLEdge)(implicit sourceInfo: SourceInfo) { + def legalizeMultibeatC(c: IrrevocableSnoop[TLBundleC], edge: TLEdge)(implicit sourceInfo: SourceInfo) { val counter = RegInit(UInt(0, width = log2Up(edge.maxTransfer))) val opcode = Reg(UInt()) val param = Reg(UInt()) @@ -365,7 +364,7 @@ object TLMonitor } } - def legalizeMultibeatD(d: IrrevocableIO[TLBundleD], edge: TLEdge)(implicit sourceInfo: SourceInfo) { + def legalizeMultibeatD(d: IrrevocableSnoop[TLBundleD], edge: TLEdge)(implicit sourceInfo: SourceInfo) { val counter = RegInit(UInt(0, width = log2Up(edge.maxTransfer))) val opcode = Reg(UInt()) val param = Reg(UInt()) @@ -395,14 +394,14 @@ object TLMonitor } } - def legalizeMultibeat(bundle: TLBundle, edge: TLEdge)(implicit sourceInfo: SourceInfo) { + def legalizeMultibeat(bundle: TLBundleSnoop, edge: TLEdge)(implicit sourceInfo: SourceInfo) { legalizeMultibeatA(bundle.a, edge) legalizeMultibeatB(bundle.b, edge) legalizeMultibeatC(bundle.c, edge) legalizeMultibeatD(bundle.d, edge) } - def legalizeIrrevocable(irr: IrrevocableIO[TLChannel], edge: TLEdge)(implicit sourceInfo: SourceInfo) { + def legalizeIrrevocable(irr: IrrevocableSnoop[TLChannel], edge: TLEdge)(implicit sourceInfo: SourceInfo) { val last_v = RegNext(irr.valid, Bool(false)) val last_r = RegNext(irr.ready, Bool(false)) val last_b = RegNext(irr.bits) @@ -414,7 +413,7 @@ object TLMonitor } } - def legalizeIrrevocable(bundle: TLBundle, edge: TLEdge)(implicit sourceInfo: SourceInfo) { + def legalizeIrrevocable(bundle: TLBundleSnoop, edge: TLEdge)(implicit sourceInfo: SourceInfo) { legalizeIrrevocable(bundle.a, edge) legalizeIrrevocable(bundle.b, edge) legalizeIrrevocable(bundle.c, edge) @@ -422,10 +421,56 @@ object TLMonitor legalizeIrrevocable(bundle.e, edge) } - def legalize(bundle: TLBundle, edge: TLEdge)(implicit sourceInfo: SourceInfo) { + def legalizeSourceUnique(bundle: TLBundleSnoop, edge: TLEdge)(implicit sourceInfo: SourceInfo) { + val inflight = RegInit(UInt(0, width = edge.client.endSourceId)) + + val a_counter = RegInit(UInt(0, width = log2Up(edge.maxTransfer))) + val a_beats1 = edge.numBeats1(bundle.a.bits) + val a_first = a_counter === UInt(0) + val a_last = a_counter === UInt(1) || a_beats1 === UInt(0) + + val d_counter = RegInit(UInt(0, width = log2Up(edge.maxTransfer))) + val d_beats1 = edge.numBeats1(bundle.d.bits) + val d_first = d_counter === UInt(0) + val d_last = d_counter === UInt(1) || d_beats1 === UInt(0) + + val bypass = bundle.a.bits.source === bundle.d.bits.source + val a_bypass = bypass && bundle.d.valid && d_last + val d_bypass = bypass && bundle.a.valid && a_last + + if (edge.manager.minLatency > 0) { + assert(!bypass || !bundle.a.valid || !bundle.d.valid, s"'A' and 'D' concurrent, despite minlatency ${edge.manager.minLatency}" + extra) + } + + val a_set = Wire(init = UInt(0, width = edge.client.endSourceId)) + when (bundle.a.fire()) { + a_counter := Mux(a_first, a_beats1, a_counter - UInt(1)) + when (a_last) { a_set := UIntToOH(bundle.a.bits.source) } + assert(a_bypass || !inflight(bundle.a.bits.source), "'A' channel re-used a source ID" + extra) + } + + val d_clr = Wire(init = UInt(0, width = edge.client.endSourceId)) + when (bundle.d.fire() && bundle.d.bits.opcode =/= TLMessages.ReleaseAck) { + d_counter := Mux(d_first, d_beats1, d_counter - UInt(1)) + when (d_last) { d_clr := UIntToOH(bundle.d.bits.source) } + assert(d_bypass || inflight(bundle.d.bits.source), "'D' channel acknowledged for nothing inflight" + extra) + } + + inflight := (inflight | a_set) & ~d_clr + } + + def legalize(bundle: TLBundleSnoop, edge: TLEdge)(implicit sourceInfo: SourceInfo) { legalizeFormat (bundle, edge) legalizeMultibeat (bundle, edge) legalizeIrrevocable(bundle, edge) - // !!! validate source uniqueness + legalizeSourceUnique(bundle, edge) + } + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = gen().asInput + } + + legalize(io.in, edge())(sourceInfo) } } diff --git a/src/main/scala/uncore/tilelink2/Nodes.scala b/src/main/scala/uncore/tilelink2/Nodes.scala index c9482e69..40819fd3 100644 --- a/src/main/scala/uncore/tilelink2/Nodes.scala +++ b/src/main/scala/uncore/tilelink2/Nodes.scala @@ -16,7 +16,7 @@ abstract class NodeImp[PO, PI, EO, EI, B <: Data] def edgeI(po: PO, pi: PI): EI def bundleO(eo: Seq[EO]): Vec[B] def bundleI(ei: Seq[EI]): Vec[B] - def connect(bo: B, eo: EO, bi: B, ei: EI)(implicit sourceInfo: SourceInfo): Unit + def connect(bo: => B, eo: => EO, bi: => B, ei: => EI)(implicit sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) // If you want to track parameters as they flow through nodes, overload these: def mixO(po: PO, node: BaseNode[PO, PI, EO, EI, B]): PO = po def mixI(pi: PI, node: BaseNode[PO, PI, EO, EI, B]): PI = pi @@ -79,7 +79,7 @@ class BaseNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B])( def connectOut = bundleOut def connectIn = bundleIn - def := (y: BaseNode[PO, PI, EO, EI, B])(implicit sourceInfo: SourceInfo) = { + def := (y: BaseNode[PO, PI, EO, EI, B])(implicit sourceInfo: SourceInfo): Option[LazyModule] = { val x = this // x := y val info = sourceLine(sourceInfo, " at ", "") require (!LazyModule.stack.isEmpty, s"${y.name} cannot be connected to ${x.name} outside of LazyModule scope" + info) @@ -91,9 +91,9 @@ class BaseNode[PO, PI, EO, EI, B <: Data](imp: NodeImp[PO, PI, EO, EI, B])( val o = y.accPO.size y.accPO += ((i, x)) x.accPI += ((o, y)) - LazyModule.stack.head.bindings = (() => { - imp.connect(y.connectOut(o), y.edgesOut(o), x.connectIn(i), x.edgesIn(i)) - }) :: LazyModule.stack.head.bindings + val (out, binding) = imp.connect(y.connectOut(o), y.edgesOut(o), x.connectIn(i), x.edgesIn(i)) + LazyModule.stack.head.bindings = binding :: LazyModule.stack.head.bindings + out } } diff --git a/src/main/scala/uncore/tilelink2/Parameters.scala b/src/main/scala/uncore/tilelink2/Parameters.scala index ca5fe0e7..0f41042b 100644 --- a/src/main/scala/uncore/tilelink2/Parameters.scala +++ b/src/main/scala/uncore/tilelink2/Parameters.scala @@ -173,10 +173,14 @@ case class TLManagerParameters( }) } -case class TLManagerPortParameters(managers: Seq[TLManagerParameters], beatBytes: Int) +case class TLManagerPortParameters( + managers: Seq[TLManagerParameters], + beatBytes: Int, + minLatency: Int = 0) { require (!managers.isEmpty) require (isPow2(beatBytes)) + require (minLatency >= 0) // Require disjoint ranges for Ids and addresses managers.combinations(2).foreach({ case Seq(x,y) => @@ -289,8 +293,13 @@ case class TLClientParameters( val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected") } -case class TLClientPortParameters(clients: Seq[TLClientParameters]) { +case class TLClientPortParameters( + clients: Seq[TLClientParameters], + unsafeAtomics: Boolean = false, + minLatency: Int = 0) // Atomics are executed as get+put +{ require (!clients.isEmpty) + require (minLatency >= 0) // Require disjoint ranges for Ids clients.combinations(2).foreach({ case Seq(x,y) => diff --git a/src/main/scala/uncore/tilelink2/RAMModel.scala b/src/main/scala/uncore/tilelink2/RAMModel.scala index 6d75475a..bb8f983a 100644 --- a/src/main/scala/uncore/tilelink2/RAMModel.scala +++ b/src/main/scala/uncore/tilelink2/RAMModel.scala @@ -104,7 +104,7 @@ class TLRAMModel extends LazyModule a_flight.opcode := in.a.bits.opcode flight(in.a.bits.source) := a_flight - val bypass = in.a.valid && in.a.bits.source === out.d.bits.source + val bypass = if (edge.manager.minLatency > 0) Bool(false) else in.a.valid && in.a.bits.source === out.d.bits.source val d_flight = RegNext(Mux(bypass, a_flight, flight(out.d.bits.source))) // Process A access requests @@ -135,7 +135,6 @@ class TLRAMModel extends LazyModule // Record the request so we can handle it's response a_counter := Mux(a_first, a_beats1, a_counter1) - // !!! atomics assert (a.opcode =/= TLMessages.Acquire) // Mark the operation as valid @@ -149,19 +148,24 @@ class TLRAMModel extends LazyModule inc_trees_wen := a_sizeOH >> (shift+1) } - when (a.opcode === TLMessages.PutFullData || a.opcode === TLMessages.PutPartialData) { + when (a.opcode === TLMessages.PutFullData || a.opcode === TLMessages.PutPartialData || + a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData) { shadow_wen := a.mask for (i <- 0 until beatBytes) { val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt val byte = a.data(8*(i+1)-1, 8*i) when (a.mask(i)) { - printf("P 0x%x := 0x%x #%d\n", a_addr_hi << shift | UInt(i), byte, busy) + when (a.opcode === TLMessages.PutFullData) { printf("PF") } + when (a.opcode === TLMessages.PutPartialData) { printf("PP") } + when (a.opcode === TLMessages.ArithmeticData) { printf("A ") } + when (a.opcode === TLMessages.LogicalData) { printf("L ") } + printf(" 0x%x := 0x%x #%d %x\n", a_addr_hi << shift | UInt(i), byte, busy, a.param) } } } when (a.opcode === TLMessages.Get) { - printf("G 0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits)) + printf("G 0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits)) } } @@ -169,7 +173,9 @@ class TLRAMModel extends LazyModule for (i <- 0 until beatBytes) { val data = Wire(new ByteMonitor) val busy = a_inc(i) =/= a_dec(i) + (!a_first).asUInt - data.valid := Mux(wipe, Bool(false), !busy || a_fifo) + val amo = a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData + data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && !amo) + // !!! calculate the AMO? data.value := a.data(8*(i+1)-1, 8*i) when (shadow_wen(i)) { shadow(i).write(a_waddr, data) @@ -250,26 +256,30 @@ class TLRAMModel extends LazyModule when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) { assert (d.opcode === TLMessages.AccessAck) - printf("p 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits)) + when (d_flight.opcode === TLMessages.PutFullData) { printf("pf") } + when (d_flight.opcode === TLMessages.PutPartialData) { printf("pp") } + printf(" 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits)) } - // !!! atomics - - when (d_flight.opcode === TLMessages.Get) { + when (d_flight.opcode === TLMessages.Get || d_flight.opcode === TLMessages.ArithmeticData || d_flight.opcode === TLMessages.LogicalData) { assert (d.opcode === TLMessages.AccessAckData) for (i <- 0 until beatBytes) { val got = d.data(8*(i+1)-1, 8*i) val shadow = Wire(init = d_shadow(i)) when (d_mask(i)) { val d_addr = d_addr_hi << shift | UInt(i) + when (d_flight.opcode === TLMessages.Get) { printf("g ") } + when (d_flight.opcode === TLMessages.ArithmeticData) { printf("a ") } + when (d_flight.opcode === TLMessages.LogicalData) { printf("l ") } + printf(" 0x%x := 0x%x", d_addr, got) when (!shadow.valid) { - printf("g 0x%x := undefined (uninitialized or prior overlapping puts)\n", d_addr) + printf(", undefined (uninitialized or prior overlapping puts)\n") } .elsewhen (d_inc(i) =/= d_dec(i)) { - printf("g 0x%x := undefined (concurrent incomplete puts #%d)\n", d_addr, d_inc(i) - d_dec(i)) + printf(", undefined (concurrent incomplete puts #%d)\n", d_inc(i) - d_dec(i)) } .elsewhen (!d_fifo && !d_valid) { - printf("g 0x%x := undefined (concurrent completed put)\n", d_addr) + printf(", undefined (concurrent completed put)\n") } .otherwise { - printf("g 0x%x := 0x%x\n", d_addr, got) + printf("\n") assert (shadow.value === got) } } diff --git a/src/main/scala/uncore/tilelink2/RegMapper.scala b/src/main/scala/uncore/tilelink2/RegMapper.scala index 6678e500..5a661af8 100644 --- a/src/main/scala/uncore/tilelink2/RegMapper.scala +++ b/src/main/scala/uncore/tilelink2/RegMapper.scala @@ -28,7 +28,7 @@ class RegMapperOutput(params: RegMapperParams) extends GenericParameterizedBundl object RegMapper { // Create a generic register-based device - def apply(bytes: Int, concurrency: Option[Int], undefZero: Boolean, in: DecoupledIO[RegMapperInput], mapping: RegField.Map*) = { + def apply(bytes: Int, concurrency: Int, undefZero: Boolean, in: DecoupledIO[RegMapperInput], mapping: RegField.Map*) = { val regmap = mapping.toList.filter(!_._2.isEmpty) require (!regmap.isEmpty) @@ -49,9 +49,9 @@ object RegMapper // Must this device pipeline the control channel? val pipelined = regmap.map(_._2.map(_.pipelined)).flatten.reduce(_ || _) - val depth = concurrency.getOrElse(if (pipelined) 1 else 0) + val depth = concurrency require (depth >= 0) - require (!pipelined || depth > 0) + require (!pipelined || depth > 0, "Register-based device with request/response handshaking needs concurrency > 0") val back = if (depth > 0) Queue(front, depth, pipe = depth == 1) else front // Convert to and from Bits diff --git a/src/main/scala/uncore/tilelink2/RegisterRouter.scala b/src/main/scala/uncore/tilelink2/RegisterRouter.scala index 7b6bdc21..f7b77685 100644 --- a/src/main/scala/uncore/tilelink2/RegisterRouter.scala +++ b/src/main/scala/uncore/tilelink2/RegisterRouter.scala @@ -3,14 +3,16 @@ package uncore.tilelink2 import Chisel._ +import scala.math.{min,max} -class TLRegisterNode(address: AddressSet, concurrency: Option[Int] = None, beatBytes: Int = 4, undefZero: Boolean = true) +class TLRegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true) extends TLManagerNode(beatBytes, TLManagerParameters( address = Seq(address), supportsGet = TransferSizes(1, beatBytes), supportsPutPartial = TransferSizes(1, beatBytes), supportsPutFull = TransferSizes(1, beatBytes), - fifoId = Some(0))) // requests are handled in order + fifoId = Some(0)), // requests are handled in order + minLatency = min(concurrency, 1)) // the Queue adds at least one cycle { require (address.contiguous) @@ -64,7 +66,7 @@ class TLRegisterNode(address: AddressSet, concurrency: Option[Int] = None, beatB object TLRegisterNode { - def apply(address: AddressSet, concurrency: Option[Int] = None, beatBytes: Int = 4, undefZero: Boolean = true) = + def apply(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true) = new TLRegisterNode(address, concurrency, beatBytes, undefZero) } @@ -72,7 +74,7 @@ object TLRegisterNode // register mapped device from a totally abstract register mapped device. // See GPIO.scala in this directory for an example -abstract class TLRegisterRouterBase(address: AddressSet, interrupts: Int, concurrency: Option[Int], beatBytes: Int, undefZero: Boolean) extends LazyModule +abstract class TLRegisterRouterBase(address: AddressSet, interrupts: Int, concurrency: Int, beatBytes: Int, undefZero: Boolean) extends LazyModule { val node = TLRegisterNode(address, concurrency, beatBytes, undefZero) val intnode = IntSourceNode(interrupts) @@ -97,7 +99,7 @@ class TLRegModule[P, B <: TLRegBundleBase](val params: P, bundleBuilder: => B, r } class TLRegisterRouter[B <: TLRegBundleBase, M <: LazyModuleImp] - (val base: BigInt, val interrupts: Int = 0, val size: BigInt = 4096, val concurrency: Option[Int] = None, val beatBytes: Int = 4, undefZero: Boolean = true) + (val base: BigInt, val interrupts: Int = 0, val size: BigInt = 4096, val concurrency: Int = 0, val beatBytes: Int = 4, undefZero: Boolean = true) (bundleBuilder: TLRegBundleArg => B) (moduleBuilder: (=> B, TLRegisterRouterBase) => M) extends TLRegisterRouterBase(AddressSet(base, size-1), interrupts, concurrency, beatBytes, undefZero) diff --git a/src/main/scala/uncore/tilelink2/RegisterRouterTest.scala b/src/main/scala/uncore/tilelink2/RegisterRouterTest.scala index eb6a1944..395bae81 100644 --- a/src/main/scala/uncore/tilelink2/RegisterRouterTest.scala +++ b/src/main/scala/uncore/tilelink2/RegisterRouterTest.scala @@ -216,7 +216,7 @@ trait RRTest0Module extends HasRegMap regmap(RRTest0Map.map:_*) } -class RRTest0(address: BigInt) extends TLRegisterRouter(address, 0, 32, Some(0), 4)( +class RRTest0(address: BigInt) extends TLRegisterRouter(address, 0, 32, 0, 4)( new TLRegBundle((), _) with RRTest0Bundle)( new TLRegModule((), _, _) with RRTest0Module) @@ -255,6 +255,6 @@ trait RRTest1Module extends Module with HasRegMap regmap(map:_*) } -class RRTest1(address: BigInt) extends TLRegisterRouter(address, 0, 32, Some(6), 4)( +class RRTest1(address: BigInt) extends TLRegisterRouter(address, 0, 32, 6, 4)( new TLRegBundle((), _) with RRTest1Bundle)( new TLRegModule((), _, _) with RRTest1Module) diff --git a/src/main/scala/uncore/tilelink2/SRAM.scala b/src/main/scala/uncore/tilelink2/SRAM.scala index 84a775f2..9b5b3d8a 100644 --- a/src/main/scala/uncore/tilelink2/SRAM.scala +++ b/src/main/scala/uncore/tilelink2/SRAM.scala @@ -13,7 +13,8 @@ class TLRAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4) supportsGet = TransferSizes(1, beatBytes), supportsPutPartial = TransferSizes(1, beatBytes), supportsPutFull = TransferSizes(1, beatBytes), - fifoId = Some(0))) // requests are handled in order + fifoId = Some(0)), // requests are handled in order + minLatency = 1) // no bypass needed for this device // We require the address range to include an entire beat (for the write mask) require ((address.mask & (beatBytes-1)) == beatBytes-1) diff --git a/src/main/scala/uncore/tilelink2/TLNodes.scala b/src/main/scala/uncore/tilelink2/TLNodes.scala index 7e1c79b5..9bbe91fe 100644 --- a/src/main/scala/uncore/tilelink2/TLNodes.scala +++ b/src/main/scala/uncore/tilelink2/TLNodes.scala @@ -19,10 +19,13 @@ object TLImp extends NodeImp[TLClientPortParameters, TLManagerPortParameters, TL Vec(ei.size, TLBundle(ei.map(_.bundle).reduce(_.union(_)))).flip } - def connect(bo: TLBundle, eo: TLEdgeOut, bi: TLBundle, ei: TLEdgeIn)(implicit sourceInfo: SourceInfo): Unit = { - require (eo.asInstanceOf[TLEdgeParameters] == ei.asInstanceOf[TLEdgeParameters]) - TLMonitor.legalize(bo, eo) - bi <> bo + def connect(bo: => TLBundle, eo: => TLEdgeOut, bi: => TLBundle, ei: => TLEdgeIn)(implicit sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = { + val monitor = LazyModule(new TLMonitor(() => new TLBundleSnoop(bo.params), () => eo, sourceInfo)) + (Some(monitor), () => { + require (eo.asInstanceOf[TLEdgeParameters] == ei.asInstanceOf[TLEdgeParameters]) + bi <> bo + monitor.module.io.in := TLBundleSnoop(bo) + }) } override def mixO(po: TLClientPortParameters, node: TLBaseNode): TLClientPortParameters = @@ -38,8 +41,8 @@ case class TLInputNode() extends InputNode(TLImp) case class TLClientNode(params: TLClientParameters, numPorts: Range.Inclusive = 1 to 1) extends SourceNode(TLImp)(TLClientPortParameters(Seq(params)), numPorts) -case class TLManagerNode(beatBytes: Int, params: TLManagerParameters, numPorts: Range.Inclusive = 1 to 1) - extends SinkNode(TLImp)(TLManagerPortParameters(Seq(params), beatBytes), numPorts) +case class TLManagerNode(beatBytes: Int, params: TLManagerParameters, numPorts: Range.Inclusive = 1 to 1, minLatency: Int = 0) + extends SinkNode(TLImp)(TLManagerPortParameters(Seq(params), beatBytes, minLatency), numPorts) case class TLAdapterNode( clientFn: Seq[TLClientPortParameters] => TLClientPortParameters, diff --git a/src/main/scala/uncore/tilelink2/WidthWidget.scala b/src/main/scala/uncore/tilelink2/WidthWidget.scala index cd2f141a..87e66903 100644 --- a/src/main/scala/uncore/tilelink2/WidthWidget.scala +++ b/src/main/scala/uncore/tilelink2/WidthWidget.scala @@ -10,9 +10,10 @@ import scala.math.{min,max} // innBeatBytes => the new client-facing bus width class TLWidthWidget(innerBeatBytes: Int) extends LazyModule { + // Because we stall the request while sending beats, atomics can overlap => minLatency=0 val node = TLAdapterNode( - clientFn = { case Seq(c) => c }, - managerFn = { case Seq(m) => m.copy(beatBytes = innerBeatBytes) }) + clientFn = { case Seq(c) => c.copy(minLatency = 0) }, + managerFn = { case Seq(m) => m.copy(minLatency = 0, beatBytes = innerBeatBytes) }) lazy val module = new LazyModuleImp(this) { val io = new Bundle { diff --git a/src/main/scala/uncore/tilelink2/Xbar.scala b/src/main/scala/uncore/tilelink2/Xbar.scala index 4a5bbf40..1bcbb1bb 100644 --- a/src/main/scala/uncore/tilelink2/Xbar.scala +++ b/src/main/scala/uncore/tilelink2/Xbar.scala @@ -45,24 +45,30 @@ class TLXbar(policy: (Vec[Bool], Bool) => Seq[Bool] = TLXbar.lowestIndex) extend numClientPorts = 1 to 32, numManagerPorts = 1 to 32, clientFn = { seq => - val clients = (mapInputIds(seq) zip seq) flatMap { case (range, port) => - port.clients map { client => client.copy( - sourceId = client.sourceId.shift(range.start) - )} - } - TLClientPortParameters(clients) + // An unsafe atomic port can not be combined with any other! + require (!seq.exists(_.unsafeAtomics) || seq.size == 1) + seq(0).copy( + minLatency = seq.map(_.minLatency).min, + clients = (mapInputIds(seq) zip seq) flatMap { case (range, port) => + port.clients map { client => client.copy( + sourceId = client.sourceId.shift(range.start) + )} + } + ) }, managerFn = { seq => val fifoIdFactory = relabeler() - val managers = (mapOutputIds(seq) zip seq) flatMap { case (range, port) => - require (port.beatBytes == seq(0).beatBytes) - val fifoIdMapper = fifoIdFactory() - port.managers map { manager => manager.copy( - sinkId = manager.sinkId.shift(range.start), - fifoId = manager.fifoId.map(fifoIdMapper(_)) - )} - } - TLManagerPortParameters(managers, seq(0).beatBytes) + seq(0).copy( + minLatency = seq.map(_.minLatency).min, + managers = (mapOutputIds(seq) zip seq) flatMap { case (range, port) => + require (port.beatBytes == seq(0).beatBytes) + val fifoIdMapper = fifoIdFactory() + port.managers map { manager => manager.copy( + sinkId = manager.sinkId.shift(range.start), + fifoId = manager.fifoId.map(fifoIdMapper(_)) + )} + } + ) }) lazy val module = new LazyModuleImp(this) { diff --git a/src/main/scala/uncore/tilelink2/package.scala b/src/main/scala/uncore/tilelink2/package.scala index 050a4b6d..232e491d 100644 --- a/src/main/scala/uncore/tilelink2/package.scala +++ b/src/main/scala/uncore/tilelink2/package.scala @@ -10,6 +10,12 @@ package object tilelink2 def OH1ToUInt(x: UInt) = OHToUInt((x << 1 | UInt(1)) ^ x) def UIntToOH1(x: UInt, width: Int) = ~(SInt(-1, width=width).asUInt << x)(width-1, 0) def trailingZeros(x: Int) = if (x > 0) Some(log2Ceil(x & -x)) else None + def highOR(x: UInt) = { + val w = x.getWidth + def helper(s: Int, x: UInt): UInt = + if (s >= w) x else helper(s+s, x | (x << s)(w-1,0)) + helper(1, x) + } def sourceLine(sourceInfo: SourceInfo, prefix: String = " (", suffix: String = ")") = sourceInfo match { case SourceLine(filename, line, col) => s"$prefix$filename:$line:$col$suffix"