// See LICENSE.SiFive for license details. package freechips.rocketchip.tilelink import Chisel._ import freechips.rocketchip.config.Parameters import freechips.rocketchip.diplomacy._ import freechips.rocketchip.util._ import scala.math.{min,max} class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = false)(implicit p: Parameters) extends LazyModule { require (lineBytes > 0 && isPow2(lineBytes)) require (numTrackers > 0) val node = TLAdapterNode( clientFn = { cp => cp.copy(clients = Seq(TLClientParameters( name = "TLBroadcast", sourceId = IdRange(0, 1 << log2Ceil(cp.endSourceId*4))))) }, managerFn = { mp => mp.copy( endSinkId = numTrackers, managers = mp.managers.map { m => // We are the last level manager require (!m.supportsAcquireB) // We only manage addresses which are uncached if (m.regionType == RegionType.UNCACHED) { // The device had better support line transfers val lowerBound = max(m.supportsPutFull.min, m.supportsGet.min) require (!m.supportsPutFull || m.supportsPutFull.contains(lineBytes), s"${m.name} only supports PutFull(${m.supportsPutFull}), which does not include $lineBytes") require (!m.supportsGet || m.supportsGet .contains(lineBytes), s"${m.name} only supports Get(${m.supportsGet}), which does not include $lineBytes") m.copy( regionType = RegionType.TRACKED, supportsAcquireB = TransferSizes(lowerBound, lineBytes), supportsAcquireT = if (m.supportsPutFull) TransferSizes(lowerBound, lineBytes) else TransferSizes.none, // truncate supported accesses to lineBytes (we only ever probe for one line) supportsPutFull = TransferSizes(m.supportsPutFull .min, min(m.supportsPutFull .max, lineBytes)), supportsPutPartial = TransferSizes(m.supportsPutPartial.min, min(m.supportsPutPartial.max, lineBytes)), supportsGet = TransferSizes(m.supportsGet .min, min(m.supportsGet .max, lineBytes)), supportsHint = TransferSizes(m.supportsHint .min, min(m.supportsHint .max, lineBytes)), supportsArithmetic = TransferSizes(m.supportsArithmetic.min, min(m.supportsArithmetic.max, lineBytes)), supportsLogical = TransferSizes(m.supportsLogical .min, min(m.supportsLogical .max, lineBytes)), fifoId = None // trackers do not respond in FIFO order! ) } else { m } } ) } ) lazy val module = new LazyModuleImp(this) { (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => val clients = edgeIn.client.clients val managers = edgeOut.manager.managers val lineShift = log2Ceil(lineBytes) import TLBroadcastConstants._ require (lineBytes >= edgeOut.manager.beatBytes) // For the probe walker, we need to identify all the caches val caches = clients.filter(_.supportsProbe).map(_.sourceId) val cache_targets = caches.map(c => UInt(c.start)) // Create the request tracker queues val trackers = Seq.tabulate(numTrackers) { id => Module(new TLBroadcastTracker(id, lineBytes, log2Up(caches.size+1), bufferless, edgeIn, edgeOut)).io } // We always accept E in.e.ready := Bool(true) (trackers zip UIntToOH(in.e.bits.sink).toBools) foreach { case (tracker, select) => tracker.e_last := select && in.e.fire() } // Depending on the high source bits, we might transform D val d_high = log2Ceil(edgeIn.client.endSourceId) val d_what = out.d.bits.source(d_high+1, d_high) val d_drop = d_what === DROP val d_hasData = edgeOut.hasData(out.d.bits) val d_normal = Wire(in.d) val d_trackerOH = Vec(trackers.map { t => !t.idle && t.source === d_normal.bits.source }).asUInt assert (!out.d.valid || !d_drop || out.d.bits.opcode === TLMessages.AccessAck) out.d.ready := d_normal.ready || d_drop d_normal.valid := out.d.valid && !d_drop d_normal.bits := out.d.bits // truncates source when (d_what(1)) { // TRANSFORM_* d_normal.bits.opcode := Mux(d_hasData, TLMessages.GrantData, TLMessages.ReleaseAck) d_normal.bits.param := Mux(d_hasData, Mux(d_what(0), TLPermissions.toT, TLPermissions.toB), UInt(0)) } d_normal.bits.sink := OHToUInt(d_trackerOH) assert (!d_normal.valid || (d_trackerOH.orR() || d_normal.bits.opcode === TLMessages.ReleaseAck)) // A tracker response is anything neither dropped nor a ReleaseAck val d_response = d_hasData || !d_what(1) val d_last = edgeIn.last(d_normal) (trackers zip d_trackerOH.toBools) foreach { case (tracker, select) => tracker.d_last := select && d_normal.fire() && d_response && d_last tracker.probedack := select && out.d.fire() && d_drop } // Incoming C can be: // ProbeAck => decrement tracker, drop // ProbeAckData => decrement tracker, send out A as PutFull(DROP) // ReleaseData => send out A as PutFull(TRANSFORM) // Release => send out D as ReleaseAck val c_probeack = in.c.bits.opcode === TLMessages.ProbeAck val c_probeackdata = in.c.bits.opcode === TLMessages.ProbeAckData val c_releasedata = in.c.bits.opcode === TLMessages.ReleaseData val c_release = in.c.bits.opcode === TLMessages.Release val c_trackerOH = trackers.map { t => t.line === (in.c.bits.address >> lineShift) } val c_trackerSrc = Mux1H(c_trackerOH, trackers.map { _.source }) // Decrement the tracker's outstanding probe counter (trackers zip c_trackerOH) foreach { case (tracker, select) => tracker.probenack := in.c.fire() && c_probeack && select } val releaseack = Wire(in.d) val putfull = Wire(out.a) in.c.ready := c_probeack || Mux(c_release, releaseack.ready, putfull.ready) releaseack.valid := in.c.valid && c_release releaseack.bits := edgeIn.ReleaseAck(in.c.bits) val put_what = Mux(c_releasedata, TRANSFORM_B, DROP) val put_who = Mux(c_releasedata, in.c.bits.source, c_trackerSrc) putfull.valid := in.c.valid && (c_probeackdata || c_releasedata) putfull.bits := edgeOut.Put(Cat(put_what, put_who), in.c.bits.address, in.c.bits.size, in.c.bits.data)._2 // Combine ReleaseAck or the modified D TLArbiter.lowest(edgeOut, in.d, releaseack, d_normal) // Combine the PutFull with the trackers TLArbiter.lowestFromSeq(edgeOut, out.a, putfull +: trackers.map(_.out_a)) // The Probe FSM walks all caches and probes them val probe_todo = RegInit(UInt(0, width = max(1, caches.size))) val probe_line = Reg(UInt()) val probe_perms = Reg(UInt(width = 2)) val probe_next = probe_todo & ~(leftOR(probe_todo) << 1) val probe_busy = probe_todo.orR() val probe_target = if (caches.size == 0) UInt(0) else Mux1H(probe_next, cache_targets) // Probe whatever the FSM wants to do next in.b.valid := probe_busy if (caches.size != 0) { in.b.bits := edgeIn.Probe(probe_line << lineShift, probe_target, UInt(lineShift), probe_perms)._2 } when (in.b.fire()) { probe_todo := probe_todo & ~probe_next } // Which cache does a request come from? val a_cache = if (caches.size == 0) UInt(1) else Vec(caches.map(_.contains(in.a.bits.source))).asUInt val a_first = edgeIn.first(in.a) // To accept a request from A, the probe FSM must be idle and there must be a matching tracker val freeTrackers = Vec(trackers.map { t => t.idle }).asUInt val freeTracker = freeTrackers.orR() val matchTrackers = Vec(trackers.map { t => t.line === in.a.bits.address >> lineShift }).asUInt val matchTracker = matchTrackers.orR() val allocTracker = freeTrackers & ~(leftOR(freeTrackers) << 1) val selectTracker = Mux(matchTracker, matchTrackers, allocTracker) val trackerReady = Vec(trackers.map(_.in_a.ready)).asUInt in.a.ready := (!a_first || !probe_busy) && (selectTracker & trackerReady).orR() (trackers zip selectTracker.toBools) foreach { case (t, select) => t.in_a.valid := in.a.valid && select && (!a_first || !probe_busy) t.in_a.bits := in.a.bits t.in_a_first := a_first t.probe := (if (caches.size == 0) UInt(0) else Mux(a_cache.orR(), UInt(caches.size-1), UInt(caches.size))) } val acq_perms = MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array( TLPermissions.NtoB -> TLPermissions.toB, TLPermissions.NtoT -> TLPermissions.toN, TLPermissions.BtoT -> TLPermissions.toN)) when (in.a.fire() && a_first) { probe_todo := ~a_cache // probe all but the cache who poked us probe_line := in.a.bits.address >> lineShift probe_perms := MuxLookup(in.a.bits.opcode, Wire(UInt(width = 2)), Array( TLMessages.PutFullData -> TLPermissions.toN, TLMessages.PutPartialData -> TLPermissions.toN, TLMessages.ArithmeticData -> TLPermissions.toN, TLMessages.LogicalData -> TLPermissions.toN, TLMessages.Get -> TLPermissions.toB, TLMessages.Hint -> MuxLookup(in.a.bits.param, Wire(UInt(width = 2)), Array( TLHints.PREFETCH_READ -> TLPermissions.toB, TLHints.PREFETCH_WRITE -> TLPermissions.toN)), TLMessages.AcquireBlock -> acq_perms, TLMessages.AcquirePerm -> acq_perms)) } // The outer TL connections may not be cached out.b.ready := Bool(true) out.c.valid := Bool(false) out.e.valid := Bool(false) } } } object TLBroadcast { def apply(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = false)(implicit p: Parameters): TLNode = { val broadcast = LazyModule(new TLBroadcast(lineBytes, numTrackers, bufferless)) broadcast.node } } class TLBroadcastTracker(id: Int, lineBytes: Int, probeCountBits: Int, bufferless: Boolean, edgeIn: TLEdgeIn, edgeOut: TLEdgeOut) extends Module { val io = new Bundle { val in_a_first = Bool(INPUT) val in_a = Decoupled(new TLBundleA(edgeIn.bundle)).flip val out_a = Decoupled(new TLBundleA(edgeOut.bundle)) val probe = UInt(INPUT, width = probeCountBits) val probenack = Bool(INPUT) val probedack = Bool(INPUT) val d_last = Bool(INPUT) val e_last = Bool(INPUT) val source = UInt(OUTPUT) // the source awaiting D response val line = UInt(OUTPUT) // the line waiting for probes val idle = Bool(OUTPUT) } val lineShift = log2Ceil(lineBytes) import TLBroadcastConstants._ // Only one operation can be inflight per line, because we need to be sure // we send the request after all the probes we sent and before all the next probes val got_e = RegInit(Bool(true)) val sent_d = RegInit(Bool(true)) val opcode = Reg(io.in_a.bits.opcode) val param = Reg(io.in_a.bits.param) val size = Reg(io.in_a.bits.size) val source = Reg(io.in_a.bits.source) val address = RegInit(UInt(id << lineShift, width = io.in_a.bits.address.getWidth)) val count = Reg(UInt(width = probeCountBits)) val idle = got_e && sent_d when (io.in_a.fire() && io.in_a_first) { assert (idle) sent_d := Bool(false) got_e := io.in_a.bits.opcode =/= TLMessages.AcquireBlock && io.in_a.bits.opcode =/= TLMessages.AcquirePerm opcode := io.in_a.bits.opcode param := io.in_a.bits.param size := io.in_a.bits.size source := io.in_a.bits.source address := io.in_a.bits.address count := io.probe } when (io.d_last) { assert (!sent_d) sent_d := Bool(true) } when (io.e_last) { assert (!got_e) got_e := Bool(true) } when (io.probenack || io.probedack) { assert (count > UInt(0)) count := count - Mux(io.probenack && io.probedack, UInt(2), UInt(1)) } io.idle := idle io.source := source io.line := address >> lineShift val i_data = Wire(Decoupled(new TLBroadcastData(edgeIn.bundle))) val o_data = Queue(i_data, if (bufferless) 1 else (lineBytes / edgeIn.manager.beatBytes), pipe=bufferless) io.in_a.ready := (idle || !io.in_a_first) && i_data.ready i_data.valid := (idle || !io.in_a_first) && io.in_a.valid i_data.bits.mask := io.in_a.bits.mask i_data.bits.data := io.in_a.bits.data val probe_done = count === UInt(0) val acquire = opcode === TLMessages.AcquireBlock || opcode === TLMessages.AcquirePerm val transform = MuxLookup(param, Wire(UInt(width = 2)), Array( TLPermissions.NtoB -> TRANSFORM_B, TLPermissions.NtoT -> TRANSFORM_T, TLPermissions.BtoT -> TRANSFORM_T)) o_data.ready := io.out_a.ready && probe_done io.out_a.valid := o_data.valid && probe_done io.out_a.bits.opcode := Mux(acquire, TLMessages.Get, opcode) io.out_a.bits.param := Mux(acquire, UInt(0), param) io.out_a.bits.size := size io.out_a.bits.source := Cat(Mux(acquire, transform, PASS), source) io.out_a.bits.address := address io.out_a.bits.mask := o_data.bits.mask io.out_a.bits.data := o_data.bits.data } object TLBroadcastConstants { def TRANSFORM_T = UInt(3) def TRANSFORM_B = UInt(2) def DROP = UInt(1) def PASS = UInt(0) } class TLBroadcastData(params: TLBundleParameters) extends TLBundleBase(params) { val mask = UInt(width = params.dataBits/8) val data = UInt(width = params.dataBits) }