diff --git a/uncore/src/main/scala/broadcast.scala b/uncore/src/main/scala/broadcast.scala index 98c8f0af..340db34d 100644 --- a/uncore/src/main/scala/broadcast.scala +++ b/uncore/src/main/scala/broadcast.scala @@ -187,7 +187,7 @@ class BroadcastVoluntaryReleaseTracker(trackerId: Int, bankId: Int) extends Broa is(s_outer) { io.outer.acquire.valid := !collect_irel_data || irel_data_valid(oacq_data_cnt) when(oacq_data_done) { - state := Mux(xact.requiresAck(), s_grant, s_idle) + state := s_grant // converted irel to oacq, so expect grant TODO: Mux(xact.requiresAck(), s_grant, s_idle) ? } } is(s_grant) { // Forward the Grant.voluntaryAck @@ -221,13 +221,13 @@ class BroadcastAcquireTracker(trackerId: Int, bankId: Int) extends BroadcastXact Acquire.prefetchType).contains(xact.a_type)), "Broadcast Hub does not support PutAtomics, subblock Gets/Puts, or prefetches") // TODO - val release_count = Reg(init=UInt(0, width = log2Up(nCoherentClients+1))) - val pending_probes = Reg(init=Bits(0, width = nCoherentClients)) + val release_count = Reg(init=UInt(0, width = log2Up(io.inner.tlNCoherentClients+1))) + val pending_probes = Reg(init=Bits(0, width = io.inner.tlNCoherentClients)) val curr_p_id = PriorityEncoder(pending_probes) val full_sharers = coh.full() val probe_self = io.inner.acquire.bits.payload.requiresSelfProbe() - val mask_self_true = UInt(UInt(1) << io.inner.acquire.bits.header.src, width = nCoherentClients) - val mask_self_false = ~UInt(UInt(1) << io.inner.acquire.bits.header.src, width = nCoherentClients) + val mask_self_true = UInt(UInt(1) << io.inner.acquire.bits.header.src, width = io.inner.tlNCoherentClients) + val mask_self_false = ~UInt(UInt(1) << io.inner.acquire.bits.header.src, width = io.inner.tlNCoherentClients) val mask_self = Mux(probe_self, full_sharers | mask_self_true, full_sharers & mask_self_false) val mask_incoherent = mask_self & ~io.incoherent.toBits diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index e399eb98..b193c5a7 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -182,7 +182,21 @@ abstract trait L2HellaCacheParameters extends CacheParameters with CoherenceAgen } abstract class L2HellaCacheBundle extends Bundle with L2HellaCacheParameters -abstract class L2HellaCacheModule extends Module with L2HellaCacheParameters +abstract class L2HellaCacheModule extends Module with L2HellaCacheParameters { + def doInternalOutputArbitration[T <: Data : ClassTag]( + out: DecoupledIO[T], + ins: Seq[DecoupledIO[T]]) { + val arb = Module(new RRArbiter(out.bits.clone, ins.size)) + out <> arb.io.out + arb.io.in zip ins map { case (a, in) => a <> in } + } + + def doInternalInputRouting[T <: HasL2Id](in: ValidIO[T], outs: Seq[ValidIO[T]]) { + val idx = in.bits.id + outs.map(_.bits := in.bits) + outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && idx === UInt(i) } + } +} trait HasL2Id extends Bundle with CoherenceAgentParameters { val id = UInt(width = log2Up(nTransactors + 1)) @@ -361,8 +375,8 @@ class TSHRFile(bankId: Int) extends L2HellaCacheModule // WritebackUnit evicts data from L2, including invalidating L1s val wb = Module(new L2WritebackUnit(nTransactors, bankId)) - doOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req)) - doInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp)) + doInternalOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req)) + doInternalInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp)) // Propagate incoherence flags (trackerList.map(_.io.incoherent) :+ wb.io.incoherent).map( _ := io.incoherent.toBits) @@ -409,13 +423,13 @@ class TSHRFile(bankId: Int) extends L2HellaCacheModule outerList zip outer_arb.io.in map { case(out, arb) => out <> arb } io.outer <> outer_arb.io.out - // Wire local memories - doOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read)) - doOutputArbitration(io.meta.write, trackerList.map(_.io.meta.write)) - doOutputArbitration(io.data.read, trackerList.map(_.io.data.read) :+ wb.io.data.read) - doOutputArbitration(io.data.write, trackerList.map(_.io.data.write)) - doInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp)) - doInputRouting(io.data.resp, trackerList.map(_.io.data.resp) :+ wb.io.data.resp) + // Wire local memory arrays + doInternalOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read)) + doInternalOutputArbitration(io.meta.write, trackerList.map(_.io.meta.write)) + doInternalOutputArbitration(io.data.read, trackerList.map(_.io.data.read) :+ wb.io.data.read) + doInternalOutputArbitration(io.data.write, trackerList.map(_.io.data.write)) + doInternalInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp)) + doInternalInputRouting(io.data.resp, trackerList.map(_.io.data.resp) :+ wb.io.data.resp) } @@ -620,19 +634,19 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { val iacq_data_done = connectIncomingDataBeatCounter(io.inner.acquire) val pending_irels = - connectTwoWayBeatCounter(nCoherentClients, io.inner.probe, io.inner.release)._1 + connectTwoWayBeatCounter(io.inner.tlNCoherentClients, io.inner.probe, io.inner.release)._1 val (pending_ognts, oacq_data_idx, oacq_data_done, ognt_data_idx, ognt_data_done) = connectHeaderlessTwoWayBeatCounter(1, io.outer.acquire, io.outer.grant, xact.addr_beat) val (ignt_data_idx, ignt_data_done) = connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat) val pending_ifins = connectTwoWayBeatCounter(nSecondaryMisses, io.inner.grant, io.inner.finish, (g: Grant) => g.requiresAck())._1 - val pending_puts = Reg(init=Bits(0, width = innerDataBeats)) - val pending_iprbs = Reg(init = Bits(0, width = nCoherentClients)) - val pending_reads = Reg(init=Bits(0, width = innerDataBeats)) - val pending_writes = Reg(init=Bits(0, width = innerDataBeats)) - val pending_resps = Reg(init=Bits(0, width = innerDataBeats)) - val pending_ignt_data = Reg(init=Bits(0, width = innerDataBeats)) + val pending_puts = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_iprbs = Reg(init = Bits(0, width = io.inner.tlNCoherentClients)) + val pending_reads = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_writes = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_resps = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + val pending_ignt_data = Reg(init=Bits(0, width = io.inner.tlDataBeats)) val pending_meta_write = Reg{ Bool() } val all_pending_done = @@ -1006,8 +1020,8 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { val xact_id = Reg{ UInt() } val irel_had_data = Reg(init = Bool(false)) - val irel_cnt = Reg(init = UInt(0, width = log2Up(nCoherentClients+1))) - val pending_probes = Reg(init = Bits(0, width = nCoherentClients)) + val irel_cnt = Reg(init = UInt(0, width = log2Up(io.inner.tlNCoherentClients+1))) + val pending_probes = Reg(init = Bits(0, width = io.inner.tlNCoherentClients)) val curr_probe_dst = PriorityEncoder(pending_probes) val full_sharers = io.wb.req.bits.coh.inner.full() val mask_incoherent = full_sharers & ~io.incoherent.toBits diff --git a/uncore/src/main/scala/memserdes.scala b/uncore/src/main/scala/memserdes.scala index 878ce19a..942874cf 100644 --- a/uncore/src/main/scala/memserdes.scala +++ b/uncore/src/main/scala/memserdes.scala @@ -281,7 +281,7 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends TLModule { addr_out := io.tl.release.bits.payload.addr_block has_data := rel_has_data data_from_rel := Bool(true) - make_grant_ack := Bool(true) + make_grant_ack := io.tl.release.bits.payload.requiresAck() tl_done_out := tl_wrap_out tl_buf_out(tl_cnt_out) := io.tl.release.bits.payload.data } .elsewhen(io.tl.acquire.valid) { @@ -352,7 +352,7 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends TLModule { tl_done_out := tl_wrap_out when(io.tl.release.valid) { data_from_rel := Bool(true) - make_grant_ack := Bool(true) + make_grant_ack := io.tl.release.bits.payload.requiresAck() io.mem.req_data.bits.data := io.tl.release.bits.payload.data val tag = Cat(io.tl.release.bits.header.src, io.tl.release.bits.payload.client_xact_id, @@ -366,7 +366,7 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends TLModule { has_data := rel_has_data } .elsewhen(io.tl.acquire.valid) { data_from_rel := Bool(false) - make_grant_ack := acq_has_data + make_grant_ack := acq_has_data // i.e. is it a Put io.mem.req_data.bits.data := io.tl.acquire.bits.payload.data io.mem.req_cmd.bits.rw := acq_has_data val tag = Cat(io.tl.acquire.bits.header.src, diff --git a/uncore/src/main/scala/network.scala b/uncore/src/main/scala/network.scala index 7fdc8e09..f61cec6c 100644 --- a/uncore/src/main/scala/network.scala +++ b/uncore/src/main/scala/network.scala @@ -3,8 +3,6 @@ package uncore import Chisel._ -case object LNManagers extends Field[Int] -case object LNClients extends Field[Int] case object LNEndpoints extends Field[Int] case object LNHeaderBits extends Field[Int] diff --git a/uncore/src/main/scala/tilelink.scala b/uncore/src/main/scala/tilelink.scala index d639f791..1ed95df6 100644 --- a/uncore/src/main/scala/tilelink.scala +++ b/uncore/src/main/scala/tilelink.scala @@ -11,25 +11,33 @@ import scala.reflect.runtime.universe._ // case object TLId extends Field[String] // Unique name per network case object TLCoherencePolicy extends Field[CoherencePolicy] -case object TLBlockAddrBits extends Field[Int] +case object TLNManagers extends Field[Int] +case object TLNClients extends Field[Int] +case object TLNCoherentClients extends Field[Int] +case object TLNIncoherentClients extends Field[Int] case object TLMaxClientXacts extends Field[Int] case object TLMaxClientPorts extends Field[Int] case object TLMaxManagerXacts extends Field[Int] +case object TLBlockAddrBits extends Field[Int] case object TLDataBits extends Field[Int] case object TLDataBeats extends Field[Int] case object TLNetworkIsOrderedP2P extends Field[Boolean] abstract trait TileLinkParameters extends UsesParameters { - val tlBlockAddrBits = params(TLBlockAddrBits) + val tlCoh = params(TLCoherencePolicy) + val tlNManagers = params(TLNManagers) + val tlNClients = params(TLNClients) + val tlNCoherentClients = params(TLNCoherentClients) + val tlNIncoherentClients = params(TLNIncoherentClients) val tlMaxClientXacts = params(TLMaxClientXacts) val tlMaxClientPorts = params(TLMaxClientPorts) val tlMaxManagerXacts = params(TLMaxManagerXacts) val tlClientXactIdBits = log2Up(tlMaxClientXacts*tlMaxClientPorts) val tlManagerXactIdBits = log2Up(tlMaxManagerXacts) + val tlBlockAddrBits = params(TLBlockAddrBits) val tlDataBits = params(TLDataBits) val tlDataBytes = tlDataBits/8 val tlDataBeats = params(TLDataBeats) - val tlCoh = params(TLCoherencePolicy) val tlWriteMaskBits = if(tlDataBits/8 < 1) 1 else tlDataBits/8 val tlBeatAddrBits = log2Up(tlDataBeats) val tlByteAddrBits = log2Up(tlWriteMaskBits) @@ -44,11 +52,11 @@ abstract trait TileLinkParameters extends UsesParameters { val tlGrantTypeBits = max(log2Up(Grant.nBuiltInTypes), tlCoh.grantTypeWidth) + 1 val tlNetworkPreservesPointToPointOrdering = params(TLNetworkIsOrderedP2P) + val tlNetworkDoesNotInterleaveBeats = true val amoAluOperandBits = params(AmoAluOperandBits) } -abstract class TLBundle extends Bundle with TileLinkParameters { -} +abstract class TLBundle extends Bundle with TileLinkParameters abstract class TLModule extends Module with TileLinkParameters // Directionality of message channel @@ -62,7 +70,7 @@ trait ManagerToClientChannel extends TileLinkChannel trait ClientToClientChannel extends TileLinkChannel // Unused for now // Common signals that are used in multiple channels. -// These traits are useful for type parameterization. +// These traits are useful for type parameterizing bundle wiring functions. // trait HasCacheBlockAddress extends TLBundle { val addr_block = UInt(width = tlBlockAddrBits) @@ -554,10 +562,10 @@ trait HasDataBeatCounters { def connectIncomingDataBeatCounters[T <: HasClientId : ClassTag]( in: DecoupledIO[LogicalNetworkIO[T]], - entries: Int): Vec[Bool] = { - val id = in.bits.payload.client_xact_id + entries: Int, + getId: LogicalNetworkIO[T] => UInt): Vec[Bool] = { Vec((0 until entries).map { i => - connectDataBeatCounter(in.fire() && id === UInt(i), in.bits.payload, UInt(0))._2 + connectDataBeatCounter(in.fire() && getId(in.bits) === UInt(i), in.bits.payload, UInt(0))._2 }) } @@ -615,41 +623,51 @@ class FinishQueueEntry extends TLBundle { class FinishQueue(entries: Int) extends Queue(new FinishQueueEntry, entries) -class FinishUnit(srcId: Int = 0) extends TLModule +class FinishUnit(srcId: Int = 0, outstanding: Int = 2) extends TLModule with HasDataBeatCounters { val io = new Bundle { val grant = Decoupled(new LogicalNetworkIO(new Grant)).flip val refill = Decoupled(new Grant) val finish = Decoupled(new LogicalNetworkIO(new Finish)) val ready = Bool(OUTPUT) - val grant_done = Bool(OUTPUT) - val pending_finish = Bool(OUTPUT) } - val entries = 1 << tlClientXactIdBits val g = io.grant.bits.payload - assert(g.client_xact_id <= UInt(entries), "No grant beat counter provisioned, only " + entries) - val done = connectIncomingDataBeatCounters(io.grant, entries).reduce(_||_) - val q = Module(new FinishQueue(entries)) + if(tlNetworkPreservesPointToPointOrdering) { + io.finish.valid := Bool(false) + io.refill.valid := io.grant.valid + io.refill.bits := g + io.grant.ready := io.refill.ready + io.ready := Bool(true) + } else { + // We only want to send Finishes after we have collected all beats of + // a multibeat Grant. But Grants from multiple managers or transactions may + // get interleaved, so we could need a counter for each. + val done = if(tlNetworkDoesNotInterleaveBeats) { + connectIncomingDataBeatCounter(io.grant) + } else { + val entries = 1 << tlClientXactIdBits + def getId(g: LogicalNetworkIO[Grant]) = g.payload.client_xact_id + assert(getId(io.grant.bits) <= UInt(entries), "Not enough grant beat counters, only " + entries + " entries.") + connectIncomingDataBeatCounters(io.grant, entries, getId).reduce(_||_) + } + val q = Module(new FinishQueue(outstanding)) + q.io.enq.valid := io.grant.fire() && g.requiresAck() && (!g.hasMultibeatData() || done) + q.io.enq.bits.fin := g.makeFinish() + q.io.enq.bits.dst := io.grant.bits.header.src - q.io.enq.valid := io.grant.fire() && g.requiresAck() && (!g.hasMultibeatData() || done) - q.io.enq.bits.fin := g.makeFinish() - q.io.enq.bits.dst := io.grant.bits.header.src + io.finish.bits.header.src := UInt(srcId) + io.finish.bits.header.dst := q.io.deq.bits.dst + io.finish.bits.payload := q.io.deq.bits.fin + io.finish.valid := q.io.deq.valid + q.io.deq.ready := io.finish.ready - io.finish.bits.header.src := UInt(srcId) - io.finish.bits.header.dst := q.io.deq.bits.dst - io.finish.bits.payload := q.io.deq.bits.fin - io.finish.valid := q.io.deq.valid - q.io.deq.ready := io.finish.ready - - io.refill.valid := io.grant.valid - io.refill.bits := io.grant.bits.payload - io.grant.ready := (q.io.enq.ready || !g.requiresAck()) && (io.refill.ready || !g.hasData()) - - io.ready := q.io.enq.ready - io.grant_done := done - io.pending_finish := q.io.deq.valid + io.refill.valid := io.grant.valid + io.refill.bits := g + io.grant.ready := (q.io.enq.ready || !g.requiresAck()) && io.refill.ready + io.ready := q.io.enq.ready + } } object TileLinkHeaderOverwriter { diff --git a/uncore/src/main/scala/uncore.scala b/uncore/src/main/scala/uncore.scala index 62a53129..9d1c93b7 100644 --- a/uncore/src/main/scala/uncore.scala +++ b/uncore/src/main/scala/uncore.scala @@ -16,9 +16,6 @@ trait CoherenceAgentParameters extends UsesParameters { val nReleaseTransactors = 1 val nAcquireTransactors = params(NAcquireTransactors) val nTransactors = nReleaseTransactors + nAcquireTransactors - val nCoherentClients = params(NCoherentClients) - val nIncoherentClients = params(NIncoherentClients) - val nClients = nCoherentClients + nIncoherentClients def outerTLParams = params.alterPartial({ case TLId => params(OuterTLId)}) val outerDataBeats = outerTLParams(TLDataBeats) val outerDataBits = outerTLParams(TLDataBits) @@ -35,33 +32,15 @@ abstract class CoherenceAgentBundle extends Bundle with CoherenceAgentParameters abstract class CoherenceAgentModule extends Module with CoherenceAgentParameters trait HasCoherenceAgentWiringHelpers { - def doOutputArbitration[T <: Data : ClassTag]( - out: DecoupledIO[T], - ins: Seq[DecoupledIO[T]]) { - val arb = Module(new RRArbiter(out.bits.clone, ins.size)) - out <> arb.io.out - arb.io.in zip ins map { case (a, in) => a <> in } - } - - def doOutputArbitration[T <: HasTileLinkData : ClassTag, S <: LogicalNetworkIO[T] : ClassTag]( - out: DecoupledIO[S], - ins: Seq[DecoupledIO[S]]) { + def doOutputArbitration[T <: TileLinkChannel : ClassTag]( + out: DecoupledIO[LogicalNetworkIO[T]], + ins: Seq[DecoupledIO[LogicalNetworkIO[T]]]) { def lock(o: LogicalNetworkIO[T]) = o.payload.hasMultibeatData() - val arb = Module(new LockingRRArbiter( - out.bits.clone, - ins.size, - out.bits.payload.tlDataBeats, - lock _)) + val arb = Module(new LockingRRArbiter( out.bits.clone, ins.size, out.bits.payload.tlDataBeats, lock _)) out <> arb.io.out arb.io.in zip ins map { case (a, in) => a <> in } } - def doInputRouting[T <: HasL2Id](in: ValidIO[T], outs: Seq[ValidIO[T]]) { - val idx = in.bits.id - outs.map(_.bits := in.bits) - outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && idx === UInt(i) } - } - def doInputRouting[T <: HasManagerTransactionId]( in: DecoupledIO[LogicalNetworkIO[T]], outs: Seq[DecoupledIO[LogicalNetworkIO[T]]]) { @@ -74,7 +53,7 @@ trait HasCoherenceAgentWiringHelpers { trait HasInnerTLIO extends CoherenceAgentBundle { val inner = Bundle(new TileLinkIO)(innerTLParams).flip - val incoherent = Vec.fill(nCoherentClients){Bool()}.asInput + val incoherent = Vec.fill(inner.tlNCoherentClients){Bool()}.asInput def iacq(dummy: Int = 0) = inner.acquire.bits.payload def iprb(dummy: Int = 0) = inner.probe.bits.payload def irel(dummy: Int = 0) = inner.release.bits.payload @@ -173,6 +152,6 @@ abstract class XactTracker extends CoherenceAgentModule } def dropPendingBitAtDest(in: DecoupledIO[LogicalNetworkIO[Probe]]): UInt = { - ~Fill(nCoherentClients, in.fire()) | ~UIntToOH(in.bits.header.dst) + ~Fill(in.bits.payload.tlNCoherentClients, in.fire()) | ~UIntToOH(in.bits.header.dst) } }