From 9708d25dfff7b4a0a6381be398f00dad608a9436 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 24 Mar 2015 02:06:53 -0700 Subject: [PATCH] Restructure L2 state machine and utilize HeaderlessTileLinkIO --- uncore/src/main/scala/broadcast.scala | 43 +- uncore/src/main/scala/cache.scala | 709 ++++++++++++-------------- uncore/src/main/scala/htif.scala | 44 +- uncore/src/main/scala/memserdes.scala | 28 +- uncore/src/main/scala/metadata.scala | 4 +- uncore/src/main/scala/network.scala | 5 +- uncore/src/main/scala/tilelink.scala | 393 +++++++++++--- uncore/src/main/scala/uncore.scala | 93 ++-- uncore/src/main/scala/util.scala | 26 +- 9 files changed, 780 insertions(+), 565 deletions(-) diff --git a/uncore/src/main/scala/broadcast.scala b/uncore/src/main/scala/broadcast.scala index ce72f625..98c8f0af 100644 --- a/uncore/src/main/scala/broadcast.scala +++ b/uncore/src/main/scala/broadcast.scala @@ -52,20 +52,17 @@ class L2BroadcastHub(bankId: Int) extends ManagerCoherenceAgent val acquireConflicts = Vec(trackerList.map(_.io.has_acquire_conflict)).toBits val acquireMatches = Vec(trackerList.map(_.io.has_acquire_match)).toBits val acquireReadys = Vec(trackerAcquireIOs.map(_.ready)).toBits - val acquire_idx = Mux(acquireMatches.orR, + val acquire_idx = Mux(acquireMatches.orR, PriorityEncoder(acquireMatches), PriorityEncoder(acquireReadys)) val block_acquires = acquireConflicts.orR || !sdq_rdy - io.inner.acquire.ready := acquireReadys.orR && !block_acquires + io.inner.acquire.ready := acquireReadys.orR && !block_acquires trackerAcquireIOs.zipWithIndex.foreach { case(tracker, i) => tracker.bits := io.inner.acquire.bits - tracker.bits.payload.data := - DataQueueLocation(sdq_alloc_id, inStoreQueue).toBits - tracker.valid := io.inner.acquire.valid && - !block_acquires && - (acquire_idx === UInt(i)) + tracker.bits.payload.data := DataQueueLocation(sdq_alloc_id, inStoreQueue).toBits + tracker.valid := io.inner.acquire.valid && !block_acquires && (acquire_idx === UInt(i)) } // Queue to store impending Voluntary Release data @@ -94,24 +91,24 @@ class L2BroadcastHub(bankId: Int) extends ManagerCoherenceAgent // Wire probe requests and grant reply to clients, finish acks from clients // Note that we bypass the Grant data subbundles - io.inner.grant.bits.payload.data := io.outer.grant.bits.payload.data - io.inner.grant.bits.payload.addr_beat := io.outer.grant.bits.payload.addr_beat + io.inner.grant.bits.payload.data := io.outer.grant.bits.data + io.inner.grant.bits.payload.addr_beat := io.outer.grant.bits.addr_beat doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant)) doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe)) doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish)) // Create an arbiter for the one memory port - val outer_arb = Module(new UncachedTileLinkIOArbiterThatPassesId(trackerList.size), + val outer_arb = Module(new HeaderlessUncachedTileLinkIOArbiter(trackerList.size), { case TLId => params(OuterTLId) case TLDataBits => internalDataBits }) outer_arb.io.in zip trackerList map { case(arb, t) => arb <> t.io.outer } // Get the pending data out of the store data queue - val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.payload.data) + val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.data) val is_in_sdq = outer_data_ptr.loc === inStoreQueue val free_sdq = io.outer.acquire.fire() && - io.outer.acquire.bits.payload.hasData() && + io.outer.acquire.bits.hasData() && outer_data_ptr.loc === inStoreQueue - io.outer.acquire.bits.payload.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array( + io.outer.acquire.bits.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array( inStoreQueue -> sdq(outer_data_ptr.idx), inVolWBQueue -> vwbdq(outer_data_ptr.idx))) io.outer <> outer_arb.io.out @@ -147,7 +144,6 @@ class BroadcastVoluntaryReleaseTracker(trackerId: Int, bankId: Int) extends Broa io.outer.acquire.valid := Bool(false) io.outer.grant.ready := Bool(false) - io.outer.finish.valid := Bool(false) io.inner.acquire.ready := Bool(false) io.inner.probe.valid := Bool(false) io.inner.release.ready := Bool(false) @@ -159,11 +155,12 @@ class BroadcastVoluntaryReleaseTracker(trackerId: Int, bankId: Int) extends Broa io.inner.grant.bits.payload := coh.makeGrant(xact, UInt(trackerId)) //TODO: Use io.outer.release instead? - io.outer.acquire.bits.payload := Bundle(PutBlock( - client_xact_id = UInt(trackerId), - addr_block = xact.addr_block, - addr_beat = oacq_data_cnt, - data = data_buffer(oacq_data_cnt)))(outerTLParams) + io.outer.acquire.bits := Bundle( + PutBlock( + client_xact_id = UInt(trackerId), + addr_block = xact.addr_block, + addr_beat = oacq_data_cnt, + data = data_buffer(oacq_data_cnt)))(outerTLParams) when(collect_irel_data) { io.inner.release.ready := Bool(true) @@ -271,7 +268,7 @@ class BroadcastAcquireTracker(trackerId: Int, bankId: Int) extends BroadcastXact addr_block = xact.addr_block))(outerTLParams) io.outer.acquire.valid := Bool(false) - io.outer.acquire.bits.payload := outer_read //default + io.outer.acquire.bits := outer_read //default io.outer.grant.ready := Bool(false) io.inner.probe.valid := Bool(false) @@ -346,7 +343,7 @@ class BroadcastAcquireTracker(trackerId: Int, bankId: Int) extends BroadcastXact when(io.inner.release.valid) { when(io.irel().hasData()) { io.outer.acquire.valid := Bool(true) - io.outer.acquire.bits.payload := outer_write_rel + io.outer.acquire.bits := outer_write_rel when(io.outer.acquire.ready) { when(oacq_data_done) { pending_ognt_ack := Bool(true) @@ -368,7 +365,7 @@ class BroadcastAcquireTracker(trackerId: Int, bankId: Int) extends BroadcastXact } is(s_mem_write) { // Write data to outer memory io.outer.acquire.valid := !pending_ognt_ack || !collect_iacq_data || iacq_data_valid(oacq_data_cnt) - io.outer.acquire.bits.payload := outer_write_acq + io.outer.acquire.bits := outer_write_acq when(oacq_data_done) { pending_ognt_ack := Bool(true) state := Mux(pending_outer_read, s_mem_read, s_mem_resp) @@ -376,7 +373,7 @@ class BroadcastAcquireTracker(trackerId: Int, bankId: Int) extends BroadcastXact } is(s_mem_read) { // Read data from outer memory (possibly what was just written) io.outer.acquire.valid := !pending_ognt_ack - io.outer.acquire.bits.payload := outer_read + io.outer.acquire.bits := outer_read when(io.outer.acquire.fire()) { state := s_mem_resp } } is(s_mem_resp) { // Wait to forward grants from outer memory diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index a705f618..05588203 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -2,6 +2,7 @@ package uncore import Chisel._ +import scala.reflect.ClassTag case object CacheName extends Field[String] case object NSets extends Field[Int] @@ -329,22 +330,13 @@ class L2HellaCacheBank(bankId: Int) extends HierarchicalCoherenceAgent require(isPow2(nSets)) require(isPow2(nWays)) + val meta = Module(new L2MetadataArray) // TODO: add delay knob + val data = Module(new L2DataArray(1)) val tshrfile = Module(new TSHRFile(bankId)) - - //TODO: Expose queue depths and data array pipeline cycles as parameters? - tshrfile.io.inner.acquire <> io.inner.acquire - tshrfile.io.inner.probe <> io.inner.probe - tshrfile.io.inner.release <> Queue(io.inner.release) - tshrfile.io.inner.grant <> io.inner.grant - tshrfile.io.inner.finish <> io.inner.finish - + tshrfile.io.inner <> io.inner io.outer <> tshrfile.io.outer io.incoherent <> tshrfile.io.incoherent - - val meta = Module(new L2MetadataArray) tshrfile.io.meta <> meta.io - - val data = Module(new L2DataArray(1)) tshrfile.io.data <> data.io } @@ -364,6 +356,7 @@ class TSHRFile(bankId: Int) extends L2HellaCacheModule Module(new L2AcquireTracker(id, bankId)) } + // WritebackUnit evicts data from L2, including invalidating L1s val wb = Module(new L2WritebackUnit(nTransactors, bankId)) doOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req)) doInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp)) @@ -373,21 +366,15 @@ class TSHRFile(bankId: Int) extends L2HellaCacheModule // Handle acquire transaction initiation val trackerAcquireIOs = trackerList.map(_.io.inner.acquire) + val acquireConflicts = Vec(trackerList.map(_.io.has_acquire_conflict)).toBits + val acquireMatches = Vec(trackerList.map(_.io.has_acquire_match)).toBits + val acquireReadys = Vec(trackerAcquireIOs.map(_.ready)).toBits + val acquire_idx = Mux(acquireMatches.orR, + PriorityEncoder(acquireMatches), + PriorityEncoder(acquireReadys)) - val alloc_arb = Module(new Arbiter(Bool(), trackerList.size)) - alloc_arb.io.out.ready := Bool(true) - trackerAcquireIOs.zip(alloc_arb.io.in).foreach { - case(tracker, arb) => arb.valid := tracker.ready - } - val alloc_idx = Vec(alloc_arb.io.in.map(_.ready)).lastIndexWhere{b: Bool => b} - - val acquireMatchList = trackerList.map(_.io.has_acquire_match) - val any_acquire_matches = acquireMatchList.reduce(_||_) - val match_idx = Vec(acquireMatchList).indexWhere{b: Bool => b} - - val acquire_idx = Mux(any_acquire_matches, match_idx, alloc_idx) - val block_acquires = trackerList.map(_.io.has_acquire_conflict).reduce(_||_) - io.inner.acquire.ready := trackerAcquireIOs.map(_.ready).reduce(_||_) && !block_acquires + val block_acquires = acquireConflicts.orR + io.inner.acquire.ready := acquireReadys.orR && !block_acquires trackerAcquireIOs.zipWithIndex.foreach { case(tracker, i) => tracker.bits := io.inner.acquire.bits @@ -395,15 +382,18 @@ class TSHRFile(bankId: Int) extends L2HellaCacheModule } // Wire releases from clients - val release_idx = Vec(trackerList.map(_.io.has_release_match) :+ - wb.io.has_release_match).indexWhere{b: Bool => b} val trackerReleaseIOs = trackerList.map(_.io.inner.release) :+ wb.io.inner.release - trackerReleaseIOs.zipWithIndex.foreach { + val releaseReadys = Vec(trackerReleaseIOs.map(_.ready)).toBits + val releaseMatches = Vec(trackerList.map(_.io.has_release_match) :+ wb.io.has_release_match).toBits + val release_idx = PriorityEncoder(releaseMatches) + io.inner.release.ready := releaseReadys(release_idx) + trackerReleaseIOs.zipWithIndex.foreach { case(tracker, i) => tracker.bits := io.inner.release.bits tracker.valid := io.inner.release.valid && (release_idx === UInt(i)) } - io.inner.release.ready := Vec(trackerReleaseIOs.map(_.ready)).read(release_idx) + assert(!(io.inner.release.valid && !releaseMatches.orR), + "Non-voluntary release should always have a Tracker waiting for it.") // Wire probe requests and grant reply to clients, finish acks from clients doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe) :+ wb.io.inner.probe) @@ -412,7 +402,7 @@ class TSHRFile(bankId: Int) extends L2HellaCacheModule // Create an arbiter for the one memory port val outerList = trackerList.map(_.io.outer) :+ wb.io.outer - val outer_arb = Module(new TileLinkIOArbiterThatPassesId(outerList.size))(outerTLParams) + val outer_arb = Module(new HeaderlessTileLinkIOArbiter(outerList.size))(outerTLParams) outerList zip outer_arb.io.in map { case(out, arb) => out <> arb } io.outer <> outer_arb.io.out @@ -483,10 +473,10 @@ class L2VoluntaryReleaseTracker(trackerId: Int, bankId: Int) extends L2XactTrack val xact_src = Reg(io.inner.release.bits.header.src.clone) val xact = Reg(Bundle(new Release, { case TLId => params(InnerTLId); case TLDataBits => 0 })) val xact_tag_match = Reg{ Bool() } - val xact_meta = Reg{ new L2Metadata } + val xact_old_meta = Reg{ new L2Metadata } val xact_way_en = Reg{ Bits(width = nWays) } val data_buffer = Vec.fill(innerDataBeats){ Reg(io.irel().data.clone) } - val coh = xact_meta.coh + val coh = xact_old_meta.coh val collect_irel_data = Reg(init=Bool(false)) val irel_data_valid = Reg(init=Bits(0, width = innerDataBeats)) @@ -501,7 +491,6 @@ class L2VoluntaryReleaseTracker(trackerId: Int, bankId: Int) extends L2XactTrack io.outer.probe.ready := Bool(false) io.outer.release.valid := Bool(false) io.outer.grant.ready := Bool(false) - io.outer.finish.valid := Bool(false) io.inner.acquire.ready := Bool(false) io.inner.probe.valid := Bool(false) io.inner.release.ready := Bool(false) @@ -529,8 +518,8 @@ class L2VoluntaryReleaseTracker(trackerId: Int, bankId: Int) extends L2XactTrack io.meta.write.bits.idx := xact.addr_block(idxMSB,idxLSB) io.meta.write.bits.way_en := xact_way_en io.meta.write.bits.data.tag := xact.addr_block >> UInt(idxBits) - io.meta.write.bits.data.coh.inner := xact_meta.coh.inner.onRelease(xact, xact_src) - io.meta.write.bits.data.coh.outer := xact_meta.coh.outer.onHit(M_XWR) // WB is a write + io.meta.write.bits.data.coh.inner := xact_old_meta.coh.inner.onRelease(xact, xact_src) + io.meta.write.bits.data.coh.outer := xact_old_meta.coh.outer.onHit(M_XWR) // WB is a write io.wb.req.valid := Bool(false) when(collect_irel_data) { @@ -561,7 +550,7 @@ class L2VoluntaryReleaseTracker(trackerId: Int, bankId: Int) extends L2XactTrack is(s_meta_resp) { when(io.meta.resp.valid) { xact_tag_match := io.meta.resp.bits.tag_match - xact_meta := io.meta.resp.bits.meta + xact_old_meta := io.meta.resp.bits.meta xact_way_en := io.meta.resp.bits.way_en state := Mux(io.meta.resp.bits.tag_match, Mux(xact.hasData(), s_data_write, s_meta_write), @@ -595,101 +584,73 @@ class L2VoluntaryReleaseTracker(trackerId: Int, bankId: Int) extends L2XactTrack class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { val io = new L2XactTrackerIO - val s_idle :: s_meta_read :: s_meta_resp :: s_wb_req :: s_wb_resp :: s_inner_probe :: s_outer_acquire :: s_outer_grant :: s_outer_finish :: s_data_read :: s_data_resp :: s_wait_puts :: s_data_write :: s_inner_grant :: s_meta_write :: s_inner_finish :: Nil = Enum(UInt(), 16) + val s_idle :: s_meta_read :: s_meta_resp :: s_wb_req :: s_wb_resp :: s_inner_probe :: s_outer_acquire :: s_busy :: s_meta_write :: Nil = Enum(UInt(), 9) val state = Reg(init=s_idle) + // State holding transaction metadata val xact_src = Reg(io.inner.acquire.bits.header.src.clone) val xact = Reg(Bundle(new Acquire, { case TLId => params(InnerTLId) })) - val data_buffer = Vec.fill(innerDataBeats){ Reg(UInt(width = innerDataBits)) } - val wmask_buffer = Vec.fill(innerDataBeats){ Reg(Bits(width = innerDataBits/8)) } + val data_buffer = Vec.fill(innerDataBeats){ Reg(init=UInt(0, width = innerDataBits)) } + val wmask_buffer = Vec.fill(innerDataBeats){ Reg(init=UInt(0,width = innerDataBits/8)) } val xact_tag_match = Reg{ Bool() } - val xact_meta = Reg{ new L2Metadata } val xact_way_en = Reg{ Bits(width = nWays) } - val pending_coh = Reg{ xact_meta.coh.clone } - val pending_puts = Reg(init=Bits(0, width = innerDataBeats)) - pending_puts := (pending_puts & dropPendingBitWhenHasData(io.inner.acquire)) - val do_allocate = xact.allocate() + val xact_old_meta = Reg{ new L2Metadata } + val pending_coh = Reg{ xact_old_meta.coh.clone } - val release_count = Reg(init = UInt(0, width = log2Up(nCoherentClients+1))) - val pending_probes = Reg(init = Bits(0, width = nCoherentClients)) - val curr_probe_dst = PriorityEncoder(pending_probes) - val full_sharers = io.meta.resp.bits.meta.coh.inner.full() - val probe_self = xact.requiresSelfProbe() - val mask_self = Mux(probe_self, - full_sharers | UInt(UInt(1) << xact_src, width = nCoherentClients), - full_sharers & ~UInt(UInt(1) << xact_src, width = nCoherentClients)) - val mask_incoherent = mask_self & ~io.incoherent.toBits - - val irel_data_done = connectIncomingDataBeatCounter(io.inner.release) - val (oacq_data_cnt, oacq_data_done) = connectOutgoingDataBeatCounter(io.outer.acquire, xact.addr_beat) - val ognt_data_done = connectIncomingDataBeatCounter(io.outer.grant) - val pending_ofin = Reg{ io.outer.finish.bits.clone } - - val pending_ignt_data = Reg(init=Bits(0, width = innerDataBeats)) - pending_ignt_data := pending_ignt_data | - addPendingBitInternal(io.data.resp) | - addPendingBitWhenHasData(io.inner.release) | - addPendingBitWhenHasData(io.outer.grant) | - addPendingBitWhenHasData(io.inner.acquire) + // Secondary miss queue val ignt_q = Module(new Queue(new L2SecondaryMissInfo, nSecondaryMisses))(innerTLParams) - val (ignt_data_idx, ignt_data_done) = connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat) - ignt_q.io.enq.valid := Bool(false) ignt_q.io.enq.bits.client_xact_id := io.iacq().client_xact_id ignt_q.io.enq.bits.addr_beat := io.iacq().addr_beat - ignt_q.io.deq.ready := ignt_data_done - - val ifin_cnt = Reg(init = UInt(0, width = log2Up(nSecondaryMisses+1))) - when(ignt_data_done) { - ifin_cnt := Mux(io.inner.finish.fire(), - Mux(io.ignt().requiresAck(), ifin_cnt, ifin_cnt - UInt(1)), - Mux(io.ignt().requiresAck(), ifin_cnt + UInt(1), ifin_cnt)) - } .elsewhen(io.inner.finish.fire()) { ifin_cnt := ifin_cnt - UInt(1) } + // TODO add ignt.dst <- iacq.src + // State holding progress made on processing this transaction + val iacq_data_done = + connectIncomingDataBeatCounter(io.inner.acquire) + val pending_irels = + connectTwoWayBeatCounter(nCoherentClients, io.inner.probe, io.inner.release)._1 + val (pending_ognts, oacq_data_idx, oacq_data_done, ognt_data_idx, ognt_data_done) = + connectHeaderlessTwoWayBeatCounter(1, io.outer.acquire, io.outer.grant, xact.addr_beat) + val (ignt_data_idx, ignt_data_done) = + connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat) + val pending_ifins = + connectTwoWayBeatCounter(nSecondaryMisses, io.inner.grant, io.inner.finish, (g: Grant) => g.requiresAck())._1 + val pending_puts = Reg(init=Bits(0, width = innerDataBeats)) + val pending_iprbs = Reg(init = Bits(0, width = nCoherentClients)) val pending_reads = Reg(init=Bits(0, width = innerDataBeats)) - pending_reads := (pending_reads | - addPendingBitWhenGetOrAtomic(io.inner.acquire)) & - (dropPendingBit(io.data.read) & - dropPendingBitWhenHasData(io.inner.release) & - dropPendingBitWhenHasData(io.outer.grant)) - val curr_read_beat = PriorityEncoder(pending_reads) - val pending_writes = Reg(init=Bits(0, width = innerDataBeats)) - pending_writes := (pending_writes | - addPendingBitWhenHasData(io.inner.acquire) | - addPendingBitWhenHasData(io.inner.release) | - addPendingBitWhenHasData(io.outer.grant)) & - dropPendingBit(io.data.write) - val curr_write_beat = PriorityEncoder(pending_writes) - val pending_resps = Reg(init=Bits(0, width = innerDataBeats)) - pending_resps := (pending_resps | - addPendingBitInternal(io.data.read)) & - dropPendingBitInternal(io.data.resp) + val pending_ignt_data = Reg(init=Bits(0, width = innerDataBeats)) + val pending_meta_write = Reg{ Bool() } - val pending_coh_on_hit = HierarchicalMetadata( - io.meta.resp.bits.meta.coh.inner, - io.meta.resp.bits.meta.coh.outer.onHit(xact.op_code())) - val pending_icoh_on_irel = pending_coh.inner.onRelease( - incoming = io.irel(), - src = io.inner.release.bits.header.src) - val pending_ocoh_on_irel = pending_coh.outer.onHit(M_XWR) // WB is a write - val pending_coh_on_ognt = HierarchicalMetadata( - ManagerMetadata.onReset, - pending_coh.outer.onGrant(io.ognt(), xact.op_code())) - val pending_coh_on_ignt = HierarchicalMetadata( - pending_coh.inner.onGrant( - outgoing = io.ignt(), - dst = io.inner.grant.bits.header.dst), - pending_coh.outer) - val pending_ofin_on_ognt = io.ognt().makeFinish() + val all_pending_done = + !(pending_reads.orR || + pending_writes.orR || + pending_resps.orR || + pending_puts.orR || + pending_ognts || + ignt_q.io.count > UInt(0) || + //pending_meta_write || // Has own state: s_meta_write + pending_ifins) - val amo_result = xact.data + // Provide a single ALU per tracker to merge Puts and AMOs with data being + // refilled, written back, or extant in the cache val amoalu = Module(new AMOALU) amoalu.io.addr := xact.addr() amoalu.io.cmd := xact.op_code() amoalu.io.typ := xact.op_size() - amoalu.io.lhs := io.data.resp.bits.data //default - amoalu.io.rhs := data_buffer.head // default + amoalu.io.lhs := io.data.resp.bits.data // default, overwritten by calls to mergeData + amoalu.io.rhs := data_buffer.head // default, overwritten by calls to mergeData + val amo_result = xact.data // Reuse xact buffer space to store AMO result + + // Utility functions for updating the data and metadata that will be kept in + // the cache or granted to the original requestor after this transaction: + + def updatePendingCohWhen(flag: Bool, next: HierarchicalMetadata) { + when(flag && pending_coh != next) { + pending_meta_write := Bool(true) + pending_coh := next + } + } def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) { val old_data = incoming // Refilled, written back, or de-cached data @@ -704,104 +665,182 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { wmask_buffer(beat) := SInt(-1) when(xact.is(Acquire.putAtomicType) && xact.addr_beat === beat) { amo_result := old_data } } - val mergeDataInternal = mergeData(rowBits) _ - val mergeDataInner = mergeData(innerDataBits) _ - val mergeDataOuter = mergeData(outerDataBits) _ + def mergeDataInternal[T <: HasL2Data with HasL2BeatAddr](in: ValidIO[T]) { + when(in.valid) { mergeData(rowBits)(in.bits.addr_beat, in.bits.data) } + } + def mergeDataInner[T <: HasTileLinkData with HasTileLinkBeatId](in: DecoupledIO[LogicalNetworkIO[T]]) { + when(in.fire() && in.bits.payload.hasData()) { + mergeData(innerDataBits)(in.bits.payload.addr_beat, in.bits.payload.data) + } + } + def mergeDataOuter[T <: HasTileLinkData with HasTileLinkBeatId](in: DecoupledIO[T]) { + when(in.fire() && in.bits.hasData()) { + mergeData(outerDataBits)(in.bits.addr_beat, in.bits.data) + } + } + // Actual transaction processing logic begins here: + // + // First, take care of accpeting new requires or secondary misses + // For now, the only allowed secondary miss types are Gets-under-Get + // and Puts-under-Put from the same client val can_merge_iacq_get = (xact.isBuiltInType(Acquire.getType) && io.iacq().isBuiltInType(Acquire.getType)) && - (xact_src === io.inner.acquire.bits.header.src) && - xact.conflicts(io.iacq()) && - Vec(s_meta_read, s_meta_resp, s_wb_req, s_wb_resp, - s_inner_probe, s_outer_acquire, s_outer_grant, - s_outer_finish).contains(state) && - do_allocate && - ignt_q.io.enq.ready + xact_src === io.inner.acquire.bits.header.src && //TODO + xact.conflicts(io.iacq()) && + state != s_idle && state != s_meta_write && + !all_pending_done && + xact.allocate() && + !io.inner.release.fire() && + !io.outer.grant.fire() && + !io.data.resp.valid && + ignt_q.io.enq.ready + // This logic also allows the tail beats of a PutBlock to be merged in val can_merge_iacq_put = ((xact.isBuiltInType(Acquire.putType) && io.iacq().isBuiltInType(Acquire.putType)) || (xact.isBuiltInType(Acquire.putBlockType) && io.iacq().isBuiltInType(Acquire.putBlockType))) && - (xact_src === io.inner.acquire.bits.header.src) && - (xact.client_xact_id === io.iacq().client_xact_id) && - xact.conflicts(io.iacq()) && - Vec(s_meta_read, s_meta_resp, s_wb_req, s_wb_resp, - s_inner_probe, s_outer_acquire, s_outer_grant, - s_outer_finish, s_data_read, - s_data_resp).contains(state) && - do_allocate && - ignt_q.io.enq.ready - - val in_same_set = xact.addr_block(idxMSB,idxLSB) === - io.iacq().addr_block(idxMSB,idxLSB) - io.has_release_match := xact.conflicts(io.irel()) && - !io.irel().isVoluntary() && - (state === s_inner_probe) - io.has_acquire_match := can_merge_iacq_put || can_merge_iacq_get - io.has_acquire_conflict := (xact.conflicts(io.iacq()) || in_same_set) && - (state != s_idle) && - !io.has_acquire_match - - // If we're allocating in this cache, we can use the current metadata - // to make an appropriate custom Acquire, otherwise we copy over the - // built-in Acquire from the inner TL to the outer TL - io.outer.acquire.valid := state === s_outer_acquire - io.outer.acquire.bits.payload := Mux(do_allocate, - xact_meta.coh.outer.makeAcquire( - client_xact_id = UInt(trackerId), - addr_block = xact.addr_block, - op_code = xact.op_code()), - Bundle(Acquire(xact))(outerTLParams)) - io.outer.acquire.bits.header.src := UInt(bankId) - io.outer.probe.ready := Bool(false) - io.outer.release.valid := Bool(false) - io.outer.grant.ready := state === s_outer_grant - io.outer.finish.valid := state === s_outer_finish - io.outer.finish.bits := pending_ofin - - io.inner.probe.valid := state === s_inner_probe && pending_probes.orR - io.inner.probe.bits.header.src := UInt(bankId) - io.inner.probe.bits.header.dst := curr_probe_dst - io.inner.probe.bits.payload := pending_coh.inner.makeProbe(xact) - - io.inner.grant.valid := state === s_inner_grant && - ignt_q.io.deq.valid && - (!io.ignt().hasData() || - pending_ignt_data(ignt_data_idx)) - io.inner.grant.bits.header.src := UInt(bankId) - io.inner.grant.bits.header.dst := xact_src - io.inner.grant.bits.payload := pending_coh.inner.makeGrant( - acq = xact, - manager_xact_id = UInt(trackerId), - addr_beat = ignt_data_idx, - data = Mux(xact.is(Acquire.putAtomicType), - amo_result, - data_buffer(ignt_data_idx))) - io.ignt().client_xact_id := ignt_q.io.deq.bits.client_xact_id + xact_src === io.inner.acquire.bits.header.src && //TODO + xact.conflicts(io.iacq()) && + state != s_idle && state != s_meta_write && + !all_pending_done && + xact.allocate() && + !io.inner.release.fire() && + !io.outer.grant.fire() && + !io.data.resp.valid && + ignt_q.io.enq.ready io.inner.acquire.ready := state === s_idle || can_merge_iacq_put || can_merge_iacq_get - io.inner.release.ready := state === s_inner_probe - io.inner.finish.ready := Vec(s_inner_finish, s_meta_write, s_inner_grant, - s_data_write, s_wait_puts, s_data_resp).contains(state) - io.data.read.valid := state === s_data_read && pending_reads.orR + // Track whether any beats are missing from a PutBlock + pending_puts := (pending_puts & dropPendingBitWhenBeatHasData(io.inner.acquire)) + + // Track which clients yet need to be probed and make Probe message + pending_iprbs := pending_iprbs & dropPendingBitAtDest(io.inner.probe) + val curr_probe_dst = PriorityEncoder(pending_iprbs) + io.inner.probe.valid := state === s_inner_probe && pending_iprbs.orR + io.inner.probe.bits.header.src := UInt(bankId) + io.inner.probe.bits.header.dst := curr_probe_dst + io.inner.probe.bits.payload := pending_coh.inner.makeProbe(xact) + + // Handle incoming releases from clients, which may reduce sharer counts + // and/or write back dirty data + io.inner.release.ready := state === s_inner_probe + val pending_coh_on_irel = HierarchicalMetadata( + pending_coh.inner.onRelease( // Drop sharer + incoming = io.irel(), + src = io.inner.release.bits.header.src), + Mux(io.irel().hasData(), // Dirty writeback + pending_coh.outer.onHit(M_XWR), + pending_coh.outer)) + updatePendingCohWhen(io.inner.release.fire(), pending_coh_on_irel) + mergeDataInner(io.inner.release) + + // The following subtrascation handles misses or coherence permission + // upgrades by initiating a new transaction in the outer memory: + // + // If we're allocating in this cache, we can use the current metadata + // to make an appropriate custom Acquire, otherwise we copy over the + // built-in Acquire from the inner TL to the outer TL + io.outer.acquire.valid := state === s_outer_acquire + io.outer.acquire.bits := Mux( + xact.allocate(), + xact_old_meta.coh.outer.makeAcquire( + client_xact_id = UInt(0), + addr_block = xact.addr_block, + op_code = xact.op_code()), + Bundle(Acquire(xact))(outerTLParams)) + + // Probes from the outer memory system are handled in the WritebackUnit + io.outer.probe.ready := Bool(false) + io.outer.release.valid := Bool(false) + + io.outer.grant.ready := state === s_busy + val pending_coh_on_ognt = HierarchicalMetadata( + ManagerMetadata.onReset, + pending_coh.outer.onGrant(io.outer.grant.bits, xact.op_code())) + updatePendingCohWhen(ognt_data_done, pending_coh_on_ognt) + mergeDataOuter(io.outer.grant) + + // Going back to the original inner transaction, we can issue a Grant as + // soon as the data is released, granted, put, or read from the cache + pending_ignt_data := pending_ignt_data | + addPendingBitWhenBeatHasData(io.inner.release) | + addPendingBitWhenBeatHasData(io.outer.grant) | + addPendingBitInternal(io.data.resp) + ignt_q.io.enq.valid := iacq_data_done + ignt_q.io.deq.ready := ignt_data_done + // Make the Grant message using the data stored in the secondary miss queue + io.inner.grant.valid := state === s_busy && + ignt_q.io.deq.valid && + (!io.ignt().hasData() || + pending_ignt_data(ignt_data_idx)) + io.inner.grant.bits.header.src := UInt(bankId) + io.inner.grant.bits.header.dst := xact_src // TODO: ignt_q.io.deq.bits.src + io.inner.grant.bits.payload := pending_coh.inner.makeGrant( + acq = xact, + manager_xact_id = UInt(trackerId), + addr_beat = ignt_data_idx, + data = Mux(xact.is(Acquire.putAtomicType), + amo_result, + data_buffer(ignt_data_idx))) + // TODO: improve the ManagerMetadata.makeGrant to deal with possibility of + // multiple client transaction ids from merged secondary misses + io.ignt().client_xact_id := ignt_q.io.deq.bits.client_xact_id + + val pending_coh_on_ignt = HierarchicalMetadata( + pending_coh.inner.onGrant( + outgoing = io.ignt(), + dst = io.inner.grant.bits.header.dst), + pending_coh.outer) + updatePendingCohWhen(io.inner.grant.fire(), pending_coh_on_ignt) + + // We must wait for as many Finishes as we sent Grants + io.inner.finish.ready := state === s_busy + + pending_reads := (pending_reads & + dropPendingBit(io.data.read) & + dropPendingBitWhenBeatHasData(io.inner.release) & + dropPendingBitWhenBeatHasData(io.outer.grant)) | + addPendingBitWhenBeatIsGetOrAtomic(io.inner.acquire) + val curr_read_beat = PriorityEncoder(pending_reads) + io.data.read.valid := state === s_busy && + pending_reads.orR && + !pending_ognts io.data.read.bits.id := UInt(trackerId) io.data.read.bits.way_en := xact_way_en io.data.read.bits.addr_idx := xact.addr_block(idxMSB,idxLSB) io.data.read.bits.addr_beat := curr_read_beat - io.data.write.valid := state === s_data_write && pending_writes.orR + + pending_resps := (pending_resps & dropPendingBitInternal(io.data.resp)) | + addPendingBitInternal(io.data.read) + mergeDataInternal(io.data.resp) + + pending_writes := (pending_writes & dropPendingBit(io.data.write)) | + addPendingBitWhenBeatHasData(io.inner.acquire) | + addPendingBitWhenBeatHasData(io.inner.release) | + addPendingBitWhenBeatHasData(io.outer.grant) + val curr_write_beat = PriorityEncoder(pending_writes) + io.data.write.valid := state === s_busy && + pending_writes.orR && + !pending_ognts && + !pending_reads(curr_write_beat) && + !pending_resps(curr_write_beat) io.data.write.bits.id := UInt(trackerId) io.data.write.bits.way_en := xact_way_en io.data.write.bits.addr_idx := xact.addr_block(idxMSB,idxLSB) io.data.write.bits.addr_beat := curr_write_beat io.data.write.bits.wmask := wmask_buffer(curr_write_beat) io.data.write.bits.data := data_buffer(curr_write_beat) + io.meta.read.valid := state === s_meta_read io.meta.read.bits.id := UInt(trackerId) io.meta.read.bits.idx := xact.addr_block(idxMSB,idxLSB) io.meta.read.bits.tag := xact.addr_block >> UInt(idxBits) + io.meta.write.valid := state === s_meta_write io.meta.write.bits.id := UInt(trackerId) io.meta.write.bits.idx := xact.addr_block(idxMSB,idxLSB) @@ -810,188 +849,106 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { io.meta.write.bits.data.coh := pending_coh io.wb.req.valid := state === s_wb_req - io.wb.req.bits.addr_block := Cat(xact_meta.tag, xact.addr_block(idxMSB,idxLSB)) - io.wb.req.bits.coh := xact_meta.coh + io.wb.req.bits.addr_block := Cat(xact_old_meta.tag, xact.addr_block(idxMSB,idxLSB)) + io.wb.req.bits.coh := xact_old_meta.coh io.wb.req.bits.way_en := xact_way_en io.wb.req.bits.id := UInt(trackerId) - when(io.data.resp.valid) { - mergeDataInternal(io.data.resp.bits.addr_beat, io.data.resp.bits.data) + // Handling of secondary misses (Gets and Puts only for now) + when(io.inner.acquire.fire() && io.iacq().hasData()) { // state <= s_meta_wrtie + val beat = io.iacq().addr_beat + val wmask = io.iacq().wmask() + val full = FillInterleaved(8, wmask) + data_buffer(beat) := (~full & data_buffer(beat)) | (full & io.iacq().data) + wmask_buffer(beat) := wmask | Mux(state === s_idle, Bits(0), wmask_buffer(beat)) } - def doneNextCycleHot(rdy: Bool, pending: UInt) = !pending.orR || rdy && PopCount(pending) === UInt(1) - def doneNextCycleCounter(rdy: Bool, pending: UInt) = pending === UInt(0) || rdy && pending === UInt(1) - switch (state) { - is(s_idle) { - when(io.inner.acquire.valid) { - xact_src := io.inner.acquire.bits.header.src - xact := io.iacq() - xact.data := UInt(0) - wmask_buffer.foreach { w => w := UInt(0) } - pending_puts := Mux(io.iacq().isBuiltInType(Acquire.putBlockType), - dropPendingBitWhenHasData(io.inner.acquire), - UInt(0)) - pending_reads := Mux(io.iacq().isBuiltInType(Acquire.getBlockType) || - !io.iacq().isBuiltInType(), - SInt(-1, width = innerDataBeats), - addPendingBitWhenGetOrAtomic(io.inner.acquire)).toUInt - pending_writes := addPendingBitWhenHasData(io.inner.acquire) - pending_resps := UInt(0) - pending_ignt_data := UInt(0) - ifin_cnt := UInt(0) - ignt_q.io.enq.valid := Bool(true) - state := s_meta_read - } - } - is(s_meta_read) { when(io.meta.read.ready) { state := s_meta_resp } } - is(s_meta_resp) { - when(io.meta.resp.valid) { - xact_tag_match := io.meta.resp.bits.tag_match - xact_meta := io.meta.resp.bits.meta - xact_way_en := io.meta.resp.bits.way_en - pending_coh := io.meta.resp.bits.meta.coh - val _coh = io.meta.resp.bits.meta.coh - val _tag_match = io.meta.resp.bits.tag_match - val _is_hit = (if(isLastLevelCache) - (xact.isBuiltInType(Acquire.putBlockType) || - _tag_match && _coh.outer.isValid()) - else (_tag_match && _coh.outer.isHit(xact.op_code()))) - val _needs_writeback = !_tag_match && do_allocate && - (_coh.outer.requiresVoluntaryWriteback() || - _coh.inner.requiresProbesOnVoluntaryWriteback()) - val _needs_inner_probes = _tag_match && _coh.inner.requiresProbes(xact) - when(_is_hit) { pending_coh := pending_coh_on_hit } - when(_needs_inner_probes) { - pending_probes := mask_incoherent - release_count := PopCount(mask_incoherent) - } - state := Mux(_needs_writeback, s_wb_req, - Mux(_needs_inner_probes, s_inner_probe, - Mux(!_is_hit, s_outer_acquire, - Mux(pending_reads.orR, s_data_read, - Mux(!pending_writes.orR, s_inner_grant, - Mux(pending_puts.orR, s_wait_puts, s_data_write)))))) - } - } - is(s_wb_req) { when(io.wb.req.ready) { state := s_wb_resp } } - is(s_wb_resp) { - when(io.wb.resp.valid) { - val _skip_outer_acquire = Bool(isLastLevelCache) && xact.isBuiltInType(Acquire.putBlockType) - state := Mux(!_skip_outer_acquire, s_outer_acquire, - Mux(pending_puts.orR, s_wait_puts, s_data_write)) - } - } - is(s_inner_probe) { - // Send probes - when(io.inner.probe.ready) { - pending_probes := pending_probes & ~UIntToOH(curr_probe_dst) - } - // Handle releases, which may have data being written back - when(io.inner.release.valid) { - pending_coh.inner := pending_icoh_on_irel - // Handle released dirty data - when(io.irel().hasData()) { - pending_coh.outer := pending_ocoh_on_irel - mergeDataInner(io.irel().addr_beat, io.irel().data) - } - // We don't decrement release_count until we've received all the data beats. - when(!io.irel().hasMultibeatData() || irel_data_done) { - release_count := release_count - UInt(1) - } - } - when(release_count === UInt(0)) { - val _skip_outer_acquire = (if(isLastLevelCache) - (xact.isBuiltInType(Acquire.putBlockType) || - xact_meta.coh.outer.isValid()) - else xact_meta.coh.outer.isHit(xact.op_code())) - state := Mux(!_skip_outer_acquire, s_outer_acquire, - Mux(pending_reads.orR, s_data_read, - Mux(!pending_writes.orR, s_inner_grant, - Mux(pending_puts.orR, s_wait_puts, s_data_write)))) - } - } - is(s_outer_acquire) { when(oacq_data_done) { state := s_outer_grant } } - is(s_outer_grant) { - when(io.outer.grant.valid) { - when(io.ognt().hasData()) { - mergeDataOuter(io.ognt().addr_beat, io.ognt().data) - } - when(ognt_data_done) { - pending_coh := pending_coh_on_ognt - when(io.ognt().requiresAck()) { - pending_ofin.payload := pending_ofin_on_ognt - pending_ofin.header.dst := io.outer.grant.bits.header.src - pending_ofin.header.src := UInt(bankId) - state := s_outer_finish - }.otherwise { - state := Mux(pending_reads.orR, s_data_read, - Mux(!pending_writes.orR, s_inner_grant, - Mux(pending_puts.orR, s_wait_puts, s_data_write))) - } - } - } - } - is(s_outer_finish) { - when(io.outer.finish.ready) { - state := Mux(pending_reads.orR, s_data_read, - Mux(!pending_writes.orR, s_inner_grant, - Mux(pending_puts.orR, s_wait_puts, s_data_write))) - } - } - is(s_data_read) { - when(doneNextCycleHot(io.data.read.ready, pending_reads)) { - state := s_data_resp - } - } - is(s_data_resp) { - when(doneNextCycleHot(io.data.resp.valid, pending_resps)) { - state := Mux(!pending_writes.orR, s_inner_grant, - Mux(pending_puts.orR, s_wait_puts, s_data_write)) - } - } - is(s_wait_puts) { - when(doneNextCycleHot(io.inner.acquire.fire(), pending_puts)) { - state := s_data_write - } - } - is(s_data_write) { - when(io.data.write.ready) { - when(PopCount(pending_writes) <= UInt(1)) { state := s_inner_grant } - } - } - is(s_inner_grant) { - when(doneNextCycleCounter(ignt_data_done, ignt_q.io.count)) { - val meta_dirty = !xact_tag_match || pending_coh_on_ignt != xact_meta.coh - when(meta_dirty) { pending_coh := pending_coh_on_ignt } - state := Mux(meta_dirty, s_meta_write, - Mux(ifin_cnt > UInt(0) || io.ignt().requiresAck(), - s_inner_finish, s_idle)) - } - } - is(s_meta_write) { - when(io.meta.write.ready) { - state := Mux(ifin_cnt > UInt(0), s_inner_finish, s_idle) - } - } - is(s_inner_finish) { - when(doneNextCycleCounter(io.inner.finish.valid, ifin_cnt)) { - state := s_idle - } - } + // Defined here because of Chisel default wire demands, used in s_meta_resp + val pending_coh_on_hit = HierarchicalMetadata( + io.meta.resp.bits.meta.coh.inner, + io.meta.resp.bits.meta.coh.outer.onHit(xact.op_code())) + + // State machine updates and transaction handler metadata intialization + when(state === s_idle && io.inner.acquire.valid) { + xact_src := io.inner.acquire.bits.header.src + xact := io.iacq() + xact.data := UInt(0) + pending_puts := Mux( + io.iacq().isBuiltInType(Acquire.putBlockType), + dropPendingBitWhenBeatHasData(io.inner.acquire), + UInt(0)) + pending_reads := Mux( + io.iacq().isBuiltInType(Acquire.getBlockType) || !io.iacq().isBuiltInType(), + SInt(-1, width = innerDataBeats), + addPendingBitWhenBeatIsGetOrAtomic(io.inner.acquire)).toUInt + pending_writes := addPendingBitWhenBeatHasData(io.inner.acquire) + pending_resps := UInt(0) + pending_ignt_data := UInt(0) + pending_meta_write := UInt(0) + state := s_meta_read + } + when(state === s_meta_read && io.meta.read.ready) { state := s_meta_resp } + when(state === s_meta_resp && io.meta.resp.valid) { + xact_tag_match := io.meta.resp.bits.tag_match + xact_old_meta := io.meta.resp.bits.meta + xact_way_en := io.meta.resp.bits.way_en + val coh = io.meta.resp.bits.meta.coh + val tag_match = io.meta.resp.bits.tag_match + val is_hit = (if(!isLastLevelCache) tag_match && coh.outer.isHit(xact.op_code()) + else xact.isBuiltInType(Acquire.putBlockType) || + tag_match && coh.outer.isValid()) + val needs_writeback = !tag_match && + xact.allocate() && + (coh.outer.requiresVoluntaryWriteback() || + coh.inner.requiresProbesOnVoluntaryWriteback()) + val needs_inner_probes = tag_match && coh.inner.requiresProbes(xact) + when(!tag_match || is_hit && pending_coh_on_hit != coh) { pending_meta_write := Bool(true) } + pending_coh := Mux(is_hit, pending_coh_on_hit, coh) + when(needs_inner_probes) { + val full_sharers = coh.inner.full() + val mask_self = Mux( + xact.requiresSelfProbe(), + coh.inner.full() | UIntToOH(xact_src), + coh.inner.full() & ~UIntToOH(xact_src)) + val mask_incoherent = mask_self & ~io.incoherent.toBits + pending_iprbs := mask_incoherent + } + state := Mux(needs_writeback, s_wb_req, + Mux(needs_inner_probes, s_inner_probe, + Mux(!is_hit, s_outer_acquire, s_busy))) + } + when(state === s_wb_req && io.wb.req.ready) { state := s_wb_resp } + when(state === s_wb_resp && io.wb.resp.valid) { + // If we're overwriting the whole block in a last level cache we can + // just do it without fetching any data from memory + val skip_outer_acquire = Bool(isLastLevelCache) && + xact.isBuiltInType(Acquire.putBlockType) + state := Mux(!skip_outer_acquire, s_outer_acquire, s_busy) + } + when(state === s_inner_probe && !(pending_iprbs.orR || pending_irels)) { + // Tag matches, so if this is the last level cache we can use the data without upgrading permissions + val skip_outer_acquire = + (if(!isLastLevelCache) xact_old_meta.coh.outer.isHit(xact.op_code()) + else xact.isBuiltInType(Acquire.putBlockType) || xact_old_meta.coh.outer.isValid()) + state := Mux(!skip_outer_acquire, s_outer_acquire, s_busy) + } + when(state === s_outer_acquire && oacq_data_done) { state := s_busy } + when(state === s_busy && all_pending_done) { state := s_meta_write } + when(state === s_meta_write && (io.meta.write.ready || !pending_meta_write)) { + wmask_buffer.foreach { w => w := UInt(0) } + state := s_idle } - // Handle Get and Put merging - when(io.inner.acquire.fire()) { - when (io.iacq().hasData()) { - val beat = io.iacq().addr_beat - val wmask = io.iacq().wmask() - val full = FillInterleaved(8, wmask) - data_buffer(beat) := (~full & data_buffer(beat)) | (full & io.iacq().data) - wmask_buffer(beat) := wmask | Mux(state === s_idle, Bits(0), wmask_buffer(beat)) - } - when(!io.iacq().hasMultibeatData()) { ignt_q.io.enq.valid := Bool(true) } - } + // These IOs are used for routing in the parent + val in_same_set = xact.addr_block(idxMSB,idxLSB) === io.iacq().addr_block(idxMSB,idxLSB) + io.has_release_match := xact.conflicts(io.irel()) && + !io.irel().isVoluntary() && + (state === s_inner_probe) + io.has_acquire_match := can_merge_iacq_put || can_merge_iacq_get + io.has_acquire_conflict := in_same_set && (state != s_idle) && !io.has_acquire_match + //TODO: relax from in_same_set to xact.conflicts(io.iacq())? + // Checks for illegal behavior assert(!(state != s_idle && io.inner.acquire.fire() && io.inner.acquire.bits.header.src != xact_src), "AcquireTracker accepted data beat from different network source than initial request.") @@ -1019,7 +976,7 @@ class L2WritebackUnitIO extends HierarchicalXactTrackerIO { class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { val io = new L2WritebackUnitIO - val s_idle :: s_inner_probe :: s_data_read :: s_data_resp :: s_outer_release :: s_outer_grant :: s_outer_finish :: s_wb_resp :: Nil = Enum(UInt(), 8) + val s_idle :: s_inner_probe :: s_data_read :: s_data_resp :: s_outer_release :: s_outer_grant :: s_wb_resp :: Nil = Enum(UInt(), 7) val state = Reg(init=s_idle) val xact_addr_block = Reg(io.wb.req.bits.addr_block.clone) @@ -1027,10 +984,9 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { val xact_way_en = Reg{ Bits(width = nWays) } val data_buffer = Vec.fill(innerDataBeats){ Reg(io.irel().data.clone) } val xact_id = Reg{ UInt() } - val pending_ofin = Reg{ io.outer.finish.bits.clone } val irel_had_data = Reg(init = Bool(false)) - val release_count = Reg(init = UInt(0, width = log2Up(nCoherentClients+1))) + val irel_cnt = Reg(init = UInt(0, width = log2Up(nCoherentClients+1))) val pending_probes = Reg(init = Bits(0, width = nCoherentClients)) val curr_probe_dst = PriorityEncoder(pending_probes) val full_sharers = io.wb.req.bits.coh.inner.full() @@ -1056,15 +1012,12 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { io.outer.acquire.valid := Bool(false) io.outer.probe.ready := Bool(false) io.outer.release.valid := Bool(false) // default - io.outer.release.bits.payload := xact_coh.outer.makeVoluntaryWriteback( - client_xact_id = UInt(trackerId), - addr_block = xact_addr_block, - addr_beat = orel_data_cnt, - data = data_buffer(orel_data_cnt)) - io.outer.release.bits.header.src := UInt(bankId) + io.outer.release.bits := xact_coh.outer.makeVoluntaryWriteback( + client_xact_id = UInt(trackerId), + addr_block = xact_addr_block, + addr_beat = orel_data_cnt, + data = data_buffer(orel_data_cnt)) io.outer.grant.ready := Bool(false) // default - io.outer.finish.valid := Bool(false) // default - io.outer.finish.bits := pending_ofin io.inner.probe.valid := Bool(false) io.inner.probe.bits.header.src := UInt(bankId) @@ -1100,7 +1053,7 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { val needs_inner_probes = io.wb.req.bits.coh.inner.requiresProbesOnVoluntaryWriteback() when(needs_inner_probes) { pending_probes := mask_incoherent - release_count := PopCount(mask_incoherent) + irel_cnt := PopCount(mask_incoherent) } state := Mux(needs_inner_probes, s_inner_probe, s_data_read) } @@ -1121,12 +1074,12 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { xact_coh.outer := pending_ocoh_on_irel data_buffer(io.irel().addr_beat) := io.irel().data } - // We don't decrement release_count until we've received all the data beats. + // We don't decrement irel_cnt until we've received all the data beats. when(!io.irel().hasData() || irel_data_done) { - release_count := release_count - UInt(1) + irel_cnt := irel_cnt - UInt(1) } } - when(release_count === UInt(0)) { + when(irel_cnt === UInt(0)) { state := Mux(irel_had_data, // If someone released a dirty block s_outer_release, // write that block back, otherwise Mux(xact_coh.outer.requiresVoluntaryWriteback(), @@ -1152,19 +1105,9 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { is(s_outer_grant) { io.outer.grant.ready := Bool(true) when(io.outer.grant.valid) { - when(io.ognt().requiresAck()) { - pending_ofin.payload := pending_ofin_on_ognt - pending_ofin.header.dst := io.outer.grant.bits.header.src - state := s_outer_finish - }.otherwise { - state := s_wb_resp - } + state := s_wb_resp } } - is(s_outer_finish) { - io.outer.finish.valid := Bool(true) - when(io.outer.finish.ready) { state := s_wb_resp } - } is(s_wb_resp) { io.wb.resp.valid := Bool(true) state := s_idle diff --git a/uncore/src/main/scala/htif.scala b/uncore/src/main/scala/htif.scala index 81d95453..787847b9 100644 --- a/uncore/src/main/scala/htif.scala +++ b/uncore/src/main/scala/htif.scala @@ -127,21 +127,11 @@ class HTIF(pcr_RESET: Int) extends Module with HTIFParameters { val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr || cmd === cmd_writecr), size, UInt(0)) val tx_done = io.host.out.ready && tx_subword_count.andR && (tx_word_count === tx_size || tx_word_count > UInt(0) && packet_ram_raddr.andR) - val mem_acked = Reg(init=Bool(false)) - val mem_gxid = Reg(Bits()) - val mem_gsrc = Reg(UInt()) - val mem_needs_ack = Reg(Bool()) - when (io.mem.grant.valid) { - mem_acked := Bool(true) - mem_gxid := io.mem.grant.bits.payload.manager_xact_id - mem_gsrc := io.mem.grant.bits.header.src - mem_needs_ack := io.mem.grant.bits.payload.requiresAck() - } - io.mem.grant.ready := Bool(true) - - val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_rreq :: state_mem_wreq :: state_mem_rresp :: state_mem_wresp :: state_mem_finish :: state_tx :: Nil = Enum(UInt(), 9) + val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_rreq :: state_mem_wreq :: state_mem_rresp :: state_mem_wresp :: state_tx :: Nil = Enum(UInt(), 8) val state = Reg(init=state_rx) + val (cnt, cnt_done) = Counter((state === state_mem_wreq && io.mem.acquire.ready) || + (state === state_mem_rresp && io.mem.grant.valid), dataBeats) val rx_cmd = Mux(rx_word_count === UInt(0), next_cmd, cmd) when (state === state_rx && rx_done) { state := Mux(rx_cmd === cmd_readmem, state_mem_rreq, @@ -149,28 +139,18 @@ class HTIF(pcr_RESET: Int) extends Module with HTIFParameters { Mux(rx_cmd === cmd_readcr || rx_cmd === cmd_writecr, state_pcr_req, state_tx))) } - - val (cnt, cnt_done) = Counter((state === state_mem_wreq && io.mem.acquire.ready) || - (state === state_mem_rresp && io.mem.grant.valid), dataBeats) when (state === state_mem_wreq) { when (cnt_done) { state := state_mem_wresp } } when (state === state_mem_rreq) { when(io.mem.acquire.ready) { state := state_mem_rresp } } - when (state === state_mem_wresp) { - when (mem_acked) { - state := state_mem_finish - mem_acked := Bool(false) - } + when (state === state_mem_wresp && io.mem.grant.valid) { + state := Mux(cmd === cmd_readmem || pos === UInt(1), state_tx, state_rx) + pos := pos - UInt(1) + addr := addr + UInt(1 << offsetBits-3) } - when (state === state_mem_rresp) { - when (cnt_done) { - state := state_mem_finish - mem_acked := Bool(false) - } - } - when (state === state_mem_finish && io.mem.finish.ready) { + when (state === state_mem_rresp && cnt_done) { state := Mux(cmd === cmd_readmem || pos === UInt(1), state_tx, state_rx) pos := pos - UInt(1) addr := addr + UInt(1 << offsetBits-3) @@ -187,8 +167,8 @@ class HTIF(pcr_RESET: Int) extends Module with HTIFParameters { val mem_req_data = (0 until n).map { i => val ui = UInt(i, log2Up(n)) when (state === state_mem_rresp && io.mem.grant.valid) { - packet_ram(Cat(io.mem.grant.bits.payload.addr_beat, ui)) := - io.mem.grant.bits.payload.data((i+1)*short_request_bits-1, i*short_request_bits) + packet_ram(Cat(io.mem.grant.bits.addr_beat, ui)) := + io.mem.grant.bits.data((i+1)*short_request_bits-1, i*short_request_bits) } packet_ram(Cat(cnt, ui)) }.reverse.reduce(_##_) @@ -202,9 +182,7 @@ class HTIF(pcr_RESET: Int) extends Module with HTIFParameters { client_xact_id = UInt(0), data = mem_req_data), GetBlock(addr_block = init_addr)) - io.mem.finish.valid := (state === state_mem_finish) && mem_needs_ack - io.mem.finish.bits.payload.manager_xact_id := mem_gxid - io.mem.finish.bits.header.dst := mem_gsrc + io.mem.grant.ready := Bool(true) val pcrReadData = Reg(Bits(width = io.cpu(0).pcr_rep.bits.getWidth)) for (i <- 0 until nCores) { diff --git a/uncore/src/main/scala/memserdes.scala b/uncore/src/main/scala/memserdes.scala index 059fbdcf..878ce19a 100644 --- a/uncore/src/main/scala/memserdes.scala +++ b/uncore/src/main/scala/memserdes.scala @@ -207,7 +207,7 @@ class MemDesser(w: Int) extends Module // test rig side //Adapter betweewn an UncachedTileLinkIO and a mem controller MemIO -class MemIOTileLinkIOConverter(qDepth: Int) extends Module { +class MemIOTileLinkIOConverter(qDepth: Int) extends TLModule { val io = new Bundle { val tl = new TileLinkIO().flip val mem = new MemIO @@ -216,11 +216,10 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { val mifAddrBits = params(MIFAddrBits) val mifDataBits = params(MIFDataBits) val mifDataBeats = params(MIFDataBeats) - val tlDataBits = params(TLDataBits) - val tlDataBeats = params(TLDataBeats) val dataBits = tlDataBits*tlDataBeats - require(tlDataBits*tlDataBeats == mifDataBits*mifDataBeats) - require(params(TLClientXactIdBits) <= params(MIFTagBits)) + val dstIdBits = params(LNHeaderBits) + require(tlDataBits*tlDataBeats == mifDataBits*mifDataBeats, "Data sizes between LLC and MC don't agree") + require(dstIdBits + tlClientXactIdBits < mifTagBits, "MemIO converter is going truncate tags: " + dstIdBits + " + " + tlClientXactIdBits + " >= " + mifTagBits) io.tl.acquire.ready := Bool(false) io.tl.probe.valid := Bool(false) @@ -231,6 +230,7 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { val gnt_arb = Module(new Arbiter(new LogicalNetworkIO(new Grant), 2)) io.tl.grant <> gnt_arb.io.out + val dst_off = dstIdBits + tlClientXactIdBits val acq_has_data = io.tl.acquire.bits.payload.hasData() val rel_has_data = io.tl.release.bits.payload.hasData() @@ -253,6 +253,8 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { g_type = Mux(data_from_rel, Grant.voluntaryAckType, Grant.putAckType), client_xact_id = tag_out >> UInt(1), manager_xact_id = UInt(0)) + gnt_arb.io.in(1).bits.header.dst := (if(dstIdBits > 0) tag_out(dst_off, tlClientXactIdBits + 1) else UInt(0)) + gnt_arb.io.in(1).bits.header.src := UInt(0) if(tlDataBits != mifDataBits || tlDataBeats != mifDataBeats) { val mem_cmd_q = Module(new Queue(new MemReqCmd, qDepth)) @@ -273,7 +275,8 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { when(io.tl.release.valid) { active_out := Bool(true) cmd_sent_out := Bool(false) - tag_out := Cat(io.tl.release.bits.payload.client_xact_id, + tag_out := Cat(io.tl.release.bits.header.src, + io.tl.release.bits.payload.client_xact_id, io.tl.release.bits.payload.isVoluntary()) addr_out := io.tl.release.bits.payload.addr_block has_data := rel_has_data @@ -284,7 +287,8 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { } .elsewhen(io.tl.acquire.valid) { active_out := Bool(true) cmd_sent_out := Bool(false) - tag_out := Cat(io.tl.acquire.bits.payload.client_xact_id, + tag_out := Cat(io.tl.release.bits.header.src, + io.tl.acquire.bits.payload.client_xact_id, io.tl.acquire.bits.payload.isBuiltInType()) addr_out := io.tl.acquire.bits.payload.addr_block has_data := acq_has_data @@ -350,7 +354,8 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { data_from_rel := Bool(true) make_grant_ack := Bool(true) io.mem.req_data.bits.data := io.tl.release.bits.payload.data - val tag = Cat(io.tl.release.bits.payload.client_xact_id, + val tag = Cat(io.tl.release.bits.header.src, + io.tl.release.bits.payload.client_xact_id, io.tl.release.bits.payload.isVoluntary()) val addr = io.tl.release.bits.payload.addr_block io.mem.req_cmd.bits.tag := tag @@ -364,7 +369,8 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { make_grant_ack := acq_has_data io.mem.req_data.bits.data := io.tl.acquire.bits.payload.data io.mem.req_cmd.bits.rw := acq_has_data - val tag = Cat(io.tl.acquire.bits.payload.client_xact_id, + val tag = Cat(io.tl.acquire.bits.header.src, + io.tl.acquire.bits.payload.client_xact_id, io.tl.acquire.bits.payload.isBuiltInType()) val addr = io.tl.acquire.bits.payload.addr_block io.mem.req_cmd.bits.tag := tag @@ -421,6 +427,8 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { manager_xact_id = UInt(0), addr_beat = tl_cnt_in, data = tl_buf_in(tl_cnt_in)) + gnt_arb.io.in(0).bits.header.dst := (if(dstIdBits > 0) tag_in(dst_off, tlClientXactIdBits + 1) else UInt(0)) + gnt_arb.io.in(0).bits.header.src := UInt(0) when(!active_in) { io.mem.resp.ready := Bool(true) @@ -454,6 +462,8 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { manager_xact_id = UInt(0), addr_beat = tl_cnt_in, data = io.mem.resp.bits.data) + gnt_arb.io.in(0).bits.header.dst := (if(dstIdBits > 0) io.mem.resp.bits.tag(dst_off, tlClientXactIdBits + 1) else UInt(0)) + gnt_arb.io.in(0).bits.header.src := UInt(0) } } diff --git a/uncore/src/main/scala/metadata.scala b/uncore/src/main/scala/metadata.scala index 8f5d7e90..af5504c8 100644 --- a/uncore/src/main/scala/metadata.scala +++ b/uncore/src/main/scala/metadata.scala @@ -18,6 +18,7 @@ class ClientMetadata extends CoherenceMetadata { val state = UInt(width = co.clientStateWidth) def ===(rhs: ClientMetadata): Bool = this.state === rhs.state + def !=(rhs: ClientMetadata): Bool = !this.===(rhs) def isValid(dummy: Int = 0): Bool = co.isValid(this) def isHit(cmd: UInt): Bool = co.isHit(cmd, this) @@ -99,6 +100,7 @@ class ManagerMetadata extends CoherenceMetadata { def ===(rhs: ManagerMetadata): Bool = //this.state === rhs.state && TODO: Fix 0-width wires in Chisel this.sharers === rhs.sharers + def !=(rhs: ManagerMetadata): Bool = !this.===(rhs) def full(dummy: Int = 0) = co.dir.full(this.sharers) def requiresProbes(acq: Acquire): Bool = co.requiresProbes(acq, this) @@ -171,7 +173,7 @@ class HierarchicalMetadata extends CoherenceMetadata { val outer: ClientMetadata = Bundle(new ClientMetadata, {case TLId => params(OuterTLId)}) def ===(rhs: HierarchicalMetadata): Bool = this.inner === rhs.inner && this.outer === rhs.outer - def !=(rhs: HierarchicalMetadata): Bool = !(this === rhs) + def !=(rhs: HierarchicalMetadata): Bool = !this.===(rhs) } object HierarchicalMetadata { diff --git a/uncore/src/main/scala/network.scala b/uncore/src/main/scala/network.scala index 161a9a0c..7fdc8e09 100644 --- a/uncore/src/main/scala/network.scala +++ b/uncore/src/main/scala/network.scala @@ -6,6 +6,7 @@ import Chisel._ case object LNManagers extends Field[Int] case object LNClients extends Field[Int] case object LNEndpoints extends Field[Int] +case object LNHeaderBits extends Field[Int] class PhysicalHeader(n: Int) extends Bundle { val src = UInt(width = log2Up(n)) @@ -47,8 +48,8 @@ class BasicCrossbar[T <: Data](n: Int, dType: T, count: Int = 1, needsLock: Opti abstract class LogicalNetwork extends Module class LogicalHeader extends Bundle { - val src = UInt(width = log2Up(params(LNEndpoints))) - val dst = UInt(width = log2Up(params(LNEndpoints))) + val src = UInt(width = params(LNHeaderBits)) + val dst = UInt(width = params(LNHeaderBits)) } class LogicalNetworkIO[T <: Data](dType: T) extends Bundle { diff --git a/uncore/src/main/scala/tilelink.scala b/uncore/src/main/scala/tilelink.scala index 0aff9aab..c2bc2d30 100644 --- a/uncore/src/main/scala/tilelink.scala +++ b/uncore/src/main/scala/tilelink.scala @@ -3,7 +3,8 @@ package uncore import Chisel._ import scala.math.max -import scala.reflect.ClassTag +import scala.reflect._ +import scala.reflect.runtime.universe._ // Parameters exposed to the top-level design, set based on // external requirements or design space exploration @@ -11,17 +12,22 @@ import scala.reflect.ClassTag case object TLId extends Field[String] // Unique name per network case object TLCoherencePolicy extends Field[CoherencePolicy] case object TLBlockAddrBits extends Field[Int] -case object TLManagerXactIdBits extends Field[Int] -case object TLClientXactIdBits extends Field[Int] +case object TLMaxClientXacts extends Field[Int] +case object TLMaxClientPorts extends Field[Int] +case object TLMaxManagerXacts extends Field[Int] case object TLDataBits extends Field[Int] case object TLDataBeats extends Field[Int] case object TLNetworkIsOrderedP2P extends Field[Boolean] abstract trait TileLinkParameters extends UsesParameters { val tlBlockAddrBits = params(TLBlockAddrBits) - val tlClientXactIdBits = params(TLClientXactIdBits) - val tlManagerXactIdBits = params(TLManagerXactIdBits) + val tlMaxClientXacts = params(TLMaxClientXacts) + val tlMaxClientPorts = params(TLMaxClientPorts) + val tlMaxManagerXacts = params(TLMaxManagerXacts) + val tlClientXactIdBits = log2Up(tlMaxClientXacts*tlMaxClientPorts) + val tlManagerXactIdBits = log2Up(tlMaxManagerXacts) val tlDataBits = params(TLDataBits) + val tlDataBytes = tlDataBits/8 val tlDataBeats = params(TLDataBeats) val tlCoh = params(TLCoherencePolicy) val tlWriteMaskBits = if(tlDataBits/8 < 1) 1 else tlDataBits/8 @@ -41,12 +47,16 @@ abstract trait TileLinkParameters extends UsesParameters { val amoAluOperandBits = params(AmoAluOperandBits) } -abstract class TLBundle extends Bundle with TileLinkParameters +abstract class TLBundle extends Bundle with TileLinkParameters { +} abstract class TLModule extends Module with TileLinkParameters // Directionality of message channel // Used to hook up logical network ports to physical network ports -trait TileLinkChannel extends TLBundle +trait TileLinkChannel extends TLBundle { + def hasData(dummy: Int = 0): Bool + def hasMultibeatData(dummy: Int = 0): Bool +} trait ClientToManagerChannel extends TileLinkChannel trait ManagerToClientChannel extends TileLinkChannel trait ClientToClientChannel extends TileLinkChannel // Unused for now @@ -97,8 +107,7 @@ class Acquire extends ClientToManagerChannel val addrByteOff = tlMemoryOperandSizeBits + opSizeOff val addrByteMSB = tlByteAddrBits + addrByteOff def allocate(dummy: Int = 0) = union(0) - def op_code(dummy: Int = 0) = Mux(isBuiltInType() && - (a_type === Acquire.putType || a_type === Acquire.putBlockType), + def op_code(dummy: Int = 0) = Mux(isBuiltInType(Acquire.putType) || isBuiltInType(Acquire.putBlockType), M_XWR, union(opSizeOff-1, opCodeOff)) def op_size(dummy: Int = 0) = union(addrByteOff-1, opSizeOff) def addr_byte(dummy: Int = 0) = union(addrByteMSB-1, addrByteOff) @@ -328,6 +337,8 @@ class Probe extends ManagerToClientChannel val p_type = UInt(width = tlCoh.probeTypeWidth) def is(t: UInt) = p_type === t + def hasData(dummy: Int = 0) = Bool(false) + def hasMultibeatData(dummy: Int = 0) = Bool(false) } object Probe { @@ -383,6 +394,7 @@ class Grant extends ManagerToClientChannel // Helper funcs def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type + def isBuiltInType(t: UInt): Bool = is_builtin_type && g_type === t def is(t: UInt):Bool = g_type === t def hasData(dummy: Int = 0): Bool = Mux(isBuiltInType(), Grant.typesWithData.contains(g_type), @@ -394,7 +406,7 @@ class Grant extends ManagerToClientChannel def isVoluntary(dummy: Int = 0): Bool = isBuiltInType() && (g_type === Grant.voluntaryAckType) def requiresAck(dummy: Int = 0): Bool = !Bool(tlNetworkPreservesPointToPointOrdering) && !isVoluntary() def makeFinish(dummy: Int = 0): Finish = { - val f = Bundle(new Finish, { case TLManagerXactIdBits => tlManagerXactIdBits }) + val f = Bundle(new Finish, { case TLMaxManagerXacts => tlMaxManagerXacts }) f.manager_xact_id := this.manager_xact_id f } @@ -428,7 +440,10 @@ object Grant { } } -class Finish extends ClientToManagerChannel with HasManagerTransactionId +class Finish extends ClientToManagerChannel with HasManagerTransactionId { + def hasData(dummy: Int = 0) = Bool(false) + def hasMultibeatData(dummy: Int = 0) = Bool(false) +} // Complete IO definitions for two types of TileLink clients class UncachedTileLinkIO extends TLBundle { @@ -455,33 +470,22 @@ class TileLinkIOWrapper extends TLModule { io.out.probe.ready := Bool(true) io.out.release.valid := Bool(false) } -object TileLinkIOWrapper { - def apply(utl: UncachedTileLinkIO, p: Parameters): TileLinkIO = { - val conv = Module(new TileLinkIOWrapper)(p) - conv.io.in <> utl - conv.io.out - } - def apply(utl: UncachedTileLinkIO): TileLinkIO = { - val conv = Module(new TileLinkIOWrapper) - conv.io.in <> utl - conv.io.out - } - def apply(tl: TileLinkIO) = tl -} // This version of TileLinkIO does not contain network headers for packets // that originate in the Clients (i.e. Acquire and Release). These headers // are provided in the top-level that instantiates the clients and network. // By eliding the header subbundles within the clients we can enable // hierarchical P&R while minimizing unconnected port errors in GDS. +// Secondly, this version of the interface elides Finish messages, with the +// assumption that a FinishUnit has been coupled to the TileLinkIO port +// to deal with acking received Grants. class HeaderlessUncachedTileLinkIO extends TLBundle { val acquire = new DecoupledIO(new Acquire) - val grant = new DecoupledIO(new LogicalNetworkIO(new Grant)).flip - val finish = new DecoupledIO(new LogicalNetworkIO(new Finish)) + val grant = new DecoupledIO(new Grant).flip } class HeaderlessTileLinkIO extends HeaderlessUncachedTileLinkIO { - val probe = new DecoupledIO(new LogicalNetworkIO(new Probe)).flip + val probe = new DecoupledIO(new Probe).flip val release = new DecoupledIO(new Release) } @@ -492,17 +496,231 @@ class HeaderlessTileLinkIOWrapper extends TLModule { } io.out.acquire <> io.in.acquire io.out.grant <> io.in.grant - io.out.finish <> io.in.finish io.out.probe.ready := Bool(true) io.out.release.valid := Bool(false) } -object HeaderlessTileLinkIOWrapper { +object TileLinkIOWrapper { + def apply(utl: HeaderlessUncachedTileLinkIO, p: Parameters): HeaderlessTileLinkIO = { + val conv = Module(new HeaderlessTileLinkIOWrapper)(p) + conv.io.in <> utl + conv.io.out + } def apply(utl: HeaderlessUncachedTileLinkIO): HeaderlessTileLinkIO = { val conv = Module(new HeaderlessTileLinkIOWrapper) conv.io.in <> utl conv.io.out } + def apply(tl: HeaderlessTileLinkIO): HeaderlessTileLinkIO = tl + def apply(utl: UncachedTileLinkIO, p: Parameters): TileLinkIO = { + val conv = Module(new TileLinkIOWrapper)(p) + conv.io.in <> utl + conv.io.out + } + def apply(utl: UncachedTileLinkIO): TileLinkIO = { + val conv = Module(new TileLinkIOWrapper) + conv.io.in <> utl + conv.io.out + } + def apply(tl: TileLinkIO): TileLinkIO = tl +} + +trait HasDataBeatCounters { + type HasBeat = TileLinkChannel with HasTileLinkBeatId + type HasClientId = TileLinkChannel with HasClientTransactionId + type LNAcquire = LogicalNetworkIO[Acquire] + type LNRelease = LogicalNetworkIO[Release] + type LNGrant = LogicalNetworkIO[Grant] + + def connectDataBeatCounter[S <: TileLinkChannel : ClassTag](inc: Bool, data: S, beat: UInt) = { + val multi = data.hasMultibeatData() + val (multi_cnt, multi_done) = Counter(inc && multi, data.tlDataBeats) + val cnt = Mux(multi, multi_cnt, beat) + val done = Mux(multi, multi_done, inc) + (cnt, done) + } + + def connectOutgoingDataBeatCounter[T <: Data : TypeTag]( + in: DecoupledIO[T], + beat: UInt = UInt(0)): (UInt, Bool) = { + in.bits match { + case p: TileLinkChannel if typeTag[T].tpe <:< typeTag[TileLinkChannel].tpe => + connectDataBeatCounter(in.fire(), p, beat) + case ln: LNGrant if typeTag[T].tpe <:< typeTag[LNGrant].tpe => + connectDataBeatCounter(in.fire(), ln.payload, beat) + case _ => { require(false, "Don't know how to connect a beat counter to " + typeTag[T].tpe); (UInt(0), Bool(false))} + } + } + + def connectIncomingDataBeatCounters[T <: HasClientId : ClassTag]( + in: DecoupledIO[LogicalNetworkIO[T]], + entries: Int): Vec[Bool] = { + val id = in.bits.payload.client_xact_id + Vec((0 until entries).map { i => + connectDataBeatCounter(in.fire() && id === UInt(i), in.bits.payload, UInt(0))._2 + }) + } + + def connectIncomingDataBeatCounter[T <: Data : TypeTag](in: DecoupledIO[T]): Bool = { + in.bits match { + case p: TileLinkChannel if typeTag[T].tpe <:< typeTag[TileLinkChannel].tpe => + connectDataBeatCounter(in.fire(), p, UInt(0))._2 + case ln: LNAcquire if typeTag[T].tpe =:= typeTag[LNAcquire].tpe => + connectDataBeatCounter(in.fire(), ln.payload, UInt(0))._2 + case ln: LNRelease if typeTag[T].tpe =:= typeTag[LNRelease].tpe => + connectDataBeatCounter(in.fire(), ln.payload, UInt(0))._2 + case ln: LNGrant if typeTag[T].tpe =:= typeTag[LNGrant].tpe => + connectDataBeatCounter(in.fire(), ln.payload, UInt(0))._2 + case _ => { require(false, "Don't know how to connect a beat counter to " + typeTag[T].tpe); Bool(false)} + } + } + + def connectHeaderlessTwoWayBeatCounter[ T <: TileLinkChannel : ClassTag, S <: TileLinkChannel : ClassTag]( + max: Int, + up: DecoupledIO[T], + down: DecoupledIO[S], + beat: UInt): (Bool, UInt, Bool, UInt, Bool) = { + val cnt = Reg(init = UInt(0, width = log2Up(max+1))) + val (up_idx, do_inc) = connectDataBeatCounter(up.fire(), up.bits, beat) + val (down_idx, do_dec) = connectDataBeatCounter(down.fire(), down.bits, beat) + cnt := Mux(do_dec, + Mux(do_inc, cnt, cnt - UInt(1)), + Mux(do_inc, cnt + UInt(1), cnt)) + (cnt > UInt(0), up_idx, do_inc, down_idx, do_dec) + } + + def connectTwoWayBeatCounter[ T <: TileLinkChannel : ClassTag, S <: TileLinkChannel : ClassTag]( + max: Int, + up: DecoupledIO[LogicalNetworkIO[T]], + down: DecoupledIO[LogicalNetworkIO[S]], + inc: T => Bool = (t: T) => Bool(true), + dec: S => Bool = (s: S) => Bool(true)): (Bool, UInt, Bool, UInt, Bool) = { + val cnt = Reg(init = UInt(0, width = log2Up(max+1))) + val (up_idx, up_done) = connectDataBeatCounter(up.fire(), up.bits.payload, UInt(0)) + val (down_idx, down_done) = connectDataBeatCounter(down.fire(), down.bits.payload, UInt(0)) + val do_inc = up_done && inc(up.bits.payload) + val do_dec = down_done && dec(down.bits.payload) + cnt := Mux(do_dec, + Mux(do_inc, cnt, cnt - UInt(1)), + Mux(do_inc, cnt + UInt(1), cnt)) + (cnt > UInt(0), up_idx, up_done, down_idx, down_done) + } +} + +class FinishQueueEntry extends TLBundle { + val fin = new Finish + val dst = UInt(width = log2Up(params(LNEndpoints))) +} + +class FinishQueue(entries: Int) extends Queue(new FinishQueueEntry, entries) + +class FinishUnit(srcId: Int = 0) extends TLModule + with HasDataBeatCounters { + val io = new Bundle { + val grant = Decoupled(new LogicalNetworkIO(new Grant)).flip + val refill = Decoupled(new Grant) + val finish = Decoupled(new LogicalNetworkIO(new Finish)) + val ready = Bool(OUTPUT) + val grant_done = Bool(OUTPUT) + val pending_finish = Bool(OUTPUT) + } + + val entries = 1 << tlClientXactIdBits + val g = io.grant.bits.payload + assert(g.client_xact_id <= UInt(entries), "No grant beat counter provisioned, only " + entries) + + val done = connectIncomingDataBeatCounters(io.grant, entries).reduce(_||_) + val q = Module(new FinishQueue(entries)) + + q.io.enq.valid := io.grant.valid && g.requiresAck() && (!g.hasMultibeatData() || done) + q.io.enq.bits.fin := g.makeFinish() + q.io.enq.bits.dst := io.grant.bits.header.src + + io.finish.bits.header.src := UInt(srcId) + io.finish.bits.header.dst := q.io.deq.bits.dst + io.finish.bits.payload := q.io.deq.bits.fin + io.finish.valid := q.io.deq.valid + q.io.deq.ready := io.finish.ready + + io.refill.valid := io.grant.valid + io.refill.bits := io.grant.bits.payload + io.grant.ready := (q.io.enq.ready || !g.requiresAck()) && (io.refill.ready || !g.hasData()) + + io.ready := q.io.enq.ready + io.grant_done := done + io.pending_finish := q.io.deq.valid +} + +object TileLinkHeaderOverwriter { + def apply[T <: ClientToManagerChannel]( + in: DecoupledIO[LogicalNetworkIO[T]], + clientId: Int, + passThrough: Boolean): DecoupledIO[LogicalNetworkIO[T]] = { + val out = in.clone.asDirectionless + out.bits.payload := in.bits.payload + out.bits.header.src := UInt(clientId) + out.bits.header.dst := (if(passThrough) in.bits.header.dst else UInt(0)) + out.valid := in.valid + in.ready := out.ready + out + } + + def apply[T <: ClientToManagerChannel with HasCacheBlockAddress]( + in: DecoupledIO[LogicalNetworkIO[T]], + clientId: Int, + nBanks: Int, + addrConvert: UInt => UInt): DecoupledIO[LogicalNetworkIO[T]] = { + val out: DecoupledIO[LogicalNetworkIO[T]] = apply(in, clientId, false) + out.bits.header.dst := addrConvert(in.bits.payload.addr_block) + out + } + + def apply[T <: ClientToManagerChannel with HasCacheBlockAddress : ClassTag]( + in: DecoupledIO[T], + clientId: Int, + addrConvert: UInt => UInt): DecoupledIO[LogicalNetworkIO[T]] = { + val out = new DecoupledIO(new LogicalNetworkIO(in.bits.clone)).asDirectionless + out.bits.payload := in.bits + out.bits.header.src := UInt(clientId) + out.bits.header.dst := addrConvert(in.bits.addr_block) + out.valid := in.valid + in.ready := out.ready + out + } +} + +class TileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt) extends TLModule { + val io = new Bundle { + val client = new HeaderlessTileLinkIO().flip + val network = new TileLinkIO + } + + val finisher = Module(new FinishUnit(clientId)) + finisher.io.grant <> io.network.grant + io.network.finish <> finisher.io.finish + + val acq_with_header = TileLinkHeaderOverwriter(io.client.acquire, clientId, addrConvert) + val rel_with_header = TileLinkHeaderOverwriter(io.client.release, clientId, addrConvert) + val prb_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.probe) + val gnt_without_header = finisher.io.refill + + io.network.acquire.bits := acq_with_header.bits + io.network.acquire.valid := acq_with_header.valid && finisher.io.ready + acq_with_header.ready := io.network.acquire.ready && finisher.io.ready + io.network.release <> rel_with_header + io.client.probe <> prb_without_header + io.client.grant <> gnt_without_header +} + +object TileLinkNetworkPort { + def apply[T <: Data]( + client: HeaderlessTileLinkIO, + clientId: Int = 0, + addrConvert: UInt => UInt = u => UInt(0))(implicit p: Parameters): TileLinkIO = { + val port = Module(new TileLinkNetworkPort(clientId, addrConvert))(p) + port.io.client <> client + port.io.network + } } class TileLinkEnqueuer(depths: (Int, Int, Int, Int, Int)) extends Module { @@ -529,7 +747,6 @@ object TileLinkEnqueuer { } abstract trait HasArbiterTypes { - val arbN: Int type ManagerSourcedWithId = ManagerToClientChannel with HasClientTransactionId type ClientSourcedWithId = ClientToManagerChannel with HasClientTransactionId type ClientSourcedWithIdAndData = ClientToManagerChannel with @@ -538,20 +755,19 @@ abstract trait HasArbiterTypes { } // Utility functions for constructing TileLinkIO arbiters -abstract class TileLinkArbiterLike(val arbN: Int) extends TLModule - with HasArbiterTypes { - +trait TileLinkArbiterLike extends HasArbiterTypes with TileLinkParameters{ + val arbN: Int // These are filled in depending on whether the arbiter mucks with the // client ids and then needs to revert them on the way back def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int): Bits def managerSourcedClientXactId(in: ManagerSourcedWithId): Bits def arbIdx(in: ManagerSourcedWithId): UInt - def hookupClientSource[M <: ClientSourcedWithIdAndData : ClassTag] - (clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], - mngr: DecoupledIO[LogicalNetworkIO[M]]) { + def hookupClientSource[M <: ClientSourcedWithIdAndData : ClassTag]( + clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], + mngr: DecoupledIO[LogicalNetworkIO[M]]) { def hasData(m: LogicalNetworkIO[M]) = m.payload.hasMultibeatData() - val arb = Module(new LockingRRArbiter(mngr.bits.clone, arbN, params(TLDataBeats), Some(hasData _))) + val arb = Module(new LockingRRArbiter(mngr.bits.clone, arbN, tlDataBeats, Some(hasData _))) clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => { arb.valid := req.valid arb.bits := req.bits @@ -561,11 +777,11 @@ abstract class TileLinkArbiterLike(val arbN: Int) extends TLModule arb.io.out <> mngr } - def hookupClientSourceHeaderless[M <: ClientSourcedWithIdAndData : ClassTag] - (clts: Seq[DecoupledIO[M]], - mngr: DecoupledIO[M]) { + def hookupClientSourceHeaderless[M <: ClientSourcedWithIdAndData : ClassTag]( + clts: Seq[DecoupledIO[M]], + mngr: DecoupledIO[M]) { def hasData(m: M) = m.hasMultibeatData() - val arb = Module(new LockingRRArbiter(mngr.bits.clone, arbN, params(TLDataBeats), Some(hasData _))) + val arb = Module(new LockingRRArbiter(mngr.bits.clone, arbN, tlDataBeats, Some(hasData _))) clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => { arb.valid := req.valid arb.bits := req.bits @@ -575,17 +791,23 @@ abstract class TileLinkArbiterLike(val arbN: Int) extends TLModule arb.io.out <> mngr } - def hookupFinish[M <: LogicalNetworkIO[Finish] : ClassTag] - (clts: Seq[DecoupledIO[M]], - mngr: DecoupledIO[M]) { - val arb = Module(new RRArbiter(mngr.bits.clone, arbN)) - arb.io.in zip clts map { case (arb, req) => arb <> req } - arb.io.out <> mngr + def hookupManagerSourceWithHeader[M <: ManagerToClientChannel]( + clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], + mngr: DecoupledIO[LogicalNetworkIO[M]]) { + mngr.ready := Bool(false) + for (i <- 0 until arbN) { + clts(i).valid := Bool(false) + when (mngr.bits.header.dst === UInt(i)) { + clts(i).valid := mngr.valid + mngr.ready := clts(i).ready + } + clts(i).bits := mngr.bits + } } - def hookupManagerSourceWithId[M <: ManagerSourcedWithId] - (clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], - mngr: DecoupledIO[LogicalNetworkIO[M]]) { + def hookupManagerSourceWithId[M <: ManagerSourcedWithId]( + clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], + mngr: DecoupledIO[LogicalNetworkIO[M]]) { mngr.ready := Bool(false) for (i <- 0 until arbN) { clts(i).valid := Bool(false) @@ -594,24 +816,45 @@ abstract class TileLinkArbiterLike(val arbN: Int) extends TLModule mngr.ready := clts(i).ready } clts(i).bits := mngr.bits - clts(i).bits.payload.client_xact_id := - managerSourcedClientXactId(mngr.bits.payload) + clts(i).bits.payload.client_xact_id := managerSourcedClientXactId(mngr.bits.payload) } } - def hookupManagerSourceBroadcast[M <: ManagerToClientChannel] - (clts: Seq[DecoupledIO[LogicalNetworkIO[M]]], - mngr: DecoupledIO[LogicalNetworkIO[M]]) { + def hookupManagerSourceHeaderlessWithId[M <: ManagerSourcedWithId]( + clts: Seq[DecoupledIO[M]], + mngr: DecoupledIO[M]) { + mngr.ready := Bool(false) + for (i <- 0 until arbN) { + clts(i).valid := Bool(false) + when (arbIdx(mngr.bits) === UInt(i)) { + clts(i).valid := mngr.valid + mngr.ready := clts(i).ready + } + clts(i).bits := mngr.bits + clts(i).bits.client_xact_id := managerSourcedClientXactId(mngr.bits) + } + } + + def hookupManagerSourceBroadcast[M <: Data]( + clts: Seq[DecoupledIO[M]], + mngr: DecoupledIO[M]) { clts.map{ _.valid := mngr.valid } clts.map{ _.bits := mngr.bits } mngr.ready := clts.map(_.ready).reduce(_&&_) } + + def hookupFinish[M <: LogicalNetworkIO[Finish] : ClassTag]( + clts: Seq[DecoupledIO[M]], + mngr: DecoupledIO[M]) { + val arb = Module(new RRArbiter(mngr.bits.clone, arbN)) + arb.io.in zip clts map { case (arb, req) => arb <> req } + arb.io.out <> mngr + } } -abstract class UncachedTileLinkIOArbiter(n: Int) - extends TileLinkArbiterLike(n) { +abstract class UncachedTileLinkIOArbiter(val arbN: Int) extends Module with TileLinkArbiterLike { val io = new Bundle { - val in = Vec.fill(n){new UncachedTileLinkIO}.flip + val in = Vec.fill(arbN){new UncachedTileLinkIO}.flip val out = new UncachedTileLinkIO } hookupClientSource(io.in.map(_.acquire), io.out.acquire) @@ -619,9 +862,9 @@ abstract class UncachedTileLinkIOArbiter(n: Int) hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant) } -abstract class TileLinkIOArbiter(n: Int) extends TileLinkArbiterLike(n) { +abstract class TileLinkIOArbiter(val arbN: Int) extends Module with TileLinkArbiterLike { val io = new Bundle { - val in = Vec.fill(n){new TileLinkIO}.flip + val in = Vec.fill(arbN){new TileLinkIO}.flip val out = new TileLinkIO } hookupClientSource(io.in.map(_.acquire), io.out.acquire) @@ -631,8 +874,32 @@ abstract class TileLinkIOArbiter(n: Int) extends TileLinkArbiterLike(n) { hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant) } +class HeaderlessUncachedTileLinkIOArbiter(val arbN: Int) extends Module + with TileLinkArbiterLike + with AppendsArbiterId { + val io = new Bundle { + val in = Vec.fill(arbN){new HeaderlessUncachedTileLinkIO}.flip + val out = new HeaderlessUncachedTileLinkIO + } + hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire) + hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant) +} + +class HeaderlessTileLinkIOArbiter(val arbN: Int) extends Module + with TileLinkArbiterLike + with AppendsArbiterId { + val io = new Bundle { + val in = Vec.fill(arbN){new HeaderlessTileLinkIO}.flip + val out = new HeaderlessTileLinkIO + } + hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire) + hookupClientSourceHeaderless(io.in.map(_.release), io.out.release) + hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe) + hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant) +} + // Appends the port index of the arbiter to the client_xact_id -abstract trait AppendsArbiterId extends HasArbiterTypes { +trait AppendsArbiterId extends TileLinkArbiterLike { def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = Cat(in.client_xact_id, UInt(id, log2Up(arbN))) def managerSourcedClientXactId(in: ManagerSourcedWithId) = @@ -641,14 +908,14 @@ abstract trait AppendsArbiterId extends HasArbiterTypes { } // Uses the client_xact_id as is (assumes it has been set to port index) -abstract trait PassesId extends HasArbiterTypes { +trait PassesId extends TileLinkArbiterLike { def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = in.client_xact_id def managerSourcedClientXactId(in: ManagerSourcedWithId) = in.client_xact_id def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id } // Overwrites some default client_xact_id with the port idx -abstract trait UsesNewId extends HasArbiterTypes { +trait UsesNewId extends TileLinkArbiterLike { def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = UInt(id, log2Up(arbN)) def managerSourcedClientXactId(in: ManagerSourcedWithId) = UInt(0) def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id diff --git a/uncore/src/main/scala/uncore.scala b/uncore/src/main/scala/uncore.scala index de55c0e2..44981459 100644 --- a/uncore/src/main/scala/uncore.scala +++ b/uncore/src/main/scala/uncore.scala @@ -2,7 +2,8 @@ package uncore import Chisel._ -import scala.reflect.ClassTag +import scala.reflect._ +import scala.reflect.runtime.universe._ case object NReleaseTransactors extends Field[Int] case object NProbeTransactors extends Field[Int] @@ -82,26 +83,24 @@ trait HasInnerTLIO extends CoherenceAgentBundle { } trait HasUncachedOuterTLIO extends CoherenceAgentBundle { - val outer = Bundle(new UncachedTileLinkIO)(outerTLParams) - def oacq(dummy: Int = 0) = outer.acquire.bits.payload - def ognt(dummy: Int = 0) = outer.grant.bits.payload - def ofin(dummy: Int = 0) = outer.finish.bits.payload + val outer = Bundle(new HeaderlessUncachedTileLinkIO)(outerTLParams) + def oacq(dummy: Int = 0) = outer.acquire.bits + def ognt(dummy: Int = 0) = outer.grant.bits } trait HasCachedOuterTLIO extends CoherenceAgentBundle { - val outer = Bundle(new TileLinkIO)(outerTLParams) - def oacq(dummy: Int = 0) = outer.acquire.bits.payload - def oprb(dummy: Int = 0) = outer.probe.bits.payload - def orel(dummy: Int = 0) = outer.release.bits.payload - def ognt(dummy: Int = 0) = outer.grant.bits.payload - def ofin(dummy: Int = 0) = outer.finish.bits.payload + val outer = Bundle(new HeaderlessTileLinkIO)(outerTLParams) + def oacq(dummy: Int = 0) = outer.acquire.bits + def oprb(dummy: Int = 0) = outer.probe.bits + def orel(dummy: Int = 0) = outer.release.bits + def ognt(dummy: Int = 0) = outer.grant.bits } class ManagerTLIO extends HasInnerTLIO with HasUncachedOuterTLIO abstract class CoherenceAgent extends CoherenceAgentModule { def innerTL: TileLinkIO - def outerTL: TileLinkIO + def outerTL: HeaderlessTileLinkIO def incoherent: Vec[Bool] } @@ -131,39 +130,47 @@ trait HasTrackerConflictIO extends Bundle { class ManagerXactTrackerIO extends ManagerTLIO with HasTrackerConflictIO class HierarchicalXactTrackerIO extends HierarchicalTLIO with HasTrackerConflictIO -abstract class XactTracker extends CoherenceAgentModule { - def connectDataBeatCounter[S <: HasTileLinkData : ClassTag](inc: Bool, data: S, beat: UInt) = { - val multi = data.hasMultibeatData() - val (multi_cnt, multi_done) = Counter(inc && multi, data.tlDataBeats) - val cnt = Mux(multi, multi_cnt, beat) - val done = Mux(multi, multi_done, inc) - (cnt, done) +abstract class XactTracker extends CoherenceAgentModule + with HasDataBeatCounters { + def addPendingBitWhenBeat[T <: HasBeat](inc: Bool, in: T): UInt = Fill(in.tlDataBeats, inc) & UIntToOH(in.addr_beat) + def dropPendingBitWhenBeat[T <: HasBeat](dec: Bool, in: T): UInt = ~Fill(in.tlDataBeats, dec) | ~UIntToOH(in.addr_beat) + + def addPendingBitWhenBeatHasData[T <: Data : TypeTag](in: DecoupledIO[T]): UInt = { + in.bits match { + case p: HasBeat if typeTag[T].tpe <:< typeTag[HasBeat].tpe => + addPendingBitWhenBeat(in.fire() && p.hasData(), p) + case ln: LNAcquire if typeTag[T].tpe <:< typeTag[LNAcquire].tpe => + addPendingBitWhenBeat(in.fire() && ln.payload.hasData(), ln.payload) + case ln: LNRelease if typeTag[T].tpe <:< typeTag[LNRelease].tpe => + addPendingBitWhenBeat(in.fire() && ln.payload.hasData(), ln.payload) + case ln: LNGrant if typeTag[T].tpe <:< typeTag[LNGrant].tpe => + addPendingBitWhenBeat(in.fire() && ln.payload.hasData(), ln.payload) + case _ => { require(false, "Don't know how track beats of " + typeTag[T].tpe); UInt(0) } + } } - def connectOutgoingDataBeatCounter[T <: HasTileLinkData : ClassTag]( - in: DecoupledIO[LogicalNetworkIO[T]], - beat: UInt = UInt(0)) = { - connectDataBeatCounter(in.fire(), in.bits.payload, beat) - } - - def connectIncomingDataBeatCounter[T <: HasTileLinkData : ClassTag](in: DecoupledIO[LogicalNetworkIO[T]]) = { - connectDataBeatCounter(in.fire(), in.bits.payload, UInt(0))._2 - } - - def addPendingBitWhenHasData[T <: HasTileLinkData with HasTileLinkBeatId](in: DecoupledIO[LogicalNetworkIO[T]]) = { - Fill(in.bits.payload.tlDataBeats, in.fire() && in.bits.payload.hasData()) & - UIntToOH(in.bits.payload.addr_beat) - } - - def dropPendingBitWhenHasData[T <: HasTileLinkData with HasTileLinkBeatId](in: DecoupledIO[LogicalNetworkIO[T]]) = { - ~Fill(in.bits.payload.tlDataBeats, in.fire() && in.bits.payload.hasData()) | - ~UIntToOH(in.bits.payload.addr_beat) - } - - def addPendingBitWhenGetOrAtomic(in: DecoupledIO[LogicalNetworkIO[Acquire]]) = { + def addPendingBitWhenBeatIsGetOrAtomic(in: DecoupledIO[LogicalNetworkIO[Acquire]]): UInt = { val a = in.bits.payload - Fill(a.tlDataBeats, in.fire() && a.isBuiltInType() && - (a.is(Acquire.getType) || a.is(Acquire.getBlockType) || a.is(Acquire.putAtomicType))) & - UIntToOH(a.addr_beat) + val isGetOrAtomic = a.isBuiltInType() && + (Vec(Acquire.getType, Acquire.getBlockType, Acquire.putAtomicType).contains(a.a_type)) + addPendingBitWhenBeat(in.fire() && isGetOrAtomic, in.bits.payload) + } + + def dropPendingBitWhenBeatHasData[T <: Data : TypeTag](in: DecoupledIO[T]): UInt = { + in.bits match { + case p: HasBeat if typeTag[T].tpe <:< typeTag[HasBeat].tpe => + dropPendingBitWhenBeat(in.fire() && p.hasData(), p) + case ln: LNAcquire if typeTag[T].tpe <:< typeTag[LNAcquire].tpe => + dropPendingBitWhenBeat(in.fire() && ln.payload.hasData(), ln.payload) + case ln: LNRelease if typeTag[T].tpe <:< typeTag[LNRelease].tpe => + dropPendingBitWhenBeat(in.fire() && ln.payload.hasData(), ln.payload) + case ln: LNGrant if typeTag[T].tpe <:< typeTag[LNGrant].tpe => + dropPendingBitWhenBeat(in.fire() && ln.payload.hasData(), ln.payload) + case _ => { require(false, "Don't know how track beats of " + typeTag[T].tpe); UInt(0) } + } + } + + def dropPendingBitAtDest(in: DecoupledIO[LogicalNetworkIO[Probe]]): UInt = { + ~Fill(nCoherentClients, in.fire()) | ~UIntToOH(in.bits.header.dst) } } diff --git a/uncore/src/main/scala/util.scala b/uncore/src/main/scala/util.scala index 14575aa5..f02cf2c3 100644 --- a/uncore/src/main/scala/util.scala +++ b/uncore/src/main/scala/util.scala @@ -34,15 +34,15 @@ object ZCounter { } } -class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: Int) extends Module { +class FlowThroughSerializer[T <: HasTileLinkData](gen: T, n: Int) extends Module { val io = new Bundle { val in = Decoupled(gen.clone).flip val out = Decoupled(gen.clone) val cnt = UInt(OUTPUT, log2Up(n)) val done = Bool(OUTPUT) } - val narrowWidth = io.in.bits.payload.data.getWidth / n - require(io.in.bits.payload.data.getWidth % narrowWidth == 0) + val narrowWidth = io.in.bits.data.getWidth / n + require(io.in.bits.data.getWidth % narrowWidth == 0) if(n == 1) { io.in <> io.out @@ -51,12 +51,12 @@ class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: I } else { val cnt = Reg(init=UInt(0, width = log2Up(n))) val wrap = cnt === UInt(n-1) - val rbits = Reg(init=io.in.bits) + val rbits = Reg(io.in.bits.clone) val active = Reg(init=Bool(false)) val shifter = Vec.fill(n){Bits(width = narrowWidth)} (0 until n).foreach { - i => shifter(i) := rbits.payload.data((i+1)*narrowWidth-1,i*narrowWidth) + i => shifter(i) := rbits.data((i+1)*narrowWidth-1,i*narrowWidth) } io.done := Bool(false) @@ -65,16 +65,16 @@ class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: I io.out.valid := active || io.in.valid io.out.bits := io.in.bits when(!active && io.in.valid) { - when(io.in.bits.payload.hasData()) { + when(io.in.bits.hasData()) { cnt := Mux(io.out.ready, UInt(1), UInt(0)) rbits := io.in.bits active := Bool(true) } - io.done := !io.in.bits.payload.hasData() + io.done := !io.in.bits.hasData() } when(active) { io.out.bits := rbits - io.out.bits.payload.data := shifter(cnt) + io.out.bits.data := shifter(cnt) when(io.out.ready) { cnt := cnt + UInt(1) when(wrap) { @@ -86,3 +86,13 @@ class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: I } } } + +object FlowThroughSerializer { + def apply[T <: HasTileLinkData](in: DecoupledIO[T], n: Int): DecoupledIO[T] = { + val fs = Module(new FlowThroughSerializer(in.bits, n)) + fs.io.in.valid := in.valid + fs.io.in.bits := in.bits + in.ready := fs.io.in.ready + fs.io.out + } +}