From 719fffff4054921d90006546710172918efbe826 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Fri, 17 Jun 2016 16:07:47 -0700 Subject: [PATCH] make sure updates from irel and iacq gated by tracker allocation --- uncore/src/main/scala/broadcast.scala | 12 +++---- uncore/src/main/scala/bufferless.scala | 8 ++--- uncore/src/main/scala/cache.scala | 38 +++++++++++++------- uncore/src/main/scala/sdq.scala | 2 +- uncore/src/main/scala/trackers.scala | 49 +++++++++++++++----------- 5 files changed, 65 insertions(+), 44 deletions(-) diff --git a/uncore/src/main/scala/broadcast.scala b/uncore/src/main/scala/broadcast.scala index a0ed179f..3665ceaa 100644 --- a/uncore/src/main/scala/broadcast.scala +++ b/uncore/src/main/scala/broadcast.scala @@ -34,14 +34,14 @@ class L2BroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent( doInputRoutingWithAllocation( in = io.inner.acquire, outs = trackerList.map(_.io.inner.acquire), - allocs = trackerList.map(_.io.alloc_iacq), + allocs = trackerList.map(_.io.alloc.iacq), allocOverride = !irel_vs_iacq_conflict) // Handle releases, which might be voluntary and might have data doInputRoutingWithAllocation( in = io.inner.release, outs = trackerList.map(_.io.inner.release), - allocs = trackerList.map(_.io.alloc_irel)) + allocs = trackerList.map(_.io.alloc.irel)) // Wire probe requests and grant reply to clients, finish acks from clients doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe)) @@ -71,7 +71,7 @@ abstract class BroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Para pinAllReadyValidLow(io) // Checks for illegal behavior - assert(!(state === s_idle && io.inner.release.fire() && io.alloc_irel.should && !io.irel().isVoluntary()), + assert(!(state === s_idle && io.inner.release.fire() && io.alloc.irel.should && !io.irel().isVoluntary()), "VoluntaryReleaseTracker accepted Release that wasn't voluntary!") } @@ -88,7 +88,7 @@ abstract class BroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters) // Checks for illegal behavior // TODO: this could be allowed, but is a useful check against allocation gone wild - assert(!(state === s_idle && io.inner.acquire.fire() && io.alloc_iacq.should && + assert(!(state === s_idle && io.inner.acquire.fire() && io.alloc.iacq.should && io.iacq().hasMultibeatData() && !io.iacq().first()), "AcquireTracker initialized with a tail data beat.") @@ -105,7 +105,7 @@ class BufferedBroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Param // Tell the parent if any incoming messages conflict with the ongoing transaction routeInParent() - io.alloc_iacq.can := Bool(false) + io.alloc.iacq.can := Bool(false) // Start transaction by accepting inner release innerRelease(block_vol_ignt = pending_orel || vol_ognt_counter.pending) @@ -130,7 +130,7 @@ class BufferedBroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters) // Setup IOs used for routing in the parent routeInParent() - io.alloc_irel.can := Bool(false) + io.alloc.irel.can := Bool(false) // First, take care of accpeting new acquires or secondary misses // Handling of primary and secondary misses' data and write mask merging diff --git a/uncore/src/main/scala/bufferless.scala b/uncore/src/main/scala/bufferless.scala index 2927c928..eca709e4 100644 --- a/uncore/src/main/scala/bufferless.scala +++ b/uncore/src/main/scala/bufferless.scala @@ -35,7 +35,7 @@ class BufferlessBroadcastHub(implicit p: Parameters) extends HierarchicalCoheren doInputRoutingWithAllocation( in = io.inner.acquire, outs = trackerList.map(_.io.inner.acquire), - allocs = trackerList.map(_.io.alloc_iacq), + allocs = trackerList.map(_.io.alloc.iacq), allocOverride = !irel_vs_iacq_conflict) io.outer.acquire.bits.data := io.inner.acquire.bits.data io.outer.acquire.bits.addr_beat := io.inner.acquire.bits.addr_beat @@ -44,7 +44,7 @@ class BufferlessBroadcastHub(implicit p: Parameters) extends HierarchicalCoheren doInputRoutingWithAllocation( in = io.inner.release, outs = trackerList.map(_.io.inner.release), - allocs = trackerList.map(_.io.alloc_irel)) + allocs = trackerList.map(_.io.alloc.irel)) io.outer.release.bits.data := io.inner.release.bits.data io.outer.release.bits.addr_beat := io.inner.release.bits.addr_beat @@ -63,7 +63,7 @@ class BufferlessBroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Par // Tell the parent if any incoming messages conflict with the ongoing transaction routeInParent() - io.alloc_iacq.can := Bool(false) + io.alloc.iacq.can := Bool(false) // Start transaction by accepting inner release innerRelease(block_vol_ignt = pending_orel || vol_ognt_counter.pending) @@ -86,7 +86,7 @@ class BufferlessBroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters) // Setup IOs used for routing in the parent routeInParent() - io.alloc_irel.can := Bool(false) + io.alloc.irel.can := Bool(false) // First, take care of accpeting new acquires or secondary misses // Handling of primary and secondary misses' data and write mask merging diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index ecaa3dc7..5464dd1c 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -476,19 +476,19 @@ class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p) doInputRoutingWithAllocation( in = io.inner.acquire, outs = trackerList.map(_.io.inner.acquire), - allocs = trackerList.map(_.io.alloc_iacq), + allocs = trackerList.map(_.io.alloc.iacq), allocOverride = !irel_vs_iacq_conflict) - assert(PopCount(trackerList.map(_.io.alloc_iacq.should)) <= UInt(1), + assert(PopCount(trackerList.map(_.io.alloc.iacq.should)) <= UInt(1), "At most a single tracker should now be allocated for any given Acquire") // Wire releases from clients doInputRoutingWithAllocation( in = io.inner.release, outs = trackerAndWbIOs.map(_.inner.release), - allocs = trackerAndWbIOs.map(_.alloc_irel)) + allocs = trackerAndWbIOs.map(_.alloc.irel)) - assert(PopCount(trackerAndWbIOs.map(_.alloc_irel.should)) <= UInt(1), + assert(PopCount(trackerAndWbIOs.map(_.alloc.irel.should)) <= UInt(1), "At most a single tracker should now be allocated for any given Release") // Wire probe requests and grant reply to clients, finish acks from clients @@ -570,9 +570,14 @@ trait ReadsFromOuterCacheDataArray extends HasCoherenceMetadataBuffer def readDataArray(drop_pending_bit: UInt, add_pending_bit: UInt = UInt(0), - block_pending_read: Bool = Bool(false)) { + block_pending_read: Bool = Bool(false), + can_update_pending: Bool = Bool(true)) { val port = io.data - pending_reads := (pending_reads & dropPendingBit(port.read) & drop_pending_bit) | add_pending_bit + when (can_update_pending) { + pending_reads := (pending_reads & + dropPendingBit(port.read) & drop_pending_bit) | + add_pending_bit + } port.read.valid := state === s_busy && pending_reads.orR && !block_pending_read port.read.bits := L2DataReadReq( id = UInt(trackerId), @@ -598,9 +603,13 @@ trait WritesToOuterCacheDataArray extends HasCoherenceMetadataBuffer val curr_write_beat = PriorityEncoder(pending_writes) def writeDataArray(add_pending_bit: UInt = UInt(0), - block_pending_write: Bool = Bool(false)) { + block_pending_write: Bool = Bool(false), + can_update_pending: Bool = Bool(true)) { val port = io.data - pending_writes := (pending_writes & dropPendingBit(port.write)) | add_pending_bit + when (can_update_pending) { + pending_writes := (pending_writes & dropPendingBit(port.write)) | + add_pending_bit + } port.write.valid := state === s_busy && pending_writes.orR && !block_pending_write port.write.bits := L2DataWriteReq( id = UInt(trackerId), @@ -720,7 +729,7 @@ class CacheVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters) // Avoid metatdata races with writebacks routeInParent(iacqMatches = inSameSet(_, xact_addr_block)) - io.alloc_iacq.can := Bool(false) + io.alloc.iacq.can := Bool(false) // Initialize and accept pending Release beats innerRelease( @@ -733,7 +742,8 @@ class CacheVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters) metaRead(io.meta, s_busy) // Write the voluntarily written back data to this cache - writeDataArray(add_pending_bit = addPendingBitWhenBeatHasData(io.inner.release)) + writeDataArray(add_pending_bit = addPendingBitWhenBeatHasData(io.inner.release), + can_update_pending = state =/= s_idle || io.alloc.irel.should) // End a transaction by updating the block metadata metaWrite( @@ -801,7 +811,7 @@ class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters) iacqMatches = inSameSet(_, xact_addr_block), irelMatches = (irel: HasCacheBlockAddress) => Mux(before_wb_alloc, inSameSet(irel, xact_addr_block), exactAddrMatch(irel))) - io.alloc_irel.can := Bool(false) + io.alloc.irel.can := Bool(false) // TileLink allows for Gets-under-Get // and Puts-under-Put, and either may also merge with a preceding prefetch @@ -919,7 +929,8 @@ class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters) drop_pending_bit = (dropPendingBitWhenBeatHasData(io.inner.release) & dropPendingBitWhenBeatHasData(io.outer.grant)), add_pending_bit = addPendingBitWhenBeatNeedsRead(io.inner.acquire, Bool(alwaysWriteFullBeat)), - block_pending_read = ognt_counter.pending) + block_pending_read = ognt_counter.pending, + can_update_pending = state =/= s_idle || io.alloc.irel.should) // No override for first accepted acquire val alloc_override = xact_allocate && (state =/= s_idle) @@ -934,7 +945,8 @@ class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters) block_pending_write = (ognt_counter.pending || pending_put_data.orR || pending_reads(curr_write_beat) || - pending_resps(curr_write_beat))) + pending_resps(curr_write_beat)), + can_update_pending = state =/= s_idle || io.alloc.iacq.should || io.alloc.irel.should) // Acknowledge or respond with data innerGrant( diff --git a/uncore/src/main/scala/sdq.scala b/uncore/src/main/scala/sdq.scala index 00790d56..6b2cc092 100644 --- a/uncore/src/main/scala/sdq.scala +++ b/uncore/src/main/scala/sdq.scala @@ -45,7 +45,7 @@ trait HasStoreDataQueue extends HasStoreDataQueueParameters { lazy val sdq_alloc_id = PriorityEncoder(~sdq_val) lazy val sdq_rdy = !sdq_val.andR lazy val sdq_enq = trackerIOsList.map( t => - (t.alloc_iacq.should || t.alloc_iacq.matches) && + (t.alloc.iacq.should || t.alloc.iacq.matches) && t.inner.acquire.fire() && t.iacq().hasData() ).reduce(_||_) diff --git a/uncore/src/main/scala/trackers.scala b/uncore/src/main/scala/trackers.scala index 25f2f57e..cdf5b6d4 100644 --- a/uncore/src/main/scala/trackers.scala +++ b/uncore/src/main/scala/trackers.scala @@ -11,9 +11,11 @@ class TrackerAllocation extends Bundle { } trait HasTrackerAllocationIO extends Bundle { - val alloc_iacq = new TrackerAllocation - val alloc_irel = new TrackerAllocation - val alloc_oprb = new TrackerAllocation + val alloc = new Bundle { + val iacq = new TrackerAllocation + val irel = new TrackerAllocation + val oprb = new TrackerAllocation + } } class ManagerXactTrackerIO(implicit p: Parameters) extends ManagerTLIO()(p) @@ -221,7 +223,7 @@ trait AcceptsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer { def irel_can_merge: Bool def irel_same_xact: Bool - def irel_is_allocating: Bool = state === s_idle && io.alloc_irel.should && io.inner.release.valid + def irel_is_allocating: Bool = state === s_idle && io.alloc.irel.should && io.inner.release.valid def irel_is_merging: Bool = (irel_can_merge || irel_same_xact) && io.inner.release.valid def innerRelease(block_vol_ignt: Bool = Bool(false), next: UInt = s_busy) { @@ -230,11 +232,10 @@ trait AcceptsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer { up = io.inner.release, down = io.inner.grant, trackUp = (r: Release) => { - Mux(state === s_idle, io.alloc_irel.should, io.alloc_irel.matches) && r.isVoluntary() && r.requiresAck() + Mux(state === s_idle, io.alloc.irel.should, io.alloc.irel.matches) && r.isVoluntary() && r.requiresAck() }, trackDown = (g: Grant) => (state =/= s_idle) && g.isVoluntary()) - pending_irel_data := (pending_irel_data & dropPendingBitWhenBeatHasData(io.inner.release)) when(irel_is_allocating) { xact_addr_block := io.irel().addr_block @@ -242,7 +243,7 @@ trait AcceptsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer { } when(io.inner.release.fire()) { - when(io.alloc_irel.should || (irel_can_merge && io.irel().first())) { + when(io.alloc.irel.should || (irel_can_merge && io.irel().first())) { xact_vol_ir_r_type := io.irel().r_type xact_vol_ir_src := io.irel().client_id xact_vol_ir_client_xact_id := io.irel().client_xact_id @@ -252,6 +253,10 @@ trait AcceptsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer { } } + when (irel_is_merging) { + pending_irel_data := (pending_irel_data & dropPendingBitWhenBeatHasData(io.inner.release)) + } + io.inner.grant.valid := Vec(s_wb_req, s_wb_resp, s_inner_probe, s_busy).contains(state) && vol_ignt_counter.pending && !(pending_irel_data.orR || block_vol_ignt) @@ -276,9 +281,11 @@ trait EmitsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer { add_pending_data_bits: UInt = UInt(0), add_pending_send_bit: Bool = Bool(false)) { - pending_orel_data := (pending_orel_data & dropPendingBitWhenBeatHasData(io.outer.release)) | - addPendingBitWhenBeatHasData(io.inner.release) | - add_pending_data_bits + when (state =/= s_idle || io.alloc.irel.should) { + pending_orel_data := (pending_orel_data & dropPendingBitWhenBeatHasData(io.outer.release)) | + addPendingBitWhenBeatHasData(io.inner.release) | + add_pending_data_bits + } when (add_pending_send_bit) { pending_orel_send := Bool(true) } when (io.outer.release.fire()) { pending_orel_send := Bool(false) } @@ -352,12 +359,12 @@ trait RoutesInParent extends HasBlockAddressBuffer def routeInParent(iacqMatches: AddrComparison = exactAddrMatch, irelMatches: AddrComparison = exactAddrMatch, oprbMatches: AddrComparison = exactAddrMatch) { - io.alloc_iacq.matches := (state =/= s_idle) && iacqMatches(io.iacq()) - io.alloc_irel.matches := (state =/= s_idle) && irelMatches(io.irel()) - io.alloc_oprb.matches := (state =/= s_idle) && oprbMatches(io.oprb()) - io.alloc_iacq.can := state === s_idle - io.alloc_irel.can := state === s_idle - io.alloc_oprb.can := Bool(false) + io.alloc.iacq.matches := (state =/= s_idle) && iacqMatches(io.iacq()) + io.alloc.irel.matches := (state =/= s_idle) && irelMatches(io.irel()) + io.alloc.oprb.matches := (state =/= s_idle) && oprbMatches(io.oprb()) + io.alloc.iacq.can := state === s_idle + io.alloc.irel.can := state === s_idle + io.alloc.oprb.can := Bool(false) } } @@ -390,7 +397,7 @@ trait AcceptsInnerAcquires extends HasAcquireMetadataBuffer pending_put_data(io.iacq().addr_beat) } def iacq_can_merge: Bool - def iacq_is_allocating: Bool = state === s_idle && io.alloc_iacq.should && io.inner.acquire.valid + def iacq_is_allocating: Bool = state === s_idle && io.alloc.iacq.should && io.inner.acquire.valid def iacq_is_merging: Bool = (iacq_can_merge || iacq_same_xact) && io.inner.acquire.valid def innerAcquire(can_alloc: Bool, next: UInt) { @@ -405,9 +412,11 @@ trait AcceptsInnerAcquires extends HasAcquireMetadataBuffer pending_ignt := ignt_q.io.count > UInt(0) // Track whether any beats are missing from a PutBlock - pending_put_data := (pending_put_data & - dropPendingBitWhenBeatHasData(io.inner.acquire)) | - addPendingBitsOnFirstBeat(io.inner.acquire) + when (state =/= s_idle || io.alloc.iacq.should) { + pending_put_data := (pending_put_data & + dropPendingBitWhenBeatHasData(io.inner.acquire)) | + addPendingBitsOnFirstBeat(io.inner.acquire) + } // Intialize transaction metadata for accepted Acquire when(iacq_is_allocating) {