From 93773a449664d1ba79853cf946a257e94c4e1d0c Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 6 Mar 2016 23:12:16 -0800 Subject: [PATCH] Refactor L2 transaction trackers to each be capable of processing Voluntary Writebacks. To elide several races between reading and writing the metadata array for different types of transactions, all L2XactTrackers can now sink Voluntary Releases (writebacks from the L1 in the current implementation). These writebacks are merged with the ongoing transaction, and the merging tracker supplies an acknowledgment of the writeback in addition to its ongoing activities. This change involved another refactoring of the control logic for allocating new trackers and routing incoming Acquires and Releases. BroadcastHub uses the new routing logic, but still processes all voluntary releases through the VoluntaryReleaseTracker (not a problem because there are no metadata update races). Closes #18 Closes #20 --- uncore/src/main/scala/broadcast.scala | 73 +++---- uncore/src/main/scala/cache.scala | 278 +++++++++++++++++-------- uncore/src/main/scala/converters.scala | 8 +- uncore/src/main/scala/metadata.scala | 5 +- uncore/src/main/scala/tilelink.scala | 32 ++- uncore/src/main/scala/uncore.scala | 70 ++++++- 6 files changed, 319 insertions(+), 147 deletions(-) diff --git a/uncore/src/main/scala/broadcast.scala b/uncore/src/main/scala/broadcast.scala index c3eb7038..ee338e89 100644 --- a/uncore/src/main/scala/broadcast.scala +++ b/uncore/src/main/scala/broadcast.scala @@ -52,26 +52,22 @@ class L2BroadcastHub(implicit p: Parameters) extends ManagerCoherenceAgent()(p) val sdq_val = Reg(init=Bits(0, sdqDepth)) val sdq_alloc_id = PriorityEncoder(~sdq_val) val sdq_rdy = !sdq_val.andR - val sdq_enq = io.inner.acquire.fire() && io.iacq().hasData() + val sdq_enq = trackerList.map(_.io.alloc.iacq).reduce(_||_) && + io.inner.acquire.fire() && + io.iacq().hasData() when (sdq_enq) { sdq(sdq_alloc_id) := io.iacq().data } // Handle acquire transaction initiation - val trackerAcquireIOs = trackerList.map(_.io.inner.acquire) - val acquireConflicts = Vec(trackerList.map(_.io.has_acquire_conflict)).toBits - val acquireMatches = Vec(trackerList.map(_.io.has_acquire_match)).toBits - val acquireReadys = Vec(trackerAcquireIOs.map(_.ready)).toBits - val acquire_idx = Mux(acquireMatches.orR, - PriorityEncoder(acquireMatches), - PriorityEncoder(acquireReadys)) - - val block_acquires = acquireConflicts.orR || !sdq_rdy - io.inner.acquire.ready := acquireReadys.orR && !block_acquires - trackerAcquireIOs.zipWithIndex.foreach { - case(tracker, i) => - tracker.bits := io.inner.acquire.bits - tracker.bits.data := DataQueueLocation(sdq_alloc_id, inStoreQueue).toBits - tracker.valid := io.inner.acquire.valid && !block_acquires && (acquire_idx === UInt(i)) + val sdqLoc = List.fill(nTransactors) { + DataQueueLocation(sdq_alloc_id, inStoreQueue).toBits } + doInputRoutingWithAllocation( + io.inner.acquire, + trackerList.map(_.io.inner.acquire), + trackerList.map(_.io.matches.iacq), + trackerList.map(_.io.alloc.iacq), + Some(sdqLoc), + Some(sdq_rdy)) // Queue to store impending Voluntary Release data val voluntary = io.irel().isVoluntary() @@ -81,17 +77,16 @@ class L2BroadcastHub(implicit p: Parameters) extends ManagerCoherenceAgent()(p) when(vwbdq_enq) { vwbdq(rel_data_cnt) := io.irel().data } // Handle releases, which might be voluntary and might have data - val trackerReleaseIOs = trackerList.map(_.io.inner.release) - val releaseReadys = Vec(trackerReleaseIOs.map(_.ready)).toBits - io.inner.release.ready := releaseReadys.orR - trackerReleaseIOs.zipWithIndex.foreach { - case(tracker, i) => - tracker.valid := io.inner.release.valid - tracker.bits := io.inner.release.bits - tracker.bits.data := DataQueueLocation(rel_data_cnt, - (if(i < nReleaseTransactors) inVolWBQueue - else inClientReleaseQueue)).toBits - } + val vwbqLoc = (0 until nTransactors).map(i => + (DataQueueLocation(rel_data_cnt, + (if(i < nReleaseTransactors) inVolWBQueue + else inClientReleaseQueue)).toBits)) + doInputRoutingWithAllocation( + io.inner.release, + trackerList.map(_.io.inner.release), + trackerList.map(_.io.matches.irel), + trackerList.map(_.io.alloc.irel), + Some(vwbqLoc)) // Wire probe requests and grant reply to clients, finish acks from clients // Note that we bypass the Grant data subbundles @@ -162,12 +157,12 @@ class BroadcastVoluntaryReleaseTracker(trackerId: Int) // Send an acknowledgement io.inner.grant.valid := state === s_busy && pending_ignt && !pending_irels && io.outer.grant.valid - io.inner.grant.bits := coh.makeGrant(xact, UInt(trackerId)) + io.inner.grant.bits := coh.makeGrant(xact) when(io.inner.grant.fire()) { pending_ignt := Bool(false) } io.outer.grant.ready := state === s_busy && io.inner.grant.ready // State machine updates and transaction handler metadata intialization - when(state === s_idle && io.inner.release.fire()) { + when(state === s_idle && io.inner.release.valid && io.alloc.irel) { xact := io.irel() when(io.irel().hasMultibeatData()) { pending_irels := dropPendingBitWhenBeatHasData(io.inner.release) @@ -181,8 +176,9 @@ class BroadcastVoluntaryReleaseTracker(trackerId: Int) when(state === s_busy && all_pending_done) { state := s_idle } // These IOs are used for routing in the parent - io.has_acquire_match := Bool(false) - io.has_acquire_conflict := Bool(false) + io.matches.iacq := (state =/= s_idle) && xact.conflicts(io.iacq()) + io.matches.irel := (state =/= s_idle) && xact.conflicts(io.irel()) && io.irel().isVoluntary() + io.matches.oprb := Bool(false) // Checks for illegal behavior assert(!(state === s_idle && io.inner.release.fire() && !io.irel().isVoluntary()), @@ -221,11 +217,10 @@ class BroadcastAcquireTracker(trackerId: Int) val pending_outer_read_ = coh.makeGrant(io.iacq(), UInt(trackerId)).hasData() val subblock_type = xact.isSubBlockType() - io.has_acquire_conflict := xact.conflicts(io.iacq()) && - (state =/= s_idle) && - !collect_iacq_data - io.has_acquire_match := xact.conflicts(io.iacq()) && - collect_iacq_data + // These IOs are used for routing in the parent + io.matches.iacq := (state =/= s_idle) && xact.conflicts(io.iacq()) + io.matches.irel := (state =/= s_idle) && xact.conflicts(io.irel()) && !io.irel().isVoluntary() + io.matches.oprb := Bool(false) val outerParams = p.alterPartial({ case TLId => outerTLId }) @@ -286,7 +281,7 @@ class BroadcastAcquireTracker(trackerId: Int) io.iacq().client_xact_id =/= xact.client_xact_id), "AcquireTracker accepted data beat from different client transaction than initial request.") - assert(!(state === s_idle && io.inner.acquire.fire() && + assert(!(state === s_idle && io.inner.acquire.fire() && io.alloc.iacq && io.iacq().hasMultibeatData() && io.iacq().addr_beat =/= UInt(0)), "AcquireTracker initialized with a tail data beat.") @@ -309,7 +304,7 @@ class BroadcastAcquireTracker(trackerId: Int) switch (state) { is(s_idle) { io.inner.acquire.ready := Bool(true) - when(io.inner.acquire.valid) { + when(io.inner.acquire.valid && io.alloc.iacq) { xact := io.iacq() xact.data_buffer(UInt(0)) := io.iacq().data xact.wmask_buffer(UInt(0)) := io.iacq().wmask() @@ -333,7 +328,7 @@ class BroadcastAcquireTracker(trackerId: Int) } // Handle releases, which may have data to be written back - val matches = xact.conflicts(io.irel()) && !io.irel().isVoluntary() + val matches = io.matches.irel io.inner.release.ready := (!io.irel().hasData() || io.outer.acquire.ready) && matches when(io.inner.release.valid && matches) { when(io.irel().hasData()) { diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index fda44abc..d3c704c0 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -186,6 +186,8 @@ trait HasL2HellaCacheParameters extends HasCacheParameters with HasCoherenceAgen val idxLSB = cacheIdBits val idxMSB = idxLSB + idxBits - 1 val tagLSB = idxLSB + idxBits + def inSameSet(addr1: UInt, addr2: UInt): Bool = addr1(idxMSB,idxLSB) === addr2(idxMSB,idxLSB) + def haveSameTag(addr1: UInt, addr2: UInt): Bool = addr1 >> UInt(tagLSB) === addr2 >> UInt(tagLSB) //val blockAddrBits = p(TLBlockAddrBits) val refillCyclesPerBeat = outerDataBits/rowBits val refillCycles = refillCyclesPerBeat*outerDataBeats @@ -393,8 +395,11 @@ class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p) val io = new TSHRFileIO // Create TSHRs for outstanding transactions - val trackerList = (0 until nReleaseTransactors).map(id => Module(new L2VoluntaryReleaseTracker(id))) ++ - (nReleaseTransactors until nTransactors).map(id => Module(new L2AcquireTracker(id))) + val trackerList = + (0 until nReleaseTransactors).map(id => + Module(new L2VoluntaryReleaseTracker(id))) ++ + (nReleaseTransactors until nTransactors).map(id => + Module(new L2AcquireTracker(id))) // WritebackUnit evicts data from L2, including invalidating L1s val wb = Module(new L2WritebackUnit(nTransactors)) @@ -406,38 +411,33 @@ class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p) (trackerList.map(_.io.incoherent) :+ wb.io.incoherent) foreach { _ := io.incoherent } // Handle acquire transaction initiation - val trackerAcquireIOs = trackerList.map(_.io.inner.acquire) - val acquireConflicts = Vec(trackerList.map(_.io.has_acquire_conflict)).toBits - val acquireMatches = Vec(trackerList.map(_.io.has_acquire_match)).toBits - val acquireReadys = Vec(trackerAcquireIOs.map(_.ready)).toBits - val acquire_idx = Mux(acquireMatches.orR, - OHToUInt(acquireMatches), - PriorityEncoder(acquireReadys)) - val block_acquires = acquireConflicts.orR - io.inner.acquire.ready := acquireReadys.orR && !block_acquires - trackerAcquireIOs.zipWithIndex.foreach { - case(tracker, i) => - tracker.bits := io.inner.acquire.bits - tracker.valid := io.inner.acquire.valid && !block_acquires && (acquire_idx === UInt(i)) - } - assert(PopCount(acquireMatches) <= UInt(1), - "At most a single tracker should match for any given Acquire") + val irel_vs_iacq_conflict = + io.inner.acquire.valid && + io.inner.release.valid && + inSameSet(io.inner.acquire.bits.addr_block, io.inner.release.bits.addr_block) + doInputRoutingWithAllocation( + io.inner.acquire, + trackerList.map(_.io.inner.acquire), + trackerList.map(_.io.matches.iacq), + trackerList.map(_.io.alloc.iacq), + allocOverride = !irel_vs_iacq_conflict) + + assert(PopCount(trackerList.map(_.io.alloc.iacq)) <= UInt(1), + "At most a single tracker should now be allocated for any given Acquire") // Wire releases from clients - val releaseReadys = Vec(trackerAndWbIOs.map(_.inner.release.ready)).toBits - io.inner.release.ready := releaseReadys.orR - trackerAndWbIOs foreach { tracker => - tracker.inner.release.bits := io.inner.release.bits - tracker.inner.release.valid := io.inner.release.valid - } - assert(!io.inner.release.valid || PopCount(releaseReadys) <= UInt(1), - "At most a single tracker should match for any given Release") - assert(!io.inner.release.valid || io.irel().isVoluntary() || releaseReadys.orR, - "Non-voluntary release should always have a Tracker waiting for it.") + doInputRoutingWithAllocation( + io.inner.release, + trackerAndWbIOs.map(_.inner.release), + trackerAndWbIOs.map(_.matches.irel), + trackerAndWbIOs.map(_.alloc.irel)) + + assert(PopCount(trackerAndWbIOs.map(_.alloc.irel)) <= UInt(1), + "At most a single tracker should now be allocated for any given Release") // Wire probe requests and grant reply to clients, finish acks from clients doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe) :+ wb.io.inner.probe) - doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant)) + doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant) :+ wb.io.inner.grant) doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish)) // Create an arbiter for the one memory port @@ -532,6 +532,11 @@ class L2VoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters) extends !(pending_writes.orR || pending_ignt) + // These IOs are used for routing in the parent + io.matches.iacq := (state =/= s_idle) && inSameSet(io.iacq().addr_block, xact.addr_block) + io.matches.irel := (state =/= s_idle) && io.irel().conflicts(xact) + io.matches.oprb := (state =/= s_idle) && io.oprb().conflicts(xact) + // Accept a voluntary Release (and any further beats of data) pending_irels := (pending_irels & dropPendingBitWhenBeatHasData(io.inner.release)) io.inner.release.ready := ((state === s_idle) && io.irel().isVoluntary()) || pending_irels.orR @@ -557,7 +562,7 @@ class L2VoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters) extends // Send an acknowledgement io.inner.grant.valid := state === s_busy && pending_ignt && !pending_irels - io.inner.grant.bits := coh.inner.makeGrant(xact, UInt(trackerId)) + io.inner.grant.bits := coh.inner.makeGrant(xact) when(io.inner.grant.fire()) { pending_ignt := Bool(false) } // End a transaction by updating the block metadata @@ -572,7 +577,7 @@ class L2VoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters) extends xact_old_meta.coh.outer) // State machine updates and transaction handler metadata intialization - when(state === s_idle && io.inner.release.fire()) { + when(state === s_idle && io.inner.release.valid && io.alloc.irel) { xact := io.irel() when(io.irel().hasMultibeatData()) { pending_irels := dropPendingBitWhenBeatHasData(io.inner.release) @@ -592,10 +597,6 @@ class L2VoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters) extends when(state === s_busy && all_pending_done) { state := s_meta_write } when(state === s_meta_write && io.meta.write.ready) { state := s_idle } - // These IOs are used for routing in the parent - io.has_acquire_match := Bool(false) - io.has_acquire_conflict := Bool(false) - // Checks for illegal behavior assert(!(state === s_meta_resp && io.meta.resp.valid && !io.meta.resp.bits.tag_match), "VoluntaryReleaseTracker accepted Release for a block not resident in this cache!") @@ -624,6 +625,9 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra val xact_op_code = Reg{ UInt() } val xact_addr_byte = Reg{ UInt() } val xact_op_size = Reg{ UInt() } + val xact_vol_irel_r_type = Reg{ io.irel().r_type } + val xact_vol_irel_src = Reg{ io.irel().client_id } + val xact_vol_irel_client_xact_id = Reg{ io.irel().client_xact_id } // Miss queue holds transaction metadata used to make grants val ignt_q = Module(new Queue( @@ -634,12 +638,27 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra val xact = ignt_q.io.deq.bits val xact_addr_idx = xact_addr_block(idxMSB,idxLSB) val xact_addr_tag = xact_addr_block >> UInt(tagLSB) - + val xact_vol_irel = Release( + src = xact_vol_irel_src, + voluntary = Bool(true), + r_type = xact_vol_irel_r_type, + client_xact_id = xact_vol_irel_client_xact_id, + addr_block = xact_addr_block) + (p.alterPartial({ case TLId => p(InnerTLId) })) + // Counters and scoreboard tracking progress made on processing this transaction val pending_irels = connectTwoWayBeatCounter( max = io.inner.tlNCachingClients, up = io.inner.probe, - down = io.inner.release)._1 + down = io.inner.release, + trackDown = (r: Release) => !r.isVoluntary())._1 + + val pending_vol_ignt = connectTwoWayBeatCounter( + max = 1, + up = io.inner.release, + down = io.inner.grant, + trackUp = (r: Release) => r.isVoluntary(), + trackDown = (g: Grant) => g.isVoluntary())._1 val (pending_ognt, oacq_data_idx, oacq_data_done, ognt_data_idx, ognt_data_done) = connectTwoWayBeatCounter( @@ -656,7 +675,7 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra max = nSecondaryMisses, up = io.inner.grant, down = io.inner.finish, - track = (g: Grant) => g.requiresAck())._1 + trackUp = (g: Grant) => g.requiresAck())._1 val pending_puts = Reg(init=Bits(0, width = io.inner.tlDataBeats)) val pending_iprbs = Reg(init = Bits(0, width = io.inner.tlNCachingClients)) @@ -674,6 +693,7 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra pending_puts.orR || pending_ognt || ignt_q.io.count > UInt(0) || + pending_vol_ignt || //pending_meta_write || // Has own state: s_meta_write pending_ifins) @@ -726,7 +746,6 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra wmask & Mux(xact.isBuiltInType(Acquire.putAtomicType), amoalu.io.out << (xact_amo_shift_bytes << 3), new_data) - wmask_buffer(beat) := ~UInt(0, wmask_buffer.head.getWidth) when(xact.is(Acquire.putAtomicType) && xact.addr_beat === beat) { amo_result := old_data } } @@ -764,6 +783,15 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra xact_allocate } + // These IOs are used for routing in the parent + val iacq_in_same_set = inSameSet(xact_addr_idx, io.iacq().addr_block) + val irel_in_same_set = inSameSet(xact_addr_idx,io.irel().addr_block) + val before_wb_alloc = Vec(s_meta_read, s_meta_resp, s_wb_req).contains(state) + io.matches.iacq := (state =/= s_idle) && iacq_in_same_set + io.matches.irel := (state =/= s_idle) && + Mux(before_wb_alloc, irel_in_same_set, io.irel().conflicts(xact_addr_block)) + io.matches.oprb := Bool(false) //TODO + // Actual transaction processing logic begins here: // // First, take care of accpeting new acquires or secondary misses @@ -774,14 +802,19 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra !io.outer.grant.fire() && !io.data.resp.valid && ignt_q.io.enq.ready && ignt_q.io.deq.valid - val iacq_same_xact = xact.client_xact_id === io.iacq().client_xact_id && - xact.hasMultibeatData() && ignt_q.io.deq.valid && - pending_puts(io.iacq().addr_beat) - io.inner.acquire.ready := state === s_idle || iacq_can_merge || iacq_same_xact + val iacq_same_xact = xact.client_xact_id === io.iacq().client_xact_id && + xact.hasMultibeatData() && + ignt_q.io.deq.valid && // i.e. state =/= s_idle + pending_puts(io.iacq().addr_beat) + + val iacq_accepted = io.inner.acquire.fire() && + (io.alloc.iacq || iacq_can_merge || iacq_same_xact) + + io.inner.acquire.ready := state === s_idle || iacq_can_merge || iacq_same_xact // Handling of primary and secondary misses' data and write mask merging - when(io.inner.acquire.fire() && io.iacq().hasData()) { + when(iacq_accepted && io.iacq().hasData()) { val beat = io.iacq().addr_beat val full = FillInterleaved(8, io.iacq().wmask()) data_buffer(beat) := (~full & data_buffer(beat)) | (full & io.iacq().data) @@ -789,7 +822,7 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra } // Enqueue some metadata information that we'll use to make coherence updates with later - ignt_q.io.enq.valid := io.inner.acquire.fire() && io.iacq().first() + ignt_q.io.enq.valid := iacq_accepted && io.iacq().first() ignt_q.io.enq.bits := io.iacq() // Track whether any beats are missing from a PutBlock @@ -818,17 +851,35 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra io.inner.probe.bits := pending_coh.inner.makeProbe(curr_probe_dst, xact, xact_addr_block) // Handle incoming releases from clients, which may reduce sharer counts - // and/or write back dirty data - io.inner.release.ready := state === s_inner_probe && - io.irel().conflicts(xact_addr_block) && - !io.irel().isVoluntary() + // and/or write back dirty data, and may be unexpected voluntary releases + val irel_can_merge = io.irel().conflicts(xact_addr_block) && + io.irel().isVoluntary() && + !Vec(s_idle, s_meta_read, s_meta_resp, s_meta_write).contains(state) && + !all_pending_done && + !io.outer.grant.fire() && + !io.inner.grant.fire() && + !pending_vol_ignt + + val irel_same_xact = io.irel().conflicts(xact_addr_block) && + !io.irel().isVoluntary() && + state === s_inner_probe + + val irel_accepted = io.inner.release.fire() && + (io.alloc.irel || irel_can_merge || irel_same_xact) + + io.inner.release.ready := irel_can_merge || irel_same_xact val pending_coh_on_irel = HierarchicalMetadata( pending_coh.inner.onRelease(io.irel()), // Drop sharer - Mux(io.irel().hasData(), // Dirty writeback + Mux(io.irel().hasData(), // Dirty writeback pending_coh.outer.onHit(M_XWR), pending_coh.outer)) updatePendingCohWhen(io.inner.release.fire(), pending_coh_on_irel) mergeDataInner(io.inner.release) + when(io.inner.release.fire() && irel_can_merge) { + xact_vol_irel_r_type := io.irel().r_type + xact_vol_irel_src := io.irel().client_id + xact_vol_irel_client_xact_id := io.irel().client_xact_id + } // Handle misses or coherence permission upgrades by initiating a new transaction in the outer memory: // @@ -902,7 +953,7 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra io.data.write.bits.way_en := xact_way_en io.data.write.bits.addr_idx := xact_addr_idx io.data.write.bits.addr_beat := curr_write_beat - io.data.write.bits.wmask := wmask_buffer(curr_write_beat) + io.data.write.bits.wmask := SInt(-1) // Always writes a full beat io.data.write.bits.data := data_buffer(curr_write_beat) // soon as the data is released, granted, put, or read from the cache @@ -912,22 +963,28 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra addPendingBitInternal(io.data.resp) // We can issue a grant for a pending write once all data is // received and committed to the data array or outer memory - val ignt_ack_ready = !(state === s_idle || state === s_meta_read || - pending_puts.orR || pending_writes.orR || pending_ognt) + val ignt_ack_ready = !(state === s_idle || + state === s_meta_read || + pending_puts.orR || + pending_writes.orR || + pending_ognt) - ignt_q.io.deq.ready := ignt_data_done - io.inner.grant.valid := state === s_busy && - ignt_q.io.deq.valid && - Mux(io.ignt().hasData(), - ignt_data_ready(ignt_data_idx), - ignt_ack_ready) + ignt_q.io.deq.ready := !pending_vol_ignt && ignt_data_done + io.inner.grant.valid := pending_vol_ignt || + (state === s_busy && + ignt_q.io.deq.valid && + Mux(io.ignt().hasData(), + ignt_data_ready(ignt_data_idx), + ignt_ack_ready)) // Make the Grant message using the data stored in the secondary miss queue - io.inner.grant.bits := pending_coh.inner.makeGrant( - sec = ignt_q.io.deq.bits, - manager_xact_id = UInt(trackerId), - data = Mux(xact.is(Acquire.putAtomicType), - amo_result, - data_buffer(ignt_data_idx))) + val grant_from_acquire = pending_coh.inner.makeGrant( + sec = ignt_q.io.deq.bits, + manager_xact_id = UInt(trackerId), + data = Mux(xact.is(Acquire.putAtomicType), + amo_result, + data_buffer(ignt_data_idx))) + val grant_from_release = pending_coh.inner.makeGrant(xact_vol_irel) + io.inner.grant.bits := Mux(pending_vol_ignt, grant_from_release, grant_from_acquire) io.inner.grant.bits.addr_beat := ignt_data_idx // override based on outgoing counter val pending_coh_on_ignt = HierarchicalMetadata( @@ -949,7 +1006,7 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra io.meta.write.bits.data.coh := pending_coh // State machine updates and transaction handler metadata intialization - when(state === s_idle && io.inner.acquire.valid) { + when(state === s_idle && io.inner.acquire.valid && io.alloc.iacq) { xact_addr_block := io.iacq().addr_block xact_allocate := io.iacq().allocate() xact_amo_shift_bytes := io.iacq().amo_shift_bytes() @@ -1027,12 +1084,6 @@ class L2AcquireTracker(trackerId: Int)(implicit p: Parameters) extends L2XactTra state := Mux(pending_meta_write, s_meta_write, s_idle) } when(state === s_meta_write && io.meta.write.ready) { state := s_idle } - - // These IOs are used for routing in the parent - val in_same_set = xact_addr_idx === io.iacq().addr_block(idxMSB,idxLSB) - io.has_acquire_match := iacq_can_merge || iacq_same_xact - io.has_acquire_conflict := in_same_set && (state =/= s_idle) && !io.has_acquire_match - //TODO: relax from in_same_set to xact.conflicts(io.iacq())? } class L2WritebackReq(implicit p: Parameters) extends L2Metadata()(p) with HasL2Id { @@ -1061,17 +1112,47 @@ class L2WritebackUnit(trackerId: Int)(implicit p: Parameters) extends L2XactTrac val xact = Reg(new L2WritebackReq) val data_buffer = Reg(init=Vec.fill(innerDataBeats)(UInt(0, width = innerDataBits))) - val xact_addr_block = Cat(xact.tag, xact.idx, UInt(cacheId, cacheIdBits)) + val xact_vol_irel_r_type = Reg{ io.irel().r_type } + val xact_vol_irel_src = Reg{ io.irel().client_id } + val xact_vol_irel_client_xact_id = Reg{ io.irel().client_xact_id } + + val xact_addr_block = Cat(xact.tag, xact.idx, UInt(cacheId, cacheIdBits)) + val xact_vol_irel = Release( + src = xact_vol_irel_src, + voluntary = Bool(true), + r_type = xact_vol_irel_r_type, + client_xact_id = xact_vol_irel_client_xact_id, + addr_block = xact_addr_block) + + val pending_irels = connectTwoWayBeatCounter( + max = io.inner.tlNCachingClients, + up = io.inner.probe, + down = io.inner.release, + trackDown = (r: Release) => !r.isVoluntary())._1 + + val pending_vol_ignt = connectTwoWayBeatCounter( + max = 1, + up = io.inner.release, + down = io.inner.grant, + trackUp = (r: Release) => r.isVoluntary(), + trackDown = (g: Grant) => g.isVoluntary())._1 - val pending_irels = - connectTwoWayBeatCounter(max = io.inner.tlNCachingClients, up = io.inner.probe, down = io.inner.release)._1 val (pending_ognt, orel_data_idx, orel_data_done, ognt_data_idx, ognt_data_done) = - connectTwoWayBeatCounter(max = 1, up = io.outer.release, down = io.outer.grant) + connectTwoWayBeatCounter( + max = 1, + up = io.outer.release, + down = io.outer.grant) + val pending_iprbs = Reg(init = Bits(0, width = io.inner.tlNCachingClients)) val pending_reads = Reg(init=Bits(0, width = io.inner.tlDataBeats)) val pending_resps = Reg(init=Bits(0, width = io.inner.tlDataBeats)) val pending_orel_data = Reg(init=Bits(0, width = io.inner.tlDataBeats)) + // These IOs are used for routing in the parent + io.matches.iacq := (state =/= s_idle) && io.iacq().conflicts(xact_addr_block) + io.matches.irel := (state =/= s_idle) && io.irel().conflicts(xact_addr_block) + io.matches.oprb := (state =/= s_idle) && io.oprb().conflicts(xact_addr_block) + // Start the writeback sub-transaction io.wb.req.ready := state === s_idle @@ -1083,17 +1164,32 @@ class L2WritebackUnit(trackerId: Int)(implicit p: Parameters) extends L2XactTrac // Handle incoming releases from clients, which may reduce sharer counts // and/or write back dirty data - val inner_coh_on_irel = xact.coh.inner.onRelease(io.irel()) - val outer_coh_on_irel = xact.coh.outer.onHit(M_XWR) - io.inner.release.ready := (state === s_inner_probe || state === s_busy) && - io.irel().conflicts(xact_addr_block) && - !io.irel().isVoluntary() + val irel_can_merge = io.irel().conflicts(xact_addr_block) && + io.irel().isVoluntary() && + state === s_inner_probe && + !pending_vol_ignt + + val irel_same_xact = io.irel().conflicts(xact_addr_block) && + !io.irel().isVoluntary() && + state === s_inner_probe + + val irel_accepted = io.inner.release.fire() && + (io.alloc.irel || irel_can_merge || irel_same_xact) + + io.inner.release.ready := irel_can_merge || irel_same_xact + val pending_coh_on_irel = HierarchicalMetadata( + xact.coh.inner.onRelease(io.irel()), // Drop sharer + Mux(io.irel().hasData(), // Dirty writeback + xact.coh.outer.onHit(M_XWR), + xact.coh.outer)) when(io.inner.release.fire()) { - xact.coh.inner := inner_coh_on_irel - data_buffer(io.inner.release.bits.addr_beat) := io.inner.release.bits.data - } - when(io.inner.release.valid && io.irel().conflicts(xact_addr_block) && io.irel().hasData()) { - xact.coh.outer := outer_coh_on_irel // must writeback dirty data supplied by any matching release, even voluntary ones + xact.coh := pending_coh_on_irel + when(io.irel().hasData()) { data_buffer(io.irel().addr_beat) := io.irel().data } + when(irel_can_merge) { + xact_vol_irel_r_type := io.irel().r_type + xact_vol_irel_src := io.irel().client_id + xact_vol_irel_client_xact_id := io.irel().client_xact_id + } } // If a release didn't write back data, have to read it from data array @@ -1126,6 +1222,10 @@ class L2WritebackUnit(trackerId: Int)(implicit p: Parameters) extends L2XactTrac addr_beat = orel_data_idx, data = data_buffer(orel_data_idx)) + // Ack a voluntary release if we got one + io.inner.grant.valid := pending_vol_ignt + io.inner.grant.bits := xact.coh.inner.makeGrant(xact_vol_irel) + // Wait for an acknowledgement io.outer.grant.ready := state === s_outer_grant @@ -1144,7 +1244,7 @@ class L2WritebackUnit(trackerId: Int)(implicit p: Parameters) extends L2XactTrac pending_orel_data := UInt(0) state := Mux(needs_inner_probes, s_inner_probe, s_busy) } - when(state === s_inner_probe && !(pending_iprbs.orR || pending_irels)) { + when(state === s_inner_probe && !(pending_iprbs.orR || pending_irels || pending_vol_ignt)) { state := Mux(xact.coh.outer.requiresVoluntaryWriteback(), s_busy, s_wb_resp) } when(state === s_busy && orel_data_done) { @@ -1152,8 +1252,4 @@ class L2WritebackUnit(trackerId: Int)(implicit p: Parameters) extends L2XactTrac } when(state === s_outer_grant && ognt_data_done) { state := s_wb_resp } when(state === s_wb_resp ) { state := s_idle } - - // These IOs are used for routing in the parent - io.has_acquire_match := Bool(false) - io.has_acquire_conflict := Bool(false) } diff --git a/uncore/src/main/scala/converters.scala b/uncore/src/main/scala/converters.scala index 4900a253..5b510360 100644 --- a/uncore/src/main/scala/converters.scala +++ b/uncore/src/main/scala/converters.scala @@ -199,6 +199,7 @@ object ManagerTileLinkHeaderCreator { */ trait HasDataBeatCounters { type HasBeat = TileLinkChannel with HasTileLinkBeatId + type HasId = TileLinkChannel with HasClientId /** Returns the current count on this channel and when a message is done * @param inc increment the counter (usually .valid or .fire()) @@ -259,11 +260,12 @@ trait HasDataBeatCounters { up: DecoupledIO[T], down: DecoupledIO[S], beat: UInt = UInt(0), - track: T => Bool = (t: T) => Bool(true)): (Bool, UInt, Bool, UInt, Bool) = { + trackUp: T => Bool = (t: T) => Bool(true), + trackDown: S => Bool = (s: S) => Bool(true)): (Bool, UInt, Bool, UInt, Bool) = { val (up_idx, up_done) = connectDataBeatCounter(up.fire(), up.bits, beat) val (down_idx, down_done) = connectDataBeatCounter(down.fire(), down.bits, beat) - val do_inc = up_done && track(up.bits) - val do_dec = down_done + val do_inc = up_done && trackUp(up.bits) + val do_dec = down_done && trackDown(down.bits) val cnt = TwoWayCounter(do_inc, do_dec, max) (cnt > UInt(0), up_idx, up_done, down_idx, down_done) } diff --git a/uncore/src/main/scala/metadata.scala b/uncore/src/main/scala/metadata.scala index 2819745c..ede9e397 100644 --- a/uncore/src/main/scala/metadata.scala +++ b/uncore/src/main/scala/metadata.scala @@ -218,15 +218,14 @@ class ManagerMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) { /** Construct an appropriate [[uncore.GrantToDst]] to acknowledge an [[uncore.Release]] * * @param rel Release message being acknowledged by this Grant - * @param manager_xact_id manager's transaction id */ - def makeGrant(rel: ReleaseMetadata with HasClientId, manager_xact_id: UInt): GrantToDst = + def makeGrant(rel: ReleaseMetadata with HasClientId): GrantToDst = Grant( dst = rel.client_id, is_builtin_type = Bool(true), g_type = Grant.voluntaryAckType, client_xact_id = rel.client_xact_id, - manager_xact_id = manager_xact_id)(p) + manager_xact_id = UInt(0))(p) /** Construct an appropriate [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]] * diff --git a/uncore/src/main/scala/tilelink.scala b/uncore/src/main/scala/tilelink.scala index 1969f259..b8f1512c 100644 --- a/uncore/src/main/scala/tilelink.scala +++ b/uncore/src/main/scala/tilelink.scala @@ -227,7 +227,11 @@ trait HasProbeType extends HasTileLinkParameters { def hasMultibeatData(dummy: Int = 0) = Bool(false) } -trait HasReleaseType extends HasTileLinkParameters { +trait MightBeVoluntary { + def isVoluntary(dummy: Int = 0): Bool +} + +trait HasReleaseType extends HasTileLinkParameters with MightBeVoluntary { val voluntary = Bool() val r_type = UInt(width = tlCoh.releaseTypeWidth) @@ -239,7 +243,7 @@ trait HasReleaseType extends HasTileLinkParameters { def requiresAck(dummy: Int = 0) = !Bool(tlNetworkPreservesPointToPointOrdering) } -trait HasGrantType extends HasTileLinkParameters { +trait HasGrantType extends HasTileLinkParameters with MightBeVoluntary { val is_builtin_type = Bool() val g_type = UInt(width = tlGrantTypeBits) @@ -720,8 +724,8 @@ object Release { r_type: UInt, client_xact_id: UInt, addr_block: UInt, - addr_beat: UInt = UInt(0), - data: UInt = UInt(0)) + addr_beat: UInt, + data: UInt) (implicit p: Parameters): Release = { val rel = Wire(new Release) rel.r_type := r_type @@ -732,6 +736,26 @@ object Release { rel.voluntary := voluntary rel } + + def apply( + src: UInt, + voluntary: Bool, + r_type: UInt, + client_xact_id: UInt, + addr_block: UInt, + addr_beat: UInt = UInt(0), + data: UInt = UInt(0)) + (implicit p: Parameters): ReleaseFromSrc = { + val rel = Wire(new ReleaseFromSrc) + rel.client_id := src + rel.voluntary := voluntary + rel.r_type := r_type + rel.client_xact_id := client_xact_id + rel.addr_block := addr_block + rel.addr_beat := addr_beat + rel.data := data + rel + } } /** The Grant channel is used to refill data or grant permissions requested of the diff --git a/uncore/src/main/scala/uncore.scala b/uncore/src/main/scala/uncore.scala index c90adeeb..6444f54f 100644 --- a/uncore/src/main/scala/uncore.scala +++ b/uncore/src/main/scala/uncore.scala @@ -59,6 +59,38 @@ trait HasCoherenceAgentWiringHelpers { outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && idx === UInt(i) } in.ready := Vec(outs.map(_.ready)).read(idx) } + + /** Broadcasts valid messages on this channel to all trackers, + * but includes logic to allocate a new tracker in the case where + * no previously allocated tracker matches the new req's addr. + * + * When a match is reported, if ready is high the new transaction + * is merged; when ready is low the transaction is being blocked. + * When no match is reported, any high readys are presumed to be + * from trackers that are available for allocation, and one is + * assigned via alloc based on priority; f no readys are high then + * all trackers are busy with other transactions. + */ + def doInputRoutingWithAllocation[T <: TileLinkChannel with HasTileLinkData]( + in: DecoupledIO[T], + outs: Seq[DecoupledIO[T]], + matches: Seq[Bool], + allocs: Seq[Bool], + dataOverrides: Option[Seq[UInt]] = None, + allocOverride: Option[Bool] = None) { + val ready_bits = Vec(outs.map(_.ready)).toBits + val alloc_bits = PriorityEncoderOH(ready_bits) + val match_bits = Vec(matches).toBits + val no_matches = !match_bits.orR + val do_alloc = allocOverride.getOrElse(Bool(true)) + in.ready := Mux(no_matches, ready_bits.orR, (match_bits & ready_bits).orR) && do_alloc + outs.zip(allocs).zipWithIndex.foreach { case((out, a), i) => + out.valid := in.valid + out.bits := in.bits + dataOverrides foreach { d => out.bits.data := d(i) } + a := alloc_bits(i) & no_matches & do_alloc + } + } } trait HasInnerTLIO extends HasCoherenceAgentParameters { @@ -114,24 +146,39 @@ abstract class HierarchicalCoherenceAgent(implicit p: Parameters) extends Cohere def incoherent = io.incoherent } -trait HasTrackerConflictIO extends Bundle { - val has_acquire_conflict = Bool(OUTPUT) - val has_acquire_match = Bool(OUTPUT) +trait HasTrackerAllocationIO extends Bundle { + val matches = new Bundle { + val iacq = Bool(OUTPUT) + val irel = Bool(OUTPUT) + val oprb = Bool(OUTPUT) + } + val alloc = new Bundle { + val iacq = Bool(INPUT) + val irel = Bool(INPUT) + val oprb = Bool(INPUT) + } } class ManagerXactTrackerIO(implicit p: Parameters) extends ManagerTLIO()(p) - with HasTrackerConflictIO + with HasTrackerAllocationIO class HierarchicalXactTrackerIO(implicit p: Parameters) extends HierarchicalTLIO()(p) - with HasTrackerConflictIO + with HasTrackerAllocationIO abstract class XactTracker(implicit p: Parameters) extends CoherenceAgentModule()(p) with HasDataBeatCounters { def addPendingBitWhenBeat[T <: HasBeat](inc: Bool, in: T): UInt = Fill(in.tlDataBeats, inc) & UIntToOH(in.addr_beat) + def dropPendingBitWhenBeat[T <: HasBeat](dec: Bool, in: T): UInt = ~Fill(in.tlDataBeats, dec) | ~UIntToOH(in.addr_beat) + def addPendingBitWhenId[T <: HasClientId](inc: Bool, in: T): UInt = + Fill(in.tlNCachingClients, inc) & UIntToOH(in.client_id) + + def dropPendingBitWhenId[T <: HasClientId](dec: Bool, in: T): UInt = + ~Fill(in.tlNCachingClients, dec) | ~UIntToOH(in.client_id) + def addPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T], inc: Bool = Bool(true)): UInt = addPendingBitWhenBeat(in.fire() && in.bits.hasData() && inc, in.bits) @@ -151,8 +198,17 @@ abstract class XactTracker(implicit p: Parameters) extends CoherenceAgentModule( def dropPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T]): UInt = dropPendingBitWhenBeat(in.fire() && in.bits.hasData(), in.bits) - def dropPendingBitAtDest(in: DecoupledIO[ProbeToDst]): UInt = - ~Fill(in.bits.tlNCachingClients, in.fire()) | ~UIntToOH(in.bits.client_id) + def dropPendingBitAtDest[T <: HasId](in: DecoupledIO[T]): UInt = + dropPendingBitWhenId(in.fire(), in.bits) + + def dropPendingBitAtDestWhenVoluntary[T <: HasId with MightBeVoluntary](in: DecoupledIO[T]): UInt = + dropPendingBitWhenId(in.fire() && in.bits.isVoluntary(), in.bits) + + def addPendingBitAtSrc[T <: HasId](in: DecoupledIO[T]): UInt = + addPendingBitWhenId(in.fire(), in.bits) + + def addPendingBitAtSrcWhenVoluntary[T <: HasId with MightBeVoluntary](in: DecoupledIO[T]): UInt = + addPendingBitWhenId(in.fire() && in.bits.isVoluntary(), in.bits) def pinAllReadyValidLow[T <: Data](b: Bundle) { b.elements.foreach {