From 82bbbf908df9f7ace069b39c327d6a92f435de92 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 26 Jul 2016 12:31:08 -0700 Subject: [PATCH] Fix L2 Writeback deadlock issue The deadlock condition occurs when the acquire tracker attempts to request a writeback while the writeback unit is still busy and a voluntary release for the block to be written back is coming in. The voluntary release cannot be accepted because it conflicts with the acquire tracker. The acquire tracker can't merge the voluntary release because it is waiting to send the writeback. The writeback can't progress because the release it is waiting on is behind the voluntary release. The solution to this is to break the atomicity guarantee between the acquire tracker and the writeback unit. This allows the voluntary release tracker to take the voluntary release before the writeback unit accepts the conflicting request. This causes a potential race condition for the metadata array. The solution to this is to have the writeback unit re-read the metadata after accepting a request. --- uncore/src/main/scala/agents/Cache.scala | 126 ++++++++++++++------ uncore/src/main/scala/agents/Trackers.scala | 19 ++- 2 files changed, 101 insertions(+), 44 deletions(-) diff --git a/uncore/src/main/scala/agents/Cache.scala b/uncore/src/main/scala/agents/Cache.scala index 5cfb819f..fc67c391 100644 --- a/uncore/src/main/scala/agents/Cache.scala +++ b/uncore/src/main/scala/agents/Cache.scala @@ -188,12 +188,26 @@ trait HasOuterCacheParameters extends HasCacheParameters with HasCoherenceAgentP val idxLSB = cacheIdBits val idxMSB = idxLSB + idxBits - 1 val tagLSB = idxLSB + idxBits - def inSameSet(block: HasCacheBlockAddress, addr: UInt): Bool = { - block.addr_block(idxMSB,idxLSB) === addr(idxMSB,idxLSB) - } - def haveSameTag(block: HasCacheBlockAddress, addr: UInt): Bool = { - block.addr_block >> UInt(tagLSB) === addr >> UInt(tagLSB) - } + val tagMSB = tagLSB + tagBits - 1 + + def inSameSet(block_a: HasCacheBlockAddress, block_b: HasCacheBlockAddress): Bool = + inSameSet(block_a, block_b.addr_block) + + def inSameSet(block: HasCacheBlockAddress, addr: UInt): Bool = + inSet(block, addr(idxMSB, idxLSB)) + + def inSet(block: HasCacheBlockAddress, idx: UInt): Bool = + block.addr_block(idxMSB,idxLSB) === idx + + def haveSameTag(block: HasCacheBlockAddress, addr: UInt): Bool = + hasTag(block, addr(tagMSB, tagLSB)) + + def hasTag(block: HasCacheBlockAddress, tag: UInt): Bool = + block.addr_block(tagMSB, tagLSB) === tag + + def isSameBlock(block: HasCacheBlockAddress, tag: UInt, idx: UInt) = + hasTag(block, tag) && inSet(block, idx) + //val blockAddrBits = p(TLBlockAddrBits) val refillCyclesPerBeat = outerDataBits/rowBits val refillCycles = refillCyclesPerBeat*outerDataBeats @@ -211,10 +225,13 @@ abstract class L2HellaCacheModule(implicit val p: Parameters) extends Module with HasOuterCacheParameters { def doInternalOutputArbitration[T <: Data : ClassTag]( out: DecoupledIO[T], - ins: Seq[DecoupledIO[T]]) { + ins: Seq[DecoupledIO[T]], + block_transfer: T => Bool = (t: T) => Bool(false)) { val arb = Module(new RRArbiter(out.bits, ins.size)) - out <> arb.io.out - arb.io.in <> ins + out.valid := arb.io.out.valid && !block_transfer(arb.io.out.bits) + out.bits := arb.io.out.bits + arb.io.out.ready := out.ready && !block_transfer(arb.io.out.bits) + arb.io.in <> ins } def doInternalInputRouting[T <: Bundle with HasL2Id](in: ValidIO[T], outs: Seq[ValidIO[T]]) { @@ -297,6 +314,9 @@ class L2MetaRWIO(implicit p: Parameters) extends L2HellaCacheBundle()(p) with HasL2MetaReadIO with HasL2MetaWriteIO +class L2MetaReadOnlyIO(implicit p: Parameters) extends L2HellaCacheBundle()(p) + with HasL2MetaReadIO + trait HasL2MetaRWIO extends HasOuterCacheParameters { val meta = new L2MetaRWIO } @@ -462,11 +482,24 @@ class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p) (nReleaseTransactors until nTransactors).map(id => Module(new CacheAcquireTracker(id))) val trackerList = irelTrackerList ++ iacqTrackerList - + + // Don't allow a writeback request to go through if we are taking + // a voluntary release for the same block. + // The writeback can go forward once the voluntary release is handled + def writebackConflictsWithVolRelease(wb: L2WritebackReq): Bool = + irelTrackerList + .map(tracker => + !tracker.io.alloc.idle && + isSameBlock(tracker.io.alloc, wb.tag, wb.idx)) + .reduce(_ || _) || + (io.inner.release.valid && + isSameBlock(io.inner.release.bits, wb.tag, wb.idx)) + // WritebackUnit evicts data from L2, including invalidating L1s val wb = Module(new L2WritebackUnit(nTransactors)) val trackerAndWbIOs = trackerList.map(_.io) :+ wb.io - doInternalOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req)) + doInternalOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req), + block_transfer = writebackConflictsWithVolRelease _) doInternalInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp)) // Propagate incoherence flags @@ -476,7 +509,7 @@ class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p) val irel_vs_iacq_conflict = io.inner.acquire.valid && io.inner.release.valid && - inSameSet(io.inner.acquire.bits, io.inner.release.bits.addr_block) + inSameSet(io.inner.acquire.bits, io.inner.release.bits) doInputRoutingWithAllocation( in = io.inner.acquire, outs = trackerList.map(_.io.inner.acquire), @@ -508,11 +541,11 @@ class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p) io.outer <> outer_arb.io.out // Wire local memory arrays - doInternalOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read)) + doInternalOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read) :+ wb.io.meta.read) doInternalOutputArbitration(io.meta.write, trackerList.map(_.io.meta.write)) doInternalOutputArbitration(io.data.read, trackerList.map(_.io.data.read) :+ wb.io.data.read) doInternalOutputArbitration(io.data.write, trackerList.map(_.io.data.write)) - doInternalInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp)) + doInternalInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp) :+ wb.io.meta.resp) doInternalInputRouting(io.data.resp, trackerList.map(_.io.data.resp) :+ wb.io.data.resp) } @@ -684,17 +717,18 @@ trait HasCoherenceMetadataBuffer extends HasOuterCacheParameters } } - def metaRead(port: HasL2MetaReadIO, next_state: UInt) { + def metaRead(port: HasL2MetaReadIO, next_state: UInt, way_en_known: Bool = Bool(false)) { port.read.valid := state === s_meta_read port.read.bits.id := UInt(trackerId) port.read.bits.idx := xact_addr_idx port.read.bits.tag := xact_addr_tag + port.read.bits.way_en := Mux(way_en_known, xact_way_en, ~UInt(0, nWays)) when(state === s_meta_read && port.read.ready) { state := s_meta_resp } when(state === s_meta_resp && port.resp.valid) { xact_old_meta := port.resp.bits.meta - xact_way_en := port.resp.bits.way_en + when (!way_en_known) { xact_way_en := port.resp.bits.way_en } state := next_state } } @@ -716,7 +750,6 @@ trait TriggersWritebacks extends HasCoherenceMetadataBuffer { wb.req.bits.id := UInt(trackerId) wb.req.bits.idx := xact_addr_idx wb.req.bits.tag := xact_old_meta.tag - wb.req.bits.coh := xact_old_meta.coh wb.req.bits.way_en := xact_way_en when(state === s_wb_req && wb.req.ready) { state := s_wb_resp } @@ -808,14 +841,12 @@ class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters) val pending_coh_on_miss = HierarchicalMetadata.onReset - - // Setup IOs used for routing in the parent - val before_wb_alloc = state isOneOf (s_meta_read, s_meta_resp, s_wb_req) + val before_wb_req = state.isOneOf(s_meta_read, s_meta_resp) routeInParent( iacqMatches = inSameSet(_, xact_addr_block), - irelMatches = (irel: HasCacheBlockAddress) => - Mux(before_wb_alloc, inSameSet(irel, xact_addr_block), exactAddrMatch(irel)), + irelMatches = (irel: HasCacheBlockAddress) => + Mux(before_wb_req, inSameSet(irel, xact_addr_block), exactAddrMatch(irel)), iacqCanAlloc = Bool(true)) // TileLink allows for Gets-under-Get @@ -868,7 +899,7 @@ class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters) val needs_inner_probes = tag_match && coh.inner.requiresProbes(xact_iacq) val should_update_meta = !tag_match && xact_allocate || is_hit && pending_coh_on_hit =/= coh - def full_representation = io.meta.resp.bits.meta.coh.inner.full() + def full_representation = coh.inner.full() metaRead( io.meta, @@ -996,7 +1027,9 @@ class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters) quiesce(Mux(pending_meta_write, s_meta_write, s_idle)) { clearWmaskBuffer() } } -class L2WritebackReq(implicit p: Parameters) extends L2Metadata()(p) with HasL2Id { +class L2WritebackReq(implicit p: Parameters) + extends L2HellaCacheBundle()(p) with HasL2Id { + val tag = Bits(width = tagBits) val idx = Bits(width = idxBits) val way_en = Bits(width = nWays) } @@ -1012,9 +1045,10 @@ trait HasL2WritebackIO extends HasOuterCacheParameters { val wb = new L2WritebackIO() } -class L2WritebackUnitIO(implicit p: Parameters) extends HierarchicalXactTrackerIO()(p) - with HasL2DataRWIO { +class L2WritebackUnitIO(implicit p: Parameters) + extends HierarchicalXactTrackerIO()(p) with HasL2DataRWIO { val wb = new L2WritebackIO().flip() + val meta = new L2MetaReadOnlyIO } class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p) @@ -1039,6 +1073,18 @@ class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTr // Start the writeback sub-transaction io.wb.req.ready := state === s_idle + val coh = io.meta.resp.bits.meta.coh + val needs_inner_probes = coh.inner.requiresProbesOnVoluntaryWriteback() + val needs_outer_release = coh.outer.requiresVoluntaryWriteback() + def full_representation = coh.inner.full() + + // Even though we already read the metadata in the acquire tracker that + // sent the writeback request, we have to read it again in the writeback + // unit, since it may have been updated in the meantime. + metaRead(io.meta, + next_state = Mux(needs_inner_probes, s_inner_probe, s_busy), + way_en_known = Bool(true)) + // Track which clients yet need to be probed and make Probe message innerProbe( inner_coh.makeProbeForVoluntaryWriteback(curr_probe_dst, xact_addr_block), @@ -1050,7 +1096,7 @@ class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTr def irel_can_merge = io.irel().conflicts(xact_addr_block) && io.irel().isVoluntary() && - (state =/= s_idle) && + !state.isOneOf(s_idle, s_meta_read, s_meta_resp) && !(state === s_busy && all_pending_done) && !vol_ignt_counter.pending && !blockInnerRelease() @@ -1062,18 +1108,16 @@ class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTr mergeDataInner(io.inner.release) // If a release didn't write back data, have to read it from data array - readDataArray(drop_pending_bit = dropPendingBitWhenBeatHasData(io.inner.release)) - - val coh = io.wb.req.bits.coh - val needs_inner_probes = coh.inner.requiresProbesOnVoluntaryWriteback() - val needs_outer_release = coh.outer.requiresVoluntaryWriteback() + readDataArray( + drop_pending_bit = dropPendingBitWhenBeatHasData(io.inner.release)) // Once the data is buffered we can write it back to outer memory outerRelease( coh = outer_coh, data = data_buffer(vol_ognt_counter.up.idx), add_pending_data_bits = addPendingBitInternal(io.data.resp), - add_pending_send_bit = io.wb.req.fire() && needs_outer_release) + add_pending_send_bit = io.meta.resp.valid && needs_outer_release) + // Respond to the initiating transaction handler signalling completion of the writeback io.wb.resp.valid := state === s_busy && all_pending_done @@ -1081,17 +1125,21 @@ class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTr quiesce() {} - def full_representation = io.wb.req.bits.coh.inner.full() // State machine updates and transaction handler metadata intialization when(state === s_idle && io.wb.req.valid) { xact_id := io.wb.req.bits.id xact_way_en := io.wb.req.bits.way_en xact_addr_block := (if (cacheIdBits == 0) Cat(io.wb.req.bits.tag, io.wb.req.bits.idx) else Cat(io.wb.req.bits.tag, io.wb.req.bits.idx, UInt(cacheId, cacheIdBits))) - when(needs_inner_probes) { initializeProbes() } - pending_reads := Mux(needs_outer_release, ~UInt(0, width = innerDataBeats), UInt(0)) - pending_resps := UInt(0) - pending_coh := coh - state := Mux(needs_inner_probes, s_inner_probe, s_busy) + state := s_meta_read } + + when (state === s_meta_resp && io.meta.resp.valid) { + pending_reads := Fill(innerDataBeats, needs_outer_release) + pending_coh := coh + when(needs_inner_probes) { initializeProbes() } + } + + assert(!io.meta.resp.valid || io.meta.resp.bits.tag_match, + "L2 requested Writeback for block not present in cache") } diff --git a/uncore/src/main/scala/agents/Trackers.scala b/uncore/src/main/scala/agents/Trackers.scala index d695df8d..c2a7d301 100644 --- a/uncore/src/main/scala/agents/Trackers.scala +++ b/uncore/src/main/scala/agents/Trackers.scala @@ -7,6 +7,7 @@ import uncore.coherence._ import uncore.tilelink._ import uncore.util._ import uncore.Util._ +import junctions._ import cde.{Field, Parameters} import scala.math.max @@ -18,12 +19,18 @@ class TrackerAllocation extends Bundle { val should = Bool(INPUT) } +class TrackerAllocationIO(implicit val p: Parameters) + extends ParameterizedBundle()(p) + with HasCacheBlockAddress { + val iacq = new TrackerAllocation + val irel = new TrackerAllocation + val oprb = new TrackerAllocation + val idle = Bool(OUTPUT) +} + trait HasTrackerAllocationIO extends Bundle { - val alloc = new Bundle { - val iacq = new TrackerAllocation - val irel = new TrackerAllocation - val oprb = new TrackerAllocation - } + implicit val p: Parameters + val alloc = new TrackerAllocationIO } class ManagerXactTrackerIO(implicit p: Parameters) extends ManagerTLIO()(p) @@ -420,6 +427,8 @@ trait RoutesInParent extends HasBlockAddressBuffer io.alloc.iacq.can := state === s_idle && iacqCanAlloc io.alloc.irel.can := state === s_idle && irelCanAlloc io.alloc.oprb.can := state === s_idle && oprbCanAlloc + io.alloc.addr_block := xact_addr_block + io.alloc.idle := state === s_idle } }