1
0

Fix L2 Writeback deadlock issue

The deadlock condition occurs when the acquire tracker attempts to
request a writeback while the writeback unit is still busy and a
voluntary release for the block to be written back is coming in.

The voluntary release cannot be accepted because it conflicts with the
acquire tracker. The acquire tracker can't merge the voluntary release
because it is waiting to send the writeback. The writeback can't
progress because the release it is waiting on is behind the voluntary
release.

The solution to this is to break the atomicity guarantee between the
acquire tracker and the writeback unit. This allows the voluntary
release tracker to take the voluntary release before the writeback unit
accepts the conflicting request. This causes a potential race condition
for the metadata array. The solution to this is to have the writeback
unit re-read the metadata after accepting a request.
This commit is contained in:
Howard Mao 2016-07-26 12:31:08 -07:00
parent 11ec5b2cf4
commit 82bbbf908d
2 changed files with 101 additions and 44 deletions

View File

@ -188,12 +188,26 @@ trait HasOuterCacheParameters extends HasCacheParameters with HasCoherenceAgentP
val idxLSB = cacheIdBits val idxLSB = cacheIdBits
val idxMSB = idxLSB + idxBits - 1 val idxMSB = idxLSB + idxBits - 1
val tagLSB = idxLSB + idxBits val tagLSB = idxLSB + idxBits
def inSameSet(block: HasCacheBlockAddress, addr: UInt): Bool = { val tagMSB = tagLSB + tagBits - 1
block.addr_block(idxMSB,idxLSB) === addr(idxMSB,idxLSB)
} def inSameSet(block_a: HasCacheBlockAddress, block_b: HasCacheBlockAddress): Bool =
def haveSameTag(block: HasCacheBlockAddress, addr: UInt): Bool = { inSameSet(block_a, block_b.addr_block)
block.addr_block >> UInt(tagLSB) === addr >> UInt(tagLSB)
} def inSameSet(block: HasCacheBlockAddress, addr: UInt): Bool =
inSet(block, addr(idxMSB, idxLSB))
def inSet(block: HasCacheBlockAddress, idx: UInt): Bool =
block.addr_block(idxMSB,idxLSB) === idx
def haveSameTag(block: HasCacheBlockAddress, addr: UInt): Bool =
hasTag(block, addr(tagMSB, tagLSB))
def hasTag(block: HasCacheBlockAddress, tag: UInt): Bool =
block.addr_block(tagMSB, tagLSB) === tag
def isSameBlock(block: HasCacheBlockAddress, tag: UInt, idx: UInt) =
hasTag(block, tag) && inSet(block, idx)
//val blockAddrBits = p(TLBlockAddrBits) //val blockAddrBits = p(TLBlockAddrBits)
val refillCyclesPerBeat = outerDataBits/rowBits val refillCyclesPerBeat = outerDataBits/rowBits
val refillCycles = refillCyclesPerBeat*outerDataBeats val refillCycles = refillCyclesPerBeat*outerDataBeats
@ -211,9 +225,12 @@ abstract class L2HellaCacheModule(implicit val p: Parameters) extends Module
with HasOuterCacheParameters { with HasOuterCacheParameters {
def doInternalOutputArbitration[T <: Data : ClassTag]( def doInternalOutputArbitration[T <: Data : ClassTag](
out: DecoupledIO[T], out: DecoupledIO[T],
ins: Seq[DecoupledIO[T]]) { ins: Seq[DecoupledIO[T]],
block_transfer: T => Bool = (t: T) => Bool(false)) {
val arb = Module(new RRArbiter(out.bits, ins.size)) val arb = Module(new RRArbiter(out.bits, ins.size))
out <> arb.io.out out.valid := arb.io.out.valid && !block_transfer(arb.io.out.bits)
out.bits := arb.io.out.bits
arb.io.out.ready := out.ready && !block_transfer(arb.io.out.bits)
arb.io.in <> ins arb.io.in <> ins
} }
@ -297,6 +314,9 @@ class L2MetaRWIO(implicit p: Parameters) extends L2HellaCacheBundle()(p)
with HasL2MetaReadIO with HasL2MetaReadIO
with HasL2MetaWriteIO with HasL2MetaWriteIO
class L2MetaReadOnlyIO(implicit p: Parameters) extends L2HellaCacheBundle()(p)
with HasL2MetaReadIO
trait HasL2MetaRWIO extends HasOuterCacheParameters { trait HasL2MetaRWIO extends HasOuterCacheParameters {
val meta = new L2MetaRWIO val meta = new L2MetaRWIO
} }
@ -463,10 +483,23 @@ class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p)
Module(new CacheAcquireTracker(id))) Module(new CacheAcquireTracker(id)))
val trackerList = irelTrackerList ++ iacqTrackerList val trackerList = irelTrackerList ++ iacqTrackerList
// Don't allow a writeback request to go through if we are taking
// a voluntary release for the same block.
// The writeback can go forward once the voluntary release is handled
def writebackConflictsWithVolRelease(wb: L2WritebackReq): Bool =
irelTrackerList
.map(tracker =>
!tracker.io.alloc.idle &&
isSameBlock(tracker.io.alloc, wb.tag, wb.idx))
.reduce(_ || _) ||
(io.inner.release.valid &&
isSameBlock(io.inner.release.bits, wb.tag, wb.idx))
// WritebackUnit evicts data from L2, including invalidating L1s // WritebackUnit evicts data from L2, including invalidating L1s
val wb = Module(new L2WritebackUnit(nTransactors)) val wb = Module(new L2WritebackUnit(nTransactors))
val trackerAndWbIOs = trackerList.map(_.io) :+ wb.io val trackerAndWbIOs = trackerList.map(_.io) :+ wb.io
doInternalOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req)) doInternalOutputArbitration(wb.io.wb.req, trackerList.map(_.io.wb.req),
block_transfer = writebackConflictsWithVolRelease _)
doInternalInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp)) doInternalInputRouting(wb.io.wb.resp, trackerList.map(_.io.wb.resp))
// Propagate incoherence flags // Propagate incoherence flags
@ -476,7 +509,7 @@ class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p)
val irel_vs_iacq_conflict = val irel_vs_iacq_conflict =
io.inner.acquire.valid && io.inner.acquire.valid &&
io.inner.release.valid && io.inner.release.valid &&
inSameSet(io.inner.acquire.bits, io.inner.release.bits.addr_block) inSameSet(io.inner.acquire.bits, io.inner.release.bits)
doInputRoutingWithAllocation( doInputRoutingWithAllocation(
in = io.inner.acquire, in = io.inner.acquire,
outs = trackerList.map(_.io.inner.acquire), outs = trackerList.map(_.io.inner.acquire),
@ -508,11 +541,11 @@ class TSHRFile(implicit p: Parameters) extends L2HellaCacheModule()(p)
io.outer <> outer_arb.io.out io.outer <> outer_arb.io.out
// Wire local memory arrays // Wire local memory arrays
doInternalOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read)) doInternalOutputArbitration(io.meta.read, trackerList.map(_.io.meta.read) :+ wb.io.meta.read)
doInternalOutputArbitration(io.meta.write, trackerList.map(_.io.meta.write)) doInternalOutputArbitration(io.meta.write, trackerList.map(_.io.meta.write))
doInternalOutputArbitration(io.data.read, trackerList.map(_.io.data.read) :+ wb.io.data.read) doInternalOutputArbitration(io.data.read, trackerList.map(_.io.data.read) :+ wb.io.data.read)
doInternalOutputArbitration(io.data.write, trackerList.map(_.io.data.write)) doInternalOutputArbitration(io.data.write, trackerList.map(_.io.data.write))
doInternalInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp)) doInternalInputRouting(io.meta.resp, trackerList.map(_.io.meta.resp) :+ wb.io.meta.resp)
doInternalInputRouting(io.data.resp, trackerList.map(_.io.data.resp) :+ wb.io.data.resp) doInternalInputRouting(io.data.resp, trackerList.map(_.io.data.resp) :+ wb.io.data.resp)
} }
@ -684,17 +717,18 @@ trait HasCoherenceMetadataBuffer extends HasOuterCacheParameters
} }
} }
def metaRead(port: HasL2MetaReadIO, next_state: UInt) { def metaRead(port: HasL2MetaReadIO, next_state: UInt, way_en_known: Bool = Bool(false)) {
port.read.valid := state === s_meta_read port.read.valid := state === s_meta_read
port.read.bits.id := UInt(trackerId) port.read.bits.id := UInt(trackerId)
port.read.bits.idx := xact_addr_idx port.read.bits.idx := xact_addr_idx
port.read.bits.tag := xact_addr_tag port.read.bits.tag := xact_addr_tag
port.read.bits.way_en := Mux(way_en_known, xact_way_en, ~UInt(0, nWays))
when(state === s_meta_read && port.read.ready) { state := s_meta_resp } when(state === s_meta_read && port.read.ready) { state := s_meta_resp }
when(state === s_meta_resp && port.resp.valid) { when(state === s_meta_resp && port.resp.valid) {
xact_old_meta := port.resp.bits.meta xact_old_meta := port.resp.bits.meta
xact_way_en := port.resp.bits.way_en when (!way_en_known) { xact_way_en := port.resp.bits.way_en }
state := next_state state := next_state
} }
} }
@ -716,7 +750,6 @@ trait TriggersWritebacks extends HasCoherenceMetadataBuffer {
wb.req.bits.id := UInt(trackerId) wb.req.bits.id := UInt(trackerId)
wb.req.bits.idx := xact_addr_idx wb.req.bits.idx := xact_addr_idx
wb.req.bits.tag := xact_old_meta.tag wb.req.bits.tag := xact_old_meta.tag
wb.req.bits.coh := xact_old_meta.coh
wb.req.bits.way_en := xact_way_en wb.req.bits.way_en := xact_way_en
when(state === s_wb_req && wb.req.ready) { state := s_wb_resp } when(state === s_wb_req && wb.req.ready) { state := s_wb_resp }
@ -808,14 +841,12 @@ class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters)
val pending_coh_on_miss = HierarchicalMetadata.onReset val pending_coh_on_miss = HierarchicalMetadata.onReset
val before_wb_req = state.isOneOf(s_meta_read, s_meta_resp)
// Setup IOs used for routing in the parent
val before_wb_alloc = state isOneOf (s_meta_read, s_meta_resp, s_wb_req)
routeInParent( routeInParent(
iacqMatches = inSameSet(_, xact_addr_block), iacqMatches = inSameSet(_, xact_addr_block),
irelMatches = (irel: HasCacheBlockAddress) => irelMatches = (irel: HasCacheBlockAddress) =>
Mux(before_wb_alloc, inSameSet(irel, xact_addr_block), exactAddrMatch(irel)), Mux(before_wb_req, inSameSet(irel, xact_addr_block), exactAddrMatch(irel)),
iacqCanAlloc = Bool(true)) iacqCanAlloc = Bool(true))
// TileLink allows for Gets-under-Get // TileLink allows for Gets-under-Get
@ -868,7 +899,7 @@ class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters)
val needs_inner_probes = tag_match && coh.inner.requiresProbes(xact_iacq) val needs_inner_probes = tag_match && coh.inner.requiresProbes(xact_iacq)
val should_update_meta = !tag_match && xact_allocate || val should_update_meta = !tag_match && xact_allocate ||
is_hit && pending_coh_on_hit =/= coh is_hit && pending_coh_on_hit =/= coh
def full_representation = io.meta.resp.bits.meta.coh.inner.full() def full_representation = coh.inner.full()
metaRead( metaRead(
io.meta, io.meta,
@ -996,7 +1027,9 @@ class CacheAcquireTracker(trackerId: Int)(implicit p: Parameters)
quiesce(Mux(pending_meta_write, s_meta_write, s_idle)) { clearWmaskBuffer() } quiesce(Mux(pending_meta_write, s_meta_write, s_idle)) { clearWmaskBuffer() }
} }
class L2WritebackReq(implicit p: Parameters) extends L2Metadata()(p) with HasL2Id { class L2WritebackReq(implicit p: Parameters)
extends L2HellaCacheBundle()(p) with HasL2Id {
val tag = Bits(width = tagBits)
val idx = Bits(width = idxBits) val idx = Bits(width = idxBits)
val way_en = Bits(width = nWays) val way_en = Bits(width = nWays)
} }
@ -1012,9 +1045,10 @@ trait HasL2WritebackIO extends HasOuterCacheParameters {
val wb = new L2WritebackIO() val wb = new L2WritebackIO()
} }
class L2WritebackUnitIO(implicit p: Parameters) extends HierarchicalXactTrackerIO()(p) class L2WritebackUnitIO(implicit p: Parameters)
with HasL2DataRWIO { extends HierarchicalXactTrackerIO()(p) with HasL2DataRWIO {
val wb = new L2WritebackIO().flip() val wb = new L2WritebackIO().flip()
val meta = new L2MetaReadOnlyIO
} }
class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p) class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p)
@ -1039,6 +1073,18 @@ class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTr
// Start the writeback sub-transaction // Start the writeback sub-transaction
io.wb.req.ready := state === s_idle io.wb.req.ready := state === s_idle
val coh = io.meta.resp.bits.meta.coh
val needs_inner_probes = coh.inner.requiresProbesOnVoluntaryWriteback()
val needs_outer_release = coh.outer.requiresVoluntaryWriteback()
def full_representation = coh.inner.full()
// Even though we already read the metadata in the acquire tracker that
// sent the writeback request, we have to read it again in the writeback
// unit, since it may have been updated in the meantime.
metaRead(io.meta,
next_state = Mux(needs_inner_probes, s_inner_probe, s_busy),
way_en_known = Bool(true))
// Track which clients yet need to be probed and make Probe message // Track which clients yet need to be probed and make Probe message
innerProbe( innerProbe(
inner_coh.makeProbeForVoluntaryWriteback(curr_probe_dst, xact_addr_block), inner_coh.makeProbeForVoluntaryWriteback(curr_probe_dst, xact_addr_block),
@ -1050,7 +1096,7 @@ class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTr
def irel_can_merge = io.irel().conflicts(xact_addr_block) && def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
io.irel().isVoluntary() && io.irel().isVoluntary() &&
(state =/= s_idle) && !state.isOneOf(s_idle, s_meta_read, s_meta_resp) &&
!(state === s_busy && all_pending_done) && !(state === s_busy && all_pending_done) &&
!vol_ignt_counter.pending && !vol_ignt_counter.pending &&
!blockInnerRelease() !blockInnerRelease()
@ -1062,18 +1108,16 @@ class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTr
mergeDataInner(io.inner.release) mergeDataInner(io.inner.release)
// If a release didn't write back data, have to read it from data array // If a release didn't write back data, have to read it from data array
readDataArray(drop_pending_bit = dropPendingBitWhenBeatHasData(io.inner.release)) readDataArray(
drop_pending_bit = dropPendingBitWhenBeatHasData(io.inner.release))
val coh = io.wb.req.bits.coh
val needs_inner_probes = coh.inner.requiresProbesOnVoluntaryWriteback()
val needs_outer_release = coh.outer.requiresVoluntaryWriteback()
// Once the data is buffered we can write it back to outer memory // Once the data is buffered we can write it back to outer memory
outerRelease( outerRelease(
coh = outer_coh, coh = outer_coh,
data = data_buffer(vol_ognt_counter.up.idx), data = data_buffer(vol_ognt_counter.up.idx),
add_pending_data_bits = addPendingBitInternal(io.data.resp), add_pending_data_bits = addPendingBitInternal(io.data.resp),
add_pending_send_bit = io.wb.req.fire() && needs_outer_release) add_pending_send_bit = io.meta.resp.valid && needs_outer_release)
// Respond to the initiating transaction handler signalling completion of the writeback // Respond to the initiating transaction handler signalling completion of the writeback
io.wb.resp.valid := state === s_busy && all_pending_done io.wb.resp.valid := state === s_busy && all_pending_done
@ -1081,17 +1125,21 @@ class L2WritebackUnit(val trackerId: Int)(implicit p: Parameters) extends XactTr
quiesce() {} quiesce() {}
def full_representation = io.wb.req.bits.coh.inner.full()
// State machine updates and transaction handler metadata intialization // State machine updates and transaction handler metadata intialization
when(state === s_idle && io.wb.req.valid) { when(state === s_idle && io.wb.req.valid) {
xact_id := io.wb.req.bits.id xact_id := io.wb.req.bits.id
xact_way_en := io.wb.req.bits.way_en xact_way_en := io.wb.req.bits.way_en
xact_addr_block := (if (cacheIdBits == 0) Cat(io.wb.req.bits.tag, io.wb.req.bits.idx) xact_addr_block := (if (cacheIdBits == 0) Cat(io.wb.req.bits.tag, io.wb.req.bits.idx)
else Cat(io.wb.req.bits.tag, io.wb.req.bits.idx, UInt(cacheId, cacheIdBits))) else Cat(io.wb.req.bits.tag, io.wb.req.bits.idx, UInt(cacheId, cacheIdBits)))
when(needs_inner_probes) { initializeProbes() } state := s_meta_read
pending_reads := Mux(needs_outer_release, ~UInt(0, width = innerDataBeats), UInt(0)) }
pending_resps := UInt(0)
when (state === s_meta_resp && io.meta.resp.valid) {
pending_reads := Fill(innerDataBeats, needs_outer_release)
pending_coh := coh pending_coh := coh
state := Mux(needs_inner_probes, s_inner_probe, s_busy) when(needs_inner_probes) { initializeProbes() }
} }
assert(!io.meta.resp.valid || io.meta.resp.bits.tag_match,
"L2 requested Writeback for block not present in cache")
} }

View File

@ -7,6 +7,7 @@ import uncore.coherence._
import uncore.tilelink._ import uncore.tilelink._
import uncore.util._ import uncore.util._
import uncore.Util._ import uncore.Util._
import junctions._
import cde.{Field, Parameters} import cde.{Field, Parameters}
import scala.math.max import scala.math.max
@ -18,12 +19,18 @@ class TrackerAllocation extends Bundle {
val should = Bool(INPUT) val should = Bool(INPUT)
} }
trait HasTrackerAllocationIO extends Bundle { class TrackerAllocationIO(implicit val p: Parameters)
val alloc = new Bundle { extends ParameterizedBundle()(p)
with HasCacheBlockAddress {
val iacq = new TrackerAllocation val iacq = new TrackerAllocation
val irel = new TrackerAllocation val irel = new TrackerAllocation
val oprb = new TrackerAllocation val oprb = new TrackerAllocation
val idle = Bool(OUTPUT)
} }
trait HasTrackerAllocationIO extends Bundle {
implicit val p: Parameters
val alloc = new TrackerAllocationIO
} }
class ManagerXactTrackerIO(implicit p: Parameters) extends ManagerTLIO()(p) class ManagerXactTrackerIO(implicit p: Parameters) extends ManagerTLIO()(p)
@ -420,6 +427,8 @@ trait RoutesInParent extends HasBlockAddressBuffer
io.alloc.iacq.can := state === s_idle && iacqCanAlloc io.alloc.iacq.can := state === s_idle && iacqCanAlloc
io.alloc.irel.can := state === s_idle && irelCanAlloc io.alloc.irel.can := state === s_idle && irelCanAlloc
io.alloc.oprb.can := state === s_idle && oprbCanAlloc io.alloc.oprb.can := state === s_idle && oprbCanAlloc
io.alloc.addr_block := xact_addr_block
io.alloc.idle := state === s_idle
} }
} }