diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index aa2097af..ec25501c 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -566,17 +566,16 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { val is_hit = xact_tag_match && xact_meta.coh.outer.isHit(xact.op_code()) val do_allocate = xact.allocate() val needs_writeback = !xact_tag_match && do_allocate && - xact_meta.coh.outer.requiresVoluntaryWriteback() - val needs_probes = xact_meta.coh.inner.requiresProbes(xact) + (xact_meta.coh.outer.requiresVoluntaryWriteback() || + xact_meta.coh.inner.requiresProbesOnVoluntaryWriteback()) val pending_coh_on_hit = HierarchicalMetadata( io.meta.resp.bits.meta.coh.inner, io.meta.resp.bits.meta.coh.outer.onHit(xact.op_code())) - val pending_coh_on_irel = HierarchicalMetadata( - pending_coh.inner.onRelease( + val pending_icoh_on_irel = pending_coh.inner.onRelease( incoming = io.irel(), - src = io.inner.release.bits.header.src), - pending_coh.outer.onHit(M_XWR)) // WB is a write + src = io.inner.release.bits.header.src) + val pending_ocoh_on_irel = pending_coh.outer.onHit(M_XWR) // WB is a write val pending_coh_on_ognt = HierarchicalMetadata( ManagerMetadata.onReset, pending_coh.outer.onGrant(io.ognt(), xact.op_code())) @@ -768,7 +767,8 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { val _tag_match = io.meta.resp.bits.tag_match val _is_hit = _tag_match && _coh.outer.isHit(xact.op_code()) val _needs_writeback = !_tag_match && do_allocate && - _coh.outer.requiresVoluntaryWriteback() + (_coh.outer.requiresVoluntaryWriteback() || + _coh.inner.requiresProbesOnVoluntaryWriteback()) val _needs_probes = _tag_match && _coh.inner.requiresProbes(xact) when(_is_hit) { pending_coh := pending_coh_on_hit } when(_needs_probes) { @@ -796,12 +796,13 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { // Handle releases, which may have data being written back io.inner.release.ready := Bool(true) when(io.inner.release.valid) { - pending_coh := pending_coh_on_irel + pending_coh.inner := pending_icoh_on_irel // Handle released dirty data //TODO: make sure cacq data is actually present before accpeting // release data to merge! when(io.irel().hasData()) { irel_had_data := Bool(true) + pending_coh.outer := pending_ocoh_on_irel mergeDataInner(data_buffer, io.irel()) } // We don't decrement release_count until we've received all the data beats. @@ -931,8 +932,9 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { val resp_data_done = connectInternalDataBeatCounter(io.data.resp) val pending_icoh_on_irel = xact_coh.inner.onRelease( - incoming = io.irel(), + incoming = io.irel(), src = io.inner.release.bits.header.src) + val pending_ocoh_on_irel = xact_coh.outer.onHit(M_XWR) // WB is a write io.has_acquire_conflict := Bool(false) io.has_acquire_match := Bool(false) @@ -1008,6 +1010,7 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { // Handle released dirty data when(io.irel().hasData()) { irel_had_data := Bool(true) + xact_coh.outer := pending_ocoh_on_irel data_buffer(io.irel().addr_beat) := io.irel().data } // We don't decrement release_count until we've received all the data beats. @@ -1016,7 +1019,11 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { } } when(release_count === UInt(0)) { - state := Mux(irel_had_data, s_outer_release, s_data_read) + state := Mux(irel_had_data, // If someone released a dirty block + s_outer_release, // write that block back, otherwise + Mux(xact_coh.outer.requiresVoluntaryWriteback(), + s_data_read, // write extant dirty data back, or just + s_wb_resp)) // drop a clean block after collecting acks } } is(s_data_read) { diff --git a/uncore/src/main/scala/coherence.scala b/uncore/src/main/scala/coherence.scala index 1e8d1b60..862eb484 100644 --- a/uncore/src/main/scala/coherence.scala +++ b/uncore/src/main/scala/coherence.scala @@ -383,6 +383,7 @@ class MSICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing // only a single sharer (also would need // notification msg to track clean drops) + // Also could avoid probes on outer WBs. def requiresProbes(a: Acquire, meta: ManagerMetadata) = Mux(dir.none(meta.sharers), Bool(false), @@ -501,6 +502,7 @@ class MESICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) { val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing // only a single sharer (also would need // notification msg to track clean drops) + // Also could avoid probes on outer WBs. def requiresProbes(a: Acquire, meta: ManagerMetadata) = Mux(dir.none(meta.sharers), Bool(false), diff --git a/uncore/src/main/scala/memserdes.scala b/uncore/src/main/scala/memserdes.scala index 7542815b..eafc44bd 100644 --- a/uncore/src/main/scala/memserdes.scala +++ b/uncore/src/main/scala/memserdes.scala @@ -212,6 +212,7 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { val mem = new MemIO } val mifTagBits = params(MIFTagBits) + val mifAddrBits = params(MIFAddrBits) val mifDataBits = params(MIFDataBits) val mifDataBeats = params(MIFDataBeats) val tlDataBits = params(TLDataBits) @@ -235,8 +236,8 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { // Decompose outgoing TL Acquires into MemIO cmd and data val active_out = Reg(init=Bool(false)) val cmd_sent_out = Reg(init=Bool(false)) - val tag_out = Reg(Bits()) - val addr_out = Reg(Bits()) + val tag_out = Reg(UInt(width = mifTagBits)) + val addr_out = Reg(UInt(width = mifAddrBits)) val has_data = Reg(init=Bool(false)) val data_from_rel = Reg(init=Bool(false)) val (tl_cnt_out, tl_wrap_out) = @@ -343,26 +344,34 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { active_out := !io.mem.req_cmd.ready || io.mem.req_data.valid io.mem.req_cmd.valid := Bool(true) cmd_sent_out := io.mem.req_cmd.ready - tag_out := io.mem.req_cmd.bits.tag - addr_out := io.mem.req_data.bits.data - has_data := io.mem.req_cmd.bits.rw tl_done_out := tl_wrap_out when(io.tl.release.valid) { data_from_rel := Bool(true) make_grant_ack := Bool(true) - io.mem.req_cmd.bits.rw := rel_has_data - io.mem.req_cmd.bits.tag := Cat(io.tl.release.bits.payload.client_xact_id, - io.tl.release.bits.payload.isVoluntary()) - io.mem.req_cmd.bits.addr := io.tl.release.bits.payload.addr_block io.mem.req_data.bits.data := io.tl.release.bits.payload.data + val tag = Cat(io.tl.release.bits.payload.client_xact_id, + io.tl.release.bits.payload.isVoluntary()) + val addr = io.tl.release.bits.payload.addr_block + io.mem.req_cmd.bits.tag := tag + io.mem.req_cmd.bits.addr := addr + io.mem.req_cmd.bits.rw := rel_has_data + tag_out := tag + addr_out := addr + has_data := rel_has_data } .elsewhen(io.tl.acquire.valid) { data_from_rel := Bool(false) make_grant_ack := acq_has_data - io.mem.req_cmd.bits.rw := acq_has_data - io.mem.req_cmd.bits.tag := Cat(io.tl.acquire.bits.payload.client_xact_id, - io.tl.acquire.bits.payload.isBuiltInType()) - io.mem.req_cmd.bits.addr := io.tl.acquire.bits.payload.addr_block io.mem.req_data.bits.data := io.tl.acquire.bits.payload.data + io.mem.req_cmd.bits.rw := acq_has_data + val tag = Cat(io.tl.acquire.bits.payload.client_xact_id, + io.tl.acquire.bits.payload.isBuiltInType()) + val addr = io.tl.acquire.bits.payload.addr_block + io.mem.req_cmd.bits.tag := tag + io.mem.req_cmd.bits.addr := addr + io.mem.req_cmd.bits.rw := acq_has_data + tag_out := tag + addr_out := addr + has_data := acq_has_data } } } @@ -380,7 +389,7 @@ class MemIOTileLinkIOConverter(qDepth: Int) extends Module { } when(tl_wrap_out) { tl_done_out := Bool(true) } when(tl_done_out && make_grant_ack) { - gnt_arb.io.in(1).valid := Bool(true) + gnt_arb.io.in(1).valid := Bool(true) // TODO: grants for voluntary acks? when(gnt_arb.io.in(1).ready) { make_grant_ack := Bool(false) } } when(cmd_sent_out && (!has_data || tl_done_out) && !make_grant_ack) {