From cf1df2d72a3b3cd687eed7f7ed8c3a17ac62758e Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 16 Mar 2015 13:27:05 -0700 Subject: [PATCH 01/10] further amo cleanups --- uncore/src/main/scala/cache.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index bc12cfeb..e1c8bce4 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -572,11 +572,11 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { val state = Reg(init=s_idle) val xact_src = Reg(io.inner.acquire.bits.header.src.clone) - val xact = Reg(Bundle(new Acquire, { case TLId => params(InnerTLId); case TLDataBits => 0 })) + val xact = Reg(Bundle(new Acquire, { case TLId => params(InnerTLId) })) val data_buffer = Vec.fill(innerDataBeats) { Reg(io.iacq().data.clone) } - val amo_result = Reg(io.iacq().data.clone) + val amo_result = xact.data val xact_tag_match = Reg{ Bool() } val xact_meta = Reg{ new L2Metadata } val xact_way_en = Reg{ Bits(width = nWays) } @@ -638,13 +638,14 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { def mergeData[T <: HasTileLinkData] (byteAddrBits: Int, dataBits: Int) (buffer: Vec[UInt], beat: UInt, incoming: UInt) { - val old_data = incoming - val new_data = buffer(beat) + val old_data = incoming // Refilled, written back, or de-cached data + val new_data = buffer(beat) // Newly Put data is in the buffer val amoOpSz = UInt(amoAluOperandBits) val offset = xact.addr_byte()(byteAddrBits-1, log2Up(amoAluOperandBits/8)) amoalu.io.lhs := old_data >> offset*amoOpSz amoalu.io.rhs := new_data >> offset*amoOpSz - val valid_beat = xact.is(Acquire.putBlockType) || xact.addr_beat === beat + val valid_beat = (xact.is(Acquire.putBlockType) || xact.addr_beat === beat) && + xact.isBuiltInType() // Only custom a_types have data for now val wmask = Fill(dataBits, valid_beat) & Mux(xact.is(Acquire.putAtomicType), FillInterleaved(amoAluOperandBits, UIntToOH(offset)), @@ -746,6 +747,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { io.iacq().client_xact_id != xact.client_xact_id), "AcquireTracker accepted data beat from different client transaction than initial request.") + //TODO: Assumes in-order network assert(!(state === s_idle && io.inner.acquire.fire() && io.iacq().addr_beat != UInt(0)), "AcquireTracker initialized with a tail data beat.") @@ -765,6 +767,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { when(io.inner.acquire.valid) { xact_src := io.inner.acquire.bits.header.src xact := io.iacq() + xact.data := UInt(0) data_buffer(io.iacq().addr_beat) := io.iacq().data collect_iacq_data := io.iacq().hasMultibeatData() iacq_data_valid := io.iacq().hasData() << io.iacq().addr_beat From 145e15701e8e2dc6b34c252487201c0801b0d8cb Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 16 Mar 2015 18:47:16 -0700 Subject: [PATCH 02/10] bugfix where an in-progress acquire can be blocked by another acquire tracker being free'd up in between --- uncore/src/main/scala/cache.scala | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index e1c8bce4..da000c5e 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -364,23 +364,28 @@ class TSHRFile(bankId: Int) extends L2HellaCacheModule (trackerList.map(_.io.incoherent) :+ wb.io.incoherent).map( _ := io.incoherent.toBits) // Handle acquire transaction initiation + val trackerAcquireIOs = trackerList.map(_.io.inner.acquire) + val alloc_arb = Module(new Arbiter(Bool(), trackerList.size)) - val trackerAcquireIOs = trackerList.map(_.io.inner.acquire) + alloc_arb.io.out.ready := Bool(true) + trackerAcquireIOs.zip(alloc_arb.io.in).foreach { + case(tracker, arb) => + arb.valid := tracker.ready + } + val alloc_idx = Vec(alloc_arb.io.in.map(_.ready)).lastIndexWhere{b: Bool => b} + val acquireMatchList = trackerList.map(_.io.has_acquire_match) val any_acquire_matches = acquireMatchList.reduce(_||_) - val alloc_idx = Vec(alloc_arb.io.in.map(_.ready)).lastIndexWhere{b: Bool => b} val match_idx = Vec(acquireMatchList).indexWhere{b: Bool => b} + val acquire_idx = Mux(any_acquire_matches, match_idx, alloc_idx) - trackerAcquireIOs.zip(alloc_arb.io.in).zipWithIndex.foreach { - case((tracker, arb), i) => - arb.valid := tracker.ready - tracker.bits := io.inner.acquire.bits - tracker.valid := arb.ready && (acquire_idx === UInt(i)) - } val block_acquires = trackerList.map(_.io.has_acquire_conflict).reduce(_||_) - io.inner.acquire.ready := trackerAcquireIOs.map(_.ready).reduce(_||_) && - !block_acquires - alloc_arb.io.out.ready := io.inner.acquire.valid && !block_acquires + io.inner.acquire.ready := trackerAcquireIOs.map(_.ready).reduce(_||_) && !block_acquires + trackerAcquireIOs.zipWithIndex.foreach { + case(tracker, i) => + tracker.bits := io.inner.acquire.bits + tracker.valid := io.inner.acquire.valid && !block_acquires && (acquire_idx === UInt(i)) + } // Wire releases from clients val release_idx = Vec(trackerList.map(_.io.has_release_match) :+ From 36fc67dc7c1011afa763350c819657a911fdd74b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 16 Mar 2015 15:06:46 -0700 Subject: [PATCH 03/10] cleanup mergeData buffer --- uncore/src/main/scala/cache.scala | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index da000c5e..b6d22d5e 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -642,23 +642,23 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { def mergeData[T <: HasTileLinkData] (byteAddrBits: Int, dataBits: Int) - (buffer: Vec[UInt], beat: UInt, incoming: UInt) { + (beat: UInt, incoming: UInt) { val old_data = incoming // Refilled, written back, or de-cached data - val new_data = buffer(beat) // Newly Put data is in the buffer + val new_data = data_buffer(beat) // Newly Put data is in the buffer val amoOpSz = UInt(amoAluOperandBits) - val offset = xact.addr_byte()(byteAddrBits-1, log2Up(amoAluOperandBits/8)) - amoalu.io.lhs := old_data >> offset*amoOpSz - amoalu.io.rhs := new_data >> offset*amoOpSz + val amoOffset = xact.addr_byte()(byteAddrBits-1, log2Up(amoAluOperandBits/8)) + amoalu.io.lhs := old_data >> amoOffset*amoOpSz + amoalu.io.rhs := new_data >> amoOffset*amoOpSz val valid_beat = (xact.is(Acquire.putBlockType) || xact.addr_beat === beat) && xact.isBuiltInType() // Only custom a_types have data for now val wmask = Fill(dataBits, valid_beat) & Mux(xact.is(Acquire.putAtomicType), - FillInterleaved(amoAluOperandBits, UIntToOH(offset)), + FillInterleaved(amoAluOperandBits, UIntToOH(amoOffset)), Mux(xact.is(Acquire.putBlockType) || xact.is(Acquire.putType), - FillInterleaved(8, xact.write_mask()), + FillInterleaved(8, write_mask_buffer(beat)), UInt(0, width = dataBits))) - buffer(beat) := ~wmask & old_data | wmask & - Mux(xact.is(Acquire.putAtomicType), amoalu.io.out << offset*amoOpSz, new_data) + data_buffer(beat) := ~wmask & old_data | wmask & + Mux(xact.is(Acquire.putAtomicType), amoalu.io.out << amoOffset*amoOpSz, new_data) when(xact.is(Acquire.putAtomicType) && valid_beat) { amo_result := old_data } } val mergeDataInternal = mergeData(log2Up(rowBits/8), rowBits) _ @@ -831,7 +831,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { when(io.irel().hasData()) { irel_had_data := Bool(true) pending_coh.outer := pending_ocoh_on_irel - mergeDataInner(data_buffer, io.irel().addr_beat, io.irel().data) + mergeDataInner(io.irel().addr_beat, io.irel().data) } // We don't decrement release_count until we've received all the data beats. when(!io.irel().hasMultibeatData() || irel_data_done) { @@ -852,7 +852,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { io.outer.grant.ready := Bool(true) when(io.outer.grant.valid) { when(io.ognt().hasData()) { - mergeDataOuter(data_buffer, io.ognt().addr_beat, io.ognt().data) + mergeDataOuter(io.ognt().addr_beat, io.ognt().data) ognt_had_data := Bool(true) } when(ognt_data_done) { @@ -879,13 +879,13 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { is(s_data_read) { io.data.read.valid := !collect_iacq_data || iacq_data_valid(read_data_cnt) when(io.data.resp.valid) { - mergeDataInternal(data_buffer, io.data.resp.bits.addr_beat, io.data.resp.bits.data) + mergeDataInternal(io.data.resp.bits.addr_beat, io.data.resp.bits.data) } when(read_data_done) { state := s_data_resp } } is(s_data_resp) { when(io.data.resp.valid) { - mergeDataInternal(data_buffer, io.data.resp.bits.addr_beat, io.data.resp.bits.data) + mergeDataInternal(io.data.resp.bits.addr_beat, io.data.resp.bits.data) } when(resp_data_done) { state := Mux(xact.hasData(), s_data_write, s_inner_grant) From f6fe037e303d708101c0d1d7b5716a9efb3919ab Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 16 Mar 2015 23:41:56 -0700 Subject: [PATCH 04/10] first cut of merging puts/gets --- uncore/src/main/scala/cache.scala | 260 ++++++++++++++++----------- uncore/src/main/scala/tilelink.scala | 24 ++- 2 files changed, 170 insertions(+), 114 deletions(-) diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index b6d22d5e..a1defb3b 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -171,6 +171,9 @@ abstract trait L2HellaCacheParameters extends CacheParameters with CoherenceAgen val amoAluOperandBits = params(AmoAluOperandBits) require(amoAluOperandBits <= innerDataBits) require(rowBits == innerDataBits) // TODO: relax this by improving s_data_* states + val nSecondaryMisses = 4 + val enableGetMerging = false + val enablePutMerging = true } abstract class L2HellaCacheBundle extends Bundle with L2HellaCacheParameters @@ -316,6 +319,10 @@ class L2DataArray(delay: Int) extends L2HellaCacheModule { io.write.ready := Bool(true) } +class L2SecondaryMissInfo extends TLBundle + with HasTileLinkBeatId + with HasClientTransactionId + class L2HellaCacheBank(bankId: Int) extends HierarchicalCoherenceAgent with L2HellaCacheParameters { require(isPow2(nSets)) @@ -570,6 +577,7 @@ class L2VoluntaryReleaseTracker(trackerId: Int, bankId: Int) extends L2XactTrack } } + class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { val io = new L2XactTrackerIO @@ -578,15 +586,14 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { val xact_src = Reg(io.inner.acquire.bits.header.src.clone) val xact = Reg(Bundle(new Acquire, { case TLId => params(InnerTLId) })) - val data_buffer = Vec.fill(innerDataBeats) { - Reg(io.iacq().data.clone) - } - val amo_result = xact.data + val data_buffer = Vec.fill(innerDataBeats){ Reg(UInt(width = innerDataBits)) } + val wmask_buffer = Vec.fill(innerDataBeats){ Reg(Bits(width = innerDataBits/8)) } val xact_tag_match = Reg{ Bool() } val xact_meta = Reg{ new L2Metadata } val xact_way_en = Reg{ Bits(width = nWays) } val pending_coh = Reg{ xact_meta.coh.clone } val pending_finish = Reg{ io.outer.finish.bits.clone } + val ignt_q = Module(new Queue(new L2SecondaryMissInfo, nSecondaryMisses))(innerTLParams) val is_hit = xact_tag_match && xact_meta.coh.outer.isHit(xact.op_code()) val do_allocate = xact.allocate() @@ -594,6 +601,29 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { (xact_meta.coh.outer.requiresVoluntaryWriteback() || xact_meta.coh.inner.requiresProbesOnVoluntaryWriteback()) + val release_count = Reg(init = UInt(0, width = log2Up(nCoherentClients+1))) + val pending_probes = Reg(init = Bits(0, width = nCoherentClients)) + val curr_probe_dst = PriorityEncoder(pending_probes) + val full_sharers = io.meta.resp.bits.meta.coh.inner.full() + val probe_self = xact.requiresSelfProbe() + val mask_self = Mux(probe_self, + full_sharers | UInt(UInt(1) << xact_src, width = nCoherentClients), + full_sharers & ~UInt(UInt(1) << xact_src, width = nCoherentClients)) + val mask_incoherent = mask_self & ~io.incoherent.toBits + + val irel_had_data = Reg(init = Bool(false)) + val ognt_had_data = Reg(init = Bool(false)) + val irel_data_done = connectIncomingDataBeatCounter(io.inner.release) + val ognt_data_done = connectIncomingDataBeatCounter(io.outer.grant) + val (oacq_data_cnt, oacq_data_done) = connectOutgoingDataBeatCounter(io.outer.acquire, xact.addr_beat) + val (ignt_data_idx, ignt_data_done) = connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat) + + val pending_reads = Reg(init=Bits(0, width = innerDataBeats)) + val pending_writes = Reg(init=Bits(0, width = innerDataBeats)) + val pending_resps = Reg(init=Bits(0, width = innerDataBeats)) + val curr_read_beat = PriorityEncoder(pending_reads) + val curr_write_beat = PriorityEncoder(pending_writes) + val pending_coh_on_hit = HierarchicalMetadata( io.meta.resp.bits.meta.coh.inner, io.meta.resp.bits.meta.coh.outer.onHit(xact.op_code())) @@ -610,29 +640,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { dst = io.inner.grant.bits.header.dst), pending_coh.outer) - val release_count = Reg(init = UInt(0, width = log2Up(nCoherentClients+1))) - val pending_probes = Reg(init = Bits(0, width = nCoherentClients)) - val curr_p_id = PriorityEncoder(pending_probes) - val full_sharers = io.meta.resp.bits.meta.coh.inner.full() - val probe_self = xact.requiresSelfProbe() - val mask_self = Mux(probe_self, - full_sharers | UInt(UInt(1) << xact_src, width = nCoherentClients), - full_sharers & ~UInt(UInt(1) << xact_src, width = nCoherentClients)) - val mask_incoherent = mask_self & ~io.incoherent.toBits - - val collect_iacq_data = Reg(init=Bool(false)) - val iacq_data_valid = Reg(init=Bits(0, width = innerDataBeats)) - val irel_had_data = Reg(init = Bool(false)) - val ognt_had_data = Reg(init = Bool(false)) - val iacq_data_done = connectIncomingDataBeatCounter(io.inner.acquire) - val irel_data_done = connectIncomingDataBeatCounter(io.inner.release) - val ognt_data_done = connectIncomingDataBeatCounter(io.outer.grant) - val (ignt_data_cnt, ignt_data_done) = connectOutgoingDataBeatCounter(io.inner.grant, xact.addr_beat) - val (oacq_data_cnt, oacq_data_done) = connectOutgoingDataBeatCounter(io.outer.acquire, xact.addr_beat) - val (read_data_cnt, read_data_done) = connectInternalDataBeatCounter(io.data.read, xact.addr_beat, !xact.isSubBlockType()) - val (write_data_cnt, write_data_done) = connectInternalDataBeatCounter(io.data.write, xact.addr_beat, !xact.isSubBlockType() || ognt_had_data || irel_had_data) - val resp_data_done = connectInternalDataBeatCounter(io.data.resp, !xact.isSubBlockType()) - + val amo_result = xact.data val amoalu = Module(new AMOALU) amoalu.io.addr := xact.addr() amoalu.io.cmd := xact.op_code() @@ -640,42 +648,62 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { amoalu.io.lhs := io.data.resp.bits.data //default amoalu.io.rhs := data_buffer.head // default - def mergeData[T <: HasTileLinkData] - (byteAddrBits: Int, dataBits: Int) - (beat: UInt, incoming: UInt) { + def mergeDataPut(beat: UInt, wmask: UInt, put_data: UInt) { + data_buffer(beat) := ~wmask & data_buffer(beat) | wmask & put_data + } + + def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) { val old_data = incoming // Refilled, written back, or de-cached data - val new_data = data_buffer(beat) // Newly Put data is in the buffer - val amoOpSz = UInt(amoAluOperandBits) - val amoOffset = xact.addr_byte()(byteAddrBits-1, log2Up(amoAluOperandBits/8)) - amoalu.io.lhs := old_data >> amoOffset*amoOpSz - amoalu.io.rhs := new_data >> amoOffset*amoOpSz - val valid_beat = (xact.is(Acquire.putBlockType) || xact.addr_beat === beat) && - xact.isBuiltInType() // Only custom a_types have data for now - val wmask = Fill(dataBits, valid_beat) & - Mux(xact.is(Acquire.putAtomicType), - FillInterleaved(amoAluOperandBits, UIntToOH(amoOffset)), - Mux(xact.is(Acquire.putBlockType) || xact.is(Acquire.putType), - FillInterleaved(8, write_mask_buffer(beat)), - UInt(0, width = dataBits))) - data_buffer(beat) := ~wmask & old_data | wmask & - Mux(xact.is(Acquire.putAtomicType), amoalu.io.out << amoOffset*amoOpSz, new_data) + val new_data = data_buffer(beat) // Newly Put data is already in the buffer + amoalu.io.lhs := old_data >> xact.amo_shift_bits() + amoalu.io.rhs := new_data >> xact.amo_shift_bits() + val valid_beat = (xact.is(Acquire.putBlockType) || xact.addr_beat === beat) + val wmask = Fill(dataBits, valid_beat) & wmask_buffer(beat) + data_buffer(beat) := ~wmask & old_data | + wmask & Mux(xact.is(Acquire.putAtomicType), + amoalu.io.out << xact.amo_shift_bits(), + new_data) when(xact.is(Acquire.putAtomicType) && valid_beat) { amo_result := old_data } } - val mergeDataInternal = mergeData(log2Up(rowBits/8), rowBits) _ - val mergeDataInner = mergeData(innerByteAddrBits, innerDataBits) _ - val mergeDataOuter = mergeData(outerByteAddrBits, outerDataBits) _ + val mergeDataInternal = mergeData(rowBits) _ + val mergeDataInner = mergeData(innerDataBits) _ + val mergeDataOuter = mergeData(outerDataBits) _ + + val can_merge_iacq_get = Bool(enableGetMerging) && + (xact.isBuiltInType(Acquire.getType) && + io.iacq().isBuiltInType(Acquire.getType)) && + (xact_src === io.inner.acquire.bits.header.src) && + xact.conflicts(io.iacq()) && + Vec(s_meta_read, s_meta_resp, s_wb_req, s_wb_resp, + s_probe, s_outer_acquire, s_outer_grant, + s_outer_finish).contains(state) && + do_allocate && + ignt_q.io.enq.ready + //TODO: mix Puts and PutBlocks + val can_merge_iacq_put = ((Bool(enablePutMerging) && + (xact.isBuiltInType(Acquire.putType) && + io.iacq().isBuiltInType(Acquire.putType))) || + (xact.isBuiltInType(Acquire.putBlockType) && + io.iacq().isBuiltInType(Acquire.putBlockType))) && + (xact_src === io.inner.acquire.bits.header.src) && + (xact.client_xact_id === io.iacq().client_xact_id) && + xact.conflicts(io.iacq()) && + Vec(s_meta_read, s_meta_resp, s_wb_req, s_wb_resp, + s_probe, s_outer_acquire, s_outer_grant, + s_outer_finish, s_data_read, + s_data_resp).contains(state) && + do_allocate && + ignt_q.io.enq.ready - //TODO: Allow hit under miss for stores val in_same_set = xact.addr_block(idxMSB,idxLSB) === io.iacq().addr_block(idxMSB,idxLSB) - io.has_acquire_conflict := (xact.conflicts(io.iacq()) || in_same_set) && - (state != s_idle) && - !collect_iacq_data - io.has_acquire_match := xact.conflicts(io.iacq()) && - collect_iacq_data io.has_release_match := xact.conflicts(io.irel()) && !io.irel().isVoluntary() && (state === s_probe) + io.has_acquire_match := can_merge_iacq_put || can_merge_iacq_get + io.has_acquire_conflict := (xact.conflicts(io.iacq()) || in_same_set) && + (state != s_idle) && + !io.has_acquire_match // If we're allocating in this cache, we can use the current metadata // to make an appropriate custom Acquire, otherwise we copy over the @@ -697,21 +725,25 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { io.inner.probe.valid := Bool(false) io.inner.probe.bits.header.src := UInt(bankId) - io.inner.probe.bits.header.dst := curr_p_id + io.inner.probe.bits.header.dst := curr_probe_dst io.inner.probe.bits.payload := pending_coh.inner.makeProbe(xact) - io.inner.grant.valid := Bool(false) + io.inner.grant.valid := state === s_inner_grant && ignt_q.io.deq.valid io.inner.grant.bits.header.src := UInt(bankId) io.inner.grant.bits.header.dst := xact_src io.inner.grant.bits.payload := pending_coh.inner.makeGrant( acq = xact, manager_xact_id = UInt(trackerId), - addr_beat = ignt_data_cnt, + addr_beat = ignt_data_idx, data = Mux(xact.is(Acquire.putAtomicType), amo_result, - data_buffer(ignt_data_cnt))) + data_buffer(ignt_data_idx))) + io.ignt().client_xact_id := ignt_q.io.deq.bits.client_xact_id + ignt_q.io.deq.ready := ignt_data_done - io.inner.acquire.ready := Bool(false) + io.inner.acquire.ready := state === s_idle || + can_merge_iacq_put || + can_merge_iacq_get io.inner.release.ready := Bool(false) io.inner.finish.ready := Bool(false) @@ -719,14 +751,14 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { io.data.read.bits.id := UInt(trackerId) io.data.read.bits.way_en := xact_way_en io.data.read.bits.addr_idx := xact.addr_block(idxMSB,idxLSB) - io.data.read.bits.addr_beat := read_data_cnt + io.data.read.bits.addr_beat := curr_read_beat io.data.write.valid := Bool(false) io.data.write.bits.id := UInt(trackerId) io.data.write.bits.way_en := xact_way_en io.data.write.bits.addr_idx := xact.addr_block(idxMSB,idxLSB) - io.data.write.bits.addr_beat := write_data_cnt + io.data.write.bits.addr_beat := curr_write_beat io.data.write.bits.wmask := SInt(-1) - io.data.write.bits.data := data_buffer(write_data_cnt) + io.data.write.bits.data := data_buffer(curr_write_beat) io.meta.read.valid := Bool(false) io.meta.read.bits.id := UInt(trackerId) io.meta.read.bits.idx := xact.addr_block(idxMSB,idxLSB) @@ -744,38 +776,16 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { io.wb.req.bits.way_en := xact_way_en io.wb.req.bits.id := UInt(trackerId) - assert(!(state != s_idle && collect_iacq_data && io.inner.acquire.fire() && - io.inner.acquire.bits.header.src != xact_src), - "AcquireTracker accepted data beat from different network source than initial request.") - - assert(!(state != s_idle && collect_iacq_data && io.inner.acquire.fire() && - io.iacq().client_xact_id != xact.client_xact_id), - "AcquireTracker accepted data beat from different client transaction than initial request.") - - //TODO: Assumes in-order network - assert(!(state === s_idle && io.inner.acquire.fire() && - io.iacq().addr_beat != UInt(0)), - "AcquireTracker initialized with a tail data beat.") - - when(collect_iacq_data) { - io.inner.acquire.ready := Bool(true) - when(io.inner.acquire.valid) { - data_buffer(io.iacq().addr_beat) := io.iacq().data - iacq_data_valid(io.iacq().addr_beat) := Bool(true) - } - when(iacq_data_done) { collect_iacq_data := Bool(false) } - } - switch (state) { is(s_idle) { - io.inner.acquire.ready := Bool(true) when(io.inner.acquire.valid) { xact_src := io.inner.acquire.bits.header.src xact := io.iacq() xact.data := UInt(0) - data_buffer(io.iacq().addr_beat) := io.iacq().data - collect_iacq_data := io.iacq().hasMultibeatData() - iacq_data_valid := io.iacq().hasData() << io.iacq().addr_beat + wmask_buffer.foreach { w => w := UInt(0) } + pending_reads := Mux(io.iacq().isSubBlockType(), SInt(0), SInt(-1)).toUInt + pending_writes := UInt(0) + pending_resps := UInt(0) irel_had_data := Bool(false) ognt_had_data := Bool(false) state := s_meta_read @@ -819,7 +829,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { // Send probes io.inner.probe.valid := pending_probes != UInt(0) when(io.inner.probe.ready) { - pending_probes := pending_probes & ~UIntToOH(curr_p_id) + pending_probes := pending_probes & ~UIntToOH(curr_probe_dst) } // Handle releases, which may have data being written back io.inner.release.ready := Bool(true) @@ -829,9 +839,9 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { //TODO: make sure cacq data is actually present before accpeting // release data to merge! when(io.irel().hasData()) { - irel_had_data := Bool(true) pending_coh.outer := pending_ocoh_on_irel mergeDataInner(io.irel().addr_beat, io.irel().data) + pending_writes := pending_writes | UIntToOH(io.irel().addr_beat) } // We don't decrement release_count until we've received all the data beats. when(!io.irel().hasMultibeatData() || irel_data_done) { @@ -839,11 +849,13 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { } } when(release_count === UInt(0)) { - state := Mux(is_hit, Mux(irel_had_data, s_data_write, s_data_read), s_outer_acquire) + state := Mux(is_hit, + Mux(pending_writes.orR, s_data_write, s_data_read), + s_outer_acquire) } } is(s_outer_acquire) { - io.outer.acquire.valid := !iacq_data_done // collect all data before refilling + io.outer.acquire.valid := Bool(true) when(oacq_data_done) { state := s_outer_grant } @@ -853,7 +865,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { when(io.outer.grant.valid) { when(io.ognt().hasData()) { mergeDataOuter(io.ognt().addr_beat, io.ognt().data) - ognt_had_data := Bool(true) + pending_writes := pending_writes | UIntToOH(io.ognt().addr_beat) } when(ognt_data_done) { pending_coh := pending_coh_on_ognt @@ -864,7 +876,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { state := s_outer_finish }.otherwise { state := Mux(!do_allocate, s_inner_grant, - Mux(io.ognt().hasData(), s_data_write, s_data_read)) + Mux(pending_writes.orR, s_data_write, s_data_read)) } } } @@ -873,31 +885,39 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { io.outer.finish.valid := Bool(true) when(io.outer.finish.ready) { state := Mux(!do_allocate, s_inner_grant, - Mux(ognt_had_data, s_data_write, s_data_read)) + Mux(pending_writes.orR, s_data_write, s_data_read)) } } is(s_data_read) { - io.data.read.valid := !collect_iacq_data || iacq_data_valid(read_data_cnt) + io.data.read.valid := pending_reads.orR + when(io.data.read.ready) { + pending_resps := pending_resps | UIntToOH(curr_read_beat) + pending_reads := pending_reads & ~UIntToOH(curr_read_beat) + when(PopCount(pending_reads) <= UInt(1)) { state := s_data_resp } + } when(io.data.resp.valid) { mergeDataInternal(io.data.resp.bits.addr_beat, io.data.resp.bits.data) + pending_resps := pending_resps & ~UIntToOH(io.data.resp.bits.addr_beat) } - when(read_data_done) { state := s_data_resp } } is(s_data_resp) { when(io.data.resp.valid) { mergeDataInternal(io.data.resp.bits.addr_beat, io.data.resp.bits.data) - } - when(resp_data_done) { - state := Mux(xact.hasData(), s_data_write, s_inner_grant) + pending_resps := pending_resps & ~UIntToOH(io.data.resp.bits.addr_beat) + when(PopCount(pending_resps) <= UInt(1)) { + state := Mux(pending_writes.orR, s_data_write, s_inner_grant) + } } } is(s_data_write) { - io.data.write.valid := Bool(true) - when(write_data_done) { state := s_inner_grant } + io.data.write.valid := pending_writes.orR //TODO make sure all acquire data is present + when(io.data.write.ready) { + pending_writes := pending_writes & ~UIntToOH(curr_write_beat) + when(PopCount(pending_writes) <= UInt(1)) { state := s_inner_grant } + } } is(s_inner_grant) { - io.inner.grant.valid := Bool(true) - when(ignt_data_done) { + when(ignt_data_done && ignt_q.io.count === UInt(1)) { val meta_dirty = !is_hit || pending_coh_on_ignt != xact_meta.coh when(meta_dirty) { pending_coh := pending_coh_on_ignt } state := Mux(meta_dirty, s_meta_write, @@ -915,6 +935,32 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { when(io.inner.finish.valid) { state := s_idle } } } + + ignt_q.io.enq.valid := io.inner.acquire.fire() && + (state === s_idle || !xact.hasMultibeatData()) + ignt_q.io.enq.bits.client_xact_id := io.iacq().client_xact_id + ignt_q.io.enq.bits.addr_beat := io.iacq().addr_beat + + // Handle Get and Put merging + when(io.inner.acquire.fire()) { + val beat = io.iacq().addr_beat + when(io.iacq().hasData()) { + mergeDataPut(beat, io.iacq().wmask(), io.iacq().data) + wmask_buffer(beat) := io.iacq().wmask() | wmask_buffer(beat) + //iacq_data_valid(beat) := Bool(true) + pending_writes(beat) := Bool(true) + } + pending_reads(beat) := Bool(true) + } + + assert(!(state != s_idle && io.inner.acquire.fire() && + io.inner.acquire.bits.header.src != xact_src), + "AcquireTracker accepted data beat from different network source than initial request.") + + //TODO: Assumes in-order network + assert(!(state === s_idle && io.inner.acquire.fire() && + io.iacq().addr_beat != UInt(0)), + "AcquireTracker initialized with a tail data beat.") } class L2WritebackReq extends L2HellaCacheBundle @@ -952,7 +998,7 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { val irel_had_data = Reg(init = Bool(false)) val release_count = Reg(init = UInt(0, width = log2Up(nCoherentClients+1))) val pending_probes = Reg(init = Bits(0, width = nCoherentClients)) - val curr_p_id = PriorityEncoder(pending_probes) + val curr_probe_dst = PriorityEncoder(pending_probes) val full_sharers = io.wb.req.bits.coh.inner.full() val mask_incoherent = full_sharers & ~io.incoherent.toBits @@ -988,7 +1034,7 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { io.inner.probe.valid := Bool(false) io.inner.probe.bits.header.src := UInt(bankId) - io.inner.probe.bits.header.dst := curr_p_id + io.inner.probe.bits.header.dst := curr_probe_dst io.inner.probe.bits.payload := xact_coh.inner.makeProbeForVoluntaryWriteback(xact_addr_block) @@ -1029,7 +1075,7 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker { // Send probes io.inner.probe.valid := pending_probes != UInt(0) when(io.inner.probe.ready) { - pending_probes := pending_probes & ~UIntToOH(curr_p_id) + pending_probes := pending_probes & ~UIntToOH(curr_probe_dst) } // Handle releases, which may have data being written back io.inner.release.ready := Bool(true) diff --git a/uncore/src/main/scala/tilelink.scala b/uncore/src/main/scala/tilelink.scala index 7bd218ad..80f090d7 100644 --- a/uncore/src/main/scala/tilelink.scala +++ b/uncore/src/main/scala/tilelink.scala @@ -38,6 +38,7 @@ abstract trait TileLinkParameters extends UsesParameters { val tlGrantTypeBits = max(log2Up(Grant.nBuiltInTypes), tlCoh.grantTypeWidth) + 1 val tlNetworkPreservesPointToPointOrdering = params(TLNetworkIsOrderedP2P) + val amoAluOperandBits = params(AmoAluOperandBits) } abstract class TLBundle extends Bundle with TileLinkParameters @@ -101,13 +102,22 @@ class Acquire extends ClientToManagerChannel M_XWR, union(opSizeOff-1, opCodeOff)) def op_size(dummy: Int = 0) = union(addrByteOff-1, opSizeOff) def addr_byte(dummy: Int = 0) = union(addrByteMSB-1, addrByteOff) - def write_mask(dummy: Int = 0) = union(tlWriteMaskBits, 1) + private def amo_offset(dummy: Int = 0) = addr_byte()(tlByteAddrBits-1, log2Up(amoAluOperandBits/8)) + def amo_shift_bits(dummy: Int = 0) = UInt(amoAluOperandBits)*amo_offset() + def wmask(dummy: Int = 0) = + Mux(isBuiltInType(Acquire.putAtomicType), + FillInterleaved(amoAluOperandBits, UIntToOH(amo_offset())), + Mux(isBuiltInType(Acquire.putBlockType) || isBuiltInType(Acquire.putType), + FillInterleaved(8, union(tlWriteMaskBits, 1)), + UInt(0, width = tlDataBits))) + def addr(dummy: Int = 0) = Cat(this.addr_block, this.addr_beat, this.addr_byte()) // Other helper funcs - def is(t: UInt) = a_type === t + def is(t: UInt) = a_type === t //TODO: make this more opaque; def ===? def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type + def isBuiltInType(t: UInt): Bool = is_builtin_type && a_type === t def isSubBlockType(dummy: Int = 0): Bool = isBuiltInType() && Acquire.typesOnSubBlocks.contains(a_type) @@ -119,7 +129,7 @@ class Acquire extends ClientToManagerChannel def hasMultibeatData(dummy: Int = 0): Bool = Bool(tlDataBeats > 1) && isBuiltInType() && Acquire.typesWithMultibeatData.contains(a_type) - def requiresSelfProbe(dummy: Int = 0) = Bool(false) + def requiresSelfProbe(dummy: Int = 0) = isBuiltInType()//Bool(false) def getBuiltInGrantType(dummy: Int = 0): UInt = { MuxLookup(this.a_type, Grant.putAckType, Array( @@ -229,7 +239,7 @@ object Put { addr_block: UInt, addr_beat: UInt, data: UInt, - write_mask: UInt = Acquire.fullWriteMask): Acquire = { + wmask: UInt = Acquire.fullWriteMask): Acquire = { Acquire( is_builtin_type = Bool(true), a_type = Acquire.putType, @@ -237,7 +247,7 @@ object Put { addr_beat = addr_beat, client_xact_id = client_xact_id, data = data, - union = Cat(write_mask, Bool(true))) + union = Cat(wmask, Bool(true))) } } @@ -248,7 +258,7 @@ object PutBlock { addr_block: UInt, addr_beat: UInt, data: UInt, - write_mask: UInt): Acquire = { + wmask: UInt): Acquire = { Acquire( is_builtin_type = Bool(true), a_type = Acquire.putBlockType, @@ -256,7 +266,7 @@ object PutBlock { addr_block = addr_block, addr_beat = addr_beat, data = data, - union = Cat(write_mask, (write_mask != Acquire.fullWriteMask))) + union = Cat(wmask, (wmask != Acquire.fullWriteMask))) } def apply( client_xact_id: UInt, From 40a5059cee6311f6bd5e74fc4a46aee5f34d99fe Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 17 Mar 2015 00:23:11 -0700 Subject: [PATCH 05/10] clean up acquire alloc in hub --- uncore/src/main/scala/broadcast.scala | 24 ++++++++++++++---------- uncore/src/main/scala/cache.scala | 3 +-- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/uncore/src/main/scala/broadcast.scala b/uncore/src/main/scala/broadcast.scala index c8014f45..2564140b 100644 --- a/uncore/src/main/scala/broadcast.scala +++ b/uncore/src/main/scala/broadcast.scala @@ -48,25 +48,29 @@ class L2BroadcastHub(bankId: Int) extends ManagerCoherenceAgent when (sdq_enq) { sdq(sdq_alloc_id) := io.iacq().data } // Handle acquire transaction initiation - val alloc_arb = Module(new Arbiter(Bool(), trackerList.size)) val trackerAcquireIOs = trackerList.map(_.io.inner.acquire) + + val alloc_arb = Module(new Arbiter(Bool(), trackerList.size)) + alloc_arb.io.out.ready := Bool(true) + trackerAcquireIOs.zip(alloc_arb.io.in).foreach { + case(tracker, arb) => arb.valid := tracker.ready + } + val alloc_idx = Vec(alloc_arb.io.in.map(_.ready)).lastIndexWhere{b: Bool => b} + val acquireMatchList = trackerList.map(_.io.has_acquire_match) val any_acquire_matches = acquireMatchList.reduce(_||_) - val alloc_idx = Vec(alloc_arb.io.in.map(_.ready)).lastIndexWhere{b: Bool => b} val match_idx = Vec(acquireMatchList).indexWhere{b: Bool => b} + val acquire_idx = Mux(any_acquire_matches, match_idx, alloc_idx) - trackerAcquireIOs.zip(alloc_arb.io.in).zipWithIndex.foreach { - case((tracker, arb), i) => - arb.valid := tracker.ready + val block_acquires = trackerList.map(_.io.has_acquire_conflict).reduce(_||_) + io.inner.acquire.ready := trackerAcquireIOs.map(_.ready).reduce(_||_) && !block_acquires && sdq_rdy + trackerAcquireIOs.zipWithIndex.foreach { + case(tracker, i) => tracker.bits := io.inner.acquire.bits tracker.bits.payload.data := DataQueueLocation(sdq_alloc_id, inStoreQueue).toBits - tracker.valid := arb.ready && (acquire_idx === UInt(i)) + tracker.valid := io.inner.acquire.valid && !block_acquires && (acquire_idx === UInt(i)) } - val block_acquires = trackerList.map(_.io.has_acquire_conflict).reduce(_||_) - io.inner.acquire.ready := trackerAcquireIOs.map(_.ready).reduce(_||_) && - sdq_rdy && !block_acquires - alloc_arb.io.out.ready := io.inner.acquire.valid && sdq_rdy && !block_acquires // Queue to store impending Voluntary Release data val voluntary = io.irel().isVoluntary() diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index a1defb3b..4af26399 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -376,8 +376,7 @@ class TSHRFile(bankId: Int) extends L2HellaCacheModule val alloc_arb = Module(new Arbiter(Bool(), trackerList.size)) alloc_arb.io.out.ready := Bool(true) trackerAcquireIOs.zip(alloc_arb.io.in).foreach { - case(tracker, arb) => - arb.valid := tracker.ready + case(tracker, arb) => arb.valid := tracker.ready } val alloc_idx = Vec(alloc_arb.io.in.map(_.ready)).lastIndexWhere{b: Bool => b} From 672fbb574f8d34066f946680d5c34addf6d2614e Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 17 Mar 2015 00:23:35 -0700 Subject: [PATCH 06/10] fix assert --- uncore/src/main/scala/cache.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index 4af26399..953aa2b6 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -958,6 +958,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { //TODO: Assumes in-order network assert(!(state === s_idle && io.inner.acquire.fire() && + !io.iacq().isSubBlockType() && io.iacq().addr_beat != UInt(0)), "AcquireTracker initialized with a tail data beat.") } From b21fcdfbe0836646a2a5a13f0a5b1369dab6c7e1 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 17 Mar 2015 00:24:59 -0700 Subject: [PATCH 07/10] turn off self probes again --- uncore/src/main/scala/tilelink.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncore/src/main/scala/tilelink.scala b/uncore/src/main/scala/tilelink.scala index 80f090d7..56dfa7db 100644 --- a/uncore/src/main/scala/tilelink.scala +++ b/uncore/src/main/scala/tilelink.scala @@ -129,7 +129,7 @@ class Acquire extends ClientToManagerChannel def hasMultibeatData(dummy: Int = 0): Bool = Bool(tlDataBeats > 1) && isBuiltInType() && Acquire.typesWithMultibeatData.contains(a_type) - def requiresSelfProbe(dummy: Int = 0) = isBuiltInType()//Bool(false) + def requiresSelfProbe(dummy: Int = 0) = Bool(false) def getBuiltInGrantType(dummy: Int = 0): UInt = { MuxLookup(this.a_type, Grant.putAckType, Array( From 2d33dd8e6ee2dbf257a046a2619f89c75b613fee Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 17 Mar 2015 01:17:44 -0700 Subject: [PATCH 08/10] pending read fix --- uncore/src/main/scala/cache.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index 953aa2b6..a2e78d1b 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -782,7 +782,9 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { xact := io.iacq() xact.data := UInt(0) wmask_buffer.foreach { w => w := UInt(0) } - pending_reads := Mux(io.iacq().isSubBlockType(), SInt(0), SInt(-1)).toUInt + pending_reads := Mux(io.iacq().isSubBlockType(), + UIntToOH(io.iacq().addr_beat), + SInt(-1, width = innerDataBeats)).toUInt pending_writes := UInt(0) pending_resps := UInt(0) irel_had_data := Bool(false) @@ -947,9 +949,9 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { mergeDataPut(beat, io.iacq().wmask(), io.iacq().data) wmask_buffer(beat) := io.iacq().wmask() | wmask_buffer(beat) //iacq_data_valid(beat) := Bool(true) - pending_writes(beat) := Bool(true) + pending_writes := pending_writes | UIntToOH(io.iacq().addr_beat) } - pending_reads(beat) := Bool(true) + when(state != s_idle) { pending_reads := pending_reads | UIntToOH(io.iacq().addr_beat) } } assert(!(state != s_idle && io.inner.acquire.fire() && From fd8f6ff0269808bdc9bc2dc24a28d22e7f5890d0 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 17 Mar 2015 02:10:30 -0700 Subject: [PATCH 09/10] overlapping read/resps in l2 fix --- uncore/src/main/scala/cache.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index a2e78d1b..72ef4a79 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -900,6 +900,11 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { mergeDataInternal(io.data.resp.bits.addr_beat, io.data.resp.bits.data) pending_resps := pending_resps & ~UIntToOH(io.data.resp.bits.addr_beat) } + when(io.data.read.ready && io.data.resp.valid) { + pending_resps := (pending_resps & + ~UIntToOH(io.data.resp.bits.addr_beat)) | + UIntToOH(curr_read_beat) + } } is(s_data_resp) { when(io.data.resp.valid) { From 257dcedcb34b341197983eba6df4f64786d0e2ae Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 17 Mar 2015 04:58:54 -0700 Subject: [PATCH 10/10] merge data wmask bugfix --- uncore/src/main/scala/cache.scala | 11 ++++++----- uncore/src/main/scala/tilelink.scala | 7 ++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index 72ef4a79..c52566c3 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -648,7 +648,9 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { amoalu.io.rhs := data_buffer.head // default def mergeDataPut(beat: UInt, wmask: UInt, put_data: UInt) { - data_buffer(beat) := ~wmask & data_buffer(beat) | wmask & put_data + val full = FillInterleaved(8, wmask) + data_buffer(beat) := (~full & data_buffer(beat)) | (full & put_data) + wmask_buffer(beat) := wmask | wmask_buffer(beat) } def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) { @@ -656,10 +658,10 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { val new_data = data_buffer(beat) // Newly Put data is already in the buffer amoalu.io.lhs := old_data >> xact.amo_shift_bits() amoalu.io.rhs := new_data >> xact.amo_shift_bits() - val valid_beat = (xact.is(Acquire.putBlockType) || xact.addr_beat === beat) - val wmask = Fill(dataBits, valid_beat) & wmask_buffer(beat) + val valid_beat = (xact.isBuiltInType(Acquire.putBlockType) || xact.addr_beat === beat) + val wmask = Fill(dataBits, valid_beat) & FillInterleaved(8, wmask_buffer(beat)) data_buffer(beat) := ~wmask & old_data | - wmask & Mux(xact.is(Acquire.putAtomicType), + wmask & Mux(xact.isBuiltInType(Acquire.putAtomicType), amoalu.io.out << xact.amo_shift_bits(), new_data) when(xact.is(Acquire.putAtomicType) && valid_beat) { amo_result := old_data } @@ -952,7 +954,6 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { val beat = io.iacq().addr_beat when(io.iacq().hasData()) { mergeDataPut(beat, io.iacq().wmask(), io.iacq().data) - wmask_buffer(beat) := io.iacq().wmask() | wmask_buffer(beat) //iacq_data_valid(beat) := Bool(true) pending_writes := pending_writes | UIntToOH(io.iacq().addr_beat) } diff --git a/uncore/src/main/scala/tilelink.scala b/uncore/src/main/scala/tilelink.scala index 56dfa7db..42c17ca4 100644 --- a/uncore/src/main/scala/tilelink.scala +++ b/uncore/src/main/scala/tilelink.scala @@ -106,10 +106,11 @@ class Acquire extends ClientToManagerChannel def amo_shift_bits(dummy: Int = 0) = UInt(amoAluOperandBits)*amo_offset() def wmask(dummy: Int = 0) = Mux(isBuiltInType(Acquire.putAtomicType), - FillInterleaved(amoAluOperandBits, UIntToOH(amo_offset())), + FillInterleaved(amoAluOperandBits/8, UIntToOH(amo_offset())), Mux(isBuiltInType(Acquire.putBlockType) || isBuiltInType(Acquire.putType), - FillInterleaved(8, union(tlWriteMaskBits, 1)), - UInt(0, width = tlDataBits))) + union(tlWriteMaskBits, 1), + UInt(0, width = tlWriteMaskBits))) + def full_wmask(dummy: Int = 0) = FillInterleaved(8, wmask()) def addr(dummy: Int = 0) = Cat(this.addr_block, this.addr_beat, this.addr_byte())