1
0

first cut of merging puts/gets

This commit is contained in:
Henry Cook 2015-03-16 23:41:56 -07:00
parent 36fc67dc7c
commit f6fe037e30
2 changed files with 170 additions and 114 deletions

View File

@ -171,6 +171,9 @@ abstract trait L2HellaCacheParameters extends CacheParameters with CoherenceAgen
val amoAluOperandBits = params(AmoAluOperandBits) val amoAluOperandBits = params(AmoAluOperandBits)
require(amoAluOperandBits <= innerDataBits) require(amoAluOperandBits <= innerDataBits)
require(rowBits == innerDataBits) // TODO: relax this by improving s_data_* states require(rowBits == innerDataBits) // TODO: relax this by improving s_data_* states
val nSecondaryMisses = 4
val enableGetMerging = false
val enablePutMerging = true
} }
abstract class L2HellaCacheBundle extends Bundle with L2HellaCacheParameters abstract class L2HellaCacheBundle extends Bundle with L2HellaCacheParameters
@ -316,6 +319,10 @@ class L2DataArray(delay: Int) extends L2HellaCacheModule {
io.write.ready := Bool(true) io.write.ready := Bool(true)
} }
class L2SecondaryMissInfo extends TLBundle
with HasTileLinkBeatId
with HasClientTransactionId
class L2HellaCacheBank(bankId: Int) extends HierarchicalCoherenceAgent class L2HellaCacheBank(bankId: Int) extends HierarchicalCoherenceAgent
with L2HellaCacheParameters { with L2HellaCacheParameters {
require(isPow2(nSets)) require(isPow2(nSets))
@ -570,6 +577,7 @@ class L2VoluntaryReleaseTracker(trackerId: Int, bankId: Int) extends L2XactTrack
} }
} }
class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker { class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
val io = new L2XactTrackerIO val io = new L2XactTrackerIO
@ -578,15 +586,14 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
val xact_src = Reg(io.inner.acquire.bits.header.src.clone) val xact_src = Reg(io.inner.acquire.bits.header.src.clone)
val xact = Reg(Bundle(new Acquire, { case TLId => params(InnerTLId) })) val xact = Reg(Bundle(new Acquire, { case TLId => params(InnerTLId) }))
val data_buffer = Vec.fill(innerDataBeats) { val data_buffer = Vec.fill(innerDataBeats){ Reg(UInt(width = innerDataBits)) }
Reg(io.iacq().data.clone) val wmask_buffer = Vec.fill(innerDataBeats){ Reg(Bits(width = innerDataBits/8)) }
}
val amo_result = xact.data
val xact_tag_match = Reg{ Bool() } val xact_tag_match = Reg{ Bool() }
val xact_meta = Reg{ new L2Metadata } val xact_meta = Reg{ new L2Metadata }
val xact_way_en = Reg{ Bits(width = nWays) } val xact_way_en = Reg{ Bits(width = nWays) }
val pending_coh = Reg{ xact_meta.coh.clone } val pending_coh = Reg{ xact_meta.coh.clone }
val pending_finish = Reg{ io.outer.finish.bits.clone } val pending_finish = Reg{ io.outer.finish.bits.clone }
val ignt_q = Module(new Queue(new L2SecondaryMissInfo, nSecondaryMisses))(innerTLParams)
val is_hit = xact_tag_match && xact_meta.coh.outer.isHit(xact.op_code()) val is_hit = xact_tag_match && xact_meta.coh.outer.isHit(xact.op_code())
val do_allocate = xact.allocate() val do_allocate = xact.allocate()
@ -594,6 +601,29 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
(xact_meta.coh.outer.requiresVoluntaryWriteback() || (xact_meta.coh.outer.requiresVoluntaryWriteback() ||
xact_meta.coh.inner.requiresProbesOnVoluntaryWriteback()) xact_meta.coh.inner.requiresProbesOnVoluntaryWriteback())
val release_count = Reg(init = UInt(0, width = log2Up(nCoherentClients+1)))
val pending_probes = Reg(init = Bits(0, width = nCoherentClients))
val curr_probe_dst = PriorityEncoder(pending_probes)
val full_sharers = io.meta.resp.bits.meta.coh.inner.full()
val probe_self = xact.requiresSelfProbe()
val mask_self = Mux(probe_self,
full_sharers | UInt(UInt(1) << xact_src, width = nCoherentClients),
full_sharers & ~UInt(UInt(1) << xact_src, width = nCoherentClients))
val mask_incoherent = mask_self & ~io.incoherent.toBits
val irel_had_data = Reg(init = Bool(false))
val ognt_had_data = Reg(init = Bool(false))
val irel_data_done = connectIncomingDataBeatCounter(io.inner.release)
val ognt_data_done = connectIncomingDataBeatCounter(io.outer.grant)
val (oacq_data_cnt, oacq_data_done) = connectOutgoingDataBeatCounter(io.outer.acquire, xact.addr_beat)
val (ignt_data_idx, ignt_data_done) = connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat)
val pending_reads = Reg(init=Bits(0, width = innerDataBeats))
val pending_writes = Reg(init=Bits(0, width = innerDataBeats))
val pending_resps = Reg(init=Bits(0, width = innerDataBeats))
val curr_read_beat = PriorityEncoder(pending_reads)
val curr_write_beat = PriorityEncoder(pending_writes)
val pending_coh_on_hit = HierarchicalMetadata( val pending_coh_on_hit = HierarchicalMetadata(
io.meta.resp.bits.meta.coh.inner, io.meta.resp.bits.meta.coh.inner,
io.meta.resp.bits.meta.coh.outer.onHit(xact.op_code())) io.meta.resp.bits.meta.coh.outer.onHit(xact.op_code()))
@ -610,29 +640,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
dst = io.inner.grant.bits.header.dst), dst = io.inner.grant.bits.header.dst),
pending_coh.outer) pending_coh.outer)
val release_count = Reg(init = UInt(0, width = log2Up(nCoherentClients+1))) val amo_result = xact.data
val pending_probes = Reg(init = Bits(0, width = nCoherentClients))
val curr_p_id = PriorityEncoder(pending_probes)
val full_sharers = io.meta.resp.bits.meta.coh.inner.full()
val probe_self = xact.requiresSelfProbe()
val mask_self = Mux(probe_self,
full_sharers | UInt(UInt(1) << xact_src, width = nCoherentClients),
full_sharers & ~UInt(UInt(1) << xact_src, width = nCoherentClients))
val mask_incoherent = mask_self & ~io.incoherent.toBits
val collect_iacq_data = Reg(init=Bool(false))
val iacq_data_valid = Reg(init=Bits(0, width = innerDataBeats))
val irel_had_data = Reg(init = Bool(false))
val ognt_had_data = Reg(init = Bool(false))
val iacq_data_done = connectIncomingDataBeatCounter(io.inner.acquire)
val irel_data_done = connectIncomingDataBeatCounter(io.inner.release)
val ognt_data_done = connectIncomingDataBeatCounter(io.outer.grant)
val (ignt_data_cnt, ignt_data_done) = connectOutgoingDataBeatCounter(io.inner.grant, xact.addr_beat)
val (oacq_data_cnt, oacq_data_done) = connectOutgoingDataBeatCounter(io.outer.acquire, xact.addr_beat)
val (read_data_cnt, read_data_done) = connectInternalDataBeatCounter(io.data.read, xact.addr_beat, !xact.isSubBlockType())
val (write_data_cnt, write_data_done) = connectInternalDataBeatCounter(io.data.write, xact.addr_beat, !xact.isSubBlockType() || ognt_had_data || irel_had_data)
val resp_data_done = connectInternalDataBeatCounter(io.data.resp, !xact.isSubBlockType())
val amoalu = Module(new AMOALU) val amoalu = Module(new AMOALU)
amoalu.io.addr := xact.addr() amoalu.io.addr := xact.addr()
amoalu.io.cmd := xact.op_code() amoalu.io.cmd := xact.op_code()
@ -640,42 +648,62 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
amoalu.io.lhs := io.data.resp.bits.data //default amoalu.io.lhs := io.data.resp.bits.data //default
amoalu.io.rhs := data_buffer.head // default amoalu.io.rhs := data_buffer.head // default
def mergeData[T <: HasTileLinkData] def mergeDataPut(beat: UInt, wmask: UInt, put_data: UInt) {
(byteAddrBits: Int, dataBits: Int) data_buffer(beat) := ~wmask & data_buffer(beat) | wmask & put_data
(beat: UInt, incoming: UInt) { }
def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) {
val old_data = incoming // Refilled, written back, or de-cached data val old_data = incoming // Refilled, written back, or de-cached data
val new_data = data_buffer(beat) // Newly Put data is in the buffer val new_data = data_buffer(beat) // Newly Put data is already in the buffer
val amoOpSz = UInt(amoAluOperandBits) amoalu.io.lhs := old_data >> xact.amo_shift_bits()
val amoOffset = xact.addr_byte()(byteAddrBits-1, log2Up(amoAluOperandBits/8)) amoalu.io.rhs := new_data >> xact.amo_shift_bits()
amoalu.io.lhs := old_data >> amoOffset*amoOpSz val valid_beat = (xact.is(Acquire.putBlockType) || xact.addr_beat === beat)
amoalu.io.rhs := new_data >> amoOffset*amoOpSz val wmask = Fill(dataBits, valid_beat) & wmask_buffer(beat)
val valid_beat = (xact.is(Acquire.putBlockType) || xact.addr_beat === beat) && data_buffer(beat) := ~wmask & old_data |
xact.isBuiltInType() // Only custom a_types have data for now wmask & Mux(xact.is(Acquire.putAtomicType),
val wmask = Fill(dataBits, valid_beat) & amoalu.io.out << xact.amo_shift_bits(),
Mux(xact.is(Acquire.putAtomicType), new_data)
FillInterleaved(amoAluOperandBits, UIntToOH(amoOffset)),
Mux(xact.is(Acquire.putBlockType) || xact.is(Acquire.putType),
FillInterleaved(8, write_mask_buffer(beat)),
UInt(0, width = dataBits)))
data_buffer(beat) := ~wmask & old_data | wmask &
Mux(xact.is(Acquire.putAtomicType), amoalu.io.out << amoOffset*amoOpSz, new_data)
when(xact.is(Acquire.putAtomicType) && valid_beat) { amo_result := old_data } when(xact.is(Acquire.putAtomicType) && valid_beat) { amo_result := old_data }
} }
val mergeDataInternal = mergeData(log2Up(rowBits/8), rowBits) _ val mergeDataInternal = mergeData(rowBits) _
val mergeDataInner = mergeData(innerByteAddrBits, innerDataBits) _ val mergeDataInner = mergeData(innerDataBits) _
val mergeDataOuter = mergeData(outerByteAddrBits, outerDataBits) _ val mergeDataOuter = mergeData(outerDataBits) _
val can_merge_iacq_get = Bool(enableGetMerging) &&
(xact.isBuiltInType(Acquire.getType) &&
io.iacq().isBuiltInType(Acquire.getType)) &&
(xact_src === io.inner.acquire.bits.header.src) &&
xact.conflicts(io.iacq()) &&
Vec(s_meta_read, s_meta_resp, s_wb_req, s_wb_resp,
s_probe, s_outer_acquire, s_outer_grant,
s_outer_finish).contains(state) &&
do_allocate &&
ignt_q.io.enq.ready
//TODO: mix Puts and PutBlocks
val can_merge_iacq_put = ((Bool(enablePutMerging) &&
(xact.isBuiltInType(Acquire.putType) &&
io.iacq().isBuiltInType(Acquire.putType))) ||
(xact.isBuiltInType(Acquire.putBlockType) &&
io.iacq().isBuiltInType(Acquire.putBlockType))) &&
(xact_src === io.inner.acquire.bits.header.src) &&
(xact.client_xact_id === io.iacq().client_xact_id) &&
xact.conflicts(io.iacq()) &&
Vec(s_meta_read, s_meta_resp, s_wb_req, s_wb_resp,
s_probe, s_outer_acquire, s_outer_grant,
s_outer_finish, s_data_read,
s_data_resp).contains(state) &&
do_allocate &&
ignt_q.io.enq.ready
//TODO: Allow hit under miss for stores
val in_same_set = xact.addr_block(idxMSB,idxLSB) === val in_same_set = xact.addr_block(idxMSB,idxLSB) ===
io.iacq().addr_block(idxMSB,idxLSB) io.iacq().addr_block(idxMSB,idxLSB)
io.has_acquire_conflict := (xact.conflicts(io.iacq()) || in_same_set) &&
(state != s_idle) &&
!collect_iacq_data
io.has_acquire_match := xact.conflicts(io.iacq()) &&
collect_iacq_data
io.has_release_match := xact.conflicts(io.irel()) && io.has_release_match := xact.conflicts(io.irel()) &&
!io.irel().isVoluntary() && !io.irel().isVoluntary() &&
(state === s_probe) (state === s_probe)
io.has_acquire_match := can_merge_iacq_put || can_merge_iacq_get
io.has_acquire_conflict := (xact.conflicts(io.iacq()) || in_same_set) &&
(state != s_idle) &&
!io.has_acquire_match
// If we're allocating in this cache, we can use the current metadata // If we're allocating in this cache, we can use the current metadata
// to make an appropriate custom Acquire, otherwise we copy over the // to make an appropriate custom Acquire, otherwise we copy over the
@ -697,21 +725,25 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
io.inner.probe.valid := Bool(false) io.inner.probe.valid := Bool(false)
io.inner.probe.bits.header.src := UInt(bankId) io.inner.probe.bits.header.src := UInt(bankId)
io.inner.probe.bits.header.dst := curr_p_id io.inner.probe.bits.header.dst := curr_probe_dst
io.inner.probe.bits.payload := pending_coh.inner.makeProbe(xact) io.inner.probe.bits.payload := pending_coh.inner.makeProbe(xact)
io.inner.grant.valid := Bool(false) io.inner.grant.valid := state === s_inner_grant && ignt_q.io.deq.valid
io.inner.grant.bits.header.src := UInt(bankId) io.inner.grant.bits.header.src := UInt(bankId)
io.inner.grant.bits.header.dst := xact_src io.inner.grant.bits.header.dst := xact_src
io.inner.grant.bits.payload := pending_coh.inner.makeGrant( io.inner.grant.bits.payload := pending_coh.inner.makeGrant(
acq = xact, acq = xact,
manager_xact_id = UInt(trackerId), manager_xact_id = UInt(trackerId),
addr_beat = ignt_data_cnt, addr_beat = ignt_data_idx,
data = Mux(xact.is(Acquire.putAtomicType), data = Mux(xact.is(Acquire.putAtomicType),
amo_result, amo_result,
data_buffer(ignt_data_cnt))) data_buffer(ignt_data_idx)))
io.ignt().client_xact_id := ignt_q.io.deq.bits.client_xact_id
ignt_q.io.deq.ready := ignt_data_done
io.inner.acquire.ready := Bool(false) io.inner.acquire.ready := state === s_idle ||
can_merge_iacq_put ||
can_merge_iacq_get
io.inner.release.ready := Bool(false) io.inner.release.ready := Bool(false)
io.inner.finish.ready := Bool(false) io.inner.finish.ready := Bool(false)
@ -719,14 +751,14 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
io.data.read.bits.id := UInt(trackerId) io.data.read.bits.id := UInt(trackerId)
io.data.read.bits.way_en := xact_way_en io.data.read.bits.way_en := xact_way_en
io.data.read.bits.addr_idx := xact.addr_block(idxMSB,idxLSB) io.data.read.bits.addr_idx := xact.addr_block(idxMSB,idxLSB)
io.data.read.bits.addr_beat := read_data_cnt io.data.read.bits.addr_beat := curr_read_beat
io.data.write.valid := Bool(false) io.data.write.valid := Bool(false)
io.data.write.bits.id := UInt(trackerId) io.data.write.bits.id := UInt(trackerId)
io.data.write.bits.way_en := xact_way_en io.data.write.bits.way_en := xact_way_en
io.data.write.bits.addr_idx := xact.addr_block(idxMSB,idxLSB) io.data.write.bits.addr_idx := xact.addr_block(idxMSB,idxLSB)
io.data.write.bits.addr_beat := write_data_cnt io.data.write.bits.addr_beat := curr_write_beat
io.data.write.bits.wmask := SInt(-1) io.data.write.bits.wmask := SInt(-1)
io.data.write.bits.data := data_buffer(write_data_cnt) io.data.write.bits.data := data_buffer(curr_write_beat)
io.meta.read.valid := Bool(false) io.meta.read.valid := Bool(false)
io.meta.read.bits.id := UInt(trackerId) io.meta.read.bits.id := UInt(trackerId)
io.meta.read.bits.idx := xact.addr_block(idxMSB,idxLSB) io.meta.read.bits.idx := xact.addr_block(idxMSB,idxLSB)
@ -744,38 +776,16 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
io.wb.req.bits.way_en := xact_way_en io.wb.req.bits.way_en := xact_way_en
io.wb.req.bits.id := UInt(trackerId) io.wb.req.bits.id := UInt(trackerId)
assert(!(state != s_idle && collect_iacq_data && io.inner.acquire.fire() &&
io.inner.acquire.bits.header.src != xact_src),
"AcquireTracker accepted data beat from different network source than initial request.")
assert(!(state != s_idle && collect_iacq_data && io.inner.acquire.fire() &&
io.iacq().client_xact_id != xact.client_xact_id),
"AcquireTracker accepted data beat from different client transaction than initial request.")
//TODO: Assumes in-order network
assert(!(state === s_idle && io.inner.acquire.fire() &&
io.iacq().addr_beat != UInt(0)),
"AcquireTracker initialized with a tail data beat.")
when(collect_iacq_data) {
io.inner.acquire.ready := Bool(true)
when(io.inner.acquire.valid) {
data_buffer(io.iacq().addr_beat) := io.iacq().data
iacq_data_valid(io.iacq().addr_beat) := Bool(true)
}
when(iacq_data_done) { collect_iacq_data := Bool(false) }
}
switch (state) { switch (state) {
is(s_idle) { is(s_idle) {
io.inner.acquire.ready := Bool(true)
when(io.inner.acquire.valid) { when(io.inner.acquire.valid) {
xact_src := io.inner.acquire.bits.header.src xact_src := io.inner.acquire.bits.header.src
xact := io.iacq() xact := io.iacq()
xact.data := UInt(0) xact.data := UInt(0)
data_buffer(io.iacq().addr_beat) := io.iacq().data wmask_buffer.foreach { w => w := UInt(0) }
collect_iacq_data := io.iacq().hasMultibeatData() pending_reads := Mux(io.iacq().isSubBlockType(), SInt(0), SInt(-1)).toUInt
iacq_data_valid := io.iacq().hasData() << io.iacq().addr_beat pending_writes := UInt(0)
pending_resps := UInt(0)
irel_had_data := Bool(false) irel_had_data := Bool(false)
ognt_had_data := Bool(false) ognt_had_data := Bool(false)
state := s_meta_read state := s_meta_read
@ -819,7 +829,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
// Send probes // Send probes
io.inner.probe.valid := pending_probes != UInt(0) io.inner.probe.valid := pending_probes != UInt(0)
when(io.inner.probe.ready) { when(io.inner.probe.ready) {
pending_probes := pending_probes & ~UIntToOH(curr_p_id) pending_probes := pending_probes & ~UIntToOH(curr_probe_dst)
} }
// Handle releases, which may have data being written back // Handle releases, which may have data being written back
io.inner.release.ready := Bool(true) io.inner.release.ready := Bool(true)
@ -829,9 +839,9 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
//TODO: make sure cacq data is actually present before accpeting //TODO: make sure cacq data is actually present before accpeting
// release data to merge! // release data to merge!
when(io.irel().hasData()) { when(io.irel().hasData()) {
irel_had_data := Bool(true)
pending_coh.outer := pending_ocoh_on_irel pending_coh.outer := pending_ocoh_on_irel
mergeDataInner(io.irel().addr_beat, io.irel().data) mergeDataInner(io.irel().addr_beat, io.irel().data)
pending_writes := pending_writes | UIntToOH(io.irel().addr_beat)
} }
// We don't decrement release_count until we've received all the data beats. // We don't decrement release_count until we've received all the data beats.
when(!io.irel().hasMultibeatData() || irel_data_done) { when(!io.irel().hasMultibeatData() || irel_data_done) {
@ -839,11 +849,13 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
} }
} }
when(release_count === UInt(0)) { when(release_count === UInt(0)) {
state := Mux(is_hit, Mux(irel_had_data, s_data_write, s_data_read), s_outer_acquire) state := Mux(is_hit,
Mux(pending_writes.orR, s_data_write, s_data_read),
s_outer_acquire)
} }
} }
is(s_outer_acquire) { is(s_outer_acquire) {
io.outer.acquire.valid := !iacq_data_done // collect all data before refilling io.outer.acquire.valid := Bool(true)
when(oacq_data_done) { when(oacq_data_done) {
state := s_outer_grant state := s_outer_grant
} }
@ -853,7 +865,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
when(io.outer.grant.valid) { when(io.outer.grant.valid) {
when(io.ognt().hasData()) { when(io.ognt().hasData()) {
mergeDataOuter(io.ognt().addr_beat, io.ognt().data) mergeDataOuter(io.ognt().addr_beat, io.ognt().data)
ognt_had_data := Bool(true) pending_writes := pending_writes | UIntToOH(io.ognt().addr_beat)
} }
when(ognt_data_done) { when(ognt_data_done) {
pending_coh := pending_coh_on_ognt pending_coh := pending_coh_on_ognt
@ -864,7 +876,7 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
state := s_outer_finish state := s_outer_finish
}.otherwise { }.otherwise {
state := Mux(!do_allocate, s_inner_grant, state := Mux(!do_allocate, s_inner_grant,
Mux(io.ognt().hasData(), s_data_write, s_data_read)) Mux(pending_writes.orR, s_data_write, s_data_read))
} }
} }
} }
@ -873,31 +885,39 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
io.outer.finish.valid := Bool(true) io.outer.finish.valid := Bool(true)
when(io.outer.finish.ready) { when(io.outer.finish.ready) {
state := Mux(!do_allocate, s_inner_grant, state := Mux(!do_allocate, s_inner_grant,
Mux(ognt_had_data, s_data_write, s_data_read)) Mux(pending_writes.orR, s_data_write, s_data_read))
} }
} }
is(s_data_read) { is(s_data_read) {
io.data.read.valid := !collect_iacq_data || iacq_data_valid(read_data_cnt) io.data.read.valid := pending_reads.orR
when(io.data.read.ready) {
pending_resps := pending_resps | UIntToOH(curr_read_beat)
pending_reads := pending_reads & ~UIntToOH(curr_read_beat)
when(PopCount(pending_reads) <= UInt(1)) { state := s_data_resp }
}
when(io.data.resp.valid) { when(io.data.resp.valid) {
mergeDataInternal(io.data.resp.bits.addr_beat, io.data.resp.bits.data) mergeDataInternal(io.data.resp.bits.addr_beat, io.data.resp.bits.data)
pending_resps := pending_resps & ~UIntToOH(io.data.resp.bits.addr_beat)
} }
when(read_data_done) { state := s_data_resp }
} }
is(s_data_resp) { is(s_data_resp) {
when(io.data.resp.valid) { when(io.data.resp.valid) {
mergeDataInternal(io.data.resp.bits.addr_beat, io.data.resp.bits.data) mergeDataInternal(io.data.resp.bits.addr_beat, io.data.resp.bits.data)
pending_resps := pending_resps & ~UIntToOH(io.data.resp.bits.addr_beat)
when(PopCount(pending_resps) <= UInt(1)) {
state := Mux(pending_writes.orR, s_data_write, s_inner_grant)
} }
when(resp_data_done) {
state := Mux(xact.hasData(), s_data_write, s_inner_grant)
} }
} }
is(s_data_write) { is(s_data_write) {
io.data.write.valid := Bool(true) io.data.write.valid := pending_writes.orR //TODO make sure all acquire data is present
when(write_data_done) { state := s_inner_grant } when(io.data.write.ready) {
pending_writes := pending_writes & ~UIntToOH(curr_write_beat)
when(PopCount(pending_writes) <= UInt(1)) { state := s_inner_grant }
}
} }
is(s_inner_grant) { is(s_inner_grant) {
io.inner.grant.valid := Bool(true) when(ignt_data_done && ignt_q.io.count === UInt(1)) {
when(ignt_data_done) {
val meta_dirty = !is_hit || pending_coh_on_ignt != xact_meta.coh val meta_dirty = !is_hit || pending_coh_on_ignt != xact_meta.coh
when(meta_dirty) { pending_coh := pending_coh_on_ignt } when(meta_dirty) { pending_coh := pending_coh_on_ignt }
state := Mux(meta_dirty, s_meta_write, state := Mux(meta_dirty, s_meta_write,
@ -915,6 +935,32 @@ class L2AcquireTracker(trackerId: Int, bankId: Int) extends L2XactTracker {
when(io.inner.finish.valid) { state := s_idle } when(io.inner.finish.valid) { state := s_idle }
} }
} }
ignt_q.io.enq.valid := io.inner.acquire.fire() &&
(state === s_idle || !xact.hasMultibeatData())
ignt_q.io.enq.bits.client_xact_id := io.iacq().client_xact_id
ignt_q.io.enq.bits.addr_beat := io.iacq().addr_beat
// Handle Get and Put merging
when(io.inner.acquire.fire()) {
val beat = io.iacq().addr_beat
when(io.iacq().hasData()) {
mergeDataPut(beat, io.iacq().wmask(), io.iacq().data)
wmask_buffer(beat) := io.iacq().wmask() | wmask_buffer(beat)
//iacq_data_valid(beat) := Bool(true)
pending_writes(beat) := Bool(true)
}
pending_reads(beat) := Bool(true)
}
assert(!(state != s_idle && io.inner.acquire.fire() &&
io.inner.acquire.bits.header.src != xact_src),
"AcquireTracker accepted data beat from different network source than initial request.")
//TODO: Assumes in-order network
assert(!(state === s_idle && io.inner.acquire.fire() &&
io.iacq().addr_beat != UInt(0)),
"AcquireTracker initialized with a tail data beat.")
} }
class L2WritebackReq extends L2HellaCacheBundle class L2WritebackReq extends L2HellaCacheBundle
@ -952,7 +998,7 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker {
val irel_had_data = Reg(init = Bool(false)) val irel_had_data = Reg(init = Bool(false))
val release_count = Reg(init = UInt(0, width = log2Up(nCoherentClients+1))) val release_count = Reg(init = UInt(0, width = log2Up(nCoherentClients+1)))
val pending_probes = Reg(init = Bits(0, width = nCoherentClients)) val pending_probes = Reg(init = Bits(0, width = nCoherentClients))
val curr_p_id = PriorityEncoder(pending_probes) val curr_probe_dst = PriorityEncoder(pending_probes)
val full_sharers = io.wb.req.bits.coh.inner.full() val full_sharers = io.wb.req.bits.coh.inner.full()
val mask_incoherent = full_sharers & ~io.incoherent.toBits val mask_incoherent = full_sharers & ~io.incoherent.toBits
@ -988,7 +1034,7 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker {
io.inner.probe.valid := Bool(false) io.inner.probe.valid := Bool(false)
io.inner.probe.bits.header.src := UInt(bankId) io.inner.probe.bits.header.src := UInt(bankId)
io.inner.probe.bits.header.dst := curr_p_id io.inner.probe.bits.header.dst := curr_probe_dst
io.inner.probe.bits.payload := io.inner.probe.bits.payload :=
xact_coh.inner.makeProbeForVoluntaryWriteback(xact_addr_block) xact_coh.inner.makeProbeForVoluntaryWriteback(xact_addr_block)
@ -1029,7 +1075,7 @@ class L2WritebackUnit(trackerId: Int, bankId: Int) extends L2XactTracker {
// Send probes // Send probes
io.inner.probe.valid := pending_probes != UInt(0) io.inner.probe.valid := pending_probes != UInt(0)
when(io.inner.probe.ready) { when(io.inner.probe.ready) {
pending_probes := pending_probes & ~UIntToOH(curr_p_id) pending_probes := pending_probes & ~UIntToOH(curr_probe_dst)
} }
// Handle releases, which may have data being written back // Handle releases, which may have data being written back
io.inner.release.ready := Bool(true) io.inner.release.ready := Bool(true)

View File

@ -38,6 +38,7 @@ abstract trait TileLinkParameters extends UsesParameters {
val tlGrantTypeBits = max(log2Up(Grant.nBuiltInTypes), val tlGrantTypeBits = max(log2Up(Grant.nBuiltInTypes),
tlCoh.grantTypeWidth) + 1 tlCoh.grantTypeWidth) + 1
val tlNetworkPreservesPointToPointOrdering = params(TLNetworkIsOrderedP2P) val tlNetworkPreservesPointToPointOrdering = params(TLNetworkIsOrderedP2P)
val amoAluOperandBits = params(AmoAluOperandBits)
} }
abstract class TLBundle extends Bundle with TileLinkParameters abstract class TLBundle extends Bundle with TileLinkParameters
@ -101,13 +102,22 @@ class Acquire extends ClientToManagerChannel
M_XWR, union(opSizeOff-1, opCodeOff)) M_XWR, union(opSizeOff-1, opCodeOff))
def op_size(dummy: Int = 0) = union(addrByteOff-1, opSizeOff) def op_size(dummy: Int = 0) = union(addrByteOff-1, opSizeOff)
def addr_byte(dummy: Int = 0) = union(addrByteMSB-1, addrByteOff) def addr_byte(dummy: Int = 0) = union(addrByteMSB-1, addrByteOff)
def write_mask(dummy: Int = 0) = union(tlWriteMaskBits, 1) private def amo_offset(dummy: Int = 0) = addr_byte()(tlByteAddrBits-1, log2Up(amoAluOperandBits/8))
def amo_shift_bits(dummy: Int = 0) = UInt(amoAluOperandBits)*amo_offset()
def wmask(dummy: Int = 0) =
Mux(isBuiltInType(Acquire.putAtomicType),
FillInterleaved(amoAluOperandBits, UIntToOH(amo_offset())),
Mux(isBuiltInType(Acquire.putBlockType) || isBuiltInType(Acquire.putType),
FillInterleaved(8, union(tlWriteMaskBits, 1)),
UInt(0, width = tlDataBits)))
def addr(dummy: Int = 0) = Cat(this.addr_block, this.addr_beat, this.addr_byte()) def addr(dummy: Int = 0) = Cat(this.addr_block, this.addr_beat, this.addr_byte())
// Other helper funcs // Other helper funcs
def is(t: UInt) = a_type === t def is(t: UInt) = a_type === t //TODO: make this more opaque; def ===?
def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type
def isBuiltInType(t: UInt): Bool = is_builtin_type && a_type === t
def isSubBlockType(dummy: Int = 0): Bool = isBuiltInType() && Acquire.typesOnSubBlocks.contains(a_type) def isSubBlockType(dummy: Int = 0): Bool = isBuiltInType() && Acquire.typesOnSubBlocks.contains(a_type)
@ -119,7 +129,7 @@ class Acquire extends ClientToManagerChannel
def hasMultibeatData(dummy: Int = 0): Bool = Bool(tlDataBeats > 1) && isBuiltInType() && def hasMultibeatData(dummy: Int = 0): Bool = Bool(tlDataBeats > 1) && isBuiltInType() &&
Acquire.typesWithMultibeatData.contains(a_type) Acquire.typesWithMultibeatData.contains(a_type)
def requiresSelfProbe(dummy: Int = 0) = Bool(false) def requiresSelfProbe(dummy: Int = 0) = isBuiltInType()//Bool(false)
def getBuiltInGrantType(dummy: Int = 0): UInt = { def getBuiltInGrantType(dummy: Int = 0): UInt = {
MuxLookup(this.a_type, Grant.putAckType, Array( MuxLookup(this.a_type, Grant.putAckType, Array(
@ -229,7 +239,7 @@ object Put {
addr_block: UInt, addr_block: UInt,
addr_beat: UInt, addr_beat: UInt,
data: UInt, data: UInt,
write_mask: UInt = Acquire.fullWriteMask): Acquire = { wmask: UInt = Acquire.fullWriteMask): Acquire = {
Acquire( Acquire(
is_builtin_type = Bool(true), is_builtin_type = Bool(true),
a_type = Acquire.putType, a_type = Acquire.putType,
@ -237,7 +247,7 @@ object Put {
addr_beat = addr_beat, addr_beat = addr_beat,
client_xact_id = client_xact_id, client_xact_id = client_xact_id,
data = data, data = data,
union = Cat(write_mask, Bool(true))) union = Cat(wmask, Bool(true)))
} }
} }
@ -248,7 +258,7 @@ object PutBlock {
addr_block: UInt, addr_block: UInt,
addr_beat: UInt, addr_beat: UInt,
data: UInt, data: UInt,
write_mask: UInt): Acquire = { wmask: UInt): Acquire = {
Acquire( Acquire(
is_builtin_type = Bool(true), is_builtin_type = Bool(true),
a_type = Acquire.putBlockType, a_type = Acquire.putBlockType,
@ -256,7 +266,7 @@ object PutBlock {
addr_block = addr_block, addr_block = addr_block,
addr_beat = addr_beat, addr_beat = addr_beat,
data = data, data = data,
union = Cat(write_mask, (write_mask != Acquire.fullWriteMask))) union = Cat(wmask, (wmask != Acquire.fullWriteMask)))
} }
def apply( def apply(
client_xact_id: UInt, client_xact_id: UInt,