fix broadcast hub and TL -> NASTI converter to support subblock operations
This commit is contained in:
parent
24389a5257
commit
24f3fac90a
@ -30,12 +30,18 @@ class L2BroadcastHub extends ManagerCoherenceAgent
|
||||
val internalDataBits = new DataQueueLocation().getWidth
|
||||
val inStoreQueue :: inVolWBQueue :: inClientReleaseQueue :: Nil = Enum(UInt(), nDataQueueLocations)
|
||||
|
||||
val trackerTLParams = params.alterPartial({
|
||||
case TLDataBits => internalDataBits
|
||||
case TLWriteMaskBits => innerWriteMaskBits
|
||||
})
|
||||
|
||||
// Create SHRs for outstanding transactions
|
||||
val trackerList = (0 until nReleaseTransactors).map(id =>
|
||||
Module(new BroadcastVoluntaryReleaseTracker(id), {case TLDataBits => internalDataBits})) ++
|
||||
(nReleaseTransactors until nTransactors).map(id =>
|
||||
Module(new BroadcastAcquireTracker(id), {case TLDataBits => internalDataBits}))
|
||||
|
||||
val trackerList =
|
||||
(0 until nReleaseTransactors).map(id =>
|
||||
Module(new BroadcastVoluntaryReleaseTracker(id))(trackerTLParams)) ++
|
||||
(nReleaseTransactors until nTransactors).map(id =>
|
||||
Module(new BroadcastAcquireTracker(id))(trackerTLParams))
|
||||
|
||||
// Propagate incoherence flags
|
||||
trackerList.map(_.io.incoherent := io.incoherent)
|
||||
|
||||
@ -100,7 +106,8 @@ class L2BroadcastHub extends ManagerCoherenceAgent
|
||||
// Create an arbiter for the one memory port
|
||||
val outer_arb = Module(new ClientUncachedTileLinkIOArbiter(trackerList.size),
|
||||
{ case TLId => params(OuterTLId)
|
||||
case TLDataBits => internalDataBits })
|
||||
case TLDataBits => internalDataBits
|
||||
case TLWriteMaskBits => innerWriteMaskBits })
|
||||
outer_arb.io.in <> trackerList.map(_.io.outer)
|
||||
// Get the pending data out of the store data queue
|
||||
val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.data)
|
||||
@ -112,8 +119,6 @@ class L2BroadcastHub extends ManagerCoherenceAgent
|
||||
io.outer.acquire.bits.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array(
|
||||
inStoreQueue -> sdq(outer_data_ptr.idx),
|
||||
inVolWBQueue -> vwbdq(outer_data_ptr.idx)))
|
||||
io.outer.acquire.bits.union := Cat(Fill(io.outer.acquire.bits.tlWriteMaskBits, outer_arb.io.out.acquire.bits.union(1)),
|
||||
outer_arb.io.out.acquire.bits.union(0))
|
||||
|
||||
// Update SDQ valid bits
|
||||
when (io.outer.acquire.valid || sdq_enq) {
|
||||
@ -209,14 +214,17 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
|
||||
val s_idle :: s_probe :: s_mem_read :: s_mem_write :: s_make_grant :: s_mem_resp :: s_ack :: Nil = Enum(UInt(), 7)
|
||||
val state = Reg(init=s_idle)
|
||||
|
||||
val xact = Reg(Bundle(new AcquireFromSrc, { case TLId => params(InnerTLId); case TLDataBits => 0 }))
|
||||
val xact = Reg(Bundle(new AcquireFromSrc, {
|
||||
case TLId => params(InnerTLId)
|
||||
case TLDataBits => 0
|
||||
case TLWriteMaskBits => innerWriteMaskBits
|
||||
}))
|
||||
val data_buffer = Reg(Vec(io.iacq().data, innerDataBeats))
|
||||
val coh = ManagerMetadata.onReset
|
||||
|
||||
assert(!(state != s_idle && xact.isBuiltInType() &&
|
||||
Vec(Acquire.getType, Acquire.putType, Acquire.putAtomicType,
|
||||
Acquire.prefetchType).contains(xact.a_type)),
|
||||
"Broadcast Hub does not support PutAtomics, subblock Gets/Puts, or prefetches") // TODO
|
||||
Vec(Acquire.putAtomicType, Acquire.prefetchType).contains(xact.a_type)),
|
||||
"Broadcast Hub does not support PutAtomics or prefetches") // TODO
|
||||
|
||||
val release_count = Reg(init=UInt(0, width = log2Up(io.inner.tlNCachingClients+1)))
|
||||
val pending_probes = Reg(init=Bits(0, width = io.inner.tlNCachingClients))
|
||||
@ -236,6 +244,7 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
|
||||
val pending_outer_write_ = io.iacq().hasData()
|
||||
val pending_outer_read = io.ignt().hasData()
|
||||
val pending_outer_read_ = coh.makeGrant(io.iacq(), UInt(trackerId)).hasData()
|
||||
val subblock_type = xact.isSubBlockType()
|
||||
|
||||
io.has_acquire_conflict := xact.conflicts(io.iacq()) &&
|
||||
(state != s_idle) &&
|
||||
@ -246,22 +255,32 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
|
||||
!io.irel().isVoluntary() &&
|
||||
(state === s_probe)
|
||||
|
||||
val outer_write_acq = Bundle(PutBlock(
|
||||
client_xact_id = UInt(trackerId),
|
||||
addr_block = xact.addr_block,
|
||||
addr_beat = oacq_data_cnt,
|
||||
data = data_buffer(oacq_data_cnt)))(outerTLParams)
|
||||
val outer_write_rel = Bundle(PutBlock(
|
||||
client_xact_id = UInt(trackerId),
|
||||
addr_block = xact.addr_block,
|
||||
addr_beat = io.irel().addr_beat,
|
||||
data = io.irel().data))(outerTLParams)
|
||||
val outer_read = Bundle(GetBlock(
|
||||
client_xact_id = UInt(trackerId),
|
||||
addr_block = xact.addr_block))(outerTLParams)
|
||||
val oacq_type = MuxLookup(state, Acquire.getBlockType, Seq(
|
||||
(s_probe, Acquire.putBlockType),
|
||||
(s_mem_write, Mux(subblock_type, Acquire.putType, Acquire.putBlockType)),
|
||||
(s_mem_read, Mux(subblock_type, Acquire.getType, Acquire.getBlockType))))
|
||||
val oacq_beat = MuxLookup(state, UInt(0), Seq(
|
||||
(s_probe, io.irel().addr_beat),
|
||||
(s_mem_write, Mux(subblock_type, xact.addr_beat, oacq_data_cnt)),
|
||||
(s_mem_read, Mux(subblock_type, xact.addr_beat, UInt(0)))))
|
||||
val oacq_data = MuxLookup(state, Bits(0), Seq(
|
||||
(s_probe, io.irel().data),
|
||||
(s_mem_write, Mux(subblock_type,
|
||||
data_buffer(0), data_buffer(oacq_data_cnt)))))
|
||||
val oacq_union = MuxLookup(state, Bits(0), Seq(
|
||||
(s_probe, Acquire.fullWriteMask),
|
||||
(s_mem_write, xact.wmask()),
|
||||
(s_mem_read, Cat(xact.addr_byte(), xact.op_size(), M_XRD))))
|
||||
|
||||
io.outer.acquire.valid := Bool(false)
|
||||
io.outer.acquire.bits := outer_read //default
|
||||
io.outer.acquire.bits := Bundle(Acquire(
|
||||
is_builtin_type = Bool(true),
|
||||
a_type = oacq_type,
|
||||
client_xact_id = UInt(trackerId),
|
||||
addr_block = xact.addr_block,
|
||||
addr_beat = oacq_beat,
|
||||
data = oacq_data,
|
||||
union = Cat(oacq_union, Bool(true))))(outerTLParams)
|
||||
io.outer.grant.ready := Bool(false)
|
||||
|
||||
io.inner.probe.valid := Bool(false)
|
||||
@ -331,7 +350,6 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
|
||||
when(io.inner.release.valid) {
|
||||
when(io.irel().hasData()) {
|
||||
io.outer.acquire.valid := Bool(true)
|
||||
io.outer.acquire.bits := outer_write_rel
|
||||
when(io.outer.acquire.ready) {
|
||||
when(oacq_data_done) {
|
||||
pending_ognt_ack := Bool(true)
|
||||
@ -353,7 +371,6 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
|
||||
}
|
||||
is(s_mem_write) { // Write data to outer memory
|
||||
io.outer.acquire.valid := !pending_ognt_ack || !collect_iacq_data || iacq_data_valid(oacq_data_cnt)
|
||||
io.outer.acquire.bits := outer_write_acq
|
||||
when(oacq_data_done) {
|
||||
pending_ognt_ack := Bool(true)
|
||||
state := Mux(pending_outer_read, s_mem_read, s_mem_resp)
|
||||
@ -361,7 +378,6 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
|
||||
}
|
||||
is(s_mem_read) { // Read data from outer memory (possibly what was just written)
|
||||
io.outer.acquire.valid := !pending_ognt_ack
|
||||
io.outer.acquire.bits := outer_read
|
||||
when(io.outer.acquire.fire()) { state := s_mem_resp }
|
||||
}
|
||||
is(s_mem_resp) { // Wait to forward grants from outer memory
|
||||
|
@ -34,6 +34,8 @@ case object TLDataBits extends Field[Int]
|
||||
case object TLDataBeats extends Field[Int]
|
||||
/** Whether the underlying physical network preserved point-to-point ordering of messages */
|
||||
case object TLNetworkIsOrderedP2P extends Field[Boolean]
|
||||
/** Number of bits in write mask (usually one per byte in beat) */
|
||||
case object TLWriteMaskBits extends Field[Int]
|
||||
|
||||
/** Utility trait for building Modules and Bundles that use TileLink parameters */
|
||||
trait TileLinkParameters extends UsesParameters {
|
||||
@ -53,7 +55,7 @@ trait TileLinkParameters extends UsesParameters {
|
||||
val tlDataBits = params(TLDataBits)
|
||||
val tlDataBytes = tlDataBits/8
|
||||
val tlDataBeats = params(TLDataBeats)
|
||||
val tlWriteMaskBits = if(tlDataBits/8 < 1) 1 else tlDataBits/8
|
||||
val tlWriteMaskBits = params(TLWriteMaskBits)
|
||||
val tlBeatAddrBits = log2Up(tlDataBeats)
|
||||
val tlByteAddrBits = log2Up(tlWriteMaskBits)
|
||||
val tlMemoryOpcodeBits = M_SZ
|
||||
@ -1274,11 +1276,38 @@ class NASTIMasterIOTileLinkIOConverter extends TLModule with NASTIParameters {
|
||||
val addr_out = Reg(UInt(width = nastiXAddrBits))
|
||||
val has_data = Reg(init=Bool(false))
|
||||
val data_from_rel = Reg(init=Bool(false))
|
||||
val is_subblock = io.tl.acquire.bits.isSubBlockType()
|
||||
val (tl_cnt_out, tl_wrap_out) =
|
||||
Counter((io.tl.acquire.fire() && acq_has_data) ||
|
||||
Counter((io.tl.acquire.fire() && io.tl.acquire.bits.hasMultibeatData()) ||
|
||||
(io.tl.release.fire() && rel_has_data), tlDataBeats)
|
||||
val tl_done_out = Reg(init=Bool(false))
|
||||
|
||||
val roq_size = 4
|
||||
val roq_data = Reg(Vec(UInt(width = tlByteAddrBits), roq_size))
|
||||
val roq_tags = Reg(Vec(UInt(width = nastiRIdBits), roq_size))
|
||||
val roq_free = Reg(init = Fill(roq_size, Bits(1, 1)))
|
||||
val roq_full = !roq_free.orR
|
||||
|
||||
val roq_enq_addr = PriorityEncoder(roq_free)
|
||||
val roq_enq_valid = io.tl.acquire.fire() && !acq_has_data && is_subblock
|
||||
val roq_enq_data = io.tl.acquire.bits.addr_byte()
|
||||
val roq_enq_tag = io.nasti.ar.bits.id
|
||||
|
||||
val roq_deq_tag = io.nasti.r.bits.id
|
||||
val roq_deq_addr = PriorityEncoder(roq_tags.map(_ === roq_deq_tag))
|
||||
val roq_deq_data = roq_data(roq_deq_addr)
|
||||
val roq_deq_valid = io.nasti.r.fire() && !io.nasti.r.bits.id(0)
|
||||
|
||||
when (roq_enq_valid) {
|
||||
roq_data(roq_enq_addr) := roq_enq_data
|
||||
roq_tags(roq_enq_addr) := roq_enq_tag
|
||||
roq_free(roq_enq_addr) := Bool(false)
|
||||
}
|
||||
|
||||
when (roq_deq_valid) {
|
||||
roq_free(roq_deq_addr) := Bool(true)
|
||||
}
|
||||
|
||||
io.nasti.ar.bits.id := tag_out
|
||||
io.nasti.ar.bits.addr := addr_out
|
||||
io.nasti.ar.bits.len := Mux(has_data, UInt(tlDataBeats-1), UInt(0))
|
||||
@ -1293,7 +1322,7 @@ class NASTIMasterIOTileLinkIOConverter extends TLModule with NASTIParameters {
|
||||
io.nasti.aw.bits := io.nasti.ar.bits
|
||||
io.nasti.w.bits.strb := Mux(data_from_rel, SInt(-1), io.tl.acquire.bits.wmask())
|
||||
io.nasti.w.bits.data := Mux(data_from_rel, io.tl.release.bits.data, io.tl.acquire.bits.data)
|
||||
io.nasti.w.bits.last := tl_wrap_out
|
||||
io.nasti.w.bits.last := tl_wrap_out || (io.tl.acquire.fire() && is_subblock)
|
||||
|
||||
when(!active_out){
|
||||
io.tl.release.ready := io.nasti.w.ready
|
||||
@ -1307,7 +1336,6 @@ class NASTIMasterIOTileLinkIOConverter extends TLModule with NASTIParameters {
|
||||
io.nasti.aw.valid := is_write
|
||||
io.nasti.ar.valid := !is_write
|
||||
cmd_sent_out := (!is_write && io.nasti.ar.ready) || (is_write && io.nasti.aw.ready)
|
||||
tl_done_out := tl_wrap_out
|
||||
when(io.tl.release.valid) {
|
||||
data_from_rel := Bool(true)
|
||||
io.nasti.w.bits.data := io.tl.release.bits.data
|
||||
@ -1319,34 +1347,35 @@ class NASTIMasterIOTileLinkIOConverter extends TLModule with NASTIParameters {
|
||||
io.nasti.aw.bits.id := tag
|
||||
io.nasti.aw.bits.addr := addr
|
||||
io.nasti.aw.bits.len := UInt(tlDataBeats-1)
|
||||
io.nasti.aw.bits.size := MT_Q
|
||||
tag_out := tag
|
||||
addr_out := addr
|
||||
has_data := rel_has_data
|
||||
tl_done_out := tl_wrap_out
|
||||
} .elsewhen(io.tl.acquire.valid) {
|
||||
data_from_rel := Bool(false)
|
||||
io.nasti.w.bits.data := io.tl.acquire.bits.data
|
||||
io.nasti.w.bits.strb := io.tl.acquire.bits.wmask()
|
||||
// The last bit indicates to the Grant logic what g_type to send back
|
||||
// For read, true = getDataBlockType, false = getDataBeatType
|
||||
// For write, it should always be false, so that putAckType is sent
|
||||
val tag = Cat(io.tl.acquire.bits.client_id,
|
||||
io.tl.acquire.bits.client_xact_id,
|
||||
io.tl.acquire.bits.isBuiltInType())
|
||||
!is_write && !is_subblock)
|
||||
val addr = io.tl.acquire.bits.full_addr()
|
||||
when(is_write) {
|
||||
io.nasti.aw.bits.id := tag
|
||||
io.nasti.aw.bits.addr := addr
|
||||
io.nasti.aw.bits.len := Mux(io.tl.acquire.bits.isBuiltInType(Acquire.putBlockType),
|
||||
UInt(tlDataBeats-1), UInt(0))
|
||||
io.nasti.aw.bits.size := bytesToXSize(PopCount(io.tl.acquire.bits.wmask()))
|
||||
io.nasti.aw.bits.len := Mux(!is_subblock, UInt(tlDataBeats-1), UInt(0))
|
||||
} .otherwise {
|
||||
io.nasti.ar.bits.id := tag
|
||||
io.nasti.ar.bits.addr := addr
|
||||
io.nasti.ar.bits.len := Mux(io.tl.acquire.bits.isBuiltInType(Acquire.getBlockType),
|
||||
UInt(tlDataBeats-1), UInt(0))
|
||||
io.nasti.ar.bits.len := Mux(!is_subblock, UInt(tlDataBeats-1), UInt(0))
|
||||
io.nasti.ar.bits.size := io.tl.acquire.bits.op_size()
|
||||
}
|
||||
tag_out := tag
|
||||
addr_out := addr
|
||||
has_data := acq_has_data
|
||||
tl_done_out := tl_wrap_out || is_subblock
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1364,24 +1393,36 @@ class NASTIMasterIOTileLinkIOConverter extends TLModule with NASTIParameters {
|
||||
}
|
||||
}
|
||||
when(tl_wrap_out) { tl_done_out := Bool(true) }
|
||||
when(cmd_sent_out && (!has_data || tl_done_out)) { active_out := Bool(false) }
|
||||
when(cmd_sent_out && !roq_full && (!has_data || tl_done_out)) {
|
||||
active_out := Bool(false)
|
||||
}
|
||||
}
|
||||
|
||||
assert (!io.nasti.r.valid || !io.nasti.r.bits.resp(1),
|
||||
"NASTI read response error")
|
||||
assert (!io.nasti.b.valid || !io.nasti.b.bits.resp(1),
|
||||
"NASTI write response error")
|
||||
|
||||
// Aggregate incoming NASTI responses into TL Grants
|
||||
val (tl_cnt_in, tl_wrap_in) = Counter(io.tl.grant.fire() && io.tl.grant.bits.hasMultibeatData(), tlDataBeats)
|
||||
val gnt_arb = Module(new Arbiter(new GrantToDst, 2))
|
||||
io.tl.grant <> gnt_arb.io.out
|
||||
|
||||
val r_aligned_data = Mux(io.nasti.r.bits.id(0),
|
||||
io.nasti.r.bits.data,
|
||||
io.nasti.r.bits.data << Cat(roq_deq_data, UInt(0, 3)))
|
||||
|
||||
gnt_arb.io.in(0).valid := io.nasti.r.valid
|
||||
io.nasti.r.ready := gnt_arb.io.in(0).ready
|
||||
gnt_arb.io.in(0).bits := Grant(
|
||||
dst = (if(dstIdBits > 0) io.nasti.r.bits.id(dst_off, tlClientXactIdBits + 1) else UInt(0)),
|
||||
is_builtin_type = io.nasti.r.bits.id(0),
|
||||
g_type = Mux(io.nasti.r.bits.id(0), Grant.getDataBlockType, UInt(0)), // TODO: Assumes MI or MEI protocol
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = Mux(io.nasti.r.bits.id(0),
|
||||
Grant.getDataBlockType, Grant.getDataBeatType), // TODO: Assumes MI or MEI protocol
|
||||
client_xact_id = io.nasti.r.bits.id >> 1,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = tl_cnt_in,
|
||||
data = io.nasti.r.bits.data)
|
||||
data = r_aligned_data)
|
||||
|
||||
gnt_arb.io.in(1).valid := io.nasti.b.valid
|
||||
io.nasti.b.ready := gnt_arb.io.in(1).ready
|
||||
|
@ -20,6 +20,7 @@ trait CoherenceAgentParameters extends UsesParameters {
|
||||
val innerTLParams = params.alterPartial({case TLId => params(InnerTLId)})
|
||||
val innerDataBeats = innerTLParams(TLDataBeats)
|
||||
val innerDataBits = innerTLParams(TLDataBits)
|
||||
val innerWriteMaskBits = innerTLParams(TLWriteMaskBits)
|
||||
val innerBeatAddrBits = log2Up(innerDataBeats)
|
||||
val innerByteAddrBits = log2Up(innerDataBits/8)
|
||||
require(outerDataBeats == innerDataBeats) //TODO: must fix all xact_data Vecs to remove this requirement
|
||||
|
Loading…
Reference in New Issue
Block a user