1
0

fix broadcast hub and TL -> NASTI converter to support subblock operations

This commit is contained in:
Howard Mao 2015-08-10 19:06:02 -07:00
parent 24389a5257
commit 24f3fac90a
3 changed files with 102 additions and 44 deletions

View File

@ -30,11 +30,17 @@ class L2BroadcastHub extends ManagerCoherenceAgent
val internalDataBits = new DataQueueLocation().getWidth val internalDataBits = new DataQueueLocation().getWidth
val inStoreQueue :: inVolWBQueue :: inClientReleaseQueue :: Nil = Enum(UInt(), nDataQueueLocations) val inStoreQueue :: inVolWBQueue :: inClientReleaseQueue :: Nil = Enum(UInt(), nDataQueueLocations)
val trackerTLParams = params.alterPartial({
case TLDataBits => internalDataBits
case TLWriteMaskBits => innerWriteMaskBits
})
// Create SHRs for outstanding transactions // Create SHRs for outstanding transactions
val trackerList = (0 until nReleaseTransactors).map(id => val trackerList =
Module(new BroadcastVoluntaryReleaseTracker(id), {case TLDataBits => internalDataBits})) ++ (0 until nReleaseTransactors).map(id =>
(nReleaseTransactors until nTransactors).map(id => Module(new BroadcastVoluntaryReleaseTracker(id))(trackerTLParams)) ++
Module(new BroadcastAcquireTracker(id), {case TLDataBits => internalDataBits})) (nReleaseTransactors until nTransactors).map(id =>
Module(new BroadcastAcquireTracker(id))(trackerTLParams))
// Propagate incoherence flags // Propagate incoherence flags
trackerList.map(_.io.incoherent := io.incoherent) trackerList.map(_.io.incoherent := io.incoherent)
@ -100,7 +106,8 @@ class L2BroadcastHub extends ManagerCoherenceAgent
// Create an arbiter for the one memory port // Create an arbiter for the one memory port
val outer_arb = Module(new ClientUncachedTileLinkIOArbiter(trackerList.size), val outer_arb = Module(new ClientUncachedTileLinkIOArbiter(trackerList.size),
{ case TLId => params(OuterTLId) { case TLId => params(OuterTLId)
case TLDataBits => internalDataBits }) case TLDataBits => internalDataBits
case TLWriteMaskBits => innerWriteMaskBits })
outer_arb.io.in <> trackerList.map(_.io.outer) outer_arb.io.in <> trackerList.map(_.io.outer)
// Get the pending data out of the store data queue // Get the pending data out of the store data queue
val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.data) val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.data)
@ -112,8 +119,6 @@ class L2BroadcastHub extends ManagerCoherenceAgent
io.outer.acquire.bits.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array( io.outer.acquire.bits.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array(
inStoreQueue -> sdq(outer_data_ptr.idx), inStoreQueue -> sdq(outer_data_ptr.idx),
inVolWBQueue -> vwbdq(outer_data_ptr.idx))) inVolWBQueue -> vwbdq(outer_data_ptr.idx)))
io.outer.acquire.bits.union := Cat(Fill(io.outer.acquire.bits.tlWriteMaskBits, outer_arb.io.out.acquire.bits.union(1)),
outer_arb.io.out.acquire.bits.union(0))
// Update SDQ valid bits // Update SDQ valid bits
when (io.outer.acquire.valid || sdq_enq) { when (io.outer.acquire.valid || sdq_enq) {
@ -209,14 +214,17 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
val s_idle :: s_probe :: s_mem_read :: s_mem_write :: s_make_grant :: s_mem_resp :: s_ack :: Nil = Enum(UInt(), 7) val s_idle :: s_probe :: s_mem_read :: s_mem_write :: s_make_grant :: s_mem_resp :: s_ack :: Nil = Enum(UInt(), 7)
val state = Reg(init=s_idle) val state = Reg(init=s_idle)
val xact = Reg(Bundle(new AcquireFromSrc, { case TLId => params(InnerTLId); case TLDataBits => 0 })) val xact = Reg(Bundle(new AcquireFromSrc, {
case TLId => params(InnerTLId)
case TLDataBits => 0
case TLWriteMaskBits => innerWriteMaskBits
}))
val data_buffer = Reg(Vec(io.iacq().data, innerDataBeats)) val data_buffer = Reg(Vec(io.iacq().data, innerDataBeats))
val coh = ManagerMetadata.onReset val coh = ManagerMetadata.onReset
assert(!(state != s_idle && xact.isBuiltInType() && assert(!(state != s_idle && xact.isBuiltInType() &&
Vec(Acquire.getType, Acquire.putType, Acquire.putAtomicType, Vec(Acquire.putAtomicType, Acquire.prefetchType).contains(xact.a_type)),
Acquire.prefetchType).contains(xact.a_type)), "Broadcast Hub does not support PutAtomics or prefetches") // TODO
"Broadcast Hub does not support PutAtomics, subblock Gets/Puts, or prefetches") // TODO
val release_count = Reg(init=UInt(0, width = log2Up(io.inner.tlNCachingClients+1))) val release_count = Reg(init=UInt(0, width = log2Up(io.inner.tlNCachingClients+1)))
val pending_probes = Reg(init=Bits(0, width = io.inner.tlNCachingClients)) val pending_probes = Reg(init=Bits(0, width = io.inner.tlNCachingClients))
@ -236,6 +244,7 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
val pending_outer_write_ = io.iacq().hasData() val pending_outer_write_ = io.iacq().hasData()
val pending_outer_read = io.ignt().hasData() val pending_outer_read = io.ignt().hasData()
val pending_outer_read_ = coh.makeGrant(io.iacq(), UInt(trackerId)).hasData() val pending_outer_read_ = coh.makeGrant(io.iacq(), UInt(trackerId)).hasData()
val subblock_type = xact.isSubBlockType()
io.has_acquire_conflict := xact.conflicts(io.iacq()) && io.has_acquire_conflict := xact.conflicts(io.iacq()) &&
(state != s_idle) && (state != s_idle) &&
@ -246,22 +255,32 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
!io.irel().isVoluntary() && !io.irel().isVoluntary() &&
(state === s_probe) (state === s_probe)
val outer_write_acq = Bundle(PutBlock( val oacq_type = MuxLookup(state, Acquire.getBlockType, Seq(
client_xact_id = UInt(trackerId), (s_probe, Acquire.putBlockType),
addr_block = xact.addr_block, (s_mem_write, Mux(subblock_type, Acquire.putType, Acquire.putBlockType)),
addr_beat = oacq_data_cnt, (s_mem_read, Mux(subblock_type, Acquire.getType, Acquire.getBlockType))))
data = data_buffer(oacq_data_cnt)))(outerTLParams) val oacq_beat = MuxLookup(state, UInt(0), Seq(
val outer_write_rel = Bundle(PutBlock( (s_probe, io.irel().addr_beat),
client_xact_id = UInt(trackerId), (s_mem_write, Mux(subblock_type, xact.addr_beat, oacq_data_cnt)),
addr_block = xact.addr_block, (s_mem_read, Mux(subblock_type, xact.addr_beat, UInt(0)))))
addr_beat = io.irel().addr_beat, val oacq_data = MuxLookup(state, Bits(0), Seq(
data = io.irel().data))(outerTLParams) (s_probe, io.irel().data),
val outer_read = Bundle(GetBlock( (s_mem_write, Mux(subblock_type,
client_xact_id = UInt(trackerId), data_buffer(0), data_buffer(oacq_data_cnt)))))
addr_block = xact.addr_block))(outerTLParams) val oacq_union = MuxLookup(state, Bits(0), Seq(
(s_probe, Acquire.fullWriteMask),
(s_mem_write, xact.wmask()),
(s_mem_read, Cat(xact.addr_byte(), xact.op_size(), M_XRD))))
io.outer.acquire.valid := Bool(false) io.outer.acquire.valid := Bool(false)
io.outer.acquire.bits := outer_read //default io.outer.acquire.bits := Bundle(Acquire(
is_builtin_type = Bool(true),
a_type = oacq_type,
client_xact_id = UInt(trackerId),
addr_block = xact.addr_block,
addr_beat = oacq_beat,
data = oacq_data,
union = Cat(oacq_union, Bool(true))))(outerTLParams)
io.outer.grant.ready := Bool(false) io.outer.grant.ready := Bool(false)
io.inner.probe.valid := Bool(false) io.inner.probe.valid := Bool(false)
@ -331,7 +350,6 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
when(io.inner.release.valid) { when(io.inner.release.valid) {
when(io.irel().hasData()) { when(io.irel().hasData()) {
io.outer.acquire.valid := Bool(true) io.outer.acquire.valid := Bool(true)
io.outer.acquire.bits := outer_write_rel
when(io.outer.acquire.ready) { when(io.outer.acquire.ready) {
when(oacq_data_done) { when(oacq_data_done) {
pending_ognt_ack := Bool(true) pending_ognt_ack := Bool(true)
@ -353,7 +371,6 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
} }
is(s_mem_write) { // Write data to outer memory is(s_mem_write) { // Write data to outer memory
io.outer.acquire.valid := !pending_ognt_ack || !collect_iacq_data || iacq_data_valid(oacq_data_cnt) io.outer.acquire.valid := !pending_ognt_ack || !collect_iacq_data || iacq_data_valid(oacq_data_cnt)
io.outer.acquire.bits := outer_write_acq
when(oacq_data_done) { when(oacq_data_done) {
pending_ognt_ack := Bool(true) pending_ognt_ack := Bool(true)
state := Mux(pending_outer_read, s_mem_read, s_mem_resp) state := Mux(pending_outer_read, s_mem_read, s_mem_resp)
@ -361,7 +378,6 @@ class BroadcastAcquireTracker(trackerId: Int) extends BroadcastXactTracker {
} }
is(s_mem_read) { // Read data from outer memory (possibly what was just written) is(s_mem_read) { // Read data from outer memory (possibly what was just written)
io.outer.acquire.valid := !pending_ognt_ack io.outer.acquire.valid := !pending_ognt_ack
io.outer.acquire.bits := outer_read
when(io.outer.acquire.fire()) { state := s_mem_resp } when(io.outer.acquire.fire()) { state := s_mem_resp }
} }
is(s_mem_resp) { // Wait to forward grants from outer memory is(s_mem_resp) { // Wait to forward grants from outer memory

View File

@ -34,6 +34,8 @@ case object TLDataBits extends Field[Int]
case object TLDataBeats extends Field[Int] case object TLDataBeats extends Field[Int]
/** Whether the underlying physical network preserved point-to-point ordering of messages */ /** Whether the underlying physical network preserved point-to-point ordering of messages */
case object TLNetworkIsOrderedP2P extends Field[Boolean] case object TLNetworkIsOrderedP2P extends Field[Boolean]
/** Number of bits in write mask (usually one per byte in beat) */
case object TLWriteMaskBits extends Field[Int]
/** Utility trait for building Modules and Bundles that use TileLink parameters */ /** Utility trait for building Modules and Bundles that use TileLink parameters */
trait TileLinkParameters extends UsesParameters { trait TileLinkParameters extends UsesParameters {
@ -53,7 +55,7 @@ trait TileLinkParameters extends UsesParameters {
val tlDataBits = params(TLDataBits) val tlDataBits = params(TLDataBits)
val tlDataBytes = tlDataBits/8 val tlDataBytes = tlDataBits/8
val tlDataBeats = params(TLDataBeats) val tlDataBeats = params(TLDataBeats)
val tlWriteMaskBits = if(tlDataBits/8 < 1) 1 else tlDataBits/8 val tlWriteMaskBits = params(TLWriteMaskBits)
val tlBeatAddrBits = log2Up(tlDataBeats) val tlBeatAddrBits = log2Up(tlDataBeats)
val tlByteAddrBits = log2Up(tlWriteMaskBits) val tlByteAddrBits = log2Up(tlWriteMaskBits)
val tlMemoryOpcodeBits = M_SZ val tlMemoryOpcodeBits = M_SZ
@ -1274,11 +1276,38 @@ class NASTIMasterIOTileLinkIOConverter extends TLModule with NASTIParameters {
val addr_out = Reg(UInt(width = nastiXAddrBits)) val addr_out = Reg(UInt(width = nastiXAddrBits))
val has_data = Reg(init=Bool(false)) val has_data = Reg(init=Bool(false))
val data_from_rel = Reg(init=Bool(false)) val data_from_rel = Reg(init=Bool(false))
val is_subblock = io.tl.acquire.bits.isSubBlockType()
val (tl_cnt_out, tl_wrap_out) = val (tl_cnt_out, tl_wrap_out) =
Counter((io.tl.acquire.fire() && acq_has_data) || Counter((io.tl.acquire.fire() && io.tl.acquire.bits.hasMultibeatData()) ||
(io.tl.release.fire() && rel_has_data), tlDataBeats) (io.tl.release.fire() && rel_has_data), tlDataBeats)
val tl_done_out = Reg(init=Bool(false)) val tl_done_out = Reg(init=Bool(false))
val roq_size = 4
val roq_data = Reg(Vec(UInt(width = tlByteAddrBits), roq_size))
val roq_tags = Reg(Vec(UInt(width = nastiRIdBits), roq_size))
val roq_free = Reg(init = Fill(roq_size, Bits(1, 1)))
val roq_full = !roq_free.orR
val roq_enq_addr = PriorityEncoder(roq_free)
val roq_enq_valid = io.tl.acquire.fire() && !acq_has_data && is_subblock
val roq_enq_data = io.tl.acquire.bits.addr_byte()
val roq_enq_tag = io.nasti.ar.bits.id
val roq_deq_tag = io.nasti.r.bits.id
val roq_deq_addr = PriorityEncoder(roq_tags.map(_ === roq_deq_tag))
val roq_deq_data = roq_data(roq_deq_addr)
val roq_deq_valid = io.nasti.r.fire() && !io.nasti.r.bits.id(0)
when (roq_enq_valid) {
roq_data(roq_enq_addr) := roq_enq_data
roq_tags(roq_enq_addr) := roq_enq_tag
roq_free(roq_enq_addr) := Bool(false)
}
when (roq_deq_valid) {
roq_free(roq_deq_addr) := Bool(true)
}
io.nasti.ar.bits.id := tag_out io.nasti.ar.bits.id := tag_out
io.nasti.ar.bits.addr := addr_out io.nasti.ar.bits.addr := addr_out
io.nasti.ar.bits.len := Mux(has_data, UInt(tlDataBeats-1), UInt(0)) io.nasti.ar.bits.len := Mux(has_data, UInt(tlDataBeats-1), UInt(0))
@ -1293,7 +1322,7 @@ class NASTIMasterIOTileLinkIOConverter extends TLModule with NASTIParameters {
io.nasti.aw.bits := io.nasti.ar.bits io.nasti.aw.bits := io.nasti.ar.bits
io.nasti.w.bits.strb := Mux(data_from_rel, SInt(-1), io.tl.acquire.bits.wmask()) io.nasti.w.bits.strb := Mux(data_from_rel, SInt(-1), io.tl.acquire.bits.wmask())
io.nasti.w.bits.data := Mux(data_from_rel, io.tl.release.bits.data, io.tl.acquire.bits.data) io.nasti.w.bits.data := Mux(data_from_rel, io.tl.release.bits.data, io.tl.acquire.bits.data)
io.nasti.w.bits.last := tl_wrap_out io.nasti.w.bits.last := tl_wrap_out || (io.tl.acquire.fire() && is_subblock)
when(!active_out){ when(!active_out){
io.tl.release.ready := io.nasti.w.ready io.tl.release.ready := io.nasti.w.ready
@ -1307,7 +1336,6 @@ class NASTIMasterIOTileLinkIOConverter extends TLModule with NASTIParameters {
io.nasti.aw.valid := is_write io.nasti.aw.valid := is_write
io.nasti.ar.valid := !is_write io.nasti.ar.valid := !is_write
cmd_sent_out := (!is_write && io.nasti.ar.ready) || (is_write && io.nasti.aw.ready) cmd_sent_out := (!is_write && io.nasti.ar.ready) || (is_write && io.nasti.aw.ready)
tl_done_out := tl_wrap_out
when(io.tl.release.valid) { when(io.tl.release.valid) {
data_from_rel := Bool(true) data_from_rel := Bool(true)
io.nasti.w.bits.data := io.tl.release.bits.data io.nasti.w.bits.data := io.tl.release.bits.data
@ -1319,34 +1347,35 @@ class NASTIMasterIOTileLinkIOConverter extends TLModule with NASTIParameters {
io.nasti.aw.bits.id := tag io.nasti.aw.bits.id := tag
io.nasti.aw.bits.addr := addr io.nasti.aw.bits.addr := addr
io.nasti.aw.bits.len := UInt(tlDataBeats-1) io.nasti.aw.bits.len := UInt(tlDataBeats-1)
io.nasti.aw.bits.size := MT_Q
tag_out := tag tag_out := tag
addr_out := addr addr_out := addr
has_data := rel_has_data has_data := rel_has_data
tl_done_out := tl_wrap_out
} .elsewhen(io.tl.acquire.valid) { } .elsewhen(io.tl.acquire.valid) {
data_from_rel := Bool(false) data_from_rel := Bool(false)
io.nasti.w.bits.data := io.tl.acquire.bits.data io.nasti.w.bits.data := io.tl.acquire.bits.data
io.nasti.w.bits.strb := io.tl.acquire.bits.wmask() io.nasti.w.bits.strb := io.tl.acquire.bits.wmask()
// The last bit indicates to the Grant logic what g_type to send back
// For read, true = getDataBlockType, false = getDataBeatType
// For write, it should always be false, so that putAckType is sent
val tag = Cat(io.tl.acquire.bits.client_id, val tag = Cat(io.tl.acquire.bits.client_id,
io.tl.acquire.bits.client_xact_id, io.tl.acquire.bits.client_xact_id,
io.tl.acquire.bits.isBuiltInType()) !is_write && !is_subblock)
val addr = io.tl.acquire.bits.full_addr() val addr = io.tl.acquire.bits.full_addr()
when(is_write) { when(is_write) {
io.nasti.aw.bits.id := tag io.nasti.aw.bits.id := tag
io.nasti.aw.bits.addr := addr io.nasti.aw.bits.addr := addr
io.nasti.aw.bits.len := Mux(io.tl.acquire.bits.isBuiltInType(Acquire.putBlockType), io.nasti.aw.bits.len := Mux(!is_subblock, UInt(tlDataBeats-1), UInt(0))
UInt(tlDataBeats-1), UInt(0))
io.nasti.aw.bits.size := bytesToXSize(PopCount(io.tl.acquire.bits.wmask()))
} .otherwise { } .otherwise {
io.nasti.ar.bits.id := tag io.nasti.ar.bits.id := tag
io.nasti.ar.bits.addr := addr io.nasti.ar.bits.addr := addr
io.nasti.ar.bits.len := Mux(io.tl.acquire.bits.isBuiltInType(Acquire.getBlockType), io.nasti.ar.bits.len := Mux(!is_subblock, UInt(tlDataBeats-1), UInt(0))
UInt(tlDataBeats-1), UInt(0))
io.nasti.ar.bits.size := io.tl.acquire.bits.op_size() io.nasti.ar.bits.size := io.tl.acquire.bits.op_size()
} }
tag_out := tag tag_out := tag
addr_out := addr addr_out := addr
has_data := acq_has_data has_data := acq_has_data
tl_done_out := tl_wrap_out || is_subblock
} }
} }
} }
@ -1364,24 +1393,36 @@ class NASTIMasterIOTileLinkIOConverter extends TLModule with NASTIParameters {
} }
} }
when(tl_wrap_out) { tl_done_out := Bool(true) } when(tl_wrap_out) { tl_done_out := Bool(true) }
when(cmd_sent_out && (!has_data || tl_done_out)) { active_out := Bool(false) } when(cmd_sent_out && !roq_full && (!has_data || tl_done_out)) {
active_out := Bool(false)
}
} }
assert (!io.nasti.r.valid || !io.nasti.r.bits.resp(1),
"NASTI read response error")
assert (!io.nasti.b.valid || !io.nasti.b.bits.resp(1),
"NASTI write response error")
// Aggregate incoming NASTI responses into TL Grants // Aggregate incoming NASTI responses into TL Grants
val (tl_cnt_in, tl_wrap_in) = Counter(io.tl.grant.fire() && io.tl.grant.bits.hasMultibeatData(), tlDataBeats) val (tl_cnt_in, tl_wrap_in) = Counter(io.tl.grant.fire() && io.tl.grant.bits.hasMultibeatData(), tlDataBeats)
val gnt_arb = Module(new Arbiter(new GrantToDst, 2)) val gnt_arb = Module(new Arbiter(new GrantToDst, 2))
io.tl.grant <> gnt_arb.io.out io.tl.grant <> gnt_arb.io.out
val r_aligned_data = Mux(io.nasti.r.bits.id(0),
io.nasti.r.bits.data,
io.nasti.r.bits.data << Cat(roq_deq_data, UInt(0, 3)))
gnt_arb.io.in(0).valid := io.nasti.r.valid gnt_arb.io.in(0).valid := io.nasti.r.valid
io.nasti.r.ready := gnt_arb.io.in(0).ready io.nasti.r.ready := gnt_arb.io.in(0).ready
gnt_arb.io.in(0).bits := Grant( gnt_arb.io.in(0).bits := Grant(
dst = (if(dstIdBits > 0) io.nasti.r.bits.id(dst_off, tlClientXactIdBits + 1) else UInt(0)), dst = (if(dstIdBits > 0) io.nasti.r.bits.id(dst_off, tlClientXactIdBits + 1) else UInt(0)),
is_builtin_type = io.nasti.r.bits.id(0), is_builtin_type = Bool(true),
g_type = Mux(io.nasti.r.bits.id(0), Grant.getDataBlockType, UInt(0)), // TODO: Assumes MI or MEI protocol g_type = Mux(io.nasti.r.bits.id(0),
Grant.getDataBlockType, Grant.getDataBeatType), // TODO: Assumes MI or MEI protocol
client_xact_id = io.nasti.r.bits.id >> 1, client_xact_id = io.nasti.r.bits.id >> 1,
manager_xact_id = UInt(0), manager_xact_id = UInt(0),
addr_beat = tl_cnt_in, addr_beat = tl_cnt_in,
data = io.nasti.r.bits.data) data = r_aligned_data)
gnt_arb.io.in(1).valid := io.nasti.b.valid gnt_arb.io.in(1).valid := io.nasti.b.valid
io.nasti.b.ready := gnt_arb.io.in(1).ready io.nasti.b.ready := gnt_arb.io.in(1).ready

View File

@ -20,6 +20,7 @@ trait CoherenceAgentParameters extends UsesParameters {
val innerTLParams = params.alterPartial({case TLId => params(InnerTLId)}) val innerTLParams = params.alterPartial({case TLId => params(InnerTLId)})
val innerDataBeats = innerTLParams(TLDataBeats) val innerDataBeats = innerTLParams(TLDataBeats)
val innerDataBits = innerTLParams(TLDataBits) val innerDataBits = innerTLParams(TLDataBits)
val innerWriteMaskBits = innerTLParams(TLWriteMaskBits)
val innerBeatAddrBits = log2Up(innerDataBeats) val innerBeatAddrBits = log2Up(innerDataBeats)
val innerByteAddrBits = log2Up(innerDataBits/8) val innerByteAddrBits = log2Up(innerDataBits/8)
require(outerDataBeats == innerDataBeats) //TODO: must fix all xact_data Vecs to remove this requirement require(outerDataBeats == innerDataBeats) //TODO: must fix all xact_data Vecs to remove this requirement