From 3026c46a9c81709bd1c49e1f9737af1095cf918c Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 7 Dec 2014 03:02:20 -0800 Subject: [PATCH] Finish adding TLDataBeats to uncore & hub --- uncore/src/main/scala/cache.scala | 4 +- uncore/src/main/scala/coherence.scala | 6 - uncore/src/main/scala/htif.scala | 33 +++-- uncore/src/main/scala/memserdes.scala | 133 ++++++++++++------ uncore/src/main/scala/tilelink.scala | 18 +-- uncore/src/main/scala/uncore.scala | 193 ++++++++++++++++---------- uncore/src/main/scala/util.scala | 88 ++++++++++++ 7 files changed, 325 insertions(+), 150 deletions(-) create mode 100644 uncore/src/main/scala/util.scala diff --git a/uncore/src/main/scala/cache.scala b/uncore/src/main/scala/cache.scala index 0a717604..b96aab70 100644 --- a/uncore/src/main/scala/cache.scala +++ b/uncore/src/main/scala/cache.scala @@ -30,7 +30,8 @@ abstract trait CacheParameters extends UsesParameters { val rowWords = rowBits/wordBits val rowBytes = rowBits/8 val rowOffBits = log2Up(rowBytes) - val refillCycles = params(TLDataBits)/rowBits + val refillCyclesPerBeat = params(TLDataBits)/rowBits + val refillCycles = refillCyclesPerBeat*params(TLDataBeats) } abstract class CacheBundle extends Bundle with CacheParameters @@ -99,7 +100,6 @@ class MetadataArray[T <: Metadata](makeRstVal: () => T) extends CacheModule { abstract trait L2HellaCacheParameters extends CacheParameters with CoherenceAgentParameters - with TileLinkParameters abstract class L2HellaCacheBundle extends Bundle with L2HellaCacheParameters abstract class L2HellaCacheModule extends Module with L2HellaCacheParameters diff --git a/uncore/src/main/scala/coherence.scala b/uncore/src/main/scala/coherence.scala index 57545fda..280e20ad 100644 --- a/uncore/src/main/scala/coherence.scala +++ b/uncore/src/main/scala/coherence.scala @@ -3,12 +3,6 @@ package uncore import Chisel._ -object MuxBundle { - def apply[T <: Data] (default: T, mapping: Seq[(Bool, T)]): T = { - mapping.reverse.foldLeft(default)((b, a) => Mux(a._1, a._2, b)) - } -} - abstract class CoherenceMetadata extends Bundle object ClientMetadata { diff --git a/uncore/src/main/scala/htif.scala b/uncore/src/main/scala/htif.scala index 405380f2..378ca3b1 100644 --- a/uncore/src/main/scala/htif.scala +++ b/uncore/src/main/scala/htif.scala @@ -13,6 +13,7 @@ case object HTIFNCores extends Field[Int] abstract trait HTIFParameters extends UsesParameters { val dataBits = params(TLDataBits) + val dataBeats = params(TLDataBeats) val co = params(TLCoherence) val w = params(HTIFWidth) val nSCR = params(HTIFNSCR) @@ -71,7 +72,7 @@ class HTIF(pcr_RESET: Int) extends Module with HTIFParameters { // system is 'interesting' if any tile is 'interesting' val short_request_bits = 64 - val long_request_bits = short_request_bits + dataBits + val long_request_bits = short_request_bits + dataBits*dataBeats require(short_request_bits % w == 0) val rx_count_w = 13 + log2Up(64) - log2Up(w) // data size field is 12 bits @@ -150,12 +151,13 @@ class HTIF(pcr_RESET: Int) extends Module with HTIFParameters { state_tx))) } - val acq_q = Module(new Queue(new Acquire, 1)) - when (state === state_mem_wreq && acq_q.io.enq.ready) { - state := state_mem_wresp + val (cnt, cnt_done) = Counter((state === state_mem_wreq && io.mem.acquire.ready) || + (state === state_mem_rresp && io.mem.grant.valid), dataBeats) + when (state === state_mem_wreq) { + when (cnt_done) { state := state_mem_wresp } } - when (state === state_mem_rreq && acq_q.io.enq.ready) { - state := state_mem_rresp + when (state === state_mem_rreq) { + when(io.mem.acquire.ready) { state := state_mem_rresp } } when (state === state_mem_wresp) { when (mem_acked) { @@ -164,10 +166,10 @@ class HTIF(pcr_RESET: Int) extends Module with HTIFParameters { } } when (state === state_mem_rresp) { - when (io.mem.grant.valid) { + when (cnt_done) { state := state_mem_finish + mem_acked := Bool(false) } - mem_acked := Bool(false) } when (state === state_mem_finish && io.mem.finish.ready) { state := Mux(cmd === cmd_readmem || pos === UInt(1), state_tx, state_rx) @@ -182,22 +184,19 @@ class HTIF(pcr_RESET: Int) extends Module with HTIFParameters { state := Mux(cmd === cmd_readmem && pos != UInt(0), state_mem_rreq, state_rx) } - var mem_req_data: Bits = null + var mem_req_data: UInt = null for (i <- 0 until dataBits/short_request_bits) { - val idx = UInt(i, log2Up(dataBits/short_request_bits)) + val idx = Cat(cnt, UInt(i, log2Up(dataBits/short_request_bits))) when (state === state_mem_rresp && io.mem.grant.valid) { packet_ram(idx) := io.mem.grant.bits.payload.data((i+1)*short_request_bits-1, i*short_request_bits) } mem_req_data = Cat(packet_ram(idx), mem_req_data) } - acq_q.io.enq.valid := state === state_mem_rreq || state === state_mem_wreq val init_addr = addr.toUInt >> UInt(offsetBits-3) - acq_q.io.enq.bits := Mux(cmd === cmd_writemem, - UncachedWrite(init_addr, UInt(0)), + io.mem.acquire.valid := state === state_mem_rreq || state === state_mem_wreq + io.mem.acquire.bits.payload := Mux(cmd === cmd_writemem, + UncachedWrite(init_addr, mem_req_data), UncachedRead(init_addr)) - io.mem.acquire.valid := acq_q.io.deq.valid - acq_q.io.deq.ready := io.mem.acquire.ready - io.mem.acquire.bits.payload := acq_q.io.deq.bits io.mem.acquire.bits.payload.data := mem_req_data io.mem.acquire.bits.header.src := UInt(params(LNClients)) // By convention HTIF is the client with the largest id io.mem.acquire.bits.header.dst := UInt(0) // DNC; Overwritten outside module @@ -255,7 +254,7 @@ class HTIF(pcr_RESET: Int) extends Module with HTIFParameters { for (i <- 0 until scr_rdata.size) scr_rdata(i) := io.scr.rdata(i) scr_rdata(0) := UInt(nCores) - scr_rdata(1) := UInt((BigInt(dataBits/8) << acq_q.io.enq.bits.addr.getWidth) >> 20) + scr_rdata(1) := UInt((BigInt(dataBits*dataBeats/8) << params(TLAddrBits)) >> 20) io.scr.wen := Bool(false) io.scr.wdata := pcr_wdata diff --git a/uncore/src/main/scala/memserdes.scala b/uncore/src/main/scala/memserdes.scala index ea41256e..d6133fc0 100644 --- a/uncore/src/main/scala/memserdes.scala +++ b/uncore/src/main/scala/memserdes.scala @@ -209,81 +209,122 @@ class MemIOUncachedTileLinkIOConverter(qDepth: Int) extends Module { val mem = new MemIO } val co = params(TLCoherence) - val tbits = params(MIFTagBits) - val dbits = params(MIFDataBits) - val dbeats = params(MIFDataBeats) - require(params(TLDataBits) == dbits*dbeats) + val mifTagBits = params(MIFTagBits) + val mifDataBits = params(MIFDataBits) + val mifDataBeats = params(MIFDataBeats) + val tlDataBits = params(TLDataBits) + val tlDataBeats = params(TLDataBeats) + val dataBits = tlDataBits*tlDataBeats + require(tlDataBits*tlDataBeats == mifDataBits*mifDataBeats) //require(params(TLClientXactIdBits) <= params(MIFTagBits)) + // Decompose outgoing TL Acquires into MemIO cmd and data val mem_cmd_q = Module(new Queue(new MemReqCmd, qDepth)) val mem_data_q = Module(new Queue(new MemData, qDepth)) - val cnt_max = dbeats - val cnt_out = Reg(UInt(width = log2Up(cnt_max+1))) + io.uncached.acquire.ready := Bool(false) + io.uncached.grant.valid := Bool(false) + io.mem.resp.ready := Bool(false) + mem_cmd_q.io.enq.valid := Bool(false) + mem_data_q.io.enq.valid := Bool(false) + + val acq_has_data = co.messageHasData(io.uncached.acquire.bits.payload) + val (tl_cnt_out, tl_wrap_out) = Counter(io.uncached.acquire.fire() && acq_has_data, tlDataBeats) + val (mif_cnt_out, mif_wrap_out) = Counter(mem_data_q.io.enq.fire(), mifDataBeats) val active_out = Reg(init=Bool(false)) val cmd_sent_out = Reg(init=Bool(false)) - val buf_out = Reg(Bits()) + val tl_done_out = Reg(init=Bool(false)) + val mif_done_out = Reg(init=Bool(false)) val tag_out = Reg(Bits()) val addr_out = Reg(Bits()) val has_data = Reg(init=Bool(false)) + val tl_buf_out = Vec.fill(tlDataBeats){ Reg(io.uncached.acquire.bits.payload.data.clone) } + val mif_buf_out = Vec.fill(mifDataBeats){ new MemData } + mif_buf_out := mif_buf_out.fromBits(tl_buf_out.toBits) + val mif_prog_out = (mif_cnt_out+UInt(1, width = log2Up(mifDataBeats+1)))*UInt(mifDataBits) + val tl_prog_out = tl_cnt_out*UInt(tlDataBits) - val cnt_in = Reg(UInt(width = log2Up(cnt_max+1))) - val active_in = Reg(init=Bool(false)) - val buf_in = Reg(Bits()) - val tag_in = Reg(UInt(width = tbits)) - - // Decompose outgoing TL Acquires into MemIO cmd and data - when(!active_out && io.uncached.acquire.valid) { - active_out := Bool(true) - cmd_sent_out := Bool(false) - cnt_out := UInt(0) - buf_out := io.uncached.acquire.bits.payload.data - tag_out := io.uncached.acquire.bits.payload.client_xact_id - addr_out := io.uncached.acquire.bits.payload.addr - has_data := co.messageHasData(io.uncached.acquire.bits.payload) + when(!active_out){ + io.uncached.acquire.ready := Bool(true) + when(io.uncached.acquire.valid) { + active_out := Bool(true) + cmd_sent_out := Bool(false) + tag_out := io.uncached.acquire.bits.payload.client_xact_id + addr_out := io.uncached.acquire.bits.payload.addr + has_data := acq_has_data + tl_done_out := tl_wrap_out + mif_done_out := Bool(false) + tl_buf_out(tl_cnt_out) := io.uncached.acquire.bits.payload.data + } } when(active_out) { + when(!cmd_sent_out) { + mem_cmd_q.io.enq.valid := Bool(true) + } + when(has_data) { + when(!tl_done_out) { + io.uncached.acquire.ready := Bool(true) + when(io.uncached.acquire.valid) { + tl_buf_out(tl_cnt_out) := io.uncached.acquire.bits.payload.data + } + } + when(!mif_done_out) { + mem_data_q.io.enq.valid := tl_done_out || mif_prog_out <= tl_prog_out + } + } when(mem_cmd_q.io.enq.fire()) { cmd_sent_out := Bool(true) } - when(mem_data_q.io.enq.fire()) { - cnt_out := cnt_out + UInt(1) - buf_out := buf_out >> UInt(dbits) - } - when(cmd_sent_out && (!has_data || cnt_out === UInt(cnt_max))) { + when(tl_wrap_out) { tl_done_out := Bool(true) } + when(mif_wrap_out) { mif_done_out := Bool(true) } + when(cmd_sent_out && (!has_data || mif_done_out)) { active_out := Bool(false) } } - io.uncached.acquire.ready := !active_out - mem_cmd_q.io.enq.valid := active_out && !cmd_sent_out mem_cmd_q.io.enq.bits.rw := has_data mem_cmd_q.io.enq.bits.tag := tag_out mem_cmd_q.io.enq.bits.addr := addr_out - mem_data_q.io.enq.valid := active_out && has_data && cnt_out < UInt(cnt_max) - mem_data_q.io.enq.bits.data := buf_out + mem_data_q.io.enq.bits.data := mif_buf_out(mif_cnt_out).data io.mem.req_cmd <> mem_cmd_q.io.deq io.mem.req_data <> mem_data_q.io.deq // Aggregate incoming MemIO responses into TL Grants - io.mem.resp.ready := !active_in || cnt_in < UInt(cnt_max) - io.uncached.grant.valid := active_in && (cnt_in === UInt(cnt_max)) - io.uncached.grant.bits.payload := Grant(Bool(true), Grant.uncachedRead, tag_in, UInt(0), buf_in) - when(!active_in && io.mem.resp.valid) { - active_in := Bool(true) - cnt_in := UInt(1) - buf_in := io.mem.resp.bits.data << UInt(dbits*(cnt_max-1)) - tag_in := io.mem.resp.bits.tag + val (mif_cnt_in, mif_wrap_in) = Counter(io.mem.resp.fire(), mifDataBeats) // TODO: Assumes all resps have data + val (tl_cnt_in, tl_wrap_in) = Counter(io.uncached.grant.fire(), tlDataBeats) + val active_in = Reg(init=Bool(false)) + val mif_done_in = Reg(init=Bool(false)) + val tag_in = Reg(UInt(width = mifTagBits)) + val mif_buf_in = Vec.fill(mifDataBeats){ Reg(new MemData) } + val tl_buf_in = Vec.fill(tlDataBeats){ io.uncached.acquire.bits.payload.data.clone } + tl_buf_in := tl_buf_in.fromBits(mif_buf_in.toBits) + val tl_prog_in = (tl_cnt_in+UInt(1, width = log2Up(tlDataBeats+1)))*UInt(tlDataBits) + val mif_prog_in = mif_cnt_in*UInt(mifDataBits) + + when(!active_in) { + io.mem.resp.ready := Bool(true) + when(io.mem.resp.valid) { + active_in := Bool(true) + mif_done_in := mif_wrap_in + tag_in := io.mem.resp.bits.tag + mif_buf_in(tl_cnt_in).data := io.mem.resp.bits.data + } } + when(active_in) { - when(io.uncached.grant.fire()) { - active_in := Bool(false) - } - when(io.mem.resp.fire()) { - buf_in := Cat(io.mem.resp.bits.data, buf_in(cnt_max*dbits-1,dbits)) - cnt_in := cnt_in + UInt(1) + io.uncached.grant.valid := mif_done_in || tl_prog_in <= mif_prog_in + when(!mif_done_in) { + io.mem.resp.ready := Bool(true) + when(io.mem.resp.valid) { + mif_buf_in(mif_cnt_in).data := io.mem.resp.bits.data + } } + when(mif_wrap_in) { mif_done_in := Bool(true) } + when(tl_wrap_in) { active_in := Bool(false) } } + + io.uncached.grant.bits.payload := Grant(Bool(true), Grant.uncachedRead, tag_in, UInt(0), + tl_buf_in(tl_cnt_in)) } class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Module @@ -390,8 +431,8 @@ class MemPipeIOUncachedTileLinkIOConverter(outstanding: Int, refillCycles: Int) val a = Module(new MemIOUncachedTileLinkIOConverter(2)) val b = Module(new MemPipeIOMemIOConverter(outstanding, refillCycles)) a.io.uncached <> io.uncached - b.io.cpu.req_cmd <> Queue(a.io.mem.req_cmd, 2) - b.io.cpu.req_data <> Queue(a.io.mem.req_data, refillCycles) + b.io.cpu.req_cmd <> Queue(a.io.mem.req_cmd, 2, pipe=true) + b.io.cpu.req_data <> Queue(a.io.mem.req_data, refillCycles, pipe=true) a.io.mem.resp <> b.io.cpu.resp b.io.mem <> io.mem } diff --git a/uncore/src/main/scala/tilelink.scala b/uncore/src/main/scala/tilelink.scala index 1e535507..678efb6a 100644 --- a/uncore/src/main/scala/tilelink.scala +++ b/uncore/src/main/scala/tilelink.scala @@ -26,9 +26,11 @@ abstract trait TileLinkParameters extends UsesParameters { (tlSubblockAddrBits + tlUncachedOperandSizeBits + tlAtomicOpcodeBits)) + val co = params(TLCoherence) } -class TLBundle extends Bundle with TileLinkParameters +abstract class TLBundle extends Bundle with TileLinkParameters +abstract class TLModule extends Module with TileLinkParameters trait HasPhysicalAddress extends TLBundle { val addr = UInt(width = tlAddrBits) @@ -55,7 +57,7 @@ class Acquire extends ClientSourcedMessage with HasClientTransactionId with HasTileLinkData { val uncached = Bool() - val a_type = UInt(width = max(log2Up(Acquire.nUncachedAcquireTypes), params(TLCoherence).acquireTypeWidth)) + val a_type = UInt(width = max(log2Up(Acquire.nUncachedAcquireTypes), co.acquireTypeWidth)) val subblock = Bits(width = tlSubblockUnionBits) val sbAddrOff = tlSubblockAddrBits + tlUncachedOperandSizeBits val opSzOff = tlUncachedOperandSizeBits + sbAddrOff @@ -147,7 +149,7 @@ object Probe { class Probe extends MasterSourcedMessage with HasPhysicalAddress { - val p_type = UInt(width = params(TLCoherence).probeTypeWidth) + val p_type = UInt(width = co.probeTypeWidth) def is(t: UInt) = p_type === t } @@ -172,7 +174,7 @@ class Release extends ClientSourcedMessage with HasPhysicalAddress with HasClientTransactionId with HasTileLinkData { - val r_type = UInt(width = params(TLCoherence).releaseTypeWidth) + val r_type = UInt(width = co.releaseTypeWidth) def is(t: UInt) = r_type === t } @@ -181,7 +183,7 @@ class Grant extends MasterSourcedMessage with HasClientTransactionId with HasMasterTransactionId { val uncached = Bool() - val g_type = UInt(width = max(log2Up(Grant.nUncachedGrantTypes), params(TLCoherence).grantTypeWidth)) + val g_type = UInt(width = max(log2Up(Grant.nUncachedGrantTypes), co.grantTypeWidth)) def is(t: UInt) = g_type === t } @@ -221,7 +223,7 @@ class TileLinkIO extends UncachedTileLinkIO { val release = new DecoupledIO(new LogicalNetworkIO(new Release)) } -abstract class TileLinkArbiterLike(val arbN: Int) extends Module { +abstract class TileLinkArbiterLike(val arbN: Int) extends TLModule { type MasterSourcedWithId = MasterSourcedMessage with HasClientTransactionId type ClientSourcedWithId = ClientSourcedMessage with HasClientTransactionId @@ -232,8 +234,8 @@ abstract class TileLinkArbiterLike(val arbN: Int) extends Module { def hookupClientSource[M <: ClientSourcedWithId] (ins: Seq[DecoupledIO[LogicalNetworkIO[M]]], out: DecoupledIO[LogicalNetworkIO[M]]) { - def hasData(m: LogicalNetworkIO[M]) = params(TLCoherence).messageHasData(m.payload) - val arb = Module(new RRArbiter(out.bits.clone, arbN)) + def hasData(m: LogicalNetworkIO[M]) = co.messageHasData(m.payload) + val arb = Module(new LockingRRArbiter(out.bits.clone, arbN, params(TLDataBeats), Some(hasData _))) out <> arb.io.out ins.zipWithIndex.zip(arb.io.in).map{ case ((req,id), arb) => { arb.valid := req.valid diff --git a/uncore/src/main/scala/uncore.scala b/uncore/src/main/scala/uncore.scala index 858d603c..69b13800 100644 --- a/uncore/src/main/scala/uncore.scala +++ b/uncore/src/main/scala/uncore.scala @@ -8,14 +8,16 @@ case object NAcquireTransactors extends Field[Int] case object L2StoreDataQueueDepth extends Field[Int] case object NClients extends Field[Int] -abstract trait CoherenceAgentParameters extends UsesParameters { - val co = params(TLCoherence) +abstract trait CoherenceAgentParameters extends UsesParameters + with TileLinkParameters { val nReleaseTransactors = 1 val nAcquireTransactors = params(NAcquireTransactors) val nTransactors = nReleaseTransactors + nAcquireTransactors val nClients = params(NClients) - val sdqDepth = params(L2StoreDataQueueDepth) - val sdqIdBits = math.max(log2Up(nReleaseTransactors) + 1, log2Up(params(L2StoreDataQueueDepth))) + 1 + val sdqDepth = params(L2StoreDataQueueDepth)*tlDataBeats + val dqIdxBits = math.max(log2Up(nReleaseTransactors) + 1, log2Up(params(L2StoreDataQueueDepth))) + + log2Ceil(tlDataBeats) + val nDataQueueLocations = 3 //Stores, VoluntaryWBs, Releases } abstract class CoherenceAgent(innerId: String, outerId: String) extends Module @@ -27,75 +29,93 @@ abstract class CoherenceAgent(innerId: String, outerId: String) extends Module } } +class DataQueueLocation extends Bundle with CoherenceAgentParameters { + val idx = UInt(width = dqIdxBits) + val loc = UInt(width = log2Ceil(nDataQueueLocations)) +} +object DataQueueLocation { + def apply(idx: UInt, loc: UInt) = { + val d = new DataQueueLocation + d.idx := idx + d.loc := loc + d + } +} + class L2CoherenceAgent(bankId: Int, innerId: String, outerId: String) extends CoherenceAgent(innerId, outerId) { - // Queue to store impending UncachedWrite data - val sdq_val = Reg(init=Bits(0, sdqDepth)) - val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0)) - val sdq_rdy = !sdq_val.andR - val sdq_enq = io.inner.acquire.valid && io.inner.acquire.ready && co.messageHasData(io.inner.acquire.bits.payload) - val sdq = Vec.fill(sdqDepth){Reg(io.inner.acquire.bits.payload.data)} - when (sdq_enq) { sdq(sdq_alloc_id) := io.inner.acquire.bits.payload.data } + val internalDataBits = new DataQueueLocation().getWidth + val inStoreQueue :: inVolWBQueue :: inClientReleaseQueue :: Nil = Enum(UInt(), nDataQueueLocations) // Create SHRs for outstanding transactions val trackerList = (0 until nReleaseTransactors).map(id => - Module(new VoluntaryReleaseTracker(id, bankId, innerId, outerId), {case TLDataBits => sdqIdBits})) ++ + Module(new VoluntaryReleaseTracker(id, bankId, innerId, outerId), {case TLDataBits => internalDataBits})) ++ (nReleaseTransactors until nTransactors).map(id => - Module(new AcquireTracker(id, bankId, innerId, outerId), {case TLDataBits => sdqIdBits})) + Module(new AcquireTracker(id, bankId, innerId, outerId), {case TLDataBits => internalDataBits})) // Propagate incoherence flags trackerList.map(_.io.tile_incoherent := io.incoherent.toBits) - // Handle acquire transaction initiation + // Queue to store impending UncachedWrite data val acquire = io.inner.acquire + val sdq_val = Reg(init=Bits(0, sdqDepth)) + val sdq_alloc_id = PriorityEncoder(~sdq_val) + val sdq_rdy = !sdq_val.andR + val sdq_enq = acquire.fire() && co.messageHasData(acquire.bits.payload) + val sdq = Vec.fill(sdqDepth){ Reg(io.inner.acquire.bits.payload.data) } + when (sdq_enq) { sdq(sdq_alloc_id) := acquire.bits.payload.data } + + // Handle acquire transaction initiation val any_acquire_conflict = trackerList.map(_.io.has_acquire_conflict).reduce(_||_) val block_acquires = any_acquire_conflict - val alloc_arb = Module(new Arbiter(Bool(), trackerList.size)) for( i <- 0 until trackerList.size ) { val t = trackerList(i).io.inner alloc_arb.io.in(i).valid := t.acquire.ready t.acquire.bits := acquire.bits - t.acquire.bits.payload.data := Cat(sdq_alloc_id, UInt(1)) + t.acquire.bits.payload.data := DataQueueLocation(sdq_alloc_id, inStoreQueue).toBits t.acquire.valid := alloc_arb.io.in(i).ready } acquire.ready := trackerList.map(_.io.inner.acquire.ready).reduce(_||_) && sdq_rdy && !block_acquires alloc_arb.io.out.ready := acquire.valid && sdq_rdy && !block_acquires - // Handle probe request generation - val probe_arb = Module(new Arbiter(new LogicalNetworkIO(new Probe), trackerList.size)) - io.inner.probe <> probe_arb.io.out - probe_arb.io.in zip trackerList map { case (arb, t) => arb <> t.io.inner.probe } - - // Handle releases, which might be voluntary and might have data + // Queue to store impending Voluntary Release data val release = io.inner.release val voluntary = co.isVoluntary(release.bits.payload) + val vwbdq_enq = release.fire() && voluntary && co.messageHasData(release.bits.payload) + val (rel_data_cnt, rel_data_done) = Counter(vwbdq_enq, tlDataBeats) //TODO Zero width + val vwbdq = Vec.fill(tlDataBeats){ Reg(release.bits.payload.data) } //TODO Assumes nReleaseTransactors == 1 + when(vwbdq_enq) { vwbdq(rel_data_cnt) := release.bits.payload.data } + + // Handle releases, which might be voluntary and might have data val any_release_conflict = trackerList.tail.map(_.io.has_release_conflict).reduce(_||_) val block_releases = Bool(false) val conflict_idx = Vec(trackerList.map(_.io.has_release_conflict)).lastIndexWhere{b: Bool => b} val release_idx = Mux(voluntary, UInt(0), conflict_idx) - // TODO: Add merging logic to allow allocated AcquireTracker to handle conflicts, send all necessary grants, use first sufficient response for( i <- 0 until trackerList.size ) { val t = trackerList(i).io.inner t.release.bits := release.bits - t.release.bits.payload.data := (if (i < nReleaseTransactors) Cat(UInt(i), UInt(2)) else UInt(0)) + t.release.bits.payload.data := (if (i < nReleaseTransactors) + DataQueueLocation(rel_data_cnt, inVolWBQueue) + else DataQueueLocation(UInt(0), inClientReleaseQueue)).toBits t.release.valid := release.valid && (release_idx === UInt(i)) && !block_releases } release.ready := Vec(trackerList.map(_.io.inner.release.ready)).read(release_idx) && !block_releases - val vwbdq = Vec.fill(nReleaseTransactors){ Reg(release.bits.payload.data) } - when(voluntary && release.fire()) { - vwbdq(release_idx) := release.bits.payload.data - } + // Wire probe requests to clients + val probe_arb = Module(new Arbiter(new LogicalNetworkIO(new Probe), trackerList.size)) + io.inner.probe <> probe_arb.io.out + probe_arb.io.in zip trackerList map { case (arb, t) => arb <> t.io.inner.probe } - // Reply to initial requestor - val grant_arb = Module(new Arbiter(new LogicalNetworkIO(new Grant), trackerList.size)) + // Wire grant reply to initiating client + def hasData(m: LogicalNetworkIO[Grant]) = co.messageHasData(m.payload) + val grant_arb = Module(new LockingArbiter(new LogicalNetworkIO(new Grant), trackerList.size, tlDataBeats, Some(hasData _))) io.inner.grant.bits.payload.data := io.outer.grant.bits.payload.data io.inner.grant <> grant_arb.io.out grant_arb.io.in zip trackerList map { case (arb, t) => arb <> t.io.inner.grant } - // Free finished transactions + // Wire finished transaction acks val ack = io.inner.finish trackerList.map(_.io.inner.finish.valid := ack.valid) trackerList.map(_.io.inner.finish.bits := ack.bits) @@ -103,27 +123,28 @@ class L2CoherenceAgent(bankId: Int, innerId: String, outerId: String) extends // Create an arbiter for the one memory port val outer_arb = Module(new UncachedTileLinkIOArbiterThatPassesId(trackerList.size), - { case TLId => outerId; case TLDataBits => sdqIdBits }) + { case TLId => outerId; case TLDataBits => internalDataBits }) outer_arb.io.in zip trackerList map { case(arb, t) => arb <> t.io.outer } - val is_in_sdq = outer_arb.io.out.acquire.bits.payload.data(0) - val is_in_vwbdq = outer_arb.io.out.acquire.bits.payload.data(1) - val free_sdq_id = outer_arb.io.out.acquire.bits.payload.data >> UInt(1) - val free_vwbdq_id = outer_arb.io.out.acquire.bits.payload.data >> UInt(2) - val free_sdq = io.outer.acquire.fire() && co.messageHasData(io.outer.acquire.bits.payload) && is_in_sdq - io.outer.acquire.bits.payload.data := Mux(is_in_sdq, sdq(free_sdq_id), - Mux(is_in_vwbdq, vwbdq(free_vwbdq_id), release.bits.payload.data)) + val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.payload.data) + val is_in_sdq = outer_data_ptr.loc === inStoreQueue + val free_sdq = io.outer.acquire.fire() && + co.messageHasData(io.outer.acquire.bits.payload) && + outer_data_ptr.loc === inStoreQueue + io.outer.acquire.bits.payload.data := MuxLookup(outer_data_ptr.loc, release.bits.payload.data, Array( + inStoreQueue -> sdq(outer_data_ptr.idx), + inVolWBQueue -> vwbdq(outer_data_ptr.idx))) io.outer <> outer_arb.io.out // Update SDQ valid bits when (io.outer.acquire.valid || sdq_enq) { - sdq_val := sdq_val & ~(UIntToOH(free_sdq_id) & Fill(sdqDepth, free_sdq)) | + sdq_val := sdq_val & ~(UIntToOH(outer_data_ptr.idx) & Fill(sdqDepth, free_sdq)) | PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq) } } abstract class XactTracker(innerId: String, outerId: String) extends Module { - val (co, nClients) = (params(TLCoherence),params(NClients)) + val (co, nClients, tlDataBeats) = (params(TLCoherence),params(NClients),params(TLDataBeats)) val io = new Bundle { val inner = Bundle(new TileLinkIO, {case TLId => innerId}).flip val outer = Bundle(new UncachedTileLinkIO, {case TLId => outerId}) @@ -137,13 +158,20 @@ abstract class XactTracker(innerId: String, outerId: String) extends Module { val c_gnt = io.inner.grant.bits val c_ack = io.inner.finish.bits val m_gnt = io.outer.grant.bits + } class VoluntaryReleaseTracker(trackerId: Int, bankId: Int, innerId: String, outerId: String) extends XactTracker(innerId, outerId) { - val s_idle :: s_mem :: s_ack :: s_busy :: Nil = Enum(UInt(), 4) + val s_idle :: s_outer :: s_ack :: s_busy :: Nil = Enum(UInt(), 4) val state = Reg(init=s_idle) val xact = Reg{ new Release } val init_client_id = Reg(init=UInt(0, width = log2Up(nClients))) + val data_ptrs = Vec.fill(tlDataBeats){ Reg(io.inner.release.bits.payload.data.clone) } + val collect_inner_data = Reg(init=Bool(false)) + val (inner_data_cnt, inner_data_done) = + Counter(io.inner.release.fire() && co.messageHasData(io.inner.release.bits.payload), tlDataBeats) + val (outer_data_cnt, outer_data_done) = + Counter(io.outer.acquire.fire() && co.messageHasData(io.outer.acquire.bits.payload), tlDataBeats) io.has_acquire_conflict := Bool(false) io.has_release_conflict := co.isCoherenceConflict(xact.addr, c_rel.payload.addr) && @@ -156,7 +184,7 @@ class VoluntaryReleaseTracker(trackerId: Int, bankId: Int, innerId: String, oute io.outer.acquire.bits.payload := Bundle(UncachedWrite( xact.addr, UInt(trackerId), - xact.data), + data_ptrs(outer_data_cnt)), { case TLId => outerId }) io.inner.acquire.ready := Bool(false) io.inner.probe.valid := Bool(false) @@ -169,18 +197,28 @@ class VoluntaryReleaseTracker(trackerId: Int, bankId: Int, innerId: String, oute xact.client_xact_id, UInt(trackerId)) + when(collect_inner_data) { + io.inner.release.ready := Bool(true) + when(io.inner.release.valid) { + data_ptrs(inner_data_cnt) := c_rel.payload.data + } + when(inner_data_done) { collect_inner_data := Bool(false) } + } + switch (state) { is(s_idle) { + io.inner.release.ready := Bool(true) when( io.inner.release.valid ) { - io.inner.release.ready := Bool(true) xact := c_rel.payload init_client_id := c_rel.header.src - state := Mux(co.messageHasData(c_rel.payload), s_mem, s_ack) + data_ptrs(UInt(0)) := c_rel.payload.data + collect_inner_data := co.messageHasData(c_rel.payload) + state := Mux(co.messageHasData(c_rel.payload), s_outer, s_ack) } } - is(s_mem) { - io.outer.acquire.valid := Bool(true) - when(io.outer.acquire.ready) { state := s_ack } + is(s_outer) { + io.outer.acquire.valid := (if(tlDataBeats == 1) Bool(true) else !collect_inner_data || (outer_data_cnt < inner_data_cnt)) + when(outer_data_done) { state := s_ack } } is(s_ack) { io.inner.grant.valid := Bool(true) @@ -195,6 +233,12 @@ class AcquireTracker(trackerId: Int, bankId: Int, innerId: String, outerId: Stri val xact = Reg{ new Acquire } val init_client_id = Reg(init=UInt(0, width = log2Up(nClients))) //TODO: Will need id reg for merged release xacts + val data_ptrs = Vec.fill(tlDataBeats){ Reg(io.inner.acquire.bits.payload.data.clone) } + val collect_inner_data = Reg(init=Bool(false)) + val (inner_data_cnt, inner_data_done) = + Counter(io.inner.acquire.fire() && co.messageHasData(io.inner.acquire.bits.payload), tlDataBeats) + val (outer_data_cnt, outer_data_done) = + Counter(io.outer.acquire.fire() && co.messageHasData(io.outer.acquire.bits.payload), tlDataBeats) val release_count = if (nClients == 1) UInt(0) else Reg(init=UInt(0, width = log2Up(nClients))) val probe_flags = Reg(init=Bits(0, width = nClients)) @@ -202,23 +246,21 @@ class AcquireTracker(trackerId: Int, bankId: Int, innerId: String, outerId: Stri val pending_outer_write = co.messageHasData(xact) val pending_outer_read = co.requiresOuterRead(xact, co.masterMetadataOnFlush) - val outer_write_acq = Bundle(UncachedWrite(xact.addr, UInt(trackerId), xact.data), + val outer_write_acq = Bundle(UncachedWrite(xact.addr, UInt(trackerId), data_ptrs(outer_data_cnt)), { case TLId => outerId }) - val outer_write_rel = Bundle(UncachedWrite(xact.addr, UInt(trackerId), UInt(0)), // Special SQDId + val outer_write_rel = Bundle(UncachedWrite(xact.addr, UInt(trackerId), c_rel.payload.data), { case TLId => outerId }) val outer_read = Bundle(UncachedRead(xact.addr, UInt(trackerId)), { case TLId => outerId }) val probe_initial_flags = Bits(width = nClients) probe_initial_flags := Bits(0) - if (nClients > 1) { - // issue self-probes for uncached read xacts to facilitate I$ coherence - val probe_self = Bool(true) //co.needsSelfProbe(io.inner.acquire.bits.payload) - val myflag = Mux(probe_self, Bits(0), UIntToOH(c_acq.header.src(log2Up(nClients)-1,0))) - probe_initial_flags := ~(io.tile_incoherent | myflag) - } + // issue self-probes for uncached read xacts to facilitate I$ coherence + val probe_self = co.requiresSelfProbe(io.inner.acquire.bits.payload) + val myflag = Mux(probe_self, Bits(0), UIntToOH(c_acq.header.src(log2Up(nClients)-1,0))) + probe_initial_flags := ~(io.tile_incoherent | myflag) - io.has_acquire_conflict := co.isCoherenceConflict(xact.addr, c_acq.payload.addr) && (state != s_idle) + io.has_acquire_conflict := co.isCoherenceConflict(xact.addr, c_acq.payload.addr) && (state != s_idle) && !collect_inner_data io.has_release_conflict := co.isCoherenceConflict(xact.addr, c_rel.payload.addr) && (state != s_idle) io.outer.acquire.valid := Bool(false) @@ -244,6 +286,14 @@ class AcquireTracker(trackerId: Int, bankId: Int, innerId: String, outerId: Stri io.inner.acquire.ready := Bool(false) io.inner.release.ready := Bool(false) + when(collect_inner_data) { + io.inner.acquire.ready := Bool(true) + when(io.inner.acquire.valid) { + data_ptrs(inner_data_cnt) := c_acq.payload.data + } + when(inner_data_done) { collect_inner_data := Bool(false) } + } + switch (state) { is(s_idle) { io.inner.acquire.ready := Bool(true) @@ -252,14 +302,13 @@ class AcquireTracker(trackerId: Int, bankId: Int, innerId: String, outerId: Stri when( io.inner.acquire.valid ) { xact := c_acq.payload init_client_id := c_acq.header.src + data_ptrs(UInt(0)) := c_acq.payload.data + collect_inner_data := co.messageHasData(c_acq.payload) probe_flags := probe_initial_flags - if(nClients > 1) { - release_count := PopCount(probe_initial_flags) - state := Mux(probe_initial_flags.orR, s_probe, - Mux(needs_outer_write, s_mem_write, - Mux(needs_outer_read, s_mem_read, s_make_grant))) - } else state := Mux(needs_outer_write, s_mem_write, - Mux(needs_outer_read, s_mem_read, s_make_grant)) + release_count := PopCount(probe_initial_flags) + state := Mux(probe_initial_flags.orR, s_probe, + Mux(needs_outer_write, s_mem_write, + Mux(needs_outer_read, s_mem_read, s_make_grant))) } } is(s_probe) { @@ -276,15 +325,17 @@ class AcquireTracker(trackerId: Int, bankId: Int, innerId: String, outerId: Stri io.outer.acquire.bits.payload := outer_write_rel when(io.outer.acquire.ready) { io.inner.release.ready := Bool(true) - if(nClients > 1) release_count := release_count - UInt(1) - when(release_count === UInt(1)) { - state := Mux(pending_outer_write, s_mem_write, - Mux(pending_outer_read, s_mem_read, s_make_grant)) + when(outer_data_done) { + release_count := release_count - UInt(1) + when(release_count === UInt(1)) { + state := Mux(pending_outer_write, s_mem_write, + Mux(pending_outer_read, s_mem_read, s_make_grant)) + } } } } .otherwise { io.inner.release.ready := Bool(true) - if(nClients > 1) release_count := release_count - UInt(1) + release_count := release_count - UInt(1) when(release_count === UInt(1)) { state := Mux(pending_outer_write, s_mem_write, Mux(pending_outer_read, s_mem_read, s_make_grant)) @@ -300,9 +351,9 @@ class AcquireTracker(trackerId: Int, bankId: Int, innerId: String, outerId: Stri } } is(s_mem_write) { - io.outer.acquire.valid := Bool(true) + io.outer.acquire.valid := (if(tlDataBeats == 1) Bool(true) else !collect_inner_data || (outer_data_cnt < inner_data_cnt)) io.outer.acquire.bits.payload := outer_write_acq - when(io.outer.acquire.ready) { + when(outer_data_done) { state := Mux(pending_outer_read, s_mem_read, s_make_grant) } } diff --git a/uncore/src/main/scala/util.scala b/uncore/src/main/scala/util.scala new file mode 100644 index 00000000..c28778aa --- /dev/null +++ b/uncore/src/main/scala/util.scala @@ -0,0 +1,88 @@ +// See LICENSE for license details. + +package uncore + +import Chisel._ +import scala.math._ + +object MuxBundle { + def apply[T <: Data] (default: T, mapping: Seq[(Bool, T)]): T = { + mapping.reverse.foldLeft(default)((b, a) => Mux(a._1, a._2, b)) + } +} + +// Produces 0-width value when counting to 1 +class ZCounter(val n: Int) { + val value = Reg(init=UInt(0, log2Ceil(n))) + def inc(): Bool = { + if (n == 1) Bool(true) + else { + val wrap = value === UInt(n-1) + value := Mux(Bool(!isPow2(n)) && wrap, UInt(0), value + UInt(1)) + wrap + } + } +} + +object ZCounter { + def apply(n: Int) = new ZCounter(n) + def apply(cond: Bool, n: Int): (UInt, Bool) = { + val c = new ZCounter(n) + var wrap: Bool = null + when (cond) { wrap = c.inc() } + (c.value, cond && wrap) + } +} + +class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: Int, doSer: T => Bool) extends Module { + val io = new Bundle { + val in = Decoupled(gen.clone).flip + val out = Decoupled(gen.clone) + val cnt = UInt(OUTPUT, log2Up(n)) + val done = Bool(OUTPUT) + } + val narrowWidth = io.in.bits.payload.data.getWidth / n + require(io.in.bits.payload.data.getWidth % narrowWidth == 0) + + if(n == 1) { + io.in <> io.out + io.cnt := UInt(width = 0) + io.done := Bool(true) + } else { + val cnt = Reg(init=UInt(0, width = log2Up(n))) + val wrap = cnt === UInt(n-1) + val rbits = Reg(init=io.in.bits) + val active = Reg(init=Bool(false)) + + val shifter = Vec.fill(n){Bits(width = narrowWidth)} + (0 until n).foreach { + i => shifter(i) := rbits.payload.data((i+1)*narrowWidth-1,i*narrowWidth) + } + + io.done := Bool(false) + io.cnt := cnt + io.in.ready := !active + io.out.valid := active || io.in.valid + io.out.bits := io.in.bits + when(!active && io.in.valid) { + when(doSer(io.in.bits.payload)) { + cnt := Mux(io.out.ready, UInt(1), UInt(0)) + rbits := io.in.bits + active := Bool(true) + } + io.done := !doSer(io.in.bits.payload) + } + when(active) { + io.out.bits := rbits + io.out.bits.payload.data := shifter(cnt) + when(io.out.ready) { + cnt := cnt + UInt(1) + when(wrap) { + cnt := UInt(0) + io.done := Bool(true) + active := Bool(false) + } + } + } + } +}