From fb2644760ff155c4b4dc14e954d65b5d40157c40 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 16 Jan 2013 23:57:35 -0800 Subject: [PATCH] single-ported coherence master --- uncore/src/package.scala | 6 +- uncore/src/uncore.scala | 344 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 347 insertions(+), 3 deletions(-) diff --git a/uncore/src/package.scala b/uncore/src/package.scala index 3c2a93df..2e73a6d7 100644 --- a/uncore/src/package.scala +++ b/uncore/src/package.scala @@ -112,11 +112,11 @@ class MasterSourcedIO[T <: Data]()(data: => T) extends DirectionalFIFOIO()(data) class TileLinkIO(implicit conf: LogicalNetworkConfiguration) extends Bundle { val xact_init = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionInit }} val xact_init_data = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionInitData }} - val xact_abort = (new MasterSourcedIO) {(new LogicalNetworkIO){new TransactionAbort }} - val probe_req = (new MasterSourcedIO) {(new LogicalNetworkIO){new ProbeRequest }} + val xact_abort = (new MasterSourcedIO){(new LogicalNetworkIO){new TransactionAbort }} + val probe_req = (new MasterSourcedIO){(new LogicalNetworkIO){new ProbeRequest }} val probe_rep = (new ClientSourcedIO){(new LogicalNetworkIO){new ProbeReply }} val probe_rep_data = (new ClientSourcedIO){(new LogicalNetworkIO){new ProbeReplyData }} - val xact_rep = (new MasterSourcedIO) {(new LogicalNetworkIO){new TransactionReply }} + val xact_rep = (new MasterSourcedIO){(new LogicalNetworkIO){new TransactionReply }} val xact_finish = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionFinish }} override def clone = { new TileLinkIO().asInstanceOf[this.type] } } diff --git a/uncore/src/uncore.scala b/uncore/src/uncore.scala index 697b632c..179179e8 100644 --- a/uncore/src/uncore.scala +++ b/uncore/src/uncore.scala @@ -524,3 +524,347 @@ class CoherenceHubBroadcast(implicit conf: CoherenceHubConfiguration) extends Co } } + +abstract class CoherenceAgent(implicit conf: LogicalNetworkConfiguration) extends Component with MasterCoherenceAgent { + val io = new Bundle { + val network = (new TileLinkIO).flip + val incoherent = Vec(conf.nTiles) { Bool() }.asInput + val mem = new ioMem + } +} + +class L2CoherenceAgent(implicit conf: CoherenceHubConfiguration) extends CoherenceAgent()(conf.ln) +{ + implicit val lnConf = conf.ln + val co = conf.co + val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) + val p_rep_data_dep_q = (new Queue(NGLOBAL_XACTS)){new TrackerDependency} // depth must >= NPRIMARY + val x_init_data_dep_q = (new Queue(NGLOBAL_XACTS)){new TrackerDependency} // depth should >= NPRIMARY + + for( i <- 0 until NGLOBAL_XACTS ) { + val t = trackerList(i) + t.io.tile_incoherent := io.incoherent.toBits + t.io.mem_resp.valid := io.mem.resp.valid && (io.mem.resp.bits.tag === UFix(i)) + t.io.mem_resp.bits := io.mem.resp.bits + } + io.mem.resp.ready := trackerList.map(_.io.mem_resp.ready).reduce(_||_) + + // Handle transaction initiation requests + // Only one allocation per cycle + // Init requests may or may not have data + val x_init = io.network.xact_init + val x_init_data = io.network.xact_init_data + val x_abort = io.network.xact_abort + val x_dep_deq = x_init_data_dep_q.io.deq + val s_idle :: s_abort_drain :: s_abort_send :: Nil = Enum(3){ UFix() } + val abort_state = Reg(resetVal = s_idle) + val abort_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) + val any_conflict = trackerList.map(_.io.has_conflict).reduce(_||_) + val all_busy = trackerList.map(_.io.busy).reduce(_&&_) + val want_to_abort = x_init.valid && (any_conflict || all_busy || (!x_init_data_dep_q.io.enq.ready && co.messageHasData(x_init.bits.payload))) + + val alloc_arb = (new Arbiter(NGLOBAL_XACTS)) { Bool() } + for( i <- 0 until NGLOBAL_XACTS ) { + alloc_arb.io.in(i).valid := !trackerList(i).io.busy + trackerList(i).io.x_init.bits := x_init.bits + trackerList(i).io.x_init.valid := (abort_state === s_idle) && !want_to_abort && x_init.valid && alloc_arb.io.in(i).ready + + trackerList(i).io.x_init_data.bits := x_init_data.bits + trackerList(i).io.x_init_data.valid := x_init_data.valid + trackerList(i).io.x_init_data_dep.bits := x_dep_deq.bits + trackerList(i).io.x_init_data_dep.valid := x_dep_deq.valid + } + val pop_x_init = trackerList.map(_.io.x_init.ready).reduce(_||_) + x_init.ready := (x_abort.valid && x_abort.ready) || pop_x_init + x_init_data.ready := (abort_state === s_abort_drain) || trackerList.map(_.io.x_init_data.ready).reduce(_||_) + x_init_data_dep_q.io.enq.valid := pop_x_init && co.messageHasData(x_init.bits.payload) && (abort_state === s_idle) + x_init_data_dep_q.io.enq.bits.global_xact_id := OHToUFix(trackerList.map(_.io.x_init.ready)) + x_dep_deq.ready := trackerList.map(_.io.x_init_data_dep.ready).reduce(_||_) + + alloc_arb.io.out.ready := x_init.valid + + // Nack conflicting transaction init attempts + x_abort.bits.header.dst := x_init.bits.header.src + x_abort.bits.payload.tile_xact_id := x_init.bits.payload.tile_xact_id + x_abort.valid := Bool(false) + switch(abort_state) { + is(s_idle) { + when(want_to_abort) { + abort_state := Mux( co.messageHasData(x_init.bits.payload), s_abort_drain, s_abort_send) + } + } + is(s_abort_drain) { // raises x_init_data.ready below + when(x_init_data.valid) { + abort_cnt := abort_cnt + UFix(1) + when(abort_cnt === ~UFix(0, width = log2Up(REFILL_CYCLES))) { + abort_state := s_abort_send + } + } + } + is(s_abort_send) { // nothing is dequeued for now + x_abort.valid := Bool(true) + when(x_abort.ready) { // raises x_init.ready + abort_state := s_idle + } + } + } + + // Handle probe request generation + val p_req_arb = (new Arbiter(NGLOBAL_XACTS)){(new LogicalNetworkIO){ new ProbeRequest }} + for( i <- 0 until NGLOBAL_XACTS ) { + val t = trackerList(i).io + p_req_arb.io.in(i).bits := t.p_req.bits + p_req_arb.io.in(i).valid := t.p_req.valid + t.p_req.ready := p_req_arb.io.in(i).ready + } + io.network.probe_req <> p_req_arb.io.out + + // Handle probe replies, which may or may not have data + val p_rep = io.network.probe_rep + val p_rep_data = io.network.probe_rep_data + val idx = p_rep.bits.payload.global_xact_id + p_rep.ready := trackerList.map(_.io.p_rep.ready).reduce(_||_) + p_rep_data.ready := trackerList.map(_.io.p_rep_data.ready).reduce(_||_) + p_rep_data_dep_q.io.enq.valid := p_rep.valid && co.messageHasData(p_rep.bits.payload) + p_rep_data_dep_q.io.enq.bits.global_xact_id := p_rep.bits.payload.global_xact_id + p_rep_data_dep_q.io.deq.ready := trackerList.map(_.io.p_rep_data_dep.ready).reduce(_||_) + for( i <- 0 until NGLOBAL_XACTS ) { + trackerList(i).io.p_rep_data.valid := p_rep_data.valid + trackerList(i).io.p_rep_data.bits := p_rep_data.bits + trackerList(i).io.p_rep_data_dep.valid := p_rep_data_dep_q.io.deq.valid + trackerList(i).io.p_rep_data_dep.bits := p_rep_data_dep_q.io.deq.bits + trackerList(i).io.p_rep.valid := p_rep.valid && (idx === UFix(i)) + trackerList(i).io.p_rep.bits := p_rep.bits + } + + // Reply to initial requestor + // Forward memory responses from mem to tile or arbitrate to ack + val x_rep_arb = (new Arbiter(NGLOBAL_XACTS)){(new LogicalNetworkIO){ new TransactionReply }} + for( i <- 0 until NGLOBAL_XACTS ) { + val t = trackerList(i).io + x_rep_arb.io.in(i).bits := t.x_rep.bits + x_rep_arb.io.in(i).valid := t.x_rep.valid + t.x_rep.ready := x_rep_arb.io.in(i).ready + } + x_rep_arb.io.out.ready := Bool(false) + io.network.xact_rep.valid := x_rep_arb.io.out.valid + io.network.xact_rep.bits := x_rep_arb.io.out.bits + x_rep_arb.io.out.ready := io.network.xact_rep.ready + when(io.mem.resp.valid) { + io.network.xact_rep.valid := Bool(true) + io.network.xact_rep.bits := Vec(trackerList.map(_.io.x_rep.bits)){(new LogicalNetworkIO){new TransactionReply}}(io.mem.resp.bits.tag) + for( i <- 0 until NGLOBAL_XACTS ) { + trackerList(i).io.x_rep.ready := (io.mem.resp.bits.tag === UFix(i)) && io.network.xact_rep.ready + } + } + + // Free finished transactions + val finish = io.network.xact_finish + for( i <- 0 until NGLOBAL_XACTS ) { + trackerList(i).io.free := finish.valid && (finish.bits.payload.global_xact_id === UFix(i)) + } + finish.ready := Bool(true) + + // Create an arbiter for the one memory port + // We have to arbitrate between the different trackers' memory requests + // and once we have picked a request, get the right write data + val mem_req_cmd_arb = (new Arbiter(NGLOBAL_XACTS)) { new MemReqCmd() } + val mem_req_data_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemData() } + for( i <- 0 until NGLOBAL_XACTS ) { + mem_req_cmd_arb.io.in(i) <> trackerList(i).io.mem_req_cmd + mem_req_data_arb.io.in(i) <> trackerList(i).io.mem_req_data + mem_req_data_arb.io.lock(i) <> trackerList(i).io.mem_req_lock + } + io.mem.req_cmd <> Queue(mem_req_cmd_arb.io.out) + io.mem.req_data <> Queue(mem_req_data_arb.io.out) +} + +class XactTracker(id: Int)(implicit conf: CoherenceHubConfiguration) extends Component { + val co = conf.co + implicit val ln = conf.ln + val io = new Bundle { + val x_init = (new FIFOIO){(new LogicalNetworkIO) { new TransactionInit }}.flip + val x_init_data = (new FIFOIO){(new LogicalNetworkIO) { new TransactionInitData }}.flip + val p_rep = (new FIFOIO){(new LogicalNetworkIO) { new ProbeReply }}.flip + val p_rep_data = (new FIFOIO){(new LogicalNetworkIO) { new ProbeReplyData }}.flip + val free = Bool(INPUT) + val tile_incoherent = Bits(INPUT, conf.ln.nTiles) + val p_rep_data_dep = (new FIFOIO) { new TrackerDependency }.flip + val x_init_data_dep = (new FIFOIO) { new TrackerDependency }.flip + val mem_resp = (new FIFOIO) { new MemResp }.flip + + val mem_req_cmd = (new FIFOIO) { new MemReqCmd } + val mem_req_data = (new FIFOIO) { new MemData } + val mem_req_lock = Bool(OUTPUT) + val p_req = (new FIFOIO) {(new LogicalNetworkIO) { new ProbeRequest }} + val x_rep = (new FIFOIO) {(new LogicalNetworkIO) { new TransactionReply }} + val busy = Bool(OUTPUT) + val has_conflict = Bool(OUTPUT) + } + + val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } + val state = Reg(resetVal = s_idle) + val xact = Reg{ new TransactionInit } + val init_client_id_ = Reg(resetVal = UFix(0, width = log2Up(conf.ln.nTiles))) + //TODO: Will need id reg for merged release xacts + val init_sharer_cnt_ = Reg(resetVal = UFix(0, width = log2Up(conf.ln.nTiles))) + val p_rep_count = if (conf.ln.nTiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2Up(conf.ln.nTiles))) + val p_req_flags = Reg(resetVal = Bits(0, width = conf.ln.nTiles)) + val x_needs_read = Reg(resetVal = Bool(false)) + val x_init_data_needs_write = Reg(resetVal = Bool(false)) + val p_rep_data_needs_write = Reg(resetVal = Bool(false)) + val x_w_mem_cmd_sent = Reg(resetVal = Bool(false)) + val p_w_mem_cmd_sent = Reg(resetVal = Bool(false)) + val mem_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) + val mem_cnt_next = mem_cnt + UFix(1) + val mem_cnt_max = ~UFix(0, width = log2Up(REFILL_CYCLES)) + val p_req_initial_flags = Bits(width = conf.ln.nTiles) + p_req_initial_flags := Bits(0) + if (conf.ln.nTiles > 1) { + // issue self-probes for uncached read xacts to facilitate I$ coherence + // TODO: this is hackish; figure out how to do it more systematically + val probe_self = co match { + case u: CoherencePolicyWithUncached => u.isUncachedReadTransaction(io.x_init.bits.payload) + case _ => Bool(false) + } + val myflag = Mux(probe_self, Bits(0), UFixToOH(io.x_init.bits.header.src(log2Up(conf.ln.nTiles)-1,0))) + p_req_initial_flags := ~(io.tile_incoherent | myflag) + } + val all_x_reps_require_acks = Bool(true) + + io.busy := state != s_idle + io.has_conflict := co.isCoherenceConflict(xact.addr, io.x_init.bits.payload.addr) && (state != s_idle) + io.mem_req_cmd.valid := Bool(false) + io.mem_req_cmd.bits.rw := Bool(false) + io.mem_req_cmd.bits.addr := xact.addr + io.mem_req_cmd.bits.tag := UFix(id) + io.mem_req_data.valid := Bool(false) + io.mem_req_data.bits.data := UFix(0) + io.mem_req_lock := Bool(false) + io.p_req.valid := Bool(false) + io.p_req.bits.payload.p_type := co.getProbeRequestType(xact.x_type, UFix(0)) + io.p_req.bits.payload.global_xact_id := UFix(id) + io.p_req.bits.payload.addr := xact.addr + io.p_req.bits.header.dst := UFix(0) + io.x_rep.bits.payload.data := io.mem_resp.bits.data + io.x_rep.bits.payload.x_type := co.getTransactionReplyType(xact.x_type, init_sharer_cnt_) + io.x_rep.bits.payload.tile_xact_id := xact.tile_xact_id + io.x_rep.bits.payload.global_xact_id := UFix(id) + io.x_rep.bits.payload.require_ack := all_x_reps_require_acks + io.x_rep.bits.header.dst := init_client_id_ + io.x_rep.valid := (io.mem_resp.valid && (UFix(id) === io.mem_resp.bits.tag)) + io.x_init.ready := Bool(false) + io.x_init_data.ready := Bool(false) + io.x_init_data_dep.ready := Bool(false) + io.p_rep.ready := Bool(false) + io.p_rep_data.ready := Bool(false) + io.p_rep_data_dep.ready := Bool(false) + io.mem_resp.ready := io.x_rep.ready + + switch (state) { + is(s_idle) { + when( io.x_init.valid ) { + xact := io.x_init.bits.payload + init_client_id_ := io.x_init.bits.header.src + init_sharer_cnt_ := UFix(conf.ln.nTiles) // TODO: Broadcast only + x_init_data_needs_write := co.messageHasData(io.x_init.bits.payload) + x_needs_read := co.needsMemRead(io.x_init.bits.payload.x_type, UFix(0)) + p_req_flags := p_req_initial_flags + mem_cnt := UFix(0) + p_w_mem_cmd_sent := Bool(false) + x_w_mem_cmd_sent := Bool(false) + io.x_init.ready := Bool(true) + if(conf.ln.nTiles > 1) { + p_rep_count := PopCount(p_req_initial_flags) + state := Mux(p_req_initial_flags.orR, s_probe, s_mem) + } else state := s_mem + } + } + is(s_probe) { + val curr_p_id = PriorityEncoder(p_req_flags) + when(p_req_flags.orR) { + io.p_req.valid := Bool(true) + io.p_req.bits.header.dst := curr_p_id + } + when(io.p_req.ready) { + p_req_flags := p_req_flags & ~(UFixToOH(curr_p_id)) + } + when(io.p_rep.valid) { + io.p_rep.ready := Bool(true) + if(conf.ln.nTiles > 1) p_rep_count := p_rep_count - UFix(1) + when(p_rep_count === UFix(1)) { + state := s_mem + } + p_rep_data_needs_write := co.messageHasData(io.p_rep.bits.payload) + } + } + is(s_mem) { + when (p_rep_data_needs_write) { + doMemReqWrite(io.mem_req_cmd, + io.mem_req_data, + io.mem_req_lock, + io.p_rep_data, + p_rep_data_needs_write, + p_w_mem_cmd_sent, + io.p_rep_data_dep.ready, + io.p_rep_data_dep.valid && (io.p_rep_data_dep.bits.global_xact_id === UFix(id))) + } . elsewhen(x_init_data_needs_write) { + doMemReqWrite(io.mem_req_cmd, + io.mem_req_data, + io.mem_req_lock, + io.x_init_data, + x_init_data_needs_write, + x_w_mem_cmd_sent, + io.x_init_data_dep.ready, + io.x_init_data_dep.valid && (io.x_init_data_dep.bits.global_xact_id === UFix(id))) + } . elsewhen (x_needs_read) { + doMemReqRead(io.mem_req_cmd, x_needs_read) + } . otherwise { + state := Mux(co.needsAckReply(xact.x_type, UFix(0)), s_ack, + Mux(all_x_reps_require_acks, s_busy, s_idle)) + } + } + is(s_ack) { + io.x_rep.valid := Bool(true) + when(io.x_rep.ready) { state := Mux(all_x_reps_require_acks, s_busy, s_idle) } + } + is(s_busy) { // Nothing left to do but wait for transaction to complete + when (io.free) { + state := s_idle + } + } + } + + def doMemReqWrite[T <: Data](req_cmd: FIFOIO[MemReqCmd], req_data: FIFOIO[MemData], lock: Bool, data: FIFOIO[LogicalNetworkIO[T]], trigger: Bool, cmd_sent: Bool, pop_dep: Bool, at_front_of_dep_queue: Bool) { + req_cmd.bits.rw := Bool(true) + req_data.bits := data.bits.payload + when(req_cmd.ready && req_cmd.valid) { + cmd_sent := Bool(true) + } + when (at_front_of_dep_queue) { + req_cmd.valid := !cmd_sent && req_data.ready && data.valid + lock := data.valid || cmd_sent + when (req_cmd.ready || cmd_sent) { + req_data.valid := data.valid + when(req_data.ready) { + data.ready:= Bool(true) + when (data.valid) { + mem_cnt := mem_cnt_next + when(mem_cnt === UFix(REFILL_CYCLES-1)) { + pop_dep := Bool(true) + trigger := Bool(false) + } + } + } + } + } + } + + def doMemReqRead(req_cmd: FIFOIO[MemReqCmd], trigger: Bool) { + req_cmd.valid := Bool(true) + req_cmd.bits.rw := Bool(false) + when(req_cmd.ready) { + trigger := Bool(false) + } + } +}