diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index fc32b6eb..7cf36e8a 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -237,7 +237,7 @@ trait FourStateCoherence extends CoherencePolicy { class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherence { val io = new Bundle { val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip - val p_data = (new ioPipe) { new TrackerProbeData } + val p_data = (new ioPipe) { new TrackerProbeData }.flip val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) val p_rep_cnt_dec = Bits(ntiles, INPUT) @@ -387,10 +387,10 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc p_req_flags := p_req_flags & ~io.p_req_cnt_inc // unflag sent reqs } when(io.p_rep_cnt_dec.orR) { - val p_rep_count_next = p_rep_count - PopCount(io.p_rep_cnt_dec) + val dec = PopCount(io.p_rep_cnt_dec) io.pop_p_rep := io.p_rep_cnt_dec - if(ntiles > 1) p_rep_count := p_rep_count_next - when(p_rep_count === UFix(1)) { + if(ntiles > 1) p_rep_count := p_rep_count - dec + when(p_rep_count === dec) { io.pop_p_rep := Bool(true) state := s_mem } @@ -536,7 +536,9 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS // Free finished transactions for( j <- 0 until ntiles ) { val finish = io.tiles(j).xact_finish - do_free_arr(finish.bits.global_xact_id) := finish.valid + when (finish.valid) { + do_free_arr(finish.bits.global_xact_id) := Bool(true) + } finish.ready := Bool(true) } @@ -552,18 +554,19 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS rep.bits.data := io.mem.resp.bits.data rep.bits.require_ack := Bool(true) rep.valid := Bool(false) - when(io.mem.resp.valid) { + when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { rep.bits.t_type := getTransactionReplyType(t_type_arr(mem_idx), sh_count_arr(mem_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) rep.bits.global_xact_id := mem_idx - rep.valid := (UFix(j) === init_tile_id_arr(mem_idx)) + rep.valid := Bool(true) } . otherwise { rep.bits.t_type := getTransactionReplyType(t_type_arr(ack_idx), sh_count_arr(ack_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) rep.bits.global_xact_id := ack_idx - val do_send_ack = (UFix(j) === init_tile_id_arr(ack_idx)) && send_x_rep_ack_arr.toBits.orR - rep.valid := do_send_ack - sent_x_rep_ack_arr(ack_idx) := do_send_ack + when (UFix(j) === init_tile_id_arr(ack_idx)) { + rep.valid := send_x_rep_ack_arr.toBits.orR + sent_x_rep_ack_arr(ack_idx) := Bool(true) + } } } // If there were a ready signal due to e.g. intervening network use: @@ -594,8 +597,10 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS p_rep_data_dep_list(j).io.enq.valid := do_pop p_rep_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_p_reps) p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) - p_data_valid_arr(idx) := p_rep.valid && probeReplyHasData(p_rep.bits) - p_data_tile_id_arr(idx) := UFix(j) + when (p_rep.valid) { + p_data_valid_arr(idx) := probeReplyHasData(p_rep.bits) + p_data_tile_id_arr(idx) := UFix(j) + } p_rep_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_p_rep_dep(j).toBool))(_||_) } for( i <- 0 until NGLOBAL_XACTS ) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4e2fe515..6ba02488 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -232,8 +232,9 @@ class MSHR(id: Int) extends Component with FourStateCoherence { } when (abort) { state := s_refill_req } } - when (state === s_refill_req && io.mem_req.ready) { - state := Mux(flush, s_drain_rpq, s_refill_resp) + when (state === s_refill_req) { + when (flush) { state := s_drain_rpq } + .elsewhen (io.mem_req.ready) { state := s_refill_resp } } when (state === s_wb_resp) { when (reply) { state := s_refill_req } @@ -502,6 +503,7 @@ class ProbeUnit extends Component with FourStateCoherence { val line_state = Reg() { UFix() } val way_oh = Reg() { Bits() } val req = Reg() { new ProbeRequest() } + val hit = way_oh.orR when ((state === s_writeback_resp) && io.wb_req.ready) { state := s_invalid @@ -510,7 +512,7 @@ class ProbeUnit extends Component with FourStateCoherence { state := s_writeback_resp } when ((state === s_probe_rep) && io.meta_req.ready && io.rep.ready) { - state := Mux(way_oh.orR && needsWriteback(line_state), s_writeback_req, s_invalid) + state := Mux(hit && needsWriteback(line_state), s_writeback_req, s_invalid) } when (state === s_meta_resp) { way_oh := io.tag_match_way_oh @@ -527,22 +529,21 @@ class ProbeUnit extends Component with FourStateCoherence { io.req.ready := state === s_invalid io.rep.valid := state === s_probe_rep && io.meta_req.ready - io.rep.bits := newProbeReply(req, line_state) + io.rep.bits := newProbeReply(req, Mux(hit, line_state, newStateOnFlush())) - val new_state = newStateOnProbeReq(req, line_state) - io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_probe_rep && new_state != line_state + io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_probe_rep && hit io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, ~UFix(0, NWAYS)) io.meta_req.bits.inner_req.rw := state === s_probe_rep io.meta_req.bits.inner_req.idx := req.address - io.meta_req.bits.inner_req.data.state := new_state - io.meta_req.bits.inner_req.data.tag := req.address >> UFix(OFFSET_BITS) + io.meta_req.bits.inner_req.data.state := newStateOnProbeReq(req, line_state) + io.meta_req.bits.inner_req.data.tag := req.address >> UFix(IDX_BITS) io.mshr_req.valid := state === s_meta_resp io.address := req.address io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_oh := way_oh io.wb_req.bits.idx := req.address - io.wb_req.bits.tag := req.address >> UFix(OFFSET_BITS) + io.wb_req.bits.tag := req.address >> UFix(IDX_BITS) } class FlushUnit(lines: Int) extends Component with FourStateCoherence{ diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index c2932513..a0743ead 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -21,35 +21,31 @@ class Top() extends Component { val icache_pf = new rocketIPrefetcher(); val dcache = new HellaCacheUniproc(); - val arbiter = new rocketMemArbiter(3 + (if (HAVE_VEC) 1 else 0)); + val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)); arbiter.io.requestor(0) <> dcache.io.mem arbiter.io.requestor(1) <> icache_pf.io.mem - arbiter.io.requestor(2) <> htif.io.mem - val hub = new CoherenceHubNull + val hub = new CoherenceHubBroadcast(2) // connect tile to hub hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) + hub.io.tiles(0).xact_init_data <> Queue(dcache.io.mem.xact_init_data) arbiter.io.mem.xact_abort <> Queue(hub.io.tiles(0).xact_abort) arbiter.io.mem.xact_rep <> Pipe(hub.io.tiles(0).xact_rep) hub.io.tiles(0).xact_finish <> Queue(arbiter.io.mem.xact_finish) dcache.io.mem.probe_req <> Queue(hub.io.tiles(0).probe_req) hub.io.tiles(0).probe_rep <> Queue(dcache.io.mem.probe_rep, 1) hub.io.tiles(0).probe_rep_data <> Queue(dcache.io.mem.probe_rep_data) + // connect HTIF to hub + hub.io.tiles(1) <> htif.io.mem // connect hub to memory io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_data <> Queue(hub.io.mem.req_data) hub.io.mem.resp <> Pipe(io.mem.resp) - // temporary HTIF data connection - val data_arb = (new Arbiter(2)) { new TransactionInitData } - data_arb.io.in(0) <> Queue(dcache.io.mem.xact_init_data) - data_arb.io.in(1) <> Queue(htif.io.mem.xact_init_data) - hub.io.tiles(0).xact_init_data <> data_arb.io.out - if (HAVE_VEC) { val vicache = new rocketICache(128, 2); // 128 sets x 2 ways - arbiter.io.requestor(3) <> vicache.io.mem + arbiter.io.requestor(2) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu; } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 10f618c2..9d8ed9e1 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -185,6 +185,33 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { dout <> io.out.bits } +class RRArbiter[T <: Data](n: Int)(data: => T) extends Component { + val io = new ioArbiter(n)(data) + + val last_grant = Reg(resetVal = UFix(0, log2up(n))) + var valid = io.in(n-1).valid + var next_grant = UFix(n-1) + var mux = (new Mux1H(n)) { data } + + for (i <- n-2 to 0 by -1) { + valid = valid || io.in(i).valid + next_grant = Mux(io.in(i).valid, UFix(i), next_grant) + } + for (i <- n-1 to 1 by -1) + next_grant = Mux(last_grant < UFix(i) && io.in(i).valid, UFix(i), next_grant) + for (i <- 0 until n) { + mux.io.sel(i) := next_grant === UFix(i) + mux.io.in(i) := io.in(i).bits + io.in(i).ready := io.out.ready && next_grant === UFix(i) + } + when (valid && io.out.ready) { + last_grant := next_grant + } + + io.out.valid := valid + io.out.bits := mux.io.out +} + class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { val in = Vec(n) { (new ioDecoupled()) { data } }.flip val lock = Vec(n) { Bool() }.asInput