From ea0775643b0a30567aaf374feb4c5c4b80c9c7a9 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 11 Mar 2012 17:13:01 -0700 Subject: [PATCH 1/6] fixed abort bug --- rocket/src/main/scala/coherence.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index f128036a..3db3ac46 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -600,7 +600,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ conflicts(i) := t.busy && x_init.valid && coherenceConflict(t.addr, x_init.bits.address) } x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - want_to_abort_arr(j) := conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && transactionInitHasData(x_init.bits)) + want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && transactionInitHasData(x_init.bits))) x_abort.valid := Bool(false) switch(abort_state_arr(j)) { From 6229a33dc488731da5ba2bf5166146b319581a97 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 11 Mar 2012 18:36:26 -0700 Subject: [PATCH 2/6] fixed cache controller flush unit deadlock --- rocket/src/main/scala/nbdcache.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index fd76bbaf..4c9542d5 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -722,8 +722,8 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // reset and flush unit val flusher = new FlushUnit(lines) val flushed = Reg(resetVal = Bool(true)) - flushed := flushed && (!r_cpu_req_val_ || r_req_flush) || r_cpu_req_val_ && r_req_flush && mshr.io.fence_rdy && flusher.io.req.ready - flusher.io.req.valid := r_cpu_req_val_ && r_req_flush && mshr.io.fence_rdy && !flushed + flushed := flushed && (!r_cpu_req_val || r_req_flush) || r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && flusher.io.req.ready + flusher.io.req.valid := r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && !flushed flusher.io.mshr_req.ready := mshr.io.req.ready when (io.cpu.req_val) { From cbf7b133410ea53e51c8fd5892604fd36b8947ab Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 12 Mar 2012 10:38:37 -0700 Subject: [PATCH 3/6] fix hit logic for amos --- rocket/src/main/scala/coherence.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 3db3ac46..653ab6dc 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -147,8 +147,8 @@ trait FourStateCoherence extends CoherencePolicy { def isHit ( cmd: Bits, state: UFix): Bool = { val (read, write) = cpuCmdToRW(cmd) - ((read && ( state === tileShared || state === tileExclusiveClean || state === tileExclusiveDirty)) || - (write && (state === tileExclusiveClean || state === tileExclusiveDirty))) + Mux(write, (state === tileExclusiveClean || state === tileExclusiveDirty), + (state === tileShared || state === tileExclusiveClean || state === tileExclusiveDirty)) } //TODO: do we need isPresent() for determining that a line needs to be From fd29e00db03e765ccfcc3d73c59421d8ccac0a12 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 13 Mar 2012 01:56:10 -0700 Subject: [PATCH 4/6] support non-power-of-2 queue sizes need to manually wrap queue pointers. --- rocket/src/main/scala/queues.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 823a031b..0d92ddfc 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -24,12 +24,13 @@ class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = { enq_ptr = Reg(resetVal = UFix(0, log2up(entries))) deq_ptr = Reg(resetVal = UFix(0, log2up(entries))) + val pow2 = Bool((entries & (entries-1)) == 0) when (do_deq) { - deq_ptr := deq_ptr + UFix(1) + deq_ptr := Mux(!pow2 && deq_ptr === UFix(entries-1), UFix(0), deq_ptr + UFix(1)) } when (do_enq) { - enq_ptr := enq_ptr + UFix(1) + enq_ptr := Mux(!pow2 && enq_ptr === UFix(entries-1), UFix(0), enq_ptr + UFix(1)) } if (flushable) { when (io.flush) { From d76b05bde147f3fd76e10a047a24b476b2f882bc Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 13 Mar 2012 02:21:02 -0700 Subject: [PATCH 5/6] fix way selection on D$ write upgrades --- rocket/src/main/scala/nbdcache.scala | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4c9542d5..3e38b505 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -94,7 +94,7 @@ class LoadDataGen extends Component { class MSHRReq extends Bundle { val tag_miss = Bool() - val old_state = UFix(width = 2) + val old_dirty = Bool() val old_tag = Bits(width = TAG_BITS) val tag = Bits(width = TAG_BITS) @@ -252,7 +252,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { req := io.req_bits when (io.req_bits.tag_miss) { - state := Mux(needsWriteback(io.req_bits.old_state), s_wb_req, s_refill_req) + state := Mux(io.req_bits.old_dirty, s_wb_req, s_refill_req) } } @@ -774,11 +774,11 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { meta_arb.io.in(2).bits.way_en := ~UFix(0, NWAYS) val early_tag_nack = !meta_arb.io.in(2).ready val cpu_req_tag = Cat(io.cpu.req_ppn, r_cpu_req_idx)(tagmsb,taglsb) - val tag_match_arr = (0 until NWAYS).map( w => isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_match_arr = (0 until NWAYS).map( w => isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR - val tag_hit = r_cpu_req_val && tag_match - val tag_miss = r_cpu_req_val && !tag_match val hit_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec + val tag_hit_arr = (0 until NWAYS).map( w => isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_hit = Cat(Bits(0),tag_match_arr:_*).orR val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_oh, meta.io.way_en) val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_oh, data.io.way_en) val meta_resp_mux = Mux1H(meta_resp_way_oh, meta.io.resp) @@ -809,8 +809,6 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { data_arb.io.in(4).bits.inner_req.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) data_arb.io.in(4).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) data_arb.io.in(4).bits.inner_req.rw := Bool(false) - data_arb.io.in(4).bits.inner_req.wmask := UFix(0) // don't care - data_arb.io.in(4).bits.inner_req.data := io.mem.xact_rep.bits.data // don't care data_arb.io.in(4).valid := io.cpu.req_val && req_read data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check val early_load_nack = req_read && !data_arb.io.in(4).ready @@ -831,12 +829,12 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val drain_store = drain_store_val && data_arb.io.in(2).ready val p_amo = Reg(resetVal = Bool(false)) val p_store_rdy = !(p_store_valid && !drain_store) && !(mshr.io.data_req.valid || r_replay_amo || p_amo) - p_amo := tag_hit && r_req_amo && mshr.io.req.ready && !nack_hit || r_replay_amo - p_store_valid := p_store_valid && !drain_store || (tag_hit && r_req_store && mshr.io.req.ready && !nack_hit) || p_amo + p_amo := r_cpu_req_val && tag_hit && r_req_amo && mshr.io.req.ready && !nack_hit || r_replay_amo + p_store_valid := p_store_valid && !drain_store || (r_cpu_req_val && tag_hit && r_req_store && mshr.io.req.ready && !nack_hit) || p_amo // tag update after a store to an exclusive clean line. val new_hit_state = newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) - val set_hit_state = tag_hit && meta_resp_mux.state != new_hit_state + val set_hit_state = r_cpu_req_val && tag_hit && meta_resp_mux.state != new_hit_state meta.io.state_req.bits.inner_req.rw := Bool(true) meta.io.state_req.bits.inner_req.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) meta.io.state_req.bits.inner_req.data.state := Reg(new_hit_state) @@ -858,8 +856,8 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // miss handling mshr.io.req.valid := r_cpu_req_val && r_req_readwrite && !nack_hit || flusher.io.mshr_req.valid - mshr.io.req.bits.tag_miss := tag_miss || flusher.io.mshr_req.valid - mshr.io.req.bits.old_state := meta_wb_mux.state + mshr.io.req.bits.tag_miss := !tag_hit || flusher.io.mshr_req.valid + mshr.io.req.bits.old_dirty := needsWriteback(meta_wb_mux.state) && !tag_match // don't wb upgrades mshr.io.req.bits.old_tag := meta_wb_mux.tag mshr.io.req.bits.tag := cpu_req_tag mshr.io.req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) @@ -867,7 +865,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { mshr.io.req.bits.offset := r_cpu_req_idx(offsetmsb,0) mshr.io.req.bits.cmd := r_cpu_req_cmd mshr.io.req.bits.typ := r_cpu_req_type - mshr.io.req.bits.way_oh := replaced_way_oh + mshr.io.req.bits.way_oh := Mux(tag_match, hit_way_oh, replaced_way_oh) mshr.io.req.bits.data := cpu_req_data mshr.io.mem_rep <> io.mem.xact_rep @@ -932,9 +930,9 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack - io.cpu.resp_val := (tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val + io.cpu.resp_val := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val io.cpu.resp_replay := mshr.io.cpu_resp_val - io.cpu.resp_miss := r_cpu_req_val_ && (!tag_match || mshr.io.secondary_miss) && r_req_read + io.cpu.resp_miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read io.cpu.resp_tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp_type := loadgen.io.typ io.cpu.resp_data := loadgen.io.dout From 287bc1c2625f236002b7a20b51d56bcedd3eef54 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 13 Mar 2012 11:48:12 -0700 Subject: [PATCH 6/6] Further refinement of tag_match/tag_hit signals --- rocket/src/main/scala/nbdcache.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3e38b505..f0058175 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -776,11 +776,11 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val cpu_req_tag = Cat(io.cpu.req_ppn, r_cpu_req_idx)(tagmsb,taglsb) val tag_match_arr = (0 until NWAYS).map( w => isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR - val hit_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec + val tag_match_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec val tag_hit_arr = (0 until NWAYS).map( w => isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) - val tag_hit = Cat(Bits(0),tag_match_arr:_*).orR - val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_oh, meta.io.way_en) - val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_oh, data.io.way_en) + val tag_hit = Cat(Bits(0),tag_hit_arr:_*).orR + val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), tag_match_way_oh, meta.io.way_en) + val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), tag_match_way_oh, data.io.way_en) val meta_resp_mux = Mux1H(meta_resp_way_oh, meta.io.resp) val data_resp_mux = Mux1H(data_resp_way_oh, data.io.resp) @@ -838,7 +838,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { meta.io.state_req.bits.inner_req.rw := Bool(true) meta.io.state_req.bits.inner_req.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) meta.io.state_req.bits.inner_req.data.state := Reg(new_hit_state) - meta.io.state_req.bits.way_en := Reg(hit_way_oh) + meta.io.state_req.bits.way_en := Reg(tag_match_way_oh) meta.io.state_req.valid := Reg(set_hit_state, resetVal = Bool(false)) // pending store data, also used for AMO RHS @@ -847,7 +847,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { p_store_idx := r_cpu_req_idx p_store_type := r_cpu_req_type p_store_cmd := r_cpu_req_cmd - p_store_way_oh := Mux(r_replay_amo, r_way_oh, hit_way_oh) + p_store_way_oh := Mux(r_replay_amo, r_way_oh, tag_match_way_oh) p_store_data := cpu_req_data } when (p_amo) { @@ -865,7 +865,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { mshr.io.req.bits.offset := r_cpu_req_idx(offsetmsb,0) mshr.io.req.bits.cmd := r_cpu_req_cmd mshr.io.req.bits.typ := r_cpu_req_type - mshr.io.req.bits.way_oh := Mux(tag_match, hit_way_oh, replaced_way_oh) + mshr.io.req.bits.way_oh := Mux(tag_match, tag_match_way_oh, replaced_way_oh) mshr.io.req.bits.data := cpu_req_data mshr.io.mem_rep <> io.mem.xact_rep