diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 674fb33b..f8069c4e 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -23,6 +23,7 @@ object Constants val PC_PCR = UFix(6, 4); val PC_MEM = UFix(7, 4); val PC_MEM4 = UFix(8, 4); + val PC_EX = UFix(9, 4); val KF_Y = UFix(1, 1); val KF_N = UFix(0, 1); diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 965dfd0e..2f54d775 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -8,6 +8,10 @@ class ioDebug extends Bundle() { val error_mode = Bool('output); val log_control = Bool('output); + val id_valid = Bool('output); + val ex_valid = Bool('output); + val mem_valid = Bool('output); + val wb_valid = Bool('output); } class ioHost(view: List[String] = null) extends Bundle(view) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2b155dd8..73aaa4b9 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -39,9 +39,7 @@ class ioCtrlDpath extends Bundle() val xcpt_syscall = Bool('output); val eret = Bool('output); val mem_load = Bool('output); - val dcache_miss = Bool('output); val wen = Bool('output); - val wb_div_mul = Bool('output); // inputs from datapath val btb_hit = Bool('input); val inst = Bits(32, 'input); @@ -67,7 +65,7 @@ class ioCtrlAll extends Bundle() val dpath = new ioCtrlDpath(); val console = new ioConsole(List("rdy", "valid")); val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); - val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_val")).flip(); + val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_val")).flip(); val host = new ioHost(List("start")); } @@ -323,33 +321,34 @@ class rocketCtrl extends Component wb_reg_div_mul_val <== mem_reg_div_mul_val; } - // replay PC when the D$ is blocked - val replay_mem_pc = mem_reg_mem_val && !io.dmem.req_rdy; - // replay PC+4 on a D$ load miss + // replay execute stage PC when the D$ is blocked +// val replay_mem_pc = mem_reg_mem_val && (mem_reg_mem_cmd != M_FLA) && !io.dmem.req_rdy; + val replay_ex = ex_reg_mem_val && !io.dmem.req_rdy; + // replay memory stage PC+4 on a D$ load miss val mem_cmd_load = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); - val replay_mem_pc_plus4 = mem_cmd_load && !io.dmem.resp_val; +// val replay_mem_pc_plus4 = mem_cmd_load && !io.dmem.resp_val; + val replay_mem = io.dmem.resp_miss; - val kill_ex = replay_mem_pc | replay_mem_pc_plus4 | mem_reg_privileged | io.dpath.exception; +// val kill_ex = replay_mem_pc | replay_mem_pc_plus4 | mem_reg_privileged; + val kill_ex = replay_ex | replay_mem | mem_reg_privileged; val kill_mem = io.dpath.exception; - dcache_miss <== replay_mem_pc_plus4; + dcache_miss <== io.dmem.resp_miss; io.dpath.mem_load := mem_cmd_load; - io.dpath.dcache_miss := dcache_miss; io.dpath.sel_pc := - Mux(replay_mem_pc, PC_MEM, - Mux(replay_mem_pc_plus4 || mem_reg_privileged, PC_MEM4, + Mux(replay_mem || mem_reg_privileged, PC_MEM4, Mux(io.dpath.exception || mem_reg_eret, PC_PCR, + Mux(replay_ex, PC_EX, Mux(!ex_reg_btb_hit && br_taken, PC_BR, Mux(ex_reg_btb_hit && !br_taken, PC_EX4, -// Mux(ex_reg_btb_hit && !br_taken || ex_reg_privileged, PC_EX4, Mux(jr_taken, PC_JR, Mux(j_taken, PC_J, Mux(io.dpath.btb_hit, PC_BTB, PC_4)))))))); - io.dpath.wen_btb := ~ex_reg_btb_hit & br_taken; + io.dpath.wen_btb := ~ex_reg_btb_hit & br_taken & ~kill_ex & ~kill_mem; val take_pc = ~ex_reg_btb_hit & br_taken | @@ -359,8 +358,8 @@ class rocketCtrl extends Component io.dpath.exception | mem_reg_privileged | mem_reg_eret | - replay_mem_pc | - replay_mem_pc_plus4; + replay_ex | + replay_mem; io.dpath.stallf := ~take_pc & @@ -428,7 +427,7 @@ class rocketCtrl extends Component io.dpath.mul_result_val ); - val ctrl_killd = take_pc | ctrl_stalld | io.dpath.killx; + val ctrl_killd = take_pc | ctrl_stalld; // for divider, multiplier writeback val mul_wb = io.dpath.mul_result_val; @@ -438,7 +437,7 @@ class rocketCtrl extends Component io.dpath.killf := take_pc | ~io.imem.resp_val; io.dpath.killd := ctrl_killd.toBool; - io.dpath.killx := kill_ex.toBool; + io.dpath.killx := kill_ex.toBool || kill_mem.toBool; io.dpath.killm := kill_mem.toBool; io.dpath.ren2 := id_ren2.toBool; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 8233de86..a9b0dac3 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -13,7 +13,8 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val req_type = Bits(3, 'input); val req_addr = UFix(32, 'input); val req_data = Bits(64, 'input); - val req_tag = Bits(12, 'input); + val req_tag = Bits(5, 'input); + val resp_miss = Bool('output); val resp_val = Bool('output); val resp_data = Bits(64, 'output); val resp_tag = Bits(12, 'output); @@ -56,7 +57,7 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { val flush_resp_count = Reg(resetVal = UFix(0, indexbits)); val flushing = Reg(resetVal = Bool(false)); val flush_waiting = Reg(resetVal = Bool(false)); - val r_cpu_req_tag = Reg(resetVal = Bits(0, 12)); + val r_cpu_req_tag = Reg(resetVal = Bits(0, 5)); when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_FLA)) { @@ -84,6 +85,7 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { dcache.io.mem ^^ io.mem; io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting; + io.cpu.resp_miss := dcache.io.cpu.resp_miss; io.cpu.resp_data := dcache.io.cpu.resp_data; io.cpu.resp_tag := dcache.io.cpu.resp_tag; io.cpu.resp_val := dcache.io.cpu.resp_val & @@ -108,14 +110,16 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val r_cpu_req_addr = Reg(resetVal = Bits(0, addrbits)); val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_data = Reg(resetVal = Bits(0,64)); +// val r_cpu_req_data = Reg(resetVal = Bits(0,64)); val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); val r_cpu_req_type = Reg(resetVal = Bits(0,3)); val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); val p_store_data = Reg(resetVal = Bits(0,64)); val p_store_addr = Reg(resetVal = Bits(0,64)); - val p_store_wmask = Reg(resetVal = Bits(0,64)); + val p_store_cmd = Reg(resetVal = Bits(0,4)); + val p_store_type = Reg(resetVal = Bits(0,3)); +// val p_store_wmask = Reg(resetVal = Bits(0,64)); val p_store_valid = Reg(resetVal = Bool(false)); val req_load = (r_cpu_req_cmd === M_XRD); @@ -124,19 +128,25 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_addr <== io.cpu.req_addr; - r_cpu_req_data <== io.cpu.req_data; r_cpu_req_cmd <== io.cpu.req_cmd; r_cpu_req_type <== io.cpu.req_type; r_cpu_req_tag <== io.cpu.req_tag; } - + + when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_XWR)) { + p_store_data <== io.cpu.req_data; + p_store_addr <== io.cpu.req_addr; + p_store_type <== io.cpu.req_type; + p_store_valid <== Bool(true); + } + when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; } when ((state === s_resolve_miss) && !req_load) { r_cpu_req_val <== Bool(false); } - + // counter val rr_count = Reg(resetVal = UFix(0,2)); val rr_count_next = rr_count + UFix(1); @@ -145,7 +155,9 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } // tag array - val tag_we = (state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2)); + val tag_we = + ((state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2))) || + ((state === s_resolve_miss) && req_flush); val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); @@ -166,57 +178,56 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val tag_valid = vb_rdata.toBool; val tag_match = tag_valid && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); - + // generate write mask and store data signals based on store type and address LSBs val wmask_b = - Mux(r_cpu_req_addr(2,0) === UFix(0, 3), Bits("b0000_0001", 8), - Mux(r_cpu_req_addr(2,0) === UFix(1, 3), Bits("b0000_0010", 8), - Mux(r_cpu_req_addr(2,0) === UFix(2, 3), Bits("b0000_0100", 8), - Mux(r_cpu_req_addr(2,0) === UFix(3, 3), Bits("b0000_1000", 8), - Mux(r_cpu_req_addr(2,0) === UFix(4, 3), Bits("b0001_0000", 8), - Mux(r_cpu_req_addr(2,0) === UFix(5, 3), Bits("b0010_0000", 8), - Mux(r_cpu_req_addr(2,0) === UFix(6, 3), Bits("b0100_0000", 8), - Mux(r_cpu_req_addr(2,0) === UFix(7, 3), Bits("b1000_0000", 8), + Mux(p_store_addr(2,0) === UFix(0, 3), Bits("b0000_0001", 8), + Mux(p_store_addr(2,0) === UFix(1, 3), Bits("b0000_0010", 8), + Mux(p_store_addr(2,0) === UFix(2, 3), Bits("b0000_0100", 8), + Mux(p_store_addr(2,0) === UFix(3, 3), Bits("b0000_1000", 8), + Mux(p_store_addr(2,0) === UFix(4, 3), Bits("b0001_0000", 8), + Mux(p_store_addr(2,0) === UFix(5, 3), Bits("b0010_0000", 8), + Mux(p_store_addr(2,0) === UFix(6, 3), Bits("b0100_0000", 8), + Mux(p_store_addr(2,0) === UFix(7, 3), Bits("b1000_0000", 8), UFix(0, 8))))))))); val wmask_h = - Mux(r_cpu_req_addr(2,1) === UFix(0, 2), Bits("b0000_0011", 8), - Mux(r_cpu_req_addr(2,1) === UFix(1, 2), Bits("b0000_1100", 8), - Mux(r_cpu_req_addr(2,1) === UFix(2, 2), Bits("b0011_0000", 8), - Mux(r_cpu_req_addr(2,1) === UFix(3, 2), Bits("b1100_0000", 8), + Mux(p_store_addr(2,1) === UFix(0, 2), Bits("b0000_0011", 8), + Mux(p_store_addr(2,1) === UFix(1, 2), Bits("b0000_1100", 8), + Mux(p_store_addr(2,1) === UFix(2, 2), Bits("b0011_0000", 8), + Mux(p_store_addr(2,1) === UFix(3, 2), Bits("b1100_0000", 8), UFix(0, 8))))); val wmask_w = - Mux(r_cpu_req_addr(2) === UFix(0, 1), Bits("b0000_1111", 8), - Mux(r_cpu_req_addr(2) === UFix(1, 1), Bits("b1111_0000", 8), + Mux(p_store_addr(2) === UFix(0, 1), Bits("b0000_1111", 8), + Mux(p_store_addr(2) === UFix(1, 1), Bits("b1111_0000", 8), UFix(0, 8))); val wmask_d = Bits("b1111_1111", 8); val store_wmask = - Mux(r_cpu_req_type === MT_B, wmask_b, - Mux(r_cpu_req_type === MT_H, wmask_h, - Mux(r_cpu_req_type === MT_W, wmask_w, - Mux(r_cpu_req_type === MT_D, wmask_d, + Mux(p_store_type === MT_B, wmask_b, + Mux(p_store_type === MT_H, wmask_h, + Mux(p_store_type === MT_W, wmask_w, + Mux(p_store_type === MT_D, wmask_d, UFix(0, 8))))); val store_data = - Mux(r_cpu_req_type === MT_B, Fill(8, r_cpu_req_data( 7,0)), - Mux(r_cpu_req_type === MT_H, Fill(4, r_cpu_req_data(15,0)), - Mux(r_cpu_req_type === MT_W, Fill(2, r_cpu_req_data(31,0)), - Mux(r_cpu_req_type === MT_D, r_cpu_req_data, + Mux(p_store_type === MT_B, Fill(8, p_store_data( 7,0)), + Mux(p_store_type === MT_H, Fill(4, p_store_data(15,0)), + Mux(p_store_type === MT_W, Fill(2, p_store_data(31,0)), + Mux(p_store_type === MT_D, p_store_data, UFix(0, 64))))); - - when ((state === s_ready) && r_cpu_req_val && req_store) { - p_store_data <== store_data; - p_store_addr <== r_cpu_req_addr; - p_store_wmask <== store_wmask; - p_store_valid <== Bool(true); - } val addr_match = (r_cpu_req_addr(tagmsb, offsetlsb) === p_store_addr(tagmsb, offsetlsb)); - val drain_store = ((state === s_ready) && p_store_valid && (!r_cpu_req_val || !tag_match || !req_load || addr_match)) + val ldst_conflict = r_cpu_req_val && req_load && p_store_valid && addr_match; +// val drain_store = ((state === s_ready) && p_store_valid && (!r_cpu_req_val || !tag_match || !req_load || addr_match)) + val drain_store = + (state === s_ready) && p_store_valid && + (!(io.cpu.req_val && (io.cpu.req_cmd === M_XRD)) || + r_cpu_req_val && req_load && p_store_valid && addr_match); + val resolve_store = (state === s_resolve_miss) && req_store; val do_store = drain_store | resolve_store; @@ -241,22 +252,22 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val data_array_wdata = Mux((state === s_refill), io.mem.resp_data, - Cat(p_store_data, p_store_data)); + Cat(store_data, store_data)); - val p_wmask_expand = - Cat(Fill(8, p_store_wmask(7)), - Fill(8, p_store_wmask(6)), - Fill(8, p_store_wmask(5)), - Fill(8, p_store_wmask(4)), - Fill(8, p_store_wmask(3)), - Fill(8, p_store_wmask(2)), - Fill(8, p_store_wmask(1)), - Fill(8, p_store_wmask(0))); + val store_wmask_expand = + Cat(Fill(8, store_wmask(7)), + Fill(8, store_wmask(6)), + Fill(8, store_wmask(5)), + Fill(8, store_wmask(4)), + Fill(8, store_wmask(3)), + Fill(8, store_wmask(2)), + Fill(8, store_wmask(1)), + Fill(8, store_wmask(0))); val da_store_wmask = Mux(p_store_addr(offsetlsb).toBool, - Cat(p_wmask_expand, Bits(0,64)), - Cat(Bits(0,64), p_wmask_expand)); + Cat(store_wmask_expand, Bits(0,64)), + Cat(Bits(0,64), store_wmask_expand)); val data_array_wmask = Mux((state === s_refill), ~Bits(0,128), @@ -269,16 +280,16 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { io.cpu.req_addr(indexmsb, offsetmsb-1)))); val data_array_rdata = Reg(data_array.read(data_array_raddr)); - val ldst_conflict = r_cpu_req_val && req_load && p_store_valid && addr_match; - + val miss = (state === s_ready) && r_cpu_req_val && req_load && (!tag_match || (p_store_valid && addr_match)); + // output signals io.cpu.req_rdy := (state === s_ready) && !ldst_conflict && (!r_cpu_req_val || (tag_match && !req_flush)); io.cpu.resp_val := ((state === s_ready) && r_cpu_req_val && tag_match && req_load && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && req_flush); + io.cpu.resp_miss := miss; io.cpu.resp_tag := Cat(Bits(0,1), r_cpu_req_type, r_cpu_req_addr(2,0), r_cpu_req_tag); - io.cpu.resp_data := Mux(r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 6383c4ec..73474a8a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -44,11 +44,13 @@ class rocketDpath extends Component val if_reg_pc = Reg(width = 32, resetVal = UFix(0, 32)); // instruction decode definitions + val id_reg_valid = Reg(resetVal = Bool(false)); val id_reg_pc = Reg(resetVal = UFix(0,32)); val id_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); val id_reg_inst = Reg(resetVal = NOP); // execute definitions + val ex_reg_valid = Reg(resetVal = Bool(false)); val ex_reg_pc = Reg(resetVal = UFix(0,32)); val ex_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); val ex_reg_inst = Reg(resetVal = Bits(0,32)); @@ -76,6 +78,7 @@ class rocketDpath extends Component val ex_wdata = Wire() { Bits() }; // memory definitions + val mem_reg_valid = Reg(resetVal = Bool(false)); val mem_reg_pc = Reg(resetVal = UFix(0,32)); val mem_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); val mem_reg_waddr = Reg(resetVal = UFix(0,5)); @@ -90,6 +93,7 @@ class rocketDpath extends Component val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); // writeback definitions + val wb_reg_valid = Reg(resetVal = Bool(false)); val wb_reg_pc = Reg(resetVal = UFix(0,32)); val wb_reg_waddr = Reg(resetVal = UFix(0,5)); val wb_reg_wdata = Reg(resetVal = Bits(0,64)); @@ -126,6 +130,7 @@ class rocketDpath extends Component val if_next_pc = Mux(io.ctrl.sel_pc === PC_4, if_pc_plus4, Mux(io.ctrl.sel_pc === PC_BTB, if_btb_target, + Mux(io.ctrl.sel_pc === PC_EX, ex_reg_pc, Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, @@ -133,7 +138,7 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_pcr(31,0).toUFix, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, Mux(io.ctrl.sel_pc === PC_MEM4, mem_reg_pc_plus4, - UFix(0, 32)))))))))); + UFix(0, 32))))))))))); when (!io.host.start){ if_reg_pc <== UFix(0, 32); //32'hFFFF_FFFC; @@ -156,10 +161,12 @@ class rocketDpath extends Component id_reg_pc <== if_reg_pc; id_reg_pc_plus4 <== if_pc_plus4; when(io.ctrl.killf) { - id_reg_inst <== NOP; + id_reg_inst <== NOP; + id_reg_valid <== Bool(false); } otherwise { - id_reg_inst <== io.imem.resp_data; + id_reg_inst <== io.imem.resp_data; + id_reg_valid <== Bool(true); } } @@ -265,6 +272,7 @@ class rocketDpath extends Component ex_reg_ctrl_cause <== id_cause; when(io.ctrl.killd) { + ex_reg_valid <== Bool(false); ex_reg_ctrl_div_val <== Bool(false); ex_reg_ctrl_mul_val <== Bool(false); ex_reg_ctrl_wen <== Bool(false); @@ -273,6 +281,7 @@ class rocketDpath extends Component ex_reg_ctrl_exception <== Bool(false); } otherwise { + ex_reg_valid <== id_reg_valid; ex_reg_ctrl_div_val <== io.ctrl.div_val; ex_reg_ctrl_mul_val <== io.ctrl.mul_val; ex_reg_ctrl_wen <== io.ctrl.wen; @@ -376,12 +385,14 @@ class rocketDpath extends Component mem_reg_ctrl_cause <== ex_reg_ctrl_cause; when (io.ctrl.killx) { + mem_reg_valid <== Bool(false); mem_reg_ctrl_eret <== Bool(false); mem_reg_ctrl_wen <== Bool(false); mem_reg_ctrl_wen_pcr <== Bool(false); mem_reg_ctrl_exception <== Bool(false); } otherwise { + mem_reg_valid <== ex_reg_valid; mem_reg_ctrl_eret <== ex_reg_ctrl_eret; mem_reg_ctrl_wen <== ex_reg_ctrl_wen; mem_reg_ctrl_wen_pcr <== ex_reg_ctrl_wen_pcr; @@ -412,10 +423,12 @@ class rocketDpath extends Component wb_reg_ctrl_exception <== mem_reg_ctrl_exception; when (io.ctrl.killm) { + wb_reg_valid <== Bool(false); wb_reg_ctrl_wen <== Bool(false); wb_reg_ctrl_wen_pcr <== Bool(false); } otherwise { + wb_reg_valid <== mem_reg_valid; wb_reg_ctrl_wen <== mem_reg_ctrl_wen; wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } @@ -448,6 +461,12 @@ class rocketDpath extends Component pcr.io.exception := wb_reg_ctrl_exception; pcr.io.cause := wb_reg_ctrl_cause; pcr.io.pc := wb_reg_pc; + + // temporary debug outputs so things don't get optimized away + io.debug.id_valid := id_reg_valid; + io.debug.ex_valid := ex_reg_valid; + io.debug.mem_valid := mem_reg_valid; + io.debug.wb_valid := wb_reg_valid; }