From 7a528d6255e3a8f2cd6c443de4fb12ac0387b0d8 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 1 Nov 2011 23:14:34 -0700 Subject: [PATCH] fixes for div/mul hazard checking + cleanup --- rocket/src/main/scala/cpu.scala | 48 +---- rocket/src/main/scala/ctrl.scala | 126 +++++++---- rocket/src/main/scala/dcache.scala | 322 ++++++++++++++--------------- rocket/src/main/scala/dpath.scala | 16 +- 4 files changed, 249 insertions(+), 263 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 2e3374f7..965dfd0e 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -40,63 +40,23 @@ class rocketProc extends Component val ctrl = new rocketCtrl(); val dpath = new rocketDpath(); -// val mem = new rocketMemory(); -// val wb = new rocketWriteback(); ctrl.io.dpath <> dpath.io.ctrl; + ctrl.io.dmem ^^ io.dmem; ctrl.io.host.start ^^ io.host.start; ctrl.io.imem ^^ io.imem; + + dpath.io.dmem ^^ io.dmem; dpath.io.imem.req_addr ^^ io.imem.req_addr; dpath.io.imem.resp_data ^^ io.imem.resp_data; dpath.io.host ^^ io.host; dpath.io.debug ^^ io.debug; - ctrl.io.dmem ^^ io.dmem; - dpath.io.dmem ^^ io.dmem; - - // FIXME + // FIXME: console disconnected // io.console.bits := dpath.io.dpath.rs1(7,0); io.console.bits := Bits(0,8); io.console.valid := ctrl.io.console.valid; ctrl.io.console.rdy := io.console.rdy; - - // dpath.io.wb <> wb.io; -// dpath.io.wb.wen <> wb.io.wb_wen; -// dpath.io.wb.waddr <> wb.io.wb_waddr; -// dpath.io.wb.wdata <> wb.io.wb_wdata; - - -// ctrl.io.mem.mrq_val <> mem.io.mem_mrq_val; -// ctrl.io.mem.mrq_cmd <> mem.io.mem_mrq_cmd; -// ctrl.io.mem.mrq_type <> mem.io.mem_mrq_type; -// ctrl.io.mem.mrq_deq <> mem.io.mem_mrq_deq; -// ctrl.io.mem.xsdq_rdy <> mem.io.mem_xsdq_rdy; -// ctrl.io.mem.xsdq_val <> mem.io.mem_xsdq_val; -// ctrl.io.mem.dc_busy := !io.dmem.req_rdy; - -// ctrl.io.console ^^ io.console; -// ctrl.io.wb.waddr <> wb.io.wb_waddr; -// ctrl.io.wb.wen <> wb.io.wb_wen; - - // TODO: SHOULD BE THE FOLLOWING BUT NEED BETTER INTERFACE CHUNKS - // mem.io.dmem >< io.dmem; - -// mem.io.dmem_req_val ^^ io.dmem.req_val; -// mem.io.dmem_req_rdy ^^ io.dmem.req_rdy; -// mem.io.dmem_req_op ^^ io.dmem.req_op; -// mem.io.dmem_req_addr ^^ io.dmem.req_addr; -// mem.io.dmem_req_data ^^ io.dmem.req_data; -// mem.io.dmem_req_wmask ^^ io.dmem.req_wmask; -// mem.io.dmem_req_tag ^^ io.dmem.req_tag; - -// -// mem.io.dpath_rs2 <> dpath.io.dpath.rs2; -// mem.io.dpath_waddr <> dpath.io.dpath.waddr; -// mem.io.dpath_alu_out <> dpath.io.dpath.alu_out; - -// wb.io.dmem_resp_val ^^ io.dmem.resp_val; -// wb.io.dmem_resp_data ^^ io.dmem.resp_data; -// wb.io.dmem_resp_tag ^^ io.dmem.resp_tag; } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ed70dc74..4b1326b5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -41,6 +41,7 @@ class ioCtrlDpath extends Bundle() val mem_load = Bool('output); val dcache_miss = Bool('output); val wen = Bool('output); + val wb_div_mul = Bool('output); // inputs from datapath val btb_hit = Bool('input); val inst = Bits(32, 'input); @@ -52,10 +53,11 @@ class ioCtrlDpath extends Bundle() val mul_result_val = Bool('input); val ex_waddr = UFix(5,'input); // write addr from execute stage val mem_waddr = UFix(5,'input); // write addr from memory stage + val wb_waddr = UFix(5,'input); // write addr from writeback stage val exception = Bool('input); val status = Bits(8, 'input); - val sboard_set = Bool('input); - val sboard_seta = UFix(5, 'input); +// val sboard_set = Bool('input); +// val sboard_seta = UFix(5, 'input); val sboard_clr0 = Bool('input); val sboard_clr0a = UFix(5, 'input); val sboard_clr1 = Bool('input); @@ -205,13 +207,21 @@ class rocketCtrl extends Component val console_out_fire = id_console_out_val & ~io.dpath.killd; io.console.valid := console_out_fire.toBool; + val wb_reg_div_mul_val = Reg(){Bool()}; + val dcache_miss = Reg(){Bool()}; + val sboard = new rocketCtrlSboard(); sboard.io.raddra := id_raddr2.toUFix; sboard.io.raddrb := id_raddr1.toUFix; sboard.io.raddrc := id_waddr.toUFix; - sboard.io.set := io.dpath.sboard_set; - sboard.io.seta := io.dpath.sboard_seta; +// sboard.io.set := io.dpath.sboard_set; +// sboard.io.seta := io.dpath.sboard_seta; + + // scoreboard set (for D$ misses, div, mul) + sboard.io.set := wb_reg_div_mul_val | dcache_miss; + sboard.io.seta := io.dpath.wb_waddr; + sboard.io.clr0 := io.dpath.sboard_clr0; sboard.io.clr0a := io.dpath.sboard_clr0a; sboard.io.clr1 := io.dpath.sboard_clr1; @@ -222,14 +232,15 @@ class rocketCtrl extends Component val id_stall_waddr = sboard.io.stallc; val id_stall_ra = sboard.io.stallra; - val id_reg_btb_hit = Reg(width = 1, resetVal = Bool(false)); - val ex_reg_br_type = Reg(){UFix(width = 4)}; - val ex_reg_btb_hit = Reg(){Bool()}; - val ex_reg_mem_val = Reg(){Bool()}; - val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; - val ex_reg_mem_type = Reg(){UFix(width = 3)}; - val ex_reg_eret = Reg(resetVal = Bool(false)); - val ex_reg_privileged = Reg(resetVal = Bool(false)); + val id_reg_btb_hit = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_br_type = Reg(){UFix(width = 4)}; + val ex_reg_btb_hit = Reg(){Bool()}; + val ex_reg_div_mul_val = Reg(){Bool()}; + val ex_reg_mem_val = Reg(){Bool()}; + val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; + val ex_reg_mem_type = Reg(){UFix(width = 3)}; + val ex_reg_eret = Reg(resetVal = Bool(false)); + val ex_reg_privileged = Reg(resetVal = Bool(false)); when (!io.dpath.stalld) { when (io.dpath.killf) { @@ -241,22 +252,24 @@ class rocketCtrl extends Component } when (reset.toBool || io.dpath.killd) { - ex_reg_br_type <== BR_N; - ex_reg_btb_hit <== Bool(false); - ex_reg_mem_val <== Bool(false); - ex_reg_mem_cmd <== UFix(0, 4); - ex_reg_mem_type <== UFix(0, 3); - ex_reg_eret <== Bool(false); - ex_reg_privileged <== Bool(false); + ex_reg_br_type <== BR_N; + ex_reg_btb_hit <== Bool(false); + ex_reg_div_mul_val <== Bool(false); + ex_reg_mem_val <== Bool(false); + ex_reg_mem_cmd <== UFix(0, 4); + ex_reg_mem_type <== UFix(0, 3); + ex_reg_eret <== Bool(false); + ex_reg_privileged <== Bool(false); } otherwise { - ex_reg_br_type <== id_br_type; - ex_reg_btb_hit <== id_reg_btb_hit; - ex_reg_mem_val <== id_mem_val.toBool; - ex_reg_mem_cmd <== id_mem_cmd; - ex_reg_mem_type <== id_mem_type; - ex_reg_eret <== id_eret.toBool; - ex_reg_privileged <== id_privileged.toBool; + ex_reg_br_type <== id_br_type; + ex_reg_btb_hit <== id_reg_btb_hit; + ex_reg_div_mul_val <== id_div_val.toBool || id_mul_val.toBool; + ex_reg_mem_val <== id_mem_val.toBool; + ex_reg_mem_cmd <== id_mem_cmd; + ex_reg_mem_type <== id_mem_type; + ex_reg_eret <== id_eret.toBool; + ex_reg_privileged <== id_privileged.toBool; } val beq = io.dpath.br_eq; @@ -283,31 +296,42 @@ class rocketCtrl extends Component io.dmem.req_val := ex_reg_mem_val && ~io.dpath.killx; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; - - val mem_reg_mem_val = Reg(){Bool()}; - val mem_reg_mem_cmd = Reg(){UFix(width = 4)}; - val mem_reg_mem_type = Reg(){UFix(width = 3)}; + + val mem_reg_div_mul_val = Reg(){Bool()}; + val mem_reg_mem_val = Reg(){Bool()}; + val mem_reg_mem_cmd = Reg(){UFix(width = 4)}; + val mem_reg_mem_type = Reg(){UFix(width = 3)}; when (reset.toBool || io.dpath.killx) { - mem_reg_mem_val <== Bool(false); - mem_reg_mem_cmd <== UFix(0, 4); - mem_reg_mem_type <== UFix(0, 3); + mem_reg_div_mul_val <== Bool(false); + mem_reg_mem_val <== Bool(false); + mem_reg_mem_cmd <== UFix(0, 4); + mem_reg_mem_type <== UFix(0, 3); } otherwise { - mem_reg_mem_val <== ex_reg_mem_val; - mem_reg_mem_cmd <== ex_reg_mem_cmd; - mem_reg_mem_type <== ex_reg_mem_type; + mem_reg_div_mul_val <== ex_reg_div_mul_val; + mem_reg_mem_val <== ex_reg_mem_val; + mem_reg_mem_cmd <== ex_reg_mem_cmd; + mem_reg_mem_type <== ex_reg_mem_type; } - + + when (reset.toBool || io.dpath.killm) { + wb_reg_div_mul_val <== Bool(false); + } + otherwise { + wb_reg_div_mul_val <== mem_reg_div_mul_val; + } + // replay PC when the D$ is blocked val replay_mem_pc = mem_reg_mem_val && !io.dmem.req_rdy; // replay PC+4 on a D$ load miss val mem_cmd_load = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); val replay_mem_pc_plus4 = mem_cmd_load && !io.dmem.resp_val; - val dcache_miss = Reg(replay_mem_pc_plus4); val replay_mem = replay_mem_pc | replay_mem_pc_plus4; - io.dpath.mem_load := mem_cmd_load; + dcache_miss <== replay_mem_pc_plus4; + + io.dpath.mem_load := mem_cmd_load; io.dpath.dcache_miss := dcache_miss; io.dpath.sel_pc := @@ -342,8 +366,7 @@ class rocketCtrl extends Component io.dpath.stalld ); - // check for loads in execute stage to detect load/use hazards - + // check for loads in execute and mem stages to detect load/use hazards val ex_mem_cmd_load = ex_reg_mem_val && (ex_reg_mem_cmd === M_XRD); val lu_stall_raddr1_ex = @@ -374,9 +397,28 @@ class rocketCtrl extends Component id_ren2.toBool && (id_raddr2 === io.dpath.mem_waddr); + // check for divide and multiply instructions in ex,mem,wb stages + val dm_stall_ex = + ex_reg_div_mul_val && + ((id_ren1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || + (id_ren2.toBool && (id_raddr2 === io.dpath.ex_waddr))); + + val dm_stall_mem = + mem_reg_div_mul_val && + ((id_ren1.toBool && (id_raddr1 === io.dpath.mem_waddr)) || + (id_ren2.toBool && (id_raddr2 === io.dpath.mem_waddr))); + + val dm_stall_wb = + wb_reg_div_mul_val && + ((id_ren1.toBool && (id_raddr1 === io.dpath.wb_waddr)) || + (id_ren2.toBool && (id_raddr2 === io.dpath.wb_waddr))); + + val dm_stall = dm_stall_ex || dm_stall_mem || dm_stall_wb; + val ctrl_stalld = ~take_pc & ( + dm_stall | lu_stall_raddr1_ex | lu_stall_raddr2_ex | lu_stall_raddr1_mem | @@ -396,7 +438,7 @@ class rocketCtrl extends Component io.dpath.mul_result_val ); - val ctrl_killd = take_pc | ctrl_stalld; + val ctrl_killd = take_pc | ctrl_stalld | io.dpath.killx; // for divider, multiplier writeback val mul_wb = io.dpath.mul_result_val; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 35a11534..8233de86 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -91,165 +91,6 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { } -// basic direct mapped data cache, 2 cycle read latency -// parameters : -// lines = # of cache lines -// addr_bits = address width (word addressable) bits -// 64 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines -/* -class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { - val io = new ioDCacheDM(); - - val indexbits = ceil(log10(lines)/log10(2)).toInt; - val offsetbits = 6; - val tagmsb = addrbits - 1; - val taglsb = indexbits+offsetbits; - val indexmsb = taglsb-1; - val indexlsb = offsetbits; - val offsetmsb = indexlsb-1; - val offsetlsb = 3; - - val s_reset :: s_ready :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(7) { UFix() }; - val state = Reg(resetVal = s_reset); - - val r_cpu_req_addr = Reg(Bits(0, addrbits)); - val r_r_cpu_req_addr = Reg(r_cpu_req_addr); - val r_cpu_req_val = Reg(Bool(false)); - val r_cpu_req_data = Reg(Bits(0,64)); - val r_cpu_req_cmd = Reg(Bits(0,4)); - val r_cpu_req_wmask = Reg(Bits(0,8)); - val r_cpu_req_tag = Reg(Bits(0,12)); - val r_cpu_resp_tag = Reg(r_cpu_req_tag); - val r_cpu_resp_val = Reg(Bool(false)); - - when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_addr <== io.cpu.req_addr; - r_cpu_req_data <== io.cpu.req_data; - r_cpu_req_cmd <== io.cpu.req_cmd; - r_cpu_req_wmask <== io.cpu.req_wmask; - r_cpu_req_tag <== io.cpu.req_tag; } - - val req_load = (r_cpu_req_cmd === M_XRD); - val req_store = (r_cpu_req_cmd === M_XWR); - val req_flush = (r_cpu_req_cmd === M_FLA); - - when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; } - otherwise { r_cpu_req_val <== Bool(false); } - - // counter - val rr_count = Reg(resetVal = UFix(0,2)); - val rr_count_next = rr_count + UFix(1); - when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) - { rr_count <== rr_count_next; } - - // tag array - val tag_we = (state === s_resolve_miss); - val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; - val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); - val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); - val tag_raddr = Mux((state === s_ready), io.cpu.req_addr(indexmsb, indexlsb).toUFix, r_cpu_req_addr(indexmsb, indexlsb).toUFix); - val tag_rdata = Reg(tag_array.read(tag_raddr)); - - // valid bit array - val vb_array = Reg(resetVal = Bits(0, lines)); - val vb_rdata = Reg(vb_array(tag_raddr)); - when (tag_we && !req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } - when (tag_we && req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } - - val tag_valid = vb_rdata.toBool; - val tag_match = tag_valid && !req_flush && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); - val store = ((state === s_ready) && r_cpu_req_val && req_store && tag_match ) || - ((state === s_resolve_miss) && req_store); - - // dirty bit array - val db_array = Reg(resetVal = Bits(0, lines)); - val db_rdata = Reg(db_array(tag_raddr)); - val tag_dirty = db_rdata.toBool; - when (store) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } - when (tag_we) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } - - // data array - val data_array_we = ((state === s_refill) && io.mem.resp_val) || store; - val data_array_waddr = Mux((state === s_refill), - Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - r_cpu_req_addr(indexmsb, offsetmsb-1).toUFix); - - val data_array_wdata = Mux((state === s_refill), io.mem.resp_data, Cat(r_cpu_req_data, r_cpu_req_data)); - - val req_wmask_expand = Cat(Fill(8, r_cpu_req_wmask(7)), - Fill(8, r_cpu_req_wmask(6)), - Fill(8, r_cpu_req_wmask(5)), - Fill(8, r_cpu_req_wmask(4)), - Fill(8, r_cpu_req_wmask(3)), - Fill(8, r_cpu_req_wmask(2)), - Fill(8, r_cpu_req_wmask(1)), - Fill(8, r_cpu_req_wmask(0))); - - val store_wmask = Mux(r_cpu_req_addr(offsetlsb).toBool, - Cat(req_wmask_expand, Bits(0,64)), - Cat(Bits(0,64), req_wmask_expand)); - - val data_array_wmask = Mux((state === s_refill), ~Bits(0,128), store_wmask); - val data_array = Mem(lines*4, data_array_we, data_array_waddr, data_array_wdata, wrMask = data_array_wmask, resetVal = null); - val data_array_raddr = Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next).toUFix, - Mux((state === s_start_writeback) || (state === s_writeback), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - r_cpu_req_addr(indexmsb, offsetmsb-1))); - val data_array_rdata = Reg(data_array.read(data_array_raddr)); - - // output signals - io.cpu.req_rdy := (state === s_ready) && (!r_cpu_req_val || tag_match); - - when ((((state === s_ready) && r_cpu_req_val && tag_match) || (state === s_resolve_miss)) && !req_store) - { r_cpu_resp_val <== Bool(true); } - otherwise { r_cpu_resp_val <== Bool(false); } - - io.cpu.resp_val := r_cpu_resp_val; - io.cpu.resp_data := Mux(r_r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); - io.cpu.resp_tag := r_cpu_resp_tag; - - io.mem.req_val := (state === s_req_refill) || (state === s_writeback); - io.mem.req_rw := (state === s_writeback); - io.mem.req_wdata := data_array_rdata; - io.mem.req_tag := UFix(0); - io.mem.req_addr := Mux(state === s_writeback, - Cat(tag_rdata, r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix); - - // control state machine - switch (state) { - is (s_reset) { - state <== s_ready; - } - is (s_ready) { - when (~r_cpu_req_val) { state <== s_ready; } - when (r_cpu_req_val & tag_match) { state <== s_ready; } - when (tag_valid & tag_dirty) { state <== s_start_writeback; } - when (req_flush) { state <== s_resolve_miss; } - otherwise { state <== s_req_refill; } - } - is (s_start_writeback) { - state <== s_writeback; - } - is (s_writeback) { - when (io.mem.req_rdy && (rr_count === UFix(3,2))) { - when (req_flush) { state <== s_resolve_miss; } - otherwise { state <== s_req_refill; } - } - } - is (s_req_refill) - { - when (io.mem.req_rdy) { state <== s_refill; } - } - is (s_refill) { - when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } - } - is (s_resolve_miss) { - state <== s_ready; - } - } -} -*/ - class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val io = new ioDCacheDM(); @@ -270,7 +111,6 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val r_cpu_req_data = Reg(resetVal = Bits(0,64)); val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); val r_cpu_req_type = Reg(resetVal = Bits(0,3)); -// val r_cpu_req_wmask = Reg(resetVal = Bits(0,8)); val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); val p_store_data = Reg(resetVal = Bits(0,64)); @@ -287,7 +127,6 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { r_cpu_req_data <== io.cpu.req_data; r_cpu_req_cmd <== io.cpu.req_cmd; r_cpu_req_type <== io.cpu.req_type; -// r_cpu_req_wmask <== io.cpu.req_wmask; r_cpu_req_tag <== io.cpu.req_tag; } @@ -306,7 +145,6 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } // tag array -// val tag_we = (state === s_resolve_miss); val tag_we = (state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2)); val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); @@ -500,4 +338,164 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } } +// basic direct mapped data cache, 2 cycle read latency +// parameters : +// lines = # of cache lines +// addr_bits = address width (word addressable) bits +// 64 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines +/* +class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { + val io = new ioDCacheDM(); + + val indexbits = ceil(log10(lines)/log10(2)).toInt; + val offsetbits = 6; + val tagmsb = addrbits - 1; + val taglsb = indexbits+offsetbits; + val indexmsb = taglsb-1; + val indexlsb = offsetbits; + val offsetmsb = indexlsb-1; + val offsetlsb = 3; + + val s_reset :: s_ready :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(7) { UFix() }; + val state = Reg(resetVal = s_reset); + + val r_cpu_req_addr = Reg(Bits(0, addrbits)); + val r_r_cpu_req_addr = Reg(r_cpu_req_addr); + val r_cpu_req_val = Reg(Bool(false)); + val r_cpu_req_data = Reg(Bits(0,64)); + val r_cpu_req_cmd = Reg(Bits(0,4)); + val r_cpu_req_wmask = Reg(Bits(0,8)); + val r_cpu_req_tag = Reg(Bits(0,12)); + val r_cpu_resp_tag = Reg(r_cpu_req_tag); + val r_cpu_resp_val = Reg(Bool(false)); + + when (io.cpu.req_val && io.cpu.req_rdy) { + r_cpu_req_addr <== io.cpu.req_addr; + r_cpu_req_data <== io.cpu.req_data; + r_cpu_req_cmd <== io.cpu.req_cmd; + r_cpu_req_wmask <== io.cpu.req_wmask; + r_cpu_req_tag <== io.cpu.req_tag; } + + val req_load = (r_cpu_req_cmd === M_XRD); + val req_store = (r_cpu_req_cmd === M_XWR); + val req_flush = (r_cpu_req_cmd === M_FLA); + + when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; } + otherwise { r_cpu_req_val <== Bool(false); } + + // counter + val rr_count = Reg(resetVal = UFix(0,2)); + val rr_count_next = rr_count + UFix(1); + when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) + { rr_count <== rr_count_next; } + + // tag array + val tag_we = (state === s_resolve_miss); + val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; + val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); + val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); + val tag_raddr = Mux((state === s_ready), io.cpu.req_addr(indexmsb, indexlsb).toUFix, r_cpu_req_addr(indexmsb, indexlsb).toUFix); + val tag_rdata = Reg(tag_array.read(tag_raddr)); + + // valid bit array + val vb_array = Reg(resetVal = Bits(0, lines)); + val vb_rdata = Reg(vb_array(tag_raddr)); + when (tag_we && !req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } + when (tag_we && req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } + + val tag_valid = vb_rdata.toBool; + val tag_match = tag_valid && !req_flush && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); + val store = ((state === s_ready) && r_cpu_req_val && req_store && tag_match ) || + ((state === s_resolve_miss) && req_store); + + // dirty bit array + val db_array = Reg(resetVal = Bits(0, lines)); + val db_rdata = Reg(db_array(tag_raddr)); + val tag_dirty = db_rdata.toBool; + when (store) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } + when (tag_we) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } + + // data array + val data_array_we = ((state === s_refill) && io.mem.resp_val) || store; + val data_array_waddr = Mux((state === s_refill), + Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + r_cpu_req_addr(indexmsb, offsetmsb-1).toUFix); + + val data_array_wdata = Mux((state === s_refill), io.mem.resp_data, Cat(r_cpu_req_data, r_cpu_req_data)); + + val req_wmask_expand = Cat(Fill(8, r_cpu_req_wmask(7)), + Fill(8, r_cpu_req_wmask(6)), + Fill(8, r_cpu_req_wmask(5)), + Fill(8, r_cpu_req_wmask(4)), + Fill(8, r_cpu_req_wmask(3)), + Fill(8, r_cpu_req_wmask(2)), + Fill(8, r_cpu_req_wmask(1)), + Fill(8, r_cpu_req_wmask(0))); + + val store_wmask = Mux(r_cpu_req_addr(offsetlsb).toBool, + Cat(req_wmask_expand, Bits(0,64)), + Cat(Bits(0,64), req_wmask_expand)); + + val data_array_wmask = Mux((state === s_refill), ~Bits(0,128), store_wmask); + val data_array = Mem(lines*4, data_array_we, data_array_waddr, data_array_wdata, wrMask = data_array_wmask, resetVal = null); + val data_array_raddr = Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next).toUFix, + Mux((state === s_start_writeback) || (state === s_writeback), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + r_cpu_req_addr(indexmsb, offsetmsb-1))); + val data_array_rdata = Reg(data_array.read(data_array_raddr)); + + // output signals + io.cpu.req_rdy := (state === s_ready) && (!r_cpu_req_val || tag_match); + + when ((((state === s_ready) && r_cpu_req_val && tag_match) || (state === s_resolve_miss)) && !req_store) + { r_cpu_resp_val <== Bool(true); } + otherwise { r_cpu_resp_val <== Bool(false); } + + io.cpu.resp_val := r_cpu_resp_val; + io.cpu.resp_data := Mux(r_r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); + io.cpu.resp_tag := r_cpu_resp_tag; + + io.mem.req_val := (state === s_req_refill) || (state === s_writeback); + io.mem.req_rw := (state === s_writeback); + io.mem.req_wdata := data_array_rdata; + io.mem.req_tag := UFix(0); + io.mem.req_addr := Mux(state === s_writeback, + Cat(tag_rdata, r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix); + + // control state machine + switch (state) { + is (s_reset) { + state <== s_ready; + } + is (s_ready) { + when (~r_cpu_req_val) { state <== s_ready; } + when (r_cpu_req_val & tag_match) { state <== s_ready; } + when (tag_valid & tag_dirty) { state <== s_start_writeback; } + when (req_flush) { state <== s_resolve_miss; } + otherwise { state <== s_req_refill; } + } + is (s_start_writeback) { + state <== s_writeback; + } + is (s_writeback) { + when (io.mem.req_rdy && (rr_count === UFix(3,2))) { + when (req_flush) { state <== s_resolve_miss; } + otherwise { state <== s_req_refill; } + } + } + is (s_req_refill) + { + when (io.mem.req_rdy) { state <== s_refill; } + } + is (s_refill) { + when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } + } + is (s_resolve_miss) { + state <== s_ready; + } + } +} +*/ + + } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index af4240e6..d53a257a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -82,8 +82,6 @@ class rocketDpath extends Component val mem_reg_wdata = Reg(resetVal = Bits(0,64)); val mem_reg_raddr2 = Reg(resetVal = UFix(0,5)); val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); - val mem_reg_ctrl_div_val = Reg(resetVal = Bool(false)); - val mem_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val mem_reg_ctrl_exception = Reg(resetVal = Bool(false)); @@ -95,8 +93,6 @@ class rocketDpath extends Component val wb_reg_wdata = Reg(resetVal = Bits(0,64)); val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val wb_reg_raddr2 = Reg(resetVal = UFix(0,5)); - val wb_reg_ctrl_div_val = Reg(resetVal = Bool(false)); - val wb_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); @@ -373,15 +369,11 @@ class rocketDpath extends Component mem_reg_raddr2 <== ex_reg_raddr2; when (io.ctrl.killx) { - mem_reg_ctrl_div_val <== Bool(false); - mem_reg_ctrl_mul_val <== Bool(false); mem_reg_ctrl_wen <== Bool(false); mem_reg_ctrl_wen_pcr <== Bool(false); mem_reg_ctrl_exception <== Bool(false); } otherwise { - mem_reg_ctrl_div_val <== ex_reg_ctrl_div_val; - mem_reg_ctrl_mul_val <== ex_reg_ctrl_mul_val; mem_reg_ctrl_wen <== ex_reg_ctrl_wen; mem_reg_ctrl_wen_pcr <== ex_reg_ctrl_wen_pcr; mem_reg_ctrl_exception <== ex_reg_ctrl_exception; @@ -416,14 +408,10 @@ class rocketDpath extends Component wb_reg_raddr2 <== mem_reg_raddr2; when (io.ctrl.killm) { - wb_reg_ctrl_div_val <== Bool(false); - wb_reg_ctrl_mul_val <== Bool(false); wb_reg_ctrl_wen <== Bool(false); wb_reg_ctrl_wen_pcr <== Bool(false); } otherwise { - wb_reg_ctrl_div_val <== mem_reg_ctrl_div_val; - wb_reg_ctrl_mul_val <== mem_reg_ctrl_mul_val; wb_reg_ctrl_wen <== mem_reg_ctrl_wen; wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } @@ -453,9 +441,7 @@ class rocketDpath extends Component rfile.io.w1.en := r_dmem_resp_val; rfile.io.w1.data := dmem_resp_data_final; - // scoreboard set (for D$ misses, div, mul) - io.ctrl.sboard_set := wb_reg_ctrl_div_val | wb_reg_ctrl_mul_val | io.ctrl.dcache_miss; - io.ctrl.sboard_seta := wb_reg_waddr; + io.ctrl.wb_waddr := wb_reg_waddr; // scoreboard clear (for div/mul and D$ load miss writebacks) io.ctrl.sboard_clr0 := wb_reg_ctrl_ll_wb;