diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 6fb6cf8b..dda9fc81 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -7,15 +7,15 @@ import hwacha._ class ioRocket extends Bundle() { - val host = new ioHTIF(); - val imem = new ioImem().flip - val vimem = new ioImem().flip - val dmem = new ioDmem().flip + val host = new ioHTIF + val imem = (new ioImem).flip + val vimem = (new ioImem).flip + val dmem = new ioHellaCache } class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) { - val io = new ioRocket(); + val io = new ioRocket val ctrl = new rocketCtrl(); val dpath = new rocketDpath(); @@ -24,7 +24,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) val itlb = new rocketITLB(ITLB_ENTRIES); val vitlb = new rocketITLB(VITLB_ENTRIES) val ptw = new rocketPTW(); - val arb = new rocketDmemArbiter(DCACHE_PORTS) + val arb = new rocketHellaCacheArbiter(DCACHE_PORTS) var vu: vu = null if (HAVE_VEC) @@ -59,7 +59,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // connect DTLB to ctrl+dpath dtlbarb.io.in(DTLB_CPU).valid := ctrl.io.dtlb_val dtlbarb.io.in(DTLB_CPU).bits.kill := ctrl.io.dtlb_kill - dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req_cmd + dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req.bits.cmd dtlbarb.io.in(DTLB_CPU).bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dtlb.vpn ctrl.io.dtlb_rdy := dtlbarb.io.in(DTLB_CPU).ready @@ -75,7 +75,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // connect DTLB to ctrl+dpath dtlb.io.cpu_req.valid := ctrl.io.dtlb_val dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill - dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req_cmd + dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd dtlb.io.cpu_req.bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld @@ -87,8 +87,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) dtlb.io.invalidate := dpath.io.ptbr_wen dtlb.io.status := dpath.io.ctrl.status - arb.io.requestor(DMEM_CPU).req_ppn := dtlb.io.cpu_resp.ppn - ctrl.io.dmem.req_rdy := dtlb.io.cpu_req.ready && arb.io.requestor(DMEM_CPU).req_rdy + arb.io.requestor(DMEM_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn + ctrl.io.dmem.req.ready := dtlb.io.cpu_req.ready && arb.io.requestor(DMEM_CPU).req.ready // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) @@ -96,8 +96,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ptw.io.itlb <> itlb.io.ptw; ptw.io.vitlb <> vitlb.io.ptw ptw.io.ptbr := dpath.io.ptbr; - arb.io.requestor(DMEM_PTW) <> ptw.io.dmem - arb.io.dmem <> io.dmem + arb.io.requestor(DMEM_PTW) <> ptw.io.mem + arb.io.mem <> io.dmem ctrl.io.dpath <> dpath.io.ctrl; dpath.io.host <> io.host; @@ -120,8 +120,18 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) io.imem.itlb_miss := itlb.io.cpu.resp_miss; // connect arbiter to ctrl+dpath+DTLB - arb.io.requestor(DMEM_CPU) <> ctrl.io.dmem - arb.io.requestor(DMEM_CPU) <> dpath.io.dmem + arb.io.requestor(DMEM_CPU).resp <> ctrl.io.dmem.resp + arb.io.requestor(DMEM_CPU).xcpt <> ctrl.io.dmem.xcpt + arb.io.requestor(DMEM_CPU).resp <> dpath.io.dmem.resp + //TODO: views on nested bundles? + arb.io.requestor(DMEM_CPU).req.valid := ctrl.io.dmem.req.valid + ctrl.io.dmem.req.ready := arb.io.requestor(DMEM_CPU).req.ready + arb.io.requestor(DMEM_CPU).req.bits.kill := ctrl.io.dmem.req.bits.kill + arb.io.requestor(DMEM_CPU).req.bits.cmd := ctrl.io.dmem.req.bits.cmd + arb.io.requestor(DMEM_CPU).req.bits.typ := ctrl.io.dmem.req.bits.typ + arb.io.requestor(DMEM_CPU).req.bits.idx := dpath.io.dmem.req.bits.idx + arb.io.requestor(DMEM_CPU).req.bits.tag := dpath.io.dmem.req.bits.tag + arb.io.requestor(DMEM_CPU).req.bits.data := dpath.io.dmem.req.bits.data var fpu: rocketFPU = null if (HAVE_FPU) @@ -207,21 +217,21 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) storegen.io.typ := vu.io.dmem_req.bits.typ storegen.io.din := vu.io.dmem_req.bits.data - arb.io.requestor(DMEM_VU).req_val := vu.io.dmem_req.valid - arb.io.requestor(DMEM_VU).req_kill := vu.io.dmem_req.bits.kill - arb.io.requestor(DMEM_VU).req_cmd := vu.io.dmem_req.bits.cmd - arb.io.requestor(DMEM_VU).req_type := vu.io.dmem_req.bits.typ - arb.io.requestor(DMEM_VU).req_idx := vu.io.dmem_req.bits.idx - arb.io.requestor(DMEM_VU).req_ppn := Reg(vu.io.dmem_req.bits.ppn) - arb.io.requestor(DMEM_VU).req_data := Reg(storegen.io.dout) - arb.io.requestor(DMEM_VU).req_tag := vu.io.dmem_req.bits.tag + arb.io.requestor(DMEM_VU).req.valid := vu.io.dmem_req.valid + arb.io.requestor(DMEM_VU).req.bits.kill := vu.io.dmem_req.bits.kill + arb.io.requestor(DMEM_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd + arb.io.requestor(DMEM_VU).req.bits.typ := vu.io.dmem_req.bits.typ + arb.io.requestor(DMEM_VU).req.bits.idx := vu.io.dmem_req.bits.idx + arb.io.requestor(DMEM_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) + arb.io.requestor(DMEM_VU).req.bits.data := Reg(storegen.io.dout) + arb.io.requestor(DMEM_VU).req.bits.tag := vu.io.dmem_req.bits.tag - vu.io.dmem_req.ready := arb.io.requestor(DMEM_VU).req_rdy - vu.io.dmem_resp.valid := Reg(arb.io.requestor(DMEM_VU).resp_val) - vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp_nack - vu.io.dmem_resp.bits.data := arb.io.requestor(DMEM_VU).resp_data_subword - vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DMEM_VU).resp_tag) - vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DMEM_VU).resp_type) + vu.io.dmem_req.ready := arb.io.requestor(DMEM_VU).req.ready + vu.io.dmem_resp.valid := Reg(arb.io.requestor(DMEM_VU).resp.valid) + vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp.bits.nack + vu.io.dmem_resp.bits.data := arb.io.requestor(DMEM_VU).resp.bits.data_subword + vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DMEM_VU).resp.bits.tag) + vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DMEM_VU).resp.bits.typ) // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req @@ -233,7 +243,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) } else { - arb.io.requestor(DMEM_VU).req_val := Bool(false) + arb.io.requestor(DMEM_VU).req.valid := Bool(false) if (HAVE_FPU) { fpu.io.sfma.valid := Bool(false) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 0e747d6e..dafe0d1f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -76,7 +76,7 @@ class ioCtrlAll extends Bundle() { val dpath = new ioCtrlDpath(); val imem = new ioImem(List("req_val", "resp_val")).flip - val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack", "xcpt_ma_ld", "xcpt_ma_st")).flip + val dmem = new ioHellaCache val dtlb_val = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT); val dtlb_rdy = Bool(INPUT); @@ -351,7 +351,7 @@ class rocketCtrl extends Component val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) - val wb_reg_dcache_miss = Reg(io.dmem.resp_miss || io.dmem.resp_nack, resetVal = Bool(false)); + val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false)); val id_reg_valid = Reg(resetVal = Bool(false)); val id_reg_btb_hit = Reg(resetVal = Bool(false)); @@ -681,8 +681,8 @@ class rocketCtrl extends Component } // exception handling - val mem_xcpt_ma_ld = io.dmem.xcpt_ma_ld && !mem_reg_kill - val mem_xcpt_ma_st = io.dmem.xcpt_ma_st && !mem_reg_kill + val mem_xcpt_ma_ld = io.dmem.xcpt.ma.ld && !mem_reg_kill + val mem_xcpt_ma_st = io.dmem.xcpt.ma.st && !mem_reg_kill val mem_xcpt_dtlb_ld = io.xcpt_dtlb_ld && !mem_reg_kill val mem_xcpt_dtlb_st = io.xcpt_dtlb_st && !mem_reg_kill @@ -722,7 +722,7 @@ class rocketCtrl extends Component // replay mem stage PC on a DTLB miss or a long-latency writeback val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val - val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp_nack) + val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || fpu_kill_mem @@ -731,7 +731,7 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || - ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) || + ex_reg_replay || ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || ex_reg_mul_val && !io.dpath.mul_rdy val kill_ex = take_pc_wb || replay_ex @@ -817,8 +817,8 @@ class rocketCtrl extends Component id_ex_hazard || id_mem_hazard || id_wb_hazard || id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || id_fp_val && id_stall_fpu || - id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || - ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || + id_mem_val.toBool && !(io.dmem.req.ready && io.dtlb_rdy) || + ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req.ready || vec_stalld ); val ctrl_stallf = ctrl_stalld; @@ -861,10 +861,10 @@ class rocketCtrl extends Component io.fpu.killx := kill_ex io.fpu.killm := kill_mem - io.dtlb_val := ex_reg_mem_val - io.dtlb_kill := mem_reg_kill; - io.dmem.req_val := ex_reg_mem_val - io.dmem.req_kill := kill_dcache; - io.dmem.req_cmd := ex_reg_mem_cmd; - io.dmem.req_type := ex_reg_mem_type; + io.dtlb_val := ex_reg_mem_val + io.dtlb_kill := mem_reg_kill + io.dmem.req.valid := ex_reg_mem_val + io.dmem.req.bits.kill := kill_dcache + io.dmem.req.bits.cmd := ex_reg_mem_cmd + io.dmem.req.bits.typ := ex_reg_mem_type } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 180d4427..3a6ee698 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -17,7 +17,7 @@ class ioDpathAll extends Bundle() { val host = new ioHTIF(); val ctrl = new ioCtrlDpath().flip - val dmem = new ioDmem(List("req_idx", "req_tag", "req_data", "resp_val", "resp_miss", "resp_replay", "resp_type", "resp_tag", "resp_data", "resp_data_subword")).flip + val dmem = new ioHellaCache val dtlb = new ioDTLB_CPU_req_bundle().asOutput() val imem = new ioDpathImem(); val ptbr_wen = Bool(OUTPUT); @@ -274,9 +274,9 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req_idx := ex_effective_address - io.dmem.req_data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) - io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) + io.dmem.req.bits.idx := ex_effective_address + io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) + io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) io.dtlb.vpn := ex_effective_address >> UFix(PGIDX_BITS) // processor control regfile read @@ -332,13 +332,13 @@ class rocketDpath extends Component // 32/64 bit load handling (moved to earlier in file) // writeback arbitration - val dmem_resp_xpu = !io.dmem.resp_tag(0).toBool - val dmem_resp_fpu = io.dmem.resp_tag(0).toBool - val dmem_resp_waddr = io.dmem.resp_tag.toUFix >> UFix(1) - dmem_resp_replay := io.dmem.resp_replay && dmem_resp_xpu; + val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool + val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool + val dmem_resp_waddr = io.dmem.resp.bits.tag.toUFix >> UFix(1) + dmem_resp_replay := io.dmem.resp.bits.replay && dmem_resp_xpu; r_dmem_resp_replay := dmem_resp_replay r_dmem_resp_waddr := dmem_resp_waddr - r_dmem_fp_replay := io.dmem.resp_replay && dmem_resp_fpu; + r_dmem_fp_replay := io.dmem.resp.bits.replay && dmem_resp_fpu; val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr, Mux(div.io.resp_val, div.io.resp_tag, @@ -350,9 +350,9 @@ class rocketDpath extends Component mem_reg_wdata))) val mem_ll_wb = dmem_resp_replay || div.io.resp_val || mul_io.resp_val - io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu - io.fpu.dmem_resp_data := io.dmem.resp_data - io.fpu.dmem_resp_type := io.dmem.resp_type + io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu + io.fpu.dmem_resp_data := io.dmem.resp.bits.data + io.fpu.dmem_resp_type := io.dmem.resp.bits.typ io.fpu.dmem_resp_tag := dmem_resp_waddr // writeback stage @@ -362,7 +362,7 @@ class rocketDpath extends Component wb_reg_rs2 := mem_reg_rs2 wb_reg_waddr := mem_ll_waddr wb_reg_wdata := mem_ll_wdata - wb_reg_dmem_wdata := io.dmem.resp_data + wb_reg_dmem_wdata := io.dmem.resp.bits.data wb_reg_vec_waddr := mem_reg_waddr wb_reg_vec_wdata := mem_reg_wdata wb_reg_raddr1 := mem_reg_raddr1 @@ -395,7 +395,7 @@ class rocketDpath extends Component wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), - Mux(wb_src_dmem, io.dmem.resp_data_subword, + Mux(wb_src_dmem, io.dmem.resp.bits.data_subword, wb_reg_wdata)) } else @@ -406,7 +406,7 @@ class rocketDpath extends Component pcr.io.vec_nfregs := UFix(0) wb_wdata := - Mux(wb_src_dmem, io.dmem.resp_data_subword, + Mux(wb_src_dmem, io.dmem.resp.bits.data_subword, wb_reg_wdata) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index bae2746b..c713b6f3 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -716,6 +716,41 @@ class AMOALU extends Component { io.out := Mux(word, Cat(out(31,0), out(31,0)).toUFix, out) } +class HellaCacheReq extends Bundle { + val cmd = Bits(width = 4) + val typ = Bits(width = 3) + val idx = Bits(width = PGIDX_BITS) + val ppn = Bits(width = PPN_BITS) + val data = Bits(width = 64) + val kill = Bool() + val tag = Bits(width = DCACHE_TAG_BITS) +} + +class HellaCacheResp extends Bundle { + val miss = Bool() + val nack = Bool() + val replay = Bool() + val typ = Bits(width = 3) + val data = Bits(width = 64) + val data_subword = Bits(width = 64) + val tag = Bits(width = DCACHE_TAG_BITS) +} + +class AlignmentExceptions extends Bundle { + val ld = Bool() + val st = Bool() +} + +class HellaCacheExceptions extends Bundle { + val ma = new AlignmentExceptions +} + +class ioHellaCache extends Bundle { + val req = (new ioDecoupled){ new HellaCacheReq } + val resp = (new ioPipe){ new HellaCacheResp }.flip + val xcpt = (new HellaCacheExceptions).asInput +} + // interface between D$ and processor/DTLB class ioDmem(view: List[String] = null) extends Bundle(view) { val req_kill = Bool(INPUT); @@ -741,7 +776,7 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { class HellaCache(co: CoherencePolicy) extends Component { val io = new Bundle { - val cpu = new ioDmem() + val cpu = (new ioHellaCache).flip val mem = new ioTileLink } @@ -759,8 +794,8 @@ class HellaCache(co: CoherencePolicy) extends Component { val ramindexlsb = log2up(MEM_DATA_BITS/8) val early_nack = Reg { Bool() } - val r_cpu_req_val_ = Reg(io.cpu.req_val && io.cpu.req_rdy, resetVal = Bool(false)) - val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req_kill && !early_nack + val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) + val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack val r_cpu_req_idx = Reg() { Bits() } val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() } @@ -776,9 +811,9 @@ class HellaCache(co: CoherencePolicy) extends Component { val p_store_way_oh = Reg() { Bits() } val r_replay_amo = Reg(resetVal = Bool(false)) - val req_store = (io.cpu.req_cmd === M_XWR) - val req_load = (io.cpu.req_cmd === M_XRD) - val req_amo = io.cpu.req_cmd(3).toBool + val req_store = (io.cpu.req.bits.cmd === M_XWR) + val req_load = (io.cpu.req.bits.cmd === M_XRD) + val req_amo = io.cpu.req.bits.cmd(3).toBool val req_read = req_load || req_amo val req_write = req_store || req_amo val r_req_load = (r_cpu_req_cmd === M_XRD) @@ -804,11 +839,11 @@ class HellaCache(co: CoherencePolicy) extends Component { flusher.io.req.valid := r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && !flushed flusher.io.mshr_req.ready := mshr.io.req.ready - when (io.cpu.req_val) { - r_cpu_req_idx := io.cpu.req_idx - r_cpu_req_cmd := io.cpu.req_cmd - r_cpu_req_type := io.cpu.req_type - r_cpu_req_tag := io.cpu.req_tag + when (io.cpu.req.valid) { + r_cpu_req_idx := io.cpu.req.bits.idx + r_cpu_req_cmd := io.cpu.req.bits.cmd + r_cpu_req_type := io.cpu.req.bits.typ + r_cpu_req_tag := io.cpu.req.bits.tag } when (prober.io.meta_req.valid) { r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(PGIDX_BITS-1,0) @@ -825,15 +860,15 @@ class HellaCache(co: CoherencePolicy) extends Component { r_cpu_req_cmd := M_FLA r_way_oh := flusher.io.meta_req.bits.way_en } - val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) + val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req.bits.data) val misaligned = (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); - io.cpu.xcpt_ma_ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned - io.cpu.xcpt_ma_st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned + io.cpu.xcpt.ma.ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned + io.cpu.xcpt.ma.st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned // tags val meta = new MetaDataArrayArray(lines) @@ -847,12 +882,12 @@ class HellaCache(co: CoherencePolicy) extends Component { data_arb.io.out <> data.io.req // cpu tag check - meta_arb.io.in(3).valid := io.cpu.req_val - meta_arb.io.in(3).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) + meta_arb.io.in(3).valid := io.cpu.req.valid + meta_arb.io.in(3).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.way_en := ~UFix(0, NWAYS) val early_tag_nack = !meta_arb.io.in(3).ready - val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.address >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req_ppn) + val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.address >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req.bits.ppn) val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) val tag_match_arr = (0 until NWAYS).map( w => co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR @@ -886,10 +921,10 @@ class HellaCache(co: CoherencePolicy) extends Component { data_arb.io.in(0).valid := io.mem.xact_rep.valid && co.messageUpdatesDataArray(io.mem.xact_rep.bits) // load hits - data_arb.io.in(4).bits.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) - data_arb.io.in(4).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) + data_arb.io.in(4).bits.offset := io.cpu.req.bits.idx(offsetmsb,ramindexlsb) + data_arb.io.in(4).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) data_arb.io.in(4).bits.rw := Bool(false) - data_arb.io.in(4).valid := io.cpu.req_val && req_read + data_arb.io.in(4).valid := io.cpu.req.valid && req_read data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check val early_load_nack = req_read && !data_arb.io.in(4).ready @@ -900,7 +935,7 @@ class HellaCache(co: CoherencePolicy) extends Component { val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match - val drain_store_val = (p_store_valid && (!io.cpu.req_val || req_write || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match + val drain_store_val = (p_store_valid && (!io.cpu.req.valid || req_write || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) data_arb.io.in(2).bits.rw := Bool(true) @@ -1020,15 +1055,15 @@ class HellaCache(co: CoherencePolicy) extends Component { !flushed && r_req_flush val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush - io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence - io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack - io.cpu.resp_val := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val - io.cpu.resp_replay := mshr.io.cpu_resp_val - io.cpu.resp_miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read - io.cpu.resp_tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) - io.cpu.resp_type := loadgen.io.typ - io.cpu.resp_data := loadgen.io.dout - io.cpu.resp_data_subword := loadgen.io.r_dout_subword + io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence + io.cpu.resp.valid := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val + io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack + io.cpu.resp.bits.replay := mshr.io.cpu_resp_val + io.cpu.resp.bits.miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read + io.cpu.resp.bits.tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) + io.cpu.resp.bits.typ := loadgen.io.typ + io.cpu.resp.bits.data := loadgen.io.dout + io.cpu.resp.bits.data_subword := loadgen.io.r_dout_subword val xact_init_arb = (new Arbiter(2)) { new TransactionInit } xact_init_arb.io.in(0) <> wb.io.mem_req diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index feff7896..8e5bd4b2 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,81 +5,84 @@ import Node._; import Constants._; import scala.math._; -class ioDmemArbiter(n: Int) extends Bundle +class ioHellaCacheArbiter(n: Int) extends Bundle { - val dmem = new ioDmem().flip - val requestor = Vec(n) { new ioDmem() } + val requestor = Vec(n) { new ioHellaCache() }.flip + val mem = new ioHellaCache } -class rocketDmemArbiter(n: Int) extends Component +class rocketHellaCacheArbiter(n: Int) extends Component { - val io = new ioDmemArbiter(n) + val io = new ioHellaCacheArbiter(n) require(DCACHE_TAG_BITS >= log2up(n) + CPU_TAG_BITS) var req_val = Bool(false) - var req_rdy = io.dmem.req_rdy + var req_rdy = io.mem.req.ready for (i <- 0 until n) { - io.requestor(i).req_rdy := req_rdy - req_val = req_val || io.requestor(i).req_val - req_rdy = req_rdy && !io.requestor(i).req_val + io.requestor(i).req.ready := req_rdy + req_val = req_val || io.requestor(i).req.valid + req_rdy = req_rdy && !io.requestor(i).req.valid } - var req_cmd = io.requestor(n-1).req_cmd - var req_type = io.requestor(n-1).req_type - var req_idx = io.requestor(n-1).req_idx - var req_ppn = io.requestor(n-1).req_ppn - var req_data = io.requestor(n-1).req_data - var req_tag = io.requestor(n-1).req_tag - var req_kill = io.requestor(n-1).req_kill + var req_cmd = io.requestor(n-1).req.bits.cmd + var req_type = io.requestor(n-1).req.bits.typ + var req_idx = io.requestor(n-1).req.bits.idx + var req_ppn = io.requestor(n-1).req.bits.ppn + var req_data = io.requestor(n-1).req.bits.data + var req_kill = io.requestor(n-1).req.bits.kill + var req_tag = io.requestor(n-1).req.bits.tag for (i <- n-1 to 0 by -1) { - req_cmd = Mux(io.requestor(i).req_val, io.requestor(i).req_cmd, req_cmd) - req_type = Mux(io.requestor(i).req_val, io.requestor(i).req_type, req_type) - req_idx = Mux(io.requestor(i).req_val, io.requestor(i).req_idx, req_idx) - req_ppn = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_ppn, req_ppn) - req_data = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_data, req_data) - req_tag = Mux(io.requestor(i).req_val, Cat(io.requestor(i).req_tag, UFix(i, log2up(n))), req_tag) - req_kill = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_kill, req_kill) + val r = io.requestor(i).req + req_cmd = Mux(r.valid, r.bits.cmd, req_cmd) + req_type = Mux(r.valid, r.bits.typ, req_type) + req_idx = Mux(r.valid, r.bits.idx, req_idx) + req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn) + req_data = Mux(Reg(r.valid), r.bits.data, req_data) + req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) + req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2up(n))), req_tag) } - io.dmem.req_val := req_val - io.dmem.req_cmd := req_cmd - io.dmem.req_type := req_type - io.dmem.req_idx := req_idx - io.dmem.req_ppn := req_ppn - io.dmem.req_data := req_data - io.dmem.req_tag := req_tag - io.dmem.req_kill := req_kill + io.mem.req.valid := req_val + io.mem.req.bits.cmd := req_cmd + io.mem.req.bits.typ := req_type + io.mem.req.bits.idx := req_idx + io.mem.req.bits.ppn := req_ppn + io.mem.req.bits.data := req_data + io.mem.req.bits.kill := req_kill + io.mem.req.bits.tag := req_tag for (i <- 0 until n) { - val tag_hit = io.dmem.resp_tag(log2up(n)-1,0) === UFix(i) - io.requestor(i).xcpt_ma_ld := io.dmem.xcpt_ma_ld && Reg(io.requestor(i).req_val) - io.requestor(i).xcpt_ma_st := io.dmem.xcpt_ma_st && Reg(io.requestor(i).req_val) - io.requestor(i).resp_nack := io.dmem.resp_nack && Reg(io.requestor(i).req_val) - io.requestor(i).resp_miss := io.dmem.resp_miss && tag_hit - io.requestor(i).resp_val := io.dmem.resp_val && tag_hit - io.requestor(i).resp_replay := io.dmem.resp_replay && tag_hit - io.requestor(i).resp_data := io.dmem.resp_data - io.requestor(i).resp_data_subword := io.dmem.resp_data_subword - io.requestor(i).resp_type := io.dmem.resp_type - io.requestor(i).resp_tag := io.dmem.resp_tag >> UFix(log2up(n)) + val r = io.requestor(i).resp + val x = io.requestor(i).xcpt + val tag_hit = io.mem.resp.bits.tag(log2up(n)-1,0) === UFix(i) + x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) + x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) + r.valid := io.mem.resp.valid && tag_hit + r.bits.miss := io.mem.resp.bits.miss && tag_hit + r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) + r.bits.replay := io.mem.resp.bits.replay && tag_hit + r.bits.data := io.mem.resp.bits.data + r.bits.data_subword := io.mem.resp.bits.data_subword + r.bits.typ := io.mem.resp.bits.typ + r.bits.tag := io.mem.resp.bits.tag >> UFix(log2up(n)) } } class ioPTW extends Bundle { - val itlb = new ioTLB_PTW().flip - val dtlb = new ioTLB_PTW().flip - val vitlb = new ioTLB_PTW().flip - val dmem = new ioDmem().flip - val ptbr = UFix(PADDR_BITS, INPUT); + val itlb = (new ioTLB_PTW).flip + val dtlb = (new ioTLB_PTW).flip + val vitlb = (new ioTLB_PTW).flip + val mem = new ioHellaCache + val ptbr = UFix(PADDR_BITS, INPUT) } class rocketPTW extends Component { - val io = new ioPTW(); + val io = new ioPTW val levels = 3 val bitsPerLevel = VPN_BITS/levels @@ -123,25 +126,25 @@ class rocketPTW extends Component req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) } - val dmem_resp_val = Reg(io.dmem.resp_val, resetVal = Bool(false)) + val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) when (dmem_resp_val) { - req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) - r_resp_perm := io.dmem.resp_data_subword(9,4); - r_resp_ppn := io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS); + req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) + r_resp_perm := io.mem.resp.bits.data_subword(9,4); + r_resp_ppn := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS); } - io.dmem.req_val := state === s_req - io.dmem.req_cmd := M_XRD; - io.dmem.req_type := MT_D; - io.dmem.req_idx := req_addr(PGIDX_BITS-1,0); - io.dmem.req_ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) - io.dmem.req_kill := Bool(false) + io.mem.req.valid := state === s_req + io.mem.req.bits.cmd := M_XRD + io.mem.req.bits.typ := MT_D + io.mem.req.bits.idx := req_addr(PGIDX_BITS-1,0) + io.mem.req.bits.ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) + io.mem.req.bits.kill := Bool(false) val resp_val = state === s_done val resp_err = state === s_error - val resp_ptd = io.dmem.resp_data_subword(1,0) === Bits(1) - val resp_pte = io.dmem.resp_data_subword(1,0) === Bits(2) + val resp_ptd = io.mem.resp.bits.data_subword(1,0) === Bits(1) + val resp_pte = io.mem.resp.bits.data_subword(1,0) === Bits(2) io.itlb.req_rdy := (state === s_ready) io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val @@ -172,12 +175,12 @@ class rocketPTW extends Component count := UFix(0) } is (s_req) { - when (io.dmem.req_rdy) { + when (io.mem.req.ready) { state := s_wait; } } is (s_wait) { - when (io.dmem.resp_nack) { + when (io.mem.resp.bits.nack) { state := s_req } when (dmem_resp_val) {