From 4d1ca8ba3ae632c9212109983dd14b36884962f1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 6 Nov 2012 08:13:44 -0800 Subject: [PATCH] remove more global consts; refactor DTLBs D$ now contains DTLB. provide full VAddr with initial request. VU now has its own DTLBs. --- rocket/src/main/scala/arbiter.scala | 108 ++++++--------- rocket/src/main/scala/consts.scala | 19 +-- .../src/main/scala/{cpu.scala => core.scala} | 130 ++++++------------ rocket/src/main/scala/ctrl.scala | 15 +- rocket/src/main/scala/dpath.scala | 13 +- rocket/src/main/scala/fpu.scala | 18 +-- rocket/src/main/scala/icache.scala | 7 +- rocket/src/main/scala/nbdcache.scala | 81 ++++++----- rocket/src/main/scala/ptw.scala | 39 ++++-- rocket/src/main/scala/tile.scala | 24 ++-- rocket/src/main/scala/tlb.scala | 33 ++--- 11 files changed, 206 insertions(+), 281 deletions(-) rename rocket/src/main/scala/{cpu.scala => core.scala} (61%) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 5e5669ea..8fb5d3f7 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -12,58 +12,40 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp val mem = new ioHellaCache()(conf.dcache) } - var req_val = Bool(false) - var req_rdy = io.mem.req.ready - for (i <- 0 until n) - { - io.requestor(i).req.ready := req_rdy - req_val = req_val || io.requestor(i).req.valid - req_rdy = req_rdy && !io.requestor(i).req.valid + val r_valid = io.requestor.map(r => Reg(r.req.valid)) + + io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_) + io.requestor(0).req.ready := io.mem.req.ready + for (i <- 1 until n) + io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid + + io.mem.req.bits := io.requestor(n-1).req.bits + io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UFix(n-1, log2Up(n))) + for (i <- n-2 to 0 by -1) { + val req = io.requestor(i).req + when (req.valid) { + io.mem.req.bits.cmd := req.bits.cmd + io.mem.req.bits.typ := req.bits.typ + io.mem.req.bits.addr := req.bits.addr + io.mem.req.bits.phys := req.bits.phys + io.mem.req.bits.tag := Cat(req.bits.tag, UFix(i, log2Up(n))) + } + when (r_valid(i)) { + io.mem.req.bits.kill := req.bits.kill + io.mem.req.bits.data := req.bits.data + } } - var req_cmd = io.requestor(n-1).req.bits.cmd - var req_type = io.requestor(n-1).req.bits.typ - var req_idx = io.requestor(n-1).req.bits.idx - var req_ppn = io.requestor(n-1).req.bits.ppn - var req_data = io.requestor(n-1).req.bits.data - var req_kill = io.requestor(n-1).req.bits.kill - var req_tag = io.requestor(n-1).req.bits.tag - for (i <- n-1 to 0 by -1) - { - val r = io.requestor(i).req - req_cmd = Mux(r.valid, r.bits.cmd, req_cmd) - req_type = Mux(r.valid, r.bits.typ, req_type) - req_idx = Mux(r.valid, r.bits.idx, req_idx) - req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn) - req_data = Mux(Reg(r.valid), r.bits.data, req_data) - req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) - req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag) - } - - io.mem.req.valid := req_val - io.mem.req.bits.cmd := req_cmd - io.mem.req.bits.typ := req_type - io.mem.req.bits.idx := req_idx - io.mem.req.bits.ppn := req_ppn - io.mem.req.bits.data := req_data - io.mem.req.bits.kill := req_kill - io.mem.req.bits.tag := req_tag - - for (i <- 0 until n) - { - val r = io.requestor(i).resp - val x = io.requestor(i).xcpt + for (i <- 0 until n) { + val resp = io.requestor(i).resp val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) - x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) - x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) - r.valid := io.mem.resp.valid && tag_hit - r.bits.miss := io.mem.resp.bits.miss && tag_hit - r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) - r.bits.replay := io.mem.resp.bits.replay && tag_hit - r.bits.data := io.mem.resp.bits.data - r.bits.data_subword := io.mem.resp.bits.data_subword - r.bits.typ := io.mem.resp.bits.typ - r.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) + resp.valid := io.mem.resp.valid && tag_hit + io.requestor(i).xcpt := io.mem.xcpt + resp.bits := io.mem.resp.bits + resp.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) + resp.bits.miss := io.mem.resp.bits.miss && tag_hit + resp.bits.nack := io.mem.resp.bits.nack && r_valid(i) + resp.bits.replay := io.mem.resp.bits.replay && tag_hit } } @@ -80,15 +62,6 @@ class MemArbiter(n: Int) extends Component { val requestor = Vec(n) { new ioUncachedRequestor }.flip } - var xi_val = Bool(false) - var xi_rdy = io.mem.xact_init.ready - for (i <- 0 until n) - { - io.requestor(i).xact_init.ready := xi_rdy - xi_val = xi_val || io.requestor(i).xact_init.valid - xi_rdy = xi_rdy && !io.requestor(i).xact_init.valid - } - var xi_bits = new TransactionInit xi_bits := io.requestor(n-1).xact_init.bits xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2Up(n))) @@ -101,24 +74,21 @@ class MemArbiter(n: Int) extends Component { xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits) } - io.mem.xact_init.valid := xi_val io.mem.xact_init.bits := xi_bits - - var xf_val = Bool(false) - var xf_rdy = io.mem.xact_finish.ready - for (i <- 0 until n) - { - io.requestor(i).xact_finish.ready := xf_rdy - xf_val = xf_val || io.requestor(i).xact_finish.valid - xf_rdy = xf_rdy && !io.requestor(i).xact_finish.valid - } + io.mem.xact_init.valid := io.requestor.map(_.xact_init.valid).reduce(_||_) + io.requestor(0).xact_init.ready := io.mem.xact_init.ready + for (i <- 1 until n) + io.requestor(i).xact_init.ready := io.requestor(i-1).xact_init.ready && !io.requestor(i-1).xact_init.valid var xf_bits = io.requestor(n-1).xact_finish.bits for (i <- n-2 to 0 by -1) xf_bits = Mux(io.requestor(i).xact_finish.valid, io.requestor(i).xact_finish.bits, xf_bits) - io.mem.xact_finish.valid := xf_val io.mem.xact_finish.bits := xf_bits + io.mem.xact_finish.valid := io.requestor.map(_.xact_finish.valid).reduce(_||_) + io.requestor(0).xact_finish.ready := io.mem.xact_finish.ready + for (i <- 1 until n) + io.requestor(i).xact_finish.ready := io.requestor(i-1).xact_finish.ready && !io.requestor(i-1).xact_finish.valid for (i <- 0 until n) { diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index c8b8629d..b482673d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -154,12 +154,7 @@ trait InterruptConstants { val IRQ_TIMER = 7 } -abstract trait RocketDcacheConstants extends ArbiterConstants with uncore.constants.AddressConstants { - val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses - require(log2Up(NMSHR)+3 <= uncore.Constants.TILE_XACT_ID_BITS) - val NRPQ = 16; // number of secondary misses - val NSDQ = 17; // number of secondary stores/AMOs - val OFFSET_BITS = 6; // log2(cache line size in bytes) +abstract trait RocketDcacheConstants extends TileConfigConstants with uncore.constants.CacheConstants with uncore.constants.AddressConstants { require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS) require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) @@ -196,15 +191,3 @@ trait VectorOpConstants { val VIMM2_ALU = UFix(1, 1) val VIMM2_X = UFix(0, 1) } - -abstract trait ArbiterConstants extends TileConfigConstants { - val DCACHE_PORTS = 3 - val DCACHE_CPU = 0 - val DCACHE_PTW = 1 - val DCACHE_VU = 2 - - val DMEM_PORTS = if (HAVE_VEC) 3 else 2 - val DMEM_DCACHE = 0 - val DMEM_ICACHE = 1 - val DMEM_VICACHE = 2 -} diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/core.scala similarity index 61% rename from rocket/src/main/scala/cpu.scala rename to rocket/src/main/scala/core.scala index 0ce54a75..d27dc1e3 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/core.scala @@ -13,87 +13,57 @@ class ioRocket(implicit conf: RocketConfiguration) extends Bundle val dmem = new ioHellaCache()(conf.dcache) } -class rocketProc(implicit conf: RocketConfiguration) extends Component +class Core(implicit conf: RocketConfiguration) extends Component { val io = new ioRocket val ctrl = new Control val dpath = new Datapath - val ptw = Vec(0) { new IOTLBPTW } - val arb = new HellaCacheArbiter(DCACHE_PORTS) - - var vu: vu = null - if (HAVE_VEC) - { - vu = new vu() - - val vdtlb = new rocketTLB(8) - vdtlb.io.invalidate := dpath.io.ptbr_wen - vdtlb.io.status := dpath.io.ctrl.status - ptw += vdtlb.io.ptw - - vdtlb.io.cpu_req <> vu.io.vec_tlb_req - vu.io.vec_tlb_resp := vdtlb.io.cpu_resp - vu.io.vec_tlb_resp.xcpt_pf := Bool(false) - - val pftlb = new rocketTLB(2) - pftlb.io.invalidate := dpath.io.ptbr_wen - pftlb.io.status := dpath.io.ctrl.status - pftlb.io.cpu_req <> vu.io.vec_pftlb_req - ptw += pftlb.io.ptw - - vu.io.vec_pftlb_resp := pftlb.io.cpu_resp - vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) - vu.io.vec_pftlb_resp.xcpt_st := Bool(false) - } - - // connect DTLB to ctrl+dpath - val dtlb = new rocketTLB(DTLB_ENTRIES) - dtlb.io.invalidate := dpath.io.ptbr_wen - dtlb.io.status := dpath.io.ctrl.status - ptw += dtlb.io.ptw - - dtlb.io.cpu_req.valid := ctrl.io.dtlb_val - dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill - dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd - dtlb.io.cpu_req.bits.asid := UFix(0) - dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn - ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld - ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st - ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready - ctrl.io.dtlb_miss := dtlb.io.cpu_resp.miss - - arb.io.requestor(DCACHE_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn - ctrl.io.dpath <> dpath.io.ctrl dpath.io.host <> io.host ctrl.io.imem <> io.imem dpath.io.imem <> io.imem - ctrl.io.dmem <> arb.io.requestor(DCACHE_CPU) - dpath.io.dmem <> arb.io.requestor(DCACHE_CPU) + val dmemArb = new HellaCacheArbiter(if (HAVE_VEC) 3 else 2) + dmemArb.io.mem <> io.dmem + val dmem = dmemArb.io.requestor + dmem(1) <> ctrl.io.dmem + dmem(1) <> dpath.io.dmem - var fpu: rocketFPU = null - if (HAVE_FPU) - { - fpu = new rocketFPU(4,6) + val ptw = collection.mutable.ArrayBuffer(io.imem.ptw, io.dmem.ptw) + + val fpu: FPU = if (HAVE_FPU) { + val fpu = new FPU(4,6) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl - } + fpu + } else null + + if (HAVE_VEC) { + val vu = new vu() + + val vdtlb = new rocketTLB(8) + ptw += vdtlb.io.ptw + vdtlb.io.cpu_req <> vu.io.vec_tlb_req + vu.io.vec_tlb_resp := vdtlb.io.cpu_resp + vu.io.vec_tlb_resp.xcpt_pf := Bool(false) + + val pftlb = new rocketTLB(2) + pftlb.io.cpu_req <> vu.io.vec_pftlb_req + ptw += pftlb.io.ptw + vu.io.vec_pftlb_resp := pftlb.io.cpu_resp + vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) + vu.io.vec_pftlb_resp.xcpt_st := Bool(false) - if (HAVE_VEC) - { dpath.io.vec_ctrl <> ctrl.io.vec_dpath // hooking up vector I$ ptw += io.vimem.ptw - io.vimem.req.bits.status := dpath.io.ctrl.status io.vimem.req.bits.pc := vu.io.imem_req.bits io.vimem.req.valid := vu.io.imem_req.valid io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst - io.vimem.req.bits.invalidateTLB := dpath.io.ptbr_wen vu.io.imem_resp.valid := io.vimem.resp.valid vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc vu.io.imem_resp.bits.data := io.vimem.resp.bits.data @@ -155,21 +125,16 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component vu.io.xcpt.hold := ctrl.io.vec_iface.hold // hooking up vector memory interface - arb.io.requestor(DCACHE_VU).req.valid := vu.io.dmem_req.valid - arb.io.requestor(DCACHE_VU).req.bits.kill := vu.io.dmem_req.bits.kill - arb.io.requestor(DCACHE_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd - arb.io.requestor(DCACHE_VU).req.bits.typ := vu.io.dmem_req.bits.typ - arb.io.requestor(DCACHE_VU).req.bits.idx := vu.io.dmem_req.bits.idx - arb.io.requestor(DCACHE_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) - arb.io.requestor(DCACHE_VU).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) - arb.io.requestor(DCACHE_VU).req.bits.tag := vu.io.dmem_req.bits.tag + dmem(2).req.valid := vu.io.dmem_req.valid + dmem(2).req.bits := vu.io.dmem_req.bits + dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) - vu.io.dmem_req.ready := arb.io.requestor(DCACHE_VU).req.ready - vu.io.dmem_resp.valid := Reg(arb.io.requestor(DCACHE_VU).resp.valid) - vu.io.dmem_resp.bits.nack := arb.io.requestor(DCACHE_VU).resp.bits.nack - vu.io.dmem_resp.bits.data := arb.io.requestor(DCACHE_VU).resp.bits.data_subword - vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DCACHE_VU).resp.bits.tag) - vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DCACHE_VU).resp.bits.typ) + vu.io.dmem_req.ready := dmem(2).req.ready + vu.io.dmem_resp.valid := Reg(dmem(2).resp.valid) + vu.io.dmem_resp.bits.nack := dmem(2).resp.bits.nack + vu.io.dmem_resp.bits.data := dmem(2).resp.bits.data_subword + vu.io.dmem_resp.bits.tag := Reg(dmem(2).resp.bits.tag) + vu.io.dmem_resp.bits.typ := Reg(dmem(2).resp.bits.typ) // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req @@ -178,22 +143,13 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component // share sfma and dfma pipelines with rocket fpu.io.sfma <> vu.io.cp_sfma fpu.io.dfma <> vu.io.cp_dfma - } - else - { - arb.io.requestor(DCACHE_VU).req.valid := Bool(false) - if (HAVE_FPU) - { - fpu.io.sfma.valid := Bool(false) - fpu.io.dfma.valid := Bool(false) - } + } else if (fpu != null) { + fpu.io.sfma.valid := Bool(false) + fpu.io.dfma.valid := Bool(false) } - ptw += io.imem.ptw val thePTW = new PTW(ptw.length) - thePTW.io.requestor <> ptw - thePTW.io.ptbr := dpath.io.ptbr; - arb.io.requestor(DCACHE_PTW) <> thePTW.io.mem - - arb.io.mem <> io.dmem + ptw zip thePTW.io.requestor map { case (a, b) => a <> b } + thePTW.io.dpath <> dpath.io.ptw + dmem(0) <> thePTW.io.mem } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index a36ce98f..46afcc98 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -569,8 +569,8 @@ class Control(implicit conf: RocketConfiguration) extends Component (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UFix( 8)), (mem_reg_mem_val && io.dmem.xcpt.ma.st, UFix( 9)), - (mem_reg_mem_val && io.xcpt_dtlb_ld, UFix(10)), - (mem_reg_mem_val && io.xcpt_dtlb_st, UFix(11)))) + (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UFix(10)), + (mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11)))) wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next when (mem_xcpt) { wb_reg_cause := mem_cause } @@ -644,7 +644,7 @@ class Control(implicit conf: RocketConfiguration) extends Component // replay inst in ex stage val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || - ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || + ex_reg_mem_val && !io.dmem.req.ready || ex_reg_div_val && !io.dpath.div_rdy || ex_reg_mul_val && !io.dpath.mul_rdy || mem_reg_replay_next @@ -652,7 +652,7 @@ class Control(implicit conf: RocketConfiguration) extends Component // replay inst in mem stage val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val - val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) + val dmem_kill_mem = mem_reg_valid && io.dmem.resp.bits.nack val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid @@ -734,7 +734,7 @@ class Control(implicit conf: RocketConfiguration) extends Component id_ex_hazard || id_mem_hazard || id_wb_hazard || id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || id_fp_val && id_stall_fpu || - id_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || + id_mem_val && !io.dmem.req.ready || vec_stalld ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt @@ -772,10 +772,9 @@ class Control(implicit conf: RocketConfiguration) extends Component io.fpu.killx := ctrl_killx io.fpu.killm := killm_common - io.dtlb_val := ex_reg_mem_val - io.dtlb_kill := !mem_reg_valid io.dmem.req.valid := ex_reg_mem_val - io.dmem.req.bits.kill := killm_common || mem_xcpt || io.dtlb_miss + io.dmem.req.bits.kill := killm_common || mem_xcpt io.dmem.req.bits.cmd := ex_reg_mem_cmd io.dmem.req.bits.typ := ex_reg_mem_type + io.dmem.req.bits.phys := Bool(false) } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index d1f5fe69..02c21851 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -13,9 +13,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val ctrl = new ioCtrlDpath().flip val dmem = new ioHellaCache()(conf.dcache) val dtlb = new ioDTLB_CPU_req_bundle().asOutput() + val ptw = new IODatapathPTW().flip val imem = new IOCPUFrontend()(conf.icache) - val ptbr_wen = Bool(OUTPUT); - val ptbr = UFix(OUTPUT, PADDR_BITS); val fpu = new ioDpathFPU(); val vec_ctrl = new ioCtrlDpathVec().flip val vec_iface = new ioDpathVecInterface() @@ -81,9 +80,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix // hook up I$ - io.imem.req.bits.invalidateTLB := pcr.io.ptbr_wen io.imem.req.bits.currentpc := ex_reg_pc - io.imem.req.bits.status := pcr.io.status io.imem.req.bits.pc := Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), @@ -209,7 +206,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req.bits.idx := ex_effective_address + io.dmem.req.bits.addr := ex_effective_address io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) @@ -225,8 +222,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ctrl.irq_ipi := pcr.io.irq_ipi; io.ctrl.status := pcr.io.status; io.ctrl.pcr_replay := pcr.io.replay - io.ptbr := pcr.io.ptbr; - io.ptbr_wen := pcr.io.ptbr_wen; + + io.ptw.ptbr := pcr.io.ptbr + io.ptw.invalidate := pcr.io.ptbr_wen + io.ptw.status := pcr.io.status // branch resolution logic io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 69e36380..3ea883db 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -5,8 +5,9 @@ import Node._ import Constants._ import Instructions._ import Util._ +import FPConstants._ -object rocketFPConstants +object FPConstants { val FCMD_ADD = Bits("b000000") val FCMD_SUB = Bits("b000001") @@ -45,7 +46,6 @@ object rocketFPConstants val FCMD_WIDTH = 6 val FSR_WIDTH = 8 } -import rocketFPConstants._ class FPUCtrlSigs extends Bundle { @@ -64,7 +64,7 @@ class FPUCtrlSigs extends Bundle val wrfsr = Bool() } -class rocketFPUDecoder extends Component +class FPUDecoder extends Component { val io = new Bundle { val inst = Bits(INPUT, 32) @@ -378,7 +378,7 @@ class ioFMA(width: Int) extends Bundle { val exc = Bits(OUTPUT, 5) } -class rocketFPUSFMAPipe(val latency: Int) extends Component +class FPUSFMAPipe(val latency: Int) extends Component { val io = new ioFMA(33) @@ -415,7 +415,7 @@ class rocketFPUSFMAPipe(val latency: Int) extends Component io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } -class rocketFPUDFMAPipe(val latency: Int) extends Component +class FPUDFMAPipe(val latency: Int) extends Component { val io = new ioFMA(65) @@ -452,7 +452,7 @@ class rocketFPUDFMAPipe(val latency: Int) extends Component io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } -class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component +class FPU(sfma_latency: Int, dfma_latency: Int) extends Component { val io = new Bundle { val ctrl = new ioCtrlFPU().flip @@ -470,7 +470,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val killm = io.ctrl.killm || io.ctrl.nack_mem val wb_reg_valid = Reg(mem_reg_valid && !killm, resetVal = Bool(false)) - val fp_decoder = new rocketFPUDecoder + val fp_decoder = new FPUDecoder fp_decoder.io.inst := io.dpath.inst val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid) @@ -530,7 +530,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB || mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB - val sfma = new rocketFPUSFMAPipe(sfma_latency) + val sfma = new FPUSFMAPipe(sfma_latency) sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) @@ -540,7 +540,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component io.sfma.out := sfma.io.out io.sfma.exc := sfma.io.exc - val dfma = new rocketFPUDFMAPipe(dfma_latency) + val dfma = new FPUDFMAPipe(dfma_latency) dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 8a7492c2..df7506a0 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -29,9 +29,7 @@ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, class FrontendReq extends Bundle { val pc = UFix(width = VADDR_BITS+1) - val status = Bits(width = 32) val invalidate = Bool() - val invalidateTLB = Bool() val mispredict = Bool() val taken = Bool() val currentpc = UFix(width = VADDR_BITS+1) @@ -99,14 +97,13 @@ class Frontend(implicit c: ICacheConfig) extends Component btb.io.clr := !io.cpu.req.bits.taken btb.io.correct_pc := io.cpu.req.bits.currentpc btb.io.correct_target := io.cpu.req.bits.pc - btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.req.bits.invalidateTLB + btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.ptw.invalidate tlb.io.ptw <> io.cpu.ptw tlb.io.req.valid := !stall && !icmiss tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS) - tlb.io.req.bits.status := io.cpu.req.bits.status tlb.io.req.bits.asid := UFix(0) - tlb.io.req.bits.invalidate := io.cpu.req.bits.invalidateTLB + tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) icache.io.mem <> io.mem diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9b03d97d..d027eca7 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -3,16 +3,18 @@ package rocket import Chisel._ import Constants._ import uncore._ +import Util._ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, - nmshr: Int, nsecondary: Int, nsdq: Int, + nmshr: Int, nrpq: Int, nsdq: Int, reqtagbits: Int = -1) { require(isPow2(sets)) require(isPow2(ways)) // TODO: relax this def lines = sets*ways def dm = ways == 1 - def ppnbits = PPN_BITS + def ppnbits = PADDR_BITS - PGIDX_BITS + def vpnbits = VADDR_BITS - PGIDX_BITS def pgidxbits = PGIDX_BITS def offbits = OFFSET_BITS def paddrbits = ppnbits + pgidxbits @@ -161,7 +163,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val req_sec_val = Bool(INPUT) val req_sec_rdy = Bool(OUTPUT) val req_bits = new MSHRReq().asInput - val req_sdq_id = UFix(INPUT, log2Up(NSDQ)) + val req_sdq_id = UFix(INPUT, log2Up(conf.nsdq)) val idx_match = Bool(OUTPUT) val idx = Bits(OUTPUT, conf.idxbits) @@ -194,7 +196,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val idx_match = req.idx === io.req_bits.idx val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - val rpq = (new Queue(NRPQ)) { new RPQEntry } + val rpq = (new Queue(conf.nrpq)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id @@ -312,24 +314,24 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits) } - val sdq_val = Reg(resetVal = Bits(0, NSDQ)) - val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) + val sdq_val = Reg(resetVal = Bits(0, conf.nsdq)) + val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0)) val sdq_rdy = !sdq_val.andR val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd) val sdq_enq = io.req.valid && io.req.ready && req_write - val sdq = Mem(NSDQ) { io.req.bits.data.clone } + val sdq = Mem(conf.nsdq) { io.req.bits.data.clone } when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } - val idxMatch = Vec(NMSHR) { Bool() } - val tagList = Vec(NMSHR) { Bits() } - val wbTagList = Vec(NMSHR) { Bits() } - val memRespMux = Vec(NMSHR) { new DataArrayReq } - val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } - val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } - val mem_finish_arb = (new Arbiter(NMSHR)) { new TransactionFinish } - val wb_req_arb = (new Arbiter(NMSHR)) { new WritebackReq } - val replay_arb = (new Arbiter(NMSHR)) { new Replay() } - val alloc_arb = (new Arbiter(NMSHR)) { Bool() } + val idxMatch = Vec(conf.nmshr) { Bool() } + val tagList = Vec(conf.nmshr) { Bits() } + val wbTagList = Vec(conf.nmshr) { Bits() } + val memRespMux = Vec(conf.nmshr) { new DataArrayReq } + val meta_req_arb = (new Arbiter(conf.nmshr)) { new MetaArrayReq() } + val mem_req_arb = (new Arbiter(conf.nmshr)) { new TransactionInit } + val mem_finish_arb = (new Arbiter(conf.nmshr)) { new TransactionFinish } + val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq } + val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } + val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag @@ -341,7 +343,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { var writeback_probe_rdy = Bool(true) var refill_probe_rdy = Bool(true) - for (i <- 0 to NMSHR-1) { + for (i <- 0 to conf.nmshr-1) { val mshr = new MSHR(i) idxMatch(i) := mshr.io.idx_match @@ -400,8 +402,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) val sdq_free = replay.valid && replay.ready && replay_write - sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, NSDQ)) | - PriorityEncoderOH(~sdq_val(NSDQ-1,0)) & Fill(NSDQ, sdq_enq && io.req.bits.tag_miss) + sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, conf.nsdq)) | + PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq && io.req.bits.tag_miss) val sdq_rdata = Reg() { io.req.bits.data.clone } sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) io.data_req.bits.data := sdq_rdata @@ -711,8 +713,8 @@ class AMOALU extends Component { class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { val kill = Bool() val typ = Bits(width = 3) - val idx = Bits(width = conf.pgidxbits) - val ppn = Bits(width = conf.ppnbits) + val phys = Bool() + val addr = UFix(width = conf.ppnbits.max(conf.vpnbits+1) + conf.pgidxbits) val data = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) val cmd = Bits(width = 4) @@ -739,6 +741,7 @@ class AlignmentExceptions extends Bundle { class HellaCacheExceptions extends Bundle { val ma = new AlignmentExceptions + val pf = new AlignmentExceptions } // interface between D$ and processor/DTLB @@ -746,6 +749,7 @@ class ioHellaCache(implicit conf: DCacheConfig) extends Bundle { val req = (new FIFOIO){ new HellaCacheReq } val resp = (new PipeIO){ new HellaCacheResp }.flip val xcpt = (new HellaCacheExceptions).asInput + val ptw = new IOTLBPTW().flip } class HellaCache(implicit conf: DCacheConfig) extends Component { @@ -768,6 +772,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val early_nack = Reg { Bool() } val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack + val r_cpu_req_phys = Reg() { Bool() } + val r_cpu_req_vpn = Reg() { UFix() } val r_cpu_req_idx = Reg() { Bits() } val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() } @@ -799,6 +805,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch val nack_hit = Bool() + val dtlb = new TLB(8) + dtlb.io.ptw <> io.cpu.ptw + dtlb.io.req.valid := r_cpu_req_val_ && r_req_readwrite && !r_cpu_req_phys + dtlb.io.req.bits.passthrough := r_cpu_req_phys + dtlb.io.req.bits.asid := UFix(0) + dtlb.io.req.bits.vpn := r_cpu_req_vpn + dtlb.io.req.bits.instruction := Bool(false) + val wb = new WritebackUnit val prober = new ProbeUnit val mshr = new MSHRFile @@ -812,7 +826,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { flusher.io.mshr_req.ready := mshr.io.req.ready when (io.cpu.req.valid) { - r_cpu_req_idx := io.cpu.req.bits.idx + r_cpu_req_phys := io.cpu.req.bits.phys + r_cpu_req_vpn := io.cpu.req.bits.addr >> taglsb + r_cpu_req_idx := io.cpu.req.bits.addr(indexmsb,0) r_cpu_req_cmd := io.cpu.req.bits.cmd r_cpu_req_type := io.cpu.req.bits.typ r_cpu_req_tag := io.cpu.req.bits.tag @@ -839,8 +855,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); - io.cpu.xcpt.ma.ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned - io.cpu.xcpt.ma.st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned + io.cpu.xcpt.ma.ld := r_cpu_req_val_ && r_req_read && misaligned + io.cpu.xcpt.ma.st := r_cpu_req_val_ && r_req_write && misaligned + io.cpu.xcpt.pf.ld := r_cpu_req_val_ && r_req_read && dtlb.io.resp.xcpt_ld + io.cpu.xcpt.pf.st := r_cpu_req_val_ && r_req_write && dtlb.io.resp.xcpt_st // tags val meta = new MetaDataArrayArray(lines) @@ -855,11 +873,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { // cpu tag check meta_arb.io.in(3).valid := io.cpu.req.valid - meta_arb.io.in(3).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) + meta_arb.io.in(3).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.way_en := Fix(-1) val early_tag_nack = !meta_arb.io.in(3).ready - val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), io.cpu.req.bits.ppn) + val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), dtlb.io.resp.ppn) val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR @@ -892,8 +910,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) // load hits - data_arb.io.in(4).bits.offset := io.cpu.req.bits.idx(offsetmsb,ramindexlsb) - data_arb.io.in(4).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) + data_arb.io.in(4).bits.offset := io.cpu.req.bits.addr(offsetmsb,ramindexlsb) + data_arb.io.in(4).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) data_arb.io.in(4).bits.rw := Bool(false) data_arb.io.in(4).valid := io.cpu.req.valid && req_read data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check @@ -1015,13 +1033,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val pending_fence = Reg(resetVal = Bool(false)) pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy || - p_store_idx_match && meta.io.state_req.valid + p_store_idx_match && meta.io.state_req.valid || + !r_cpu_req_phys && dtlb.io.resp.miss val nack_miss = !mshr.io.req.ready val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) || !flushed && r_req_flush val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush - io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence + io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence && (dtlb.io.req.ready || io.cpu.req.bits.phys) io.cpu.resp.valid := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack io.cpu.resp.bits.replay := mshr.io.cpu_resp_val diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index bf55f259..ac84e608 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,16 +5,31 @@ import Node._ import Constants._ import scala.math._ -class ioPTW(n: Int)(implicit conf: RocketConfiguration) extends Bundle -{ - val requestor = Vec(n) { new IOTLBPTW }.flip - val mem = new ioHellaCache()(conf.dcache) - val ptbr = UFix(INPUT, PADDR_BITS) +class IOTLBPTW extends Bundle { + val req = new FIFOIO()(UFix(width = VPN_BITS)) + val resp = new PipeIO()(new Bundle { + val error = Bool() + val ppn = UFix(width = PPN_BITS) + val perm = Bits(width = PERM_BITS) + }).flip + + val status = Bits(INPUT, width = 32) + val invalidate = Bool(INPUT) +} + +class IODatapathPTW extends Bundle { + val ptbr = UFix(INPUT, PADDR_BITS) + val invalidate = Bool(INPUT) + val status = Bits(INPUT, 32) } class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component { - val io = new ioPTW(n) + val io = new Bundle { + val requestor = Vec(n) { new IOTLBPTW }.flip + val mem = new ioHellaCache()(conf.dcache) + val dpath = new IODatapathPTW + } val levels = 3 val bitsPerLevel = VPN_BITS/levels @@ -27,7 +42,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component val r_req_vpn = Reg() { Bits() } val r_req_dest = Reg() { Bits() } - val req_addr = Reg() { Bits() } + val req_addr = Reg() { UFix() } val r_resp_ppn = Reg() { Bits() }; val r_resp_perm = Reg() { Bits() }; @@ -41,21 +56,21 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component when (arb.io.out.fire()) { r_req_vpn := arb.io.out.bits r_req_dest := arb.io.chosen - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) + req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), UFix(0,3)) } val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) when (dmem_resp_val) { - req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) + req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix r_resp_perm := io.mem.resp.bits.data_subword(9,4); r_resp_ppn := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS); } io.mem.req.valid := state === s_req + io.mem.req.bits.phys := Bool(true) io.mem.req.bits.cmd := M_XRD io.mem.req.bits.typ := MT_D - io.mem.req.bits.idx := req_addr(PGIDX_BITS-1,0) - io.mem.req.bits.ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) + io.mem.req.bits.addr := req_addr io.mem.req.bits.kill := Bool(false) val resp_val = state === s_done || state === s_error @@ -73,6 +88,8 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component io.requestor(i).resp.bits.error := resp_err io.requestor(i).resp.bits.perm := r_resp_perm io.requestor(i).resp.bits.ppn := resp_ppn.toUFix + io.requestor(i).invalidate := io.dpath.invalidate + io.requestor(i).status := io.dpath.status } // control state machine diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 81090090..2138561b 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -13,7 +13,8 @@ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) { - implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(DMEM_PORTS)) + val memPorts = if (HAVE_VEC) 3 else 2 + implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts)) implicit val conf = confIn.copy(dcache = dcConf) val io = new Bundle { @@ -21,13 +22,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon val host = new ioHTIF(conf.ntiles) } - val cpu = new rocketProc + val core = new Core val icache = new Frontend()(confIn.icache) val dcache = new HellaCache - val arbiter = new MemArbiter(DMEM_PORTS) - arbiter.io.requestor(DMEM_DCACHE) <> dcache.io.mem - arbiter.io.requestor(DMEM_ICACHE) <> icache.io.mem + val arbiter = new MemArbiter(memPorts) + arbiter.io.requestor(0) <> dcache.io.mem + arbiter.io.requestor(1) <> icache.io.mem io.tilelink.xact_init <> arbiter.io.mem.xact_init io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data @@ -38,14 +39,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon io.tilelink.probe_rep <> dcache.io.mem.probe_rep io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data - if (HAVE_VEC) - { + if (HAVE_VEC) { val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) - arbiter.io.requestor(DMEM_VICACHE) <> vicache.io.mem - cpu.io.vimem <> vicache.io.cpu + arbiter.io.requestor(2) <> vicache.io.mem + core.io.vimem <> vicache.io.cpu } - cpu.io.host <> io.host - cpu.io.imem <> icache.io.cpu - cpu.io.dmem <> dcache.io.cpu + core.io.host <> io.host + core.io.imem <> icache.io.cpu + core.io.dmem <> dcache.io.cpu } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 6ce671a4..0e2f1d33 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -66,21 +66,11 @@ class PseudoLRU(n: Int) } } -class IOTLBPTW extends Bundle { - val req = new FIFOIO()(UFix(width = VPN_BITS)) - val resp = new PipeIO()(new Bundle { - val error = Bool() - val ppn = UFix(width = PPN_BITS) - val perm = Bits(width = PERM_BITS) - }).flip -} - class TLBReq extends Bundle { val asid = UFix(width = ASID_BITS) val vpn = UFix(width = VPN_BITS+1) - val status = Bits(width = 32) - val invalidate = Bool() + val passthrough = Bool() val instruction = Bool() } @@ -116,7 +106,7 @@ class TLB(entries: Int) extends Component when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn } val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix - tag_cam.io.clear := io.req.bits.invalidate + tag_cam.io.clear := io.ptw.invalidate tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st) tag_cam.io.tag := lookup_tag tag_cam.io.write := state === s_wait && io.ptw.resp.valid @@ -148,8 +138,8 @@ class TLB(entries: Int) extends Component val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) - val status_s = io.req.bits.status(SR_S) // user/supervisor mode - val status_vm = io.req.bits.status(SR_VM) // virtual memory enable + val status_s = io.ptw.status(SR_S) // user/supervisor mode + val status_vm = io.ptw.status(SR_VM) // virtual memory enable val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1) val tlb_hit = status_vm && tag_hit val tlb_miss = status_vm && !tag_hit && !bad_va @@ -163,7 +153,7 @@ class TLB(entries: Int) extends Component io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr)) io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr)) io.resp.miss := tlb_miss - io.resp.ppn := Mux(status_vm, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) + io.resp.ppn := Mux(status_vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) io.resp.hit_idx := tag_cam.io.hits io.ptw.req.valid := state === s_request @@ -175,15 +165,15 @@ class TLB(entries: Int) extends Component r_refill_waddr := repl_waddr } when (state === s_request) { - when (io.req.bits.invalidate) { + when (io.ptw.invalidate) { state := s_ready } when (io.ptw.req.ready) { state := s_wait - when (io.req.bits.invalidate) { state := s_wait_invalidate } + when (io.ptw.invalidate) { state := s_wait_invalidate } } } - when (state === s_wait && io.req.bits.invalidate) { + when (state === s_wait && io.ptw.invalidate) { state := s_wait_invalidate } when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) { @@ -204,10 +194,6 @@ class ioDTLB_CPU_resp extends TLBResp(1) class ioDTLB extends Bundle { - // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(INPUT, 32) - // invalidate all TLB entries - val invalidate = Bool(INPUT) val cpu_req = new ioDTLB_CPU_req().flip val cpu_resp = new ioDTLB_CPU_resp() val ptw = new IOTLBPTW @@ -225,8 +211,7 @@ class rocketTLB(entries: Int) extends Component val tlb = new TLB(entries) tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill tlb.io.req.bits.instruction := Bool(false) - tlb.io.req.bits.invalidate := io.invalidate - tlb.io.req.bits.status := io.status + tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.vpn := r_cpu_req_vpn tlb.io.req.bits.asid := r_cpu_req_asid