From 569698b8243753d913920e4f1af10665ab010a46 Mon Sep 17 00:00:00 2001 From: Daiwei Li Date: Sat, 25 Feb 2012 22:05:30 -0800 Subject: [PATCH] dtlb now arbitrates between cpu, vec, and vec pf --- rocket/src/main/scala/consts.scala | 4 ++ rocket/src/main/scala/cpu.scala | 101 +++++++++++++++++++++-------- rocket/src/main/scala/dtlb.scala | 73 +++++++++++---------- 3 files changed, 118 insertions(+), 60 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 071d3e1f..0e10b21d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -238,4 +238,8 @@ object Constants val VIMM_VLEN = UFix(0, 1) val VIMM_ALU = UFix(1, 1) val VIMM_X = UFix(0, 1) + + val DTLB_VEC = 0 + val DTLB_VPF = 1 + val DTLB_CPU = 2 } diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 4cca083b..088779dc 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -32,6 +32,80 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) val ptw = new rocketPTW(); val arb = new rocketDmemArbiter(); + var vu: vu = null + if (HAVE_VEC) + { + vu = new vu() + // cpu, vector prefetch, and vector use the DTLB + val dtlbarb = new cArbiter(3)({new ioDTLB_CPU_req()}) + val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2up(3))) + when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } + + val chosen_vec = dtlbchosen === Bits(DTLB_VEC) + val chosen_pf = dtlbchosen === Bits(DTLB_VPF) + val chosen_cpu = dtlbchosen === Bits(DTLB_CPU) + + // vector prefetch doesn't care about exceptions + // and shouldn't cause any anyways + vu.io.vec_tlb_resp.xcpt_ld := chosen_vec && dtlb.io.cpu_resp.xcpt_ld + vu.io.vec_tlb_resp.xcpt_st := chosen_vec && dtlb.io.cpu_resp.xcpt_st + vu.io.vec_tlb_resp.miss := chosen_vec && dtlb.io.cpu_resp.miss + vu.io.vec_tlb_resp.ppn := dtlb.io.cpu_resp.ppn + + vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) + vu.io.vec_pftlb_resp.xcpt_st := Bool(false) + vu.io.vec_pftlb_resp.miss := chosen_pf && dtlb.io.cpu_resp.miss + vu.io.vec_pftlb_resp.ppn := dtlb.io.cpu_resp.ppn + + // connect DTLB to ctrl+dpath + dtlbarb.io.in(DTLB_CPU).valid := ctrl.io.dtlb_val + dtlbarb.io.in(DTLB_CPU).bits.kill := ctrl.io.dtlb_kill + dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req_cmd + dtlbarb.io.in(DTLB_CPU).bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR + dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dmem.req_addr(VADDR_BITS,PGIDX_BITS) + ctrl.io.dtlb_rdy := dtlbarb.io.in(DTLB_CPU).ready + + ctrl.io.xcpt_dtlb_ld := chosen_cpu && dtlb.io.cpu_resp.xcpt_ld + ctrl.io.xcpt_dtlb_st := chosen_cpu && dtlb.io.cpu_resp.xcpt_st + ctrl.io.dtlb_miss := chosen_cpu && dtlb.io.cpu_resp.miss + + dtlbarb.io.in(DTLB_VEC) <> vu.io.vec_tlb_req + dtlbarb.io.in(DTLB_VPF) <> vu.io.vec_pftlb_req + + + dtlb.io.cpu_req <> dtlbarb.io.out + } + else + { + // connect DTLB to ctrl+dpath + dtlb.io.cpu_req.valid := ctrl.io.dtlb_val + dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill + dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req_cmd + dtlb.io.cpu_req.bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR + dtlb.io.cpu_req.bits.vpn := dpath.io.dmem.req_addr(VADDR_BITS,PGIDX_BITS) + ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld + ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st + ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready + ctrl.io.dtlb_miss := dtlb.io.cpu_resp.miss + } + + dtlb.io.invalidate := dpath.io.ptbr_wen + dtlb.io.status := dpath.io.ctrl.status + + arb.io.cpu.req_ppn := dtlb.io.cpu_resp.ppn; + ctrl.io.dmem.req_rdy := dtlb.io.cpu_req.ready && arb.io.cpu.req_rdy; + + // connect DTLB to D$ arbiter + ctrl.io.xcpt_ma_ld := io.dmem.xcpt_ma_ld + ctrl.io.xcpt_ma_st := io.dmem.xcpt_ma_st + // connect page table walker to TLBs, page table base register (from PCR) + // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) + ptw.io.dtlb <> dtlb.io.ptw; + ptw.io.itlb <> itlb.io.ptw; + ptw.io.ptbr := dpath.io.ptbr; + arb.io.ptw <> ptw.io.dmem; + arb.io.mem <> io.dmem + ctrl.io.dpath <> dpath.io.ctrl; dpath.io.host <> io.host; dpath.io.debug <> io.debug; @@ -53,39 +127,14 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ctrl.io.xcpt_itlb := itlb.io.cpu.exception; io.imem.itlb_miss := itlb.io.cpu.resp_miss; - // connect DTLB to D$ arbiter, ctrl+dpath - dtlb.io.cpu.invalidate := dpath.io.ptbr_wen; - dtlb.io.cpu.status := dpath.io.ctrl.status; - dtlb.io.cpu.req_val := ctrl.io.dtlb_val; - dtlb.io.cpu.req_kill := ctrl.io.dtlb_kill; - dtlb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; - dtlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - dtlb.io.cpu.req_vpn := dpath.io.dmem.req_addr(VADDR_BITS,PGIDX_BITS); - ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu.xcpt_ld; - ctrl.io.xcpt_dtlb_st := dtlb.io.cpu.xcpt_st; - ctrl.io.dtlb_rdy := dtlb.io.cpu.req_rdy; - ctrl.io.dtlb_miss := dtlb.io.cpu.resp_miss; - ctrl.io.xcpt_ma_ld := io.dmem.xcpt_ma_ld; - ctrl.io.xcpt_ma_st := io.dmem.xcpt_ma_st; - - // connect page table walker to TLBs, page table base register (from PCR) - // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) - ptw.io.dtlb <> dtlb.io.ptw; - ptw.io.itlb <> itlb.io.ptw; - ptw.io.ptbr := dpath.io.ptbr; - arb.io.ptw <> ptw.io.dmem; - arb.io.mem <> io.dmem - // connect arbiter to ctrl+dpath+DTLB arb.io.cpu.req_val := ctrl.io.dmem.req_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; arb.io.cpu.req_kill := ctrl.io.dmem.req_kill; arb.io.cpu.req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); - arb.io.cpu.req_ppn := dtlb.io.cpu.resp_ppn; arb.io.cpu.req_data := dpath.io.dmem.req_data; arb.io.cpu.req_tag := dpath.io.dmem.req_tag; - ctrl.io.dmem.req_rdy := dtlb.io.cpu.req_rdy && arb.io.cpu.req_rdy; ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; ctrl.io.dmem.resp_replay:= arb.io.cpu.resp_replay; ctrl.io.dmem.resp_nack := arb.io.cpu.resp_nack; @@ -111,8 +160,6 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) { dpath.io.vec_ctrl <> ctrl.io.vec_dpath - val vu = new vu() - // hooking up vector I$ vitlb.io.cpu.invalidate := dpath.io.ptbr_wen vitlb.io.cpu.status := dpath.io.ctrl.status diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 32ddb797..176f1ab5 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -4,33 +4,39 @@ import Chisel._; import Node._; import Constants._; import scala.math._; +import hwacha._ -// interface between DTLB and pipeline -class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) +// ioDTLB_CPU also located in hwacha/src/vuVXU-Interface.scala +// should keep them in sync + +class ioDTLB_CPU_req_bundle extends Bundle { - // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(17, INPUT); - // invalidate all TLB entries - val invalidate = Bool(INPUT); // lookup requests - val req_val = Bool(INPUT); - val req_kill = Bool(INPUT); - val req_cmd = Bits(4, INPUT); // load/store/amo - val req_rdy = Bool(OUTPUT); - val req_asid = Bits(ASID_BITS, INPUT); - val req_vpn = UFix(VPN_BITS+1, INPUT); + val kill = Bool() + val cmd = Bits(width=4) // load/store/amo + val asid = Bits(width=ASID_BITS) + val vpn = UFix(width=VPN_BITS+1) +} +class ioDTLB_CPU_req extends io_ready_valid()( { new ioDTLB_CPU_req_bundle() } ) + +class ioDTLB_CPU_resp extends Bundle +{ // lookup responses - val resp_miss = Bool(OUTPUT); -// val resp_val = Bool(OUTPUT); - val resp_ppn = UFix(PPN_BITS, OUTPUT); - val xcpt_ld = Bool(OUTPUT); - val xcpt_st = Bool(OUTPUT); + val miss = Bool(OUTPUT) + val ppn = UFix(PPN_BITS, OUTPUT) + val xcpt_ld = Bool(OUTPUT) + val xcpt_st = Bool(OUTPUT) } class ioDTLB extends Bundle { - val cpu = new ioDTLB_CPU(); - val ptw = new ioTLB_PTW(); + // status bits (from PCR), to check current permission and whether VM is enabled + val status = Bits(17,INPUT) + // invalidate all TLB entries + val invalidate = Bool(INPUT) + val cpu_req = new ioDTLB_CPU_req().flip() + val cpu_resp = new ioDTLB_CPU_resp() + val ptw = new ioTLB_PTW() } class rocketDTLB(entries: Int) extends Component @@ -50,10 +56,10 @@ class rocketDTLB(entries: Int) extends Component val r_refill_waddr = Reg() { UFix() } val repl_count = Reg(resetVal = UFix(0,addr_bits)); - when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_vpn := io.cpu.req_vpn; - r_cpu_req_cmd := io.cpu.req_cmd; - r_cpu_req_asid := io.cpu.req_asid; + when (io.cpu_req.valid && io.cpu_req.ready) { + r_cpu_req_vpn := io.cpu_req.bits.vpn; + r_cpu_req_cmd := io.cpu_req.bits.cmd; + r_cpu_req_asid := io.cpu_req.bits.asid; r_cpu_req_val := Bool(true); } .otherwise { @@ -63,6 +69,7 @@ class rocketDTLB(entries: Int) extends Component val req_load = (r_cpu_req_cmd === M_XRD); val req_store = (r_cpu_req_cmd === M_XWR); val req_amo = r_cpu_req_cmd(3).toBool; + val req_pf = (r_cpu_req_cmd === M_PFR) || (r_cpu_req_cmd === M_PFW) val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); @@ -70,7 +77,7 @@ class rocketDTLB(entries: Int) extends Component val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); - tag_cam.io.clear := io.cpu.invalidate; + tag_cam.io.clear := io.invalidate; tag_cam.io.tag := lookup_tag; tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; tag_cam.io.write_tag := r_refill_tag; @@ -79,9 +86,9 @@ class rocketDTLB(entries: Int) extends Component val tag_hit_addr = tag_cam.io.hit_addr; // extract fields from status register - val status_s = io.cpu.status(SR_S).toBool; // user/supervisor mode + val status_s = io.status(SR_S).toBool; // user/supervisor mode val status_u = !status_s; - val status_vm = io.cpu.status(SR_VM).toBool // virtual memory enable + val status_vm = io.status(SR_VM).toBool // virtual memory enable // extract fields from PT permission bits val ptw_perm_ur = io.ptw.resp_perm(2); @@ -118,7 +125,7 @@ class rocketDTLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val lookup = (state === s_ready) && r_cpu_req_val && !io.cpu.req_kill && (req_load || req_store || req_amo); + val lookup = (state === s_ready) && r_cpu_req_val && !io.cpu_req.bits.kill && (req_load || req_store || req_amo || req_pf); val lookup_hit = lookup && tag_hit; val lookup_miss = lookup && !tag_hit; val tlb_hit = status_vm && lookup_hit; @@ -135,7 +142,7 @@ class rocketDTLB(entries: Int) extends Component } // exception check - val outofrange = !tlb_miss && (io.cpu.resp_ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); + val outofrange = !tlb_miss && (io.cpu_resp.ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); val access_fault_ld = tlb_hit && (req_load || req_amo) && @@ -143,7 +150,7 @@ class rocketDTLB(entries: Int) extends Component (status_u && !ur_array(tag_hit_addr).toBool) || bad_va); - io.cpu.xcpt_ld := access_fault_ld; + io.cpu_resp.xcpt_ld := access_fault_ld; val access_fault_st = tlb_hit && (req_store || req_amo) && @@ -151,11 +158,11 @@ class rocketDTLB(entries: Int) extends Component (status_u && !uw_array(tag_hit_addr).toBool) || bad_va); - io.cpu.xcpt_st := access_fault_st; + io.cpu_resp.xcpt_st := access_fault_st; - io.cpu.req_rdy := (state === s_ready) && !tlb_miss; - io.cpu.resp_miss := tlb_miss; - io.cpu.resp_ppn := + io.cpu_req.ready := (state === s_ready) && !tlb_miss; + io.cpu_resp.miss := tlb_miss; + io.cpu_resp.ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; io.ptw.req_val := (state === s_request);