remove more global consts; refactor DTLBs
D$ now contains DTLB. provide full VAddr with initial request. VU now has its own DTLBs.
This commit is contained in:
		| @@ -12,58 +12,40 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp | ||||
|     val mem = new ioHellaCache()(conf.dcache) | ||||
|   } | ||||
|  | ||||
|   var req_val = Bool(false) | ||||
|   var req_rdy = io.mem.req.ready | ||||
|   for (i <- 0 until n) | ||||
|   { | ||||
|     io.requestor(i).req.ready := req_rdy | ||||
|     req_val = req_val || io.requestor(i).req.valid | ||||
|     req_rdy = req_rdy && !io.requestor(i).req.valid | ||||
|   val r_valid = io.requestor.map(r => Reg(r.req.valid)) | ||||
|  | ||||
|   io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_) | ||||
|   io.requestor(0).req.ready := io.mem.req.ready | ||||
|   for (i <- 1 until n) | ||||
|     io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid | ||||
|  | ||||
|   io.mem.req.bits := io.requestor(n-1).req.bits | ||||
|   io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UFix(n-1, log2Up(n))) | ||||
|   for (i <- n-2 to 0 by -1) { | ||||
|     val req = io.requestor(i).req | ||||
|     when (req.valid) { | ||||
|       io.mem.req.bits.cmd := req.bits.cmd | ||||
|       io.mem.req.bits.typ := req.bits.typ | ||||
|       io.mem.req.bits.addr := req.bits.addr | ||||
|       io.mem.req.bits.phys := req.bits.phys | ||||
|       io.mem.req.bits.tag := Cat(req.bits.tag, UFix(i, log2Up(n))) | ||||
|     } | ||||
|     when (r_valid(i)) { | ||||
|       io.mem.req.bits.kill := req.bits.kill | ||||
|       io.mem.req.bits.data := req.bits.data | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   var req_cmd  = io.requestor(n-1).req.bits.cmd | ||||
|   var req_type = io.requestor(n-1).req.bits.typ | ||||
|   var req_idx  = io.requestor(n-1).req.bits.idx | ||||
|   var req_ppn  = io.requestor(n-1).req.bits.ppn | ||||
|   var req_data = io.requestor(n-1).req.bits.data | ||||
|   var req_kill = io.requestor(n-1).req.bits.kill | ||||
|   var req_tag  = io.requestor(n-1).req.bits.tag | ||||
|   for (i <- n-1 to 0 by -1) | ||||
|   { | ||||
|     val r = io.requestor(i).req | ||||
|     req_cmd  = Mux(r.valid, r.bits.cmd, req_cmd) | ||||
|     req_type = Mux(r.valid, r.bits.typ, req_type) | ||||
|     req_idx  = Mux(r.valid, r.bits.idx, req_idx) | ||||
|     req_ppn  = Mux(Reg(r.valid), r.bits.ppn, req_ppn) | ||||
|     req_data = Mux(Reg(r.valid), r.bits.data, req_data) | ||||
|     req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) | ||||
|     req_tag  = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag) | ||||
|   } | ||||
|  | ||||
|   io.mem.req.valid     := req_val | ||||
|   io.mem.req.bits.cmd  := req_cmd | ||||
|   io.mem.req.bits.typ  := req_type | ||||
|   io.mem.req.bits.idx  := req_idx | ||||
|   io.mem.req.bits.ppn  := req_ppn | ||||
|   io.mem.req.bits.data := req_data | ||||
|   io.mem.req.bits.kill := req_kill | ||||
|   io.mem.req.bits.tag  := req_tag | ||||
|  | ||||
|   for (i <- 0 until n) | ||||
|   { | ||||
|     val r = io.requestor(i).resp | ||||
|     val x = io.requestor(i).xcpt | ||||
|   for (i <- 0 until n) { | ||||
|     val resp = io.requestor(i).resp | ||||
|     val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) | ||||
|     x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) | ||||
|     x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) | ||||
|     r.valid             := io.mem.resp.valid && tag_hit | ||||
|     r.bits.miss         := io.mem.resp.bits.miss && tag_hit | ||||
|     r.bits.nack         := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) | ||||
|     r.bits.replay       := io.mem.resp.bits.replay && tag_hit | ||||
|     r.bits.data         := io.mem.resp.bits.data | ||||
|     r.bits.data_subword := io.mem.resp.bits.data_subword | ||||
|     r.bits.typ          := io.mem.resp.bits.typ | ||||
|     r.bits.tag          := io.mem.resp.bits.tag >> UFix(log2Up(n)) | ||||
|     resp.valid := io.mem.resp.valid && tag_hit | ||||
|     io.requestor(i).xcpt := io.mem.xcpt | ||||
|     resp.bits := io.mem.resp.bits | ||||
|     resp.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) | ||||
|     resp.bits.miss := io.mem.resp.bits.miss && tag_hit | ||||
|     resp.bits.nack := io.mem.resp.bits.nack && r_valid(i) | ||||
|     resp.bits.replay := io.mem.resp.bits.replay && tag_hit | ||||
|   } | ||||
| } | ||||
|  | ||||
| @@ -80,15 +62,6 @@ class MemArbiter(n: Int) extends Component { | ||||
|     val requestor = Vec(n) { new ioUncachedRequestor }.flip | ||||
|   } | ||||
|  | ||||
|   var xi_val = Bool(false) | ||||
|   var xi_rdy = io.mem.xact_init.ready | ||||
|   for (i <- 0 until n) | ||||
|   { | ||||
|     io.requestor(i).xact_init.ready := xi_rdy | ||||
|     xi_val = xi_val || io.requestor(i).xact_init.valid | ||||
|     xi_rdy = xi_rdy && !io.requestor(i).xact_init.valid | ||||
|   } | ||||
|  | ||||
|   var xi_bits = new TransactionInit | ||||
|   xi_bits := io.requestor(n-1).xact_init.bits | ||||
|   xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2Up(n))) | ||||
| @@ -101,24 +74,21 @@ class MemArbiter(n: Int) extends Component { | ||||
|     xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits) | ||||
|   } | ||||
|  | ||||
|   io.mem.xact_init.valid := xi_val | ||||
|   io.mem.xact_init.bits := xi_bits | ||||
|  | ||||
|   var xf_val = Bool(false) | ||||
|   var xf_rdy = io.mem.xact_finish.ready | ||||
|   for (i <- 0 until n) | ||||
|   { | ||||
|     io.requestor(i).xact_finish.ready := xf_rdy | ||||
|     xf_val = xf_val || io.requestor(i).xact_finish.valid | ||||
|     xf_rdy = xf_rdy && !io.requestor(i).xact_finish.valid | ||||
|   } | ||||
|   io.mem.xact_init.valid := io.requestor.map(_.xact_init.valid).reduce(_||_) | ||||
|   io.requestor(0).xact_init.ready := io.mem.xact_init.ready | ||||
|   for (i <- 1 until n) | ||||
|     io.requestor(i).xact_init.ready := io.requestor(i-1).xact_init.ready && !io.requestor(i-1).xact_init.valid | ||||
|  | ||||
|   var xf_bits = io.requestor(n-1).xact_finish.bits | ||||
|   for (i <- n-2 to 0 by -1) | ||||
|     xf_bits = Mux(io.requestor(i).xact_finish.valid, io.requestor(i).xact_finish.bits, xf_bits) | ||||
|  | ||||
|   io.mem.xact_finish.valid := xf_val | ||||
|   io.mem.xact_finish.bits := xf_bits | ||||
|   io.mem.xact_finish.valid := io.requestor.map(_.xact_finish.valid).reduce(_||_) | ||||
|   io.requestor(0).xact_finish.ready := io.mem.xact_finish.ready | ||||
|   for (i <- 1 until n) | ||||
|     io.requestor(i).xact_finish.ready := io.requestor(i-1).xact_finish.ready && !io.requestor(i-1).xact_finish.valid | ||||
|  | ||||
|   for (i <- 0 until n) | ||||
|   { | ||||
|   | ||||
| @@ -154,12 +154,7 @@ trait InterruptConstants { | ||||
|   val IRQ_TIMER = 7 | ||||
| } | ||||
|   | ||||
| abstract trait RocketDcacheConstants extends ArbiterConstants with uncore.constants.AddressConstants { | ||||
|   val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses | ||||
|   require(log2Up(NMSHR)+3 <= uncore.Constants.TILE_XACT_ID_BITS) | ||||
|   val NRPQ = 16; // number of secondary misses | ||||
|   val NSDQ = 17; // number of secondary stores/AMOs | ||||
|   val OFFSET_BITS = 6; // log2(cache line size in bytes) | ||||
| abstract trait RocketDcacheConstants extends TileConfigConstants with uncore.constants.CacheConstants with uncore.constants.AddressConstants { | ||||
|   require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) | ||||
|   require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS) | ||||
|   require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) | ||||
| @@ -196,15 +191,3 @@ trait VectorOpConstants { | ||||
|   val VIMM2_ALU = UFix(1, 1) | ||||
|   val VIMM2_X = UFix(0, 1) | ||||
| } | ||||
|  | ||||
| abstract trait ArbiterConstants extends TileConfigConstants { | ||||
|   val DCACHE_PORTS = 3 | ||||
|   val DCACHE_CPU = 0 | ||||
|   val DCACHE_PTW = 1 | ||||
|   val DCACHE_VU = 2 | ||||
|  | ||||
|   val DMEM_PORTS = if (HAVE_VEC) 3 else 2 | ||||
|   val DMEM_DCACHE = 0 | ||||
|   val DMEM_ICACHE = 1 | ||||
|   val DMEM_VICACHE = 2 | ||||
| } | ||||
|   | ||||
| @@ -13,87 +13,57 @@ class ioRocket(implicit conf: RocketConfiguration) extends Bundle | ||||
|   val dmem    = new ioHellaCache()(conf.dcache) | ||||
| } | ||||
| 
 | ||||
| class rocketProc(implicit conf: RocketConfiguration) extends Component | ||||
| class Core(implicit conf: RocketConfiguration) extends Component | ||||
| { | ||||
|   val io    = new ioRocket | ||||
|     | ||||
|   val ctrl  = new Control | ||||
|   val dpath = new Datapath | ||||
| 
 | ||||
|   val ptw = Vec(0) { new IOTLBPTW } | ||||
|   val arb = new HellaCacheArbiter(DCACHE_PORTS) | ||||
| 
 | ||||
|   var vu: vu = null | ||||
|   if (HAVE_VEC) | ||||
|   { | ||||
|     vu = new vu() | ||||
| 
 | ||||
|     val vdtlb = new rocketTLB(8) | ||||
|     vdtlb.io.invalidate := dpath.io.ptbr_wen | ||||
|     vdtlb.io.status := dpath.io.ctrl.status | ||||
|     ptw += vdtlb.io.ptw | ||||
| 
 | ||||
|     vdtlb.io.cpu_req <> vu.io.vec_tlb_req | ||||
|     vu.io.vec_tlb_resp := vdtlb.io.cpu_resp | ||||
|     vu.io.vec_tlb_resp.xcpt_pf := Bool(false) | ||||
| 
 | ||||
|     val pftlb = new rocketTLB(2) | ||||
|     pftlb.io.invalidate := dpath.io.ptbr_wen | ||||
|     pftlb.io.status := dpath.io.ctrl.status | ||||
|     pftlb.io.cpu_req <> vu.io.vec_pftlb_req | ||||
|     ptw += pftlb.io.ptw | ||||
| 
 | ||||
|     vu.io.vec_pftlb_resp := pftlb.io.cpu_resp | ||||
|     vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) | ||||
|     vu.io.vec_pftlb_resp.xcpt_st := Bool(false) | ||||
|   } | ||||
| 
 | ||||
|   // connect DTLB to ctrl+dpath | ||||
|   val dtlb = new rocketTLB(DTLB_ENTRIES) | ||||
|   dtlb.io.invalidate := dpath.io.ptbr_wen | ||||
|   dtlb.io.status := dpath.io.ctrl.status | ||||
|   ptw += dtlb.io.ptw | ||||
| 
 | ||||
|   dtlb.io.cpu_req.valid := ctrl.io.dtlb_val | ||||
|   dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill | ||||
|   dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd | ||||
|   dtlb.io.cpu_req.bits.asid := UFix(0) | ||||
|   dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn | ||||
|   ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld | ||||
|   ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st | ||||
|   ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready | ||||
|   ctrl.io.dtlb_miss := dtlb.io.cpu_resp.miss | ||||
| 
 | ||||
|   arb.io.requestor(DCACHE_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn | ||||
| 
 | ||||
|   ctrl.io.dpath <> dpath.io.ctrl | ||||
|   dpath.io.host <> io.host | ||||
| 
 | ||||
|   ctrl.io.imem <> io.imem | ||||
|   dpath.io.imem <> io.imem | ||||
| 
 | ||||
|   ctrl.io.dmem <> arb.io.requestor(DCACHE_CPU) | ||||
|   dpath.io.dmem <> arb.io.requestor(DCACHE_CPU) | ||||
|   val dmemArb = new HellaCacheArbiter(if (HAVE_VEC) 3 else 2) | ||||
|   dmemArb.io.mem <> io.dmem | ||||
|   val dmem = dmemArb.io.requestor | ||||
|   dmem(1) <> ctrl.io.dmem | ||||
|   dmem(1) <> dpath.io.dmem | ||||
| 
 | ||||
|   var fpu: rocketFPU = null | ||||
|   if (HAVE_FPU) | ||||
|   { | ||||
|     fpu = new rocketFPU(4,6) | ||||
|   val ptw = collection.mutable.ArrayBuffer(io.imem.ptw, io.dmem.ptw) | ||||
| 
 | ||||
|   val fpu: FPU = if (HAVE_FPU) { | ||||
|     val fpu = new FPU(4,6) | ||||
|     dpath.io.fpu <> fpu.io.dpath | ||||
|     ctrl.io.fpu <> fpu.io.ctrl | ||||
|   } | ||||
|     fpu | ||||
|   } else null | ||||
| 
 | ||||
|   if (HAVE_VEC) { | ||||
|     val vu = new vu() | ||||
| 
 | ||||
|     val vdtlb = new rocketTLB(8) | ||||
|     ptw += vdtlb.io.ptw | ||||
|     vdtlb.io.cpu_req <> vu.io.vec_tlb_req | ||||
|     vu.io.vec_tlb_resp := vdtlb.io.cpu_resp | ||||
|     vu.io.vec_tlb_resp.xcpt_pf := Bool(false) | ||||
| 
 | ||||
|     val pftlb = new rocketTLB(2) | ||||
|     pftlb.io.cpu_req <> vu.io.vec_pftlb_req | ||||
|     ptw += pftlb.io.ptw | ||||
|     vu.io.vec_pftlb_resp := pftlb.io.cpu_resp | ||||
|     vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) | ||||
|     vu.io.vec_pftlb_resp.xcpt_st := Bool(false) | ||||
| 
 | ||||
|   if (HAVE_VEC) | ||||
|   { | ||||
|     dpath.io.vec_ctrl <> ctrl.io.vec_dpath | ||||
| 
 | ||||
|     // hooking up vector I$ | ||||
|     ptw += io.vimem.ptw | ||||
|     io.vimem.req.bits.status := dpath.io.ctrl.status | ||||
|     io.vimem.req.bits.pc := vu.io.imem_req.bits | ||||
|     io.vimem.req.valid := vu.io.imem_req.valid | ||||
|     io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst | ||||
|     io.vimem.req.bits.invalidateTLB := dpath.io.ptbr_wen | ||||
|     vu.io.imem_resp.valid := io.vimem.resp.valid | ||||
|     vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc | ||||
|     vu.io.imem_resp.bits.data := io.vimem.resp.bits.data | ||||
| @@ -155,21 +125,16 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component | ||||
|     vu.io.xcpt.hold := ctrl.io.vec_iface.hold | ||||
| 
 | ||||
|     // hooking up vector memory interface | ||||
|     arb.io.requestor(DCACHE_VU).req.valid := vu.io.dmem_req.valid | ||||
|     arb.io.requestor(DCACHE_VU).req.bits.kill := vu.io.dmem_req.bits.kill | ||||
|     arb.io.requestor(DCACHE_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd | ||||
|     arb.io.requestor(DCACHE_VU).req.bits.typ := vu.io.dmem_req.bits.typ | ||||
|     arb.io.requestor(DCACHE_VU).req.bits.idx := vu.io.dmem_req.bits.idx | ||||
|     arb.io.requestor(DCACHE_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) | ||||
|     arb.io.requestor(DCACHE_VU).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) | ||||
|     arb.io.requestor(DCACHE_VU).req.bits.tag := vu.io.dmem_req.bits.tag | ||||
|     dmem(2).req.valid := vu.io.dmem_req.valid | ||||
|     dmem(2).req.bits := vu.io.dmem_req.bits | ||||
|     dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) | ||||
| 
 | ||||
|     vu.io.dmem_req.ready := arb.io.requestor(DCACHE_VU).req.ready | ||||
|     vu.io.dmem_resp.valid := Reg(arb.io.requestor(DCACHE_VU).resp.valid) | ||||
|     vu.io.dmem_resp.bits.nack := arb.io.requestor(DCACHE_VU).resp.bits.nack | ||||
|     vu.io.dmem_resp.bits.data := arb.io.requestor(DCACHE_VU).resp.bits.data_subword | ||||
|     vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DCACHE_VU).resp.bits.tag) | ||||
|     vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DCACHE_VU).resp.bits.typ) | ||||
|     vu.io.dmem_req.ready := dmem(2).req.ready | ||||
|     vu.io.dmem_resp.valid := Reg(dmem(2).resp.valid) | ||||
|     vu.io.dmem_resp.bits.nack := dmem(2).resp.bits.nack | ||||
|     vu.io.dmem_resp.bits.data := dmem(2).resp.bits.data_subword | ||||
|     vu.io.dmem_resp.bits.tag := Reg(dmem(2).resp.bits.tag) | ||||
|     vu.io.dmem_resp.bits.typ := Reg(dmem(2).resp.bits.typ) | ||||
| 
 | ||||
|     // share vector integer multiplier with rocket | ||||
|     dpath.io.vec_imul_req <> vu.io.cp_imul_req | ||||
| @@ -178,22 +143,13 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component | ||||
|     // share sfma and dfma pipelines with rocket | ||||
|     fpu.io.sfma <> vu.io.cp_sfma | ||||
|     fpu.io.dfma <> vu.io.cp_dfma | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     arb.io.requestor(DCACHE_VU).req.valid := Bool(false) | ||||
|     if (HAVE_FPU) | ||||
|     { | ||||
|   } else if (fpu != null) { | ||||
|     fpu.io.sfma.valid := Bool(false) | ||||
|     fpu.io.dfma.valid := Bool(false) | ||||
|   } | ||||
|   } | ||||
| 
 | ||||
|   ptw += io.imem.ptw | ||||
|   val thePTW = new PTW(ptw.length) | ||||
|   thePTW.io.requestor <> ptw | ||||
|   thePTW.io.ptbr := dpath.io.ptbr; | ||||
|   arb.io.requestor(DCACHE_PTW) <> thePTW.io.mem | ||||
| 
 | ||||
|   arb.io.mem <> io.dmem | ||||
|   ptw zip thePTW.io.requestor map { case (a, b) => a <> b } | ||||
|   thePTW.io.dpath <> dpath.io.ptw | ||||
|   dmem(0) <> thePTW.io.mem | ||||
| } | ||||
| @@ -569,8 +569,8 @@ class Control(implicit conf: RocketConfiguration) extends Component | ||||
|     (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), | ||||
|     (mem_reg_mem_val && io.dmem.xcpt.ma.ld,  UFix( 8)), | ||||
|     (mem_reg_mem_val && io.dmem.xcpt.ma.st,  UFix( 9)), | ||||
|     (mem_reg_mem_val && io.xcpt_dtlb_ld,     UFix(10)), | ||||
|     (mem_reg_mem_val && io.xcpt_dtlb_st,     UFix(11)))) | ||||
|     (mem_reg_mem_val && io.dmem.xcpt.pf.ld,     UFix(10)), | ||||
|     (mem_reg_mem_val && io.dmem.xcpt.pf.st,     UFix(11)))) | ||||
|  | ||||
|   wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next | ||||
|   when (mem_xcpt) { wb_reg_cause := mem_cause } | ||||
| @@ -644,7 +644,7 @@ class Control(implicit conf: RocketConfiguration) extends Component | ||||
|  | ||||
|   // replay inst in ex stage | ||||
|   val replay_ex    = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst ||  | ||||
|                      ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || | ||||
|                      ex_reg_mem_val && !io.dmem.req.ready || | ||||
|                      ex_reg_div_val && !io.dpath.div_rdy || | ||||
|                      ex_reg_mul_val && !io.dpath.mul_rdy || | ||||
|                      mem_reg_replay_next | ||||
| @@ -652,7 +652,7 @@ class Control(implicit conf: RocketConfiguration) extends Component | ||||
|  | ||||
|   // replay inst in mem stage | ||||
|   val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val | ||||
|   val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) | ||||
|   val dmem_kill_mem = mem_reg_valid && io.dmem.resp.bits.nack | ||||
|   val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem | ||||
|   val replay_mem  = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem | ||||
|   val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid | ||||
| @@ -734,7 +734,7 @@ class Control(implicit conf: RocketConfiguration) extends Component | ||||
|     id_ex_hazard || id_mem_hazard || id_wb_hazard || | ||||
|     id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || | ||||
|     id_fp_val && id_stall_fpu || | ||||
|     id_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || | ||||
|     id_mem_val && !io.dmem.req.ready || | ||||
|     vec_stalld | ||||
|   ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt | ||||
|  | ||||
| @@ -772,10 +772,9 @@ class Control(implicit conf: RocketConfiguration) extends Component | ||||
|   io.fpu.killx := ctrl_killx | ||||
|   io.fpu.killm := killm_common | ||||
|  | ||||
|   io.dtlb_val           := ex_reg_mem_val | ||||
|   io.dtlb_kill          := !mem_reg_valid | ||||
|   io.dmem.req.valid     := ex_reg_mem_val | ||||
|   io.dmem.req.bits.kill := killm_common || mem_xcpt || io.dtlb_miss | ||||
|   io.dmem.req.bits.kill := killm_common || mem_xcpt | ||||
|   io.dmem.req.bits.cmd  := ex_reg_mem_cmd | ||||
|   io.dmem.req.bits.typ  := ex_reg_mem_type | ||||
|   io.dmem.req.bits.phys := Bool(false) | ||||
| } | ||||
|   | ||||
| @@ -13,9 +13,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component | ||||
|     val ctrl  = new ioCtrlDpath().flip | ||||
|     val dmem = new ioHellaCache()(conf.dcache) | ||||
|     val dtlb = new ioDTLB_CPU_req_bundle().asOutput() | ||||
|     val ptw = new IODatapathPTW().flip | ||||
|     val imem  = new IOCPUFrontend()(conf.icache) | ||||
|     val ptbr_wen = Bool(OUTPUT); | ||||
|     val ptbr = UFix(OUTPUT, PADDR_BITS); | ||||
|     val fpu = new ioDpathFPU(); | ||||
|     val vec_ctrl = new ioCtrlDpathVec().flip | ||||
|     val vec_iface = new ioDpathVecInterface() | ||||
| @@ -81,9 +80,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component | ||||
|   val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix | ||||
|  | ||||
|   // hook up I$ | ||||
|   io.imem.req.bits.invalidateTLB := pcr.io.ptbr_wen | ||||
|   io.imem.req.bits.currentpc := ex_reg_pc | ||||
|   io.imem.req.bits.status := pcr.io.status | ||||
|   io.imem.req.bits.pc := | ||||
|     Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, | ||||
|     Mux(io.ctrl.sel_pc === PC_EX,  Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), | ||||
| @@ -209,7 +206,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component | ||||
|  | ||||
|   // D$ request interface (registered inside D$ module) | ||||
|   // other signals (req_val, req_rdy) connect to control module   | ||||
|   io.dmem.req.bits.idx  := ex_effective_address | ||||
|   io.dmem.req.bits.addr := ex_effective_address | ||||
|   io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) | ||||
|   io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) | ||||
|   require(io.dmem.req.bits.tag.getWidth >= 6) | ||||
| @@ -225,8 +222,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component | ||||
|   io.ctrl.irq_ipi      := pcr.io.irq_ipi;   | ||||
|   io.ctrl.status       := pcr.io.status; | ||||
|   io.ctrl.pcr_replay   := pcr.io.replay | ||||
|   io.ptbr              := pcr.io.ptbr; | ||||
|   io.ptbr_wen          := pcr.io.ptbr_wen; | ||||
|  | ||||
|   io.ptw.ptbr := pcr.io.ptbr | ||||
|   io.ptw.invalidate := pcr.io.ptbr_wen | ||||
|   io.ptw.status := pcr.io.status | ||||
|    | ||||
| 	// branch resolution logic | ||||
|   io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0) | ||||
|   | ||||
| @@ -5,8 +5,9 @@ import Node._ | ||||
| import Constants._ | ||||
| import Instructions._ | ||||
| import Util._ | ||||
| import FPConstants._ | ||||
|  | ||||
| object rocketFPConstants | ||||
| object FPConstants | ||||
| { | ||||
|   val FCMD_ADD =        Bits("b000000") | ||||
|   val FCMD_SUB =        Bits("b000001") | ||||
| @@ -45,7 +46,6 @@ object rocketFPConstants | ||||
|   val FCMD_WIDTH = 6 | ||||
|   val FSR_WIDTH = 8 | ||||
| } | ||||
| import rocketFPConstants._ | ||||
|  | ||||
| class FPUCtrlSigs extends Bundle | ||||
| { | ||||
| @@ -64,7 +64,7 @@ class FPUCtrlSigs extends Bundle | ||||
|   val wrfsr = Bool() | ||||
| } | ||||
|  | ||||
| class rocketFPUDecoder extends Component | ||||
| class FPUDecoder extends Component | ||||
| { | ||||
|   val io = new Bundle { | ||||
|     val inst = Bits(INPUT, 32) | ||||
| @@ -378,7 +378,7 @@ class ioFMA(width: Int) extends Bundle { | ||||
|   val exc = Bits(OUTPUT, 5) | ||||
| } | ||||
|  | ||||
| class rocketFPUSFMAPipe(val latency: Int) extends Component | ||||
| class FPUSFMAPipe(val latency: Int) extends Component | ||||
| { | ||||
|   val io = new ioFMA(33) | ||||
|    | ||||
| @@ -415,7 +415,7 @@ class rocketFPUSFMAPipe(val latency: Int) extends Component | ||||
|   io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits | ||||
| } | ||||
|  | ||||
| class rocketFPUDFMAPipe(val latency: Int) extends Component | ||||
| class FPUDFMAPipe(val latency: Int) extends Component | ||||
| { | ||||
|   val io = new ioFMA(65) | ||||
|    | ||||
| @@ -452,7 +452,7 @@ class rocketFPUDFMAPipe(val latency: Int) extends Component | ||||
|   io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits | ||||
| } | ||||
|  | ||||
| class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component | ||||
| class FPU(sfma_latency: Int, dfma_latency: Int) extends Component | ||||
| { | ||||
|   val io = new Bundle { | ||||
|     val ctrl = new ioCtrlFPU().flip | ||||
| @@ -470,7 +470,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component | ||||
|   val killm = io.ctrl.killm || io.ctrl.nack_mem | ||||
|   val wb_reg_valid = Reg(mem_reg_valid && !killm, resetVal = Bool(false)) | ||||
|  | ||||
|   val fp_decoder = new rocketFPUDecoder | ||||
|   val fp_decoder = new FPUDecoder | ||||
|   fp_decoder.io.inst := io.dpath.inst | ||||
|  | ||||
|   val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid) | ||||
| @@ -530,7 +530,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component | ||||
|   val cmd_fma = mem_ctrl.cmd === FCMD_MADD  || mem_ctrl.cmd === FCMD_MSUB || | ||||
|                 mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB | ||||
|   val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB | ||||
|   val sfma = new rocketFPUSFMAPipe(sfma_latency) | ||||
|   val sfma = new FPUSFMAPipe(sfma_latency) | ||||
|   sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single | ||||
|   sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) | ||||
|   sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) | ||||
| @@ -540,7 +540,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component | ||||
|   io.sfma.out := sfma.io.out | ||||
|   io.sfma.exc := sfma.io.exc | ||||
|  | ||||
|   val dfma = new rocketFPUDFMAPipe(dfma_latency) | ||||
|   val dfma = new FPUDFMAPipe(dfma_latency) | ||||
|   dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single | ||||
|   dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) | ||||
|   dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) | ||||
|   | ||||
| @@ -29,9 +29,7 @@ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, | ||||
|  | ||||
| class FrontendReq extends Bundle { | ||||
|   val pc = UFix(width = VADDR_BITS+1) | ||||
|   val status = Bits(width = 32) | ||||
|   val invalidate = Bool() | ||||
|   val invalidateTLB = Bool() | ||||
|   val mispredict = Bool() | ||||
|   val taken = Bool() | ||||
|   val currentpc = UFix(width = VADDR_BITS+1) | ||||
| @@ -99,14 +97,13 @@ class Frontend(implicit c: ICacheConfig) extends Component | ||||
|   btb.io.clr := !io.cpu.req.bits.taken | ||||
|   btb.io.correct_pc := io.cpu.req.bits.currentpc | ||||
|   btb.io.correct_target := io.cpu.req.bits.pc | ||||
|   btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.req.bits.invalidateTLB | ||||
|   btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.ptw.invalidate | ||||
|  | ||||
|   tlb.io.ptw <> io.cpu.ptw | ||||
|   tlb.io.req.valid := !stall && !icmiss | ||||
|   tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS) | ||||
|   tlb.io.req.bits.status := io.cpu.req.bits.status | ||||
|   tlb.io.req.bits.asid := UFix(0) | ||||
|   tlb.io.req.bits.invalidate := io.cpu.req.bits.invalidateTLB | ||||
|   tlb.io.req.bits.passthrough := Bool(false) | ||||
|   tlb.io.req.bits.instruction := Bool(true) | ||||
|  | ||||
|   icache.io.mem <> io.mem | ||||
|   | ||||
| @@ -3,16 +3,18 @@ package rocket | ||||
| import Chisel._ | ||||
| import Constants._ | ||||
| import uncore._ | ||||
| import Util._ | ||||
|  | ||||
| case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, | ||||
|                         nmshr: Int, nsecondary: Int, nsdq: Int, | ||||
|                         nmshr: Int, nrpq: Int, nsdq: Int, | ||||
|                         reqtagbits: Int = -1) | ||||
| { | ||||
|   require(isPow2(sets)) | ||||
|   require(isPow2(ways)) // TODO: relax this | ||||
|   def lines = sets*ways | ||||
|   def dm = ways == 1 | ||||
|   def ppnbits = PPN_BITS | ||||
|   def ppnbits = PADDR_BITS - PGIDX_BITS | ||||
|   def vpnbits = VADDR_BITS - PGIDX_BITS | ||||
|   def pgidxbits = PGIDX_BITS | ||||
|   def offbits = OFFSET_BITS | ||||
|   def paddrbits = ppnbits + pgidxbits | ||||
| @@ -161,7 +163,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { | ||||
|     val req_sec_val    = Bool(INPUT) | ||||
|     val req_sec_rdy    = Bool(OUTPUT) | ||||
|     val req_bits       = new MSHRReq().asInput | ||||
|     val req_sdq_id     = UFix(INPUT, log2Up(NSDQ)) | ||||
|     val req_sdq_id     = UFix(INPUT, log2Up(conf.nsdq)) | ||||
|  | ||||
|     val idx_match      = Bool(OUTPUT) | ||||
|     val idx            = Bits(OUTPUT, conf.idxbits) | ||||
| @@ -194,7 +196,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { | ||||
|   val idx_match = req.idx === io.req_bits.idx | ||||
|   val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) | ||||
|  | ||||
|   val rpq = (new Queue(NRPQ)) { new RPQEntry } | ||||
|   val rpq = (new Queue(conf.nrpq)) { new RPQEntry } | ||||
|   rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq | ||||
|   rpq.io.enq.bits := io.req_bits | ||||
|   rpq.io.enq.bits.sdq_id := io.req_sdq_id | ||||
| @@ -312,24 +314,24 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { | ||||
|     val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits) | ||||
|   } | ||||
|  | ||||
|   val sdq_val = Reg(resetVal = Bits(0, NSDQ)) | ||||
|   val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) | ||||
|   val sdq_val = Reg(resetVal = Bits(0, conf.nsdq)) | ||||
|   val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0)) | ||||
|   val sdq_rdy = !sdq_val.andR | ||||
|   val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd) | ||||
|   val sdq_enq = io.req.valid && io.req.ready && req_write | ||||
|   val sdq = Mem(NSDQ) { io.req.bits.data.clone } | ||||
|   val sdq = Mem(conf.nsdq) { io.req.bits.data.clone } | ||||
|   when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } | ||||
|  | ||||
|   val idxMatch = Vec(NMSHR) { Bool() } | ||||
|   val tagList = Vec(NMSHR) { Bits() } | ||||
|   val wbTagList = Vec(NMSHR) { Bits() } | ||||
|   val memRespMux = Vec(NMSHR) { new DataArrayReq } | ||||
|   val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } | ||||
|   val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } | ||||
|   val mem_finish_arb = (new Arbiter(NMSHR)) { new TransactionFinish } | ||||
|   val wb_req_arb = (new Arbiter(NMSHR)) { new WritebackReq } | ||||
|   val replay_arb = (new Arbiter(NMSHR)) { new Replay() } | ||||
|   val alloc_arb = (new Arbiter(NMSHR)) { Bool() } | ||||
|   val idxMatch = Vec(conf.nmshr) { Bool() } | ||||
|   val tagList = Vec(conf.nmshr) { Bits() } | ||||
|   val wbTagList = Vec(conf.nmshr) { Bits() } | ||||
|   val memRespMux = Vec(conf.nmshr) { new DataArrayReq } | ||||
|   val meta_req_arb = (new Arbiter(conf.nmshr)) { new MetaArrayReq() } | ||||
|   val mem_req_arb = (new Arbiter(conf.nmshr)) { new TransactionInit } | ||||
|   val mem_finish_arb = (new Arbiter(conf.nmshr)) { new TransactionFinish } | ||||
|   val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq } | ||||
|   val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } | ||||
|   val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } | ||||
|  | ||||
|   val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag | ||||
|   val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag | ||||
| @@ -341,7 +343,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { | ||||
|   var writeback_probe_rdy = Bool(true) | ||||
|   var refill_probe_rdy = Bool(true) | ||||
|  | ||||
|   for (i <- 0 to NMSHR-1) { | ||||
|   for (i <- 0 to conf.nmshr-1) { | ||||
|     val mshr = new MSHR(i) | ||||
|  | ||||
|     idxMatch(i) := mshr.io.idx_match | ||||
| @@ -400,8 +402,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { | ||||
|  | ||||
|   val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) | ||||
|   val sdq_free = replay.valid && replay.ready && replay_write | ||||
|   sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, NSDQ)) |  | ||||
|              PriorityEncoderOH(~sdq_val(NSDQ-1,0)) & Fill(NSDQ, sdq_enq && io.req.bits.tag_miss) | ||||
|   sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, conf.nsdq)) |  | ||||
|              PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq && io.req.bits.tag_miss) | ||||
|   val sdq_rdata = Reg() { io.req.bits.data.clone } | ||||
|   sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) | ||||
|   io.data_req.bits.data := sdq_rdata | ||||
| @@ -711,8 +713,8 @@ class AMOALU extends Component { | ||||
| class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { | ||||
|   val kill = Bool() | ||||
|   val typ  = Bits(width = 3) | ||||
|   val idx  = Bits(width = conf.pgidxbits) | ||||
|   val ppn  = Bits(width = conf.ppnbits) | ||||
|   val phys = Bool() | ||||
|   val addr = UFix(width = conf.ppnbits.max(conf.vpnbits+1) + conf.pgidxbits) | ||||
|   val data = Bits(width = conf.databits) | ||||
|   val tag  = Bits(width = conf.reqtagbits) | ||||
|   val cmd  = Bits(width = 4) | ||||
| @@ -739,6 +741,7 @@ class AlignmentExceptions extends Bundle { | ||||
|  | ||||
| class HellaCacheExceptions extends Bundle { | ||||
|   val ma = new AlignmentExceptions | ||||
|   val pf = new AlignmentExceptions | ||||
| } | ||||
|  | ||||
| // interface between D$ and processor/DTLB | ||||
| @@ -746,6 +749,7 @@ class ioHellaCache(implicit conf: DCacheConfig) extends Bundle { | ||||
|   val req = (new FIFOIO){ new HellaCacheReq } | ||||
|   val resp = (new PipeIO){ new HellaCacheResp }.flip | ||||
|   val xcpt = (new HellaCacheExceptions).asInput | ||||
|   val ptw = new IOTLBPTW().flip | ||||
| } | ||||
|  | ||||
| class HellaCache(implicit conf: DCacheConfig) extends Component { | ||||
| @@ -768,6 +772,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | ||||
|   val early_nack       = Reg { Bool() } | ||||
|   val r_cpu_req_val_   = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) | ||||
|   val r_cpu_req_val    = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack | ||||
|   val r_cpu_req_phys   = Reg() { Bool() } | ||||
|   val r_cpu_req_vpn    = Reg() { UFix() } | ||||
|   val r_cpu_req_idx    = Reg() { Bits() } | ||||
|   val r_cpu_req_cmd    = Reg() { Bits() } | ||||
|   val r_cpu_req_type   = Reg() { Bits() } | ||||
| @@ -799,6 +805,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | ||||
|   val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch | ||||
|   val nack_hit = Bool() | ||||
|  | ||||
|   val dtlb = new TLB(8) | ||||
|   dtlb.io.ptw <> io.cpu.ptw | ||||
|   dtlb.io.req.valid := r_cpu_req_val_ && r_req_readwrite && !r_cpu_req_phys | ||||
|   dtlb.io.req.bits.passthrough := r_cpu_req_phys | ||||
|   dtlb.io.req.bits.asid := UFix(0) | ||||
|   dtlb.io.req.bits.vpn := r_cpu_req_vpn | ||||
|   dtlb.io.req.bits.instruction := Bool(false) | ||||
|  | ||||
|   val wb = new WritebackUnit | ||||
|   val prober = new ProbeUnit | ||||
|   val mshr = new MSHRFile | ||||
| @@ -812,7 +826,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | ||||
|   flusher.io.mshr_req.ready := mshr.io.req.ready | ||||
|    | ||||
|   when (io.cpu.req.valid) { | ||||
|     r_cpu_req_idx  := io.cpu.req.bits.idx | ||||
|     r_cpu_req_phys := io.cpu.req.bits.phys | ||||
|     r_cpu_req_vpn  := io.cpu.req.bits.addr >> taglsb | ||||
|     r_cpu_req_idx  := io.cpu.req.bits.addr(indexmsb,0) | ||||
|     r_cpu_req_cmd  := io.cpu.req.bits.cmd | ||||
|     r_cpu_req_type := io.cpu.req.bits.typ | ||||
|     r_cpu_req_tag  := io.cpu.req.bits.tag | ||||
| @@ -839,8 +855,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | ||||
|     (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || | ||||
|     ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); | ||||
|      | ||||
|   io.cpu.xcpt.ma.ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned | ||||
|   io.cpu.xcpt.ma.st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned | ||||
|   io.cpu.xcpt.ma.ld := r_cpu_req_val_ && r_req_read && misaligned | ||||
|   io.cpu.xcpt.ma.st := r_cpu_req_val_ && r_req_write && misaligned | ||||
|   io.cpu.xcpt.pf.ld := r_cpu_req_val_ && r_req_read && dtlb.io.resp.xcpt_ld | ||||
|   io.cpu.xcpt.pf.st := r_cpu_req_val_ && r_req_write && dtlb.io.resp.xcpt_st | ||||
|  | ||||
|   // tags | ||||
|   val meta = new MetaDataArrayArray(lines) | ||||
| @@ -855,11 +873,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | ||||
|  | ||||
|   // cpu tag check | ||||
|   meta_arb.io.in(3).valid := io.cpu.req.valid | ||||
|   meta_arb.io.in(3).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) | ||||
|   meta_arb.io.in(3).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) | ||||
|   meta_arb.io.in(3).bits.rw := Bool(false) | ||||
|   meta_arb.io.in(3).bits.way_en := Fix(-1) | ||||
|   val early_tag_nack = !meta_arb.io.in(3).ready | ||||
|   val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), io.cpu.req.bits.ppn) | ||||
|   val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), dtlb.io.resp.ppn) | ||||
|   val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) | ||||
|   val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) | ||||
|   val tag_match = Cat(Bits(0),tag_match_arr:_*).orR | ||||
| @@ -892,8 +910,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | ||||
|   data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) | ||||
|  | ||||
|   // load hits | ||||
|   data_arb.io.in(4).bits.offset := io.cpu.req.bits.idx(offsetmsb,ramindexlsb) | ||||
|   data_arb.io.in(4).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) | ||||
|   data_arb.io.in(4).bits.offset := io.cpu.req.bits.addr(offsetmsb,ramindexlsb) | ||||
|   data_arb.io.in(4).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) | ||||
|   data_arb.io.in(4).bits.rw := Bool(false) | ||||
|   data_arb.io.in(4).valid := io.cpu.req.valid && req_read | ||||
|   data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check | ||||
| @@ -1015,13 +1033,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | ||||
|   val pending_fence = Reg(resetVal = Bool(false)) | ||||
|   pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy | ||||
|   nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy || | ||||
|               p_store_idx_match && meta.io.state_req.valid | ||||
|               p_store_idx_match && meta.io.state_req.valid || | ||||
|               !r_cpu_req_phys && dtlb.io.resp.miss | ||||
|   val nack_miss  = !mshr.io.req.ready | ||||
|   val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) || | ||||
|                    !flushed && r_req_flush | ||||
|   val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush | ||||
|  | ||||
|   io.cpu.req.ready   := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence | ||||
|   io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence && (dtlb.io.req.ready || io.cpu.req.bits.phys) | ||||
|   io.cpu.resp.valid  := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val | ||||
|   io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack | ||||
|   io.cpu.resp.bits.replay := mshr.io.cpu_resp_val | ||||
|   | ||||
| @@ -5,16 +5,31 @@ import Node._ | ||||
| import Constants._ | ||||
| import scala.math._ | ||||
|  | ||||
| class ioPTW(n: Int)(implicit conf: RocketConfiguration) extends Bundle | ||||
| { | ||||
|   val requestor = Vec(n) { new IOTLBPTW }.flip | ||||
|   val mem   = new ioHellaCache()(conf.dcache) | ||||
| class IOTLBPTW extends Bundle { | ||||
|   val req = new FIFOIO()(UFix(width = VPN_BITS)) | ||||
|   val resp = new PipeIO()(new Bundle { | ||||
|     val error = Bool() | ||||
|     val ppn = UFix(width = PPN_BITS) | ||||
|     val perm = Bits(width = PERM_BITS) | ||||
|   }).flip | ||||
|  | ||||
|   val status = Bits(INPUT, width = 32) | ||||
|   val invalidate = Bool(INPUT) | ||||
| } | ||||
|  | ||||
| class IODatapathPTW extends Bundle { | ||||
|   val ptbr = UFix(INPUT, PADDR_BITS) | ||||
|   val invalidate = Bool(INPUT) | ||||
|   val status = Bits(INPUT, 32) | ||||
| } | ||||
|  | ||||
| class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component | ||||
| { | ||||
|   val io = new ioPTW(n) | ||||
|   val io = new Bundle { | ||||
|     val requestor = Vec(n) { new IOTLBPTW }.flip | ||||
|     val mem = new ioHellaCache()(conf.dcache) | ||||
|     val dpath = new IODatapathPTW | ||||
|   } | ||||
|    | ||||
|   val levels = 3 | ||||
|   val bitsPerLevel = VPN_BITS/levels | ||||
| @@ -27,7 +42,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component | ||||
|   val r_req_vpn = Reg() { Bits() } | ||||
|   val r_req_dest = Reg() { Bits() } | ||||
|    | ||||
|   val req_addr = Reg() { Bits() } | ||||
|   val req_addr = Reg() { UFix() } | ||||
|   val r_resp_ppn = Reg() { Bits() }; | ||||
|   val r_resp_perm = Reg() { Bits() }; | ||||
|    | ||||
| @@ -41,21 +56,21 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component | ||||
|   when (arb.io.out.fire()) { | ||||
|     r_req_vpn := arb.io.out.bits | ||||
|     r_req_dest := arb.io.chosen | ||||
|     req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) | ||||
|     req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), UFix(0,3)) | ||||
|   } | ||||
|  | ||||
|   val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) | ||||
|   when (dmem_resp_val) { | ||||
|     req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) | ||||
|     req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix | ||||
|     r_resp_perm := io.mem.resp.bits.data_subword(9,4); | ||||
|     r_resp_ppn  := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS); | ||||
|   } | ||||
|    | ||||
|   io.mem.req.valid     := state === s_req | ||||
|   io.mem.req.bits.phys := Bool(true) | ||||
|   io.mem.req.bits.cmd  := M_XRD | ||||
|   io.mem.req.bits.typ  := MT_D | ||||
|   io.mem.req.bits.idx  := req_addr(PGIDX_BITS-1,0) | ||||
|   io.mem.req.bits.ppn  := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) | ||||
|   io.mem.req.bits.addr := req_addr | ||||
|   io.mem.req.bits.kill := Bool(false) | ||||
|    | ||||
|   val resp_val = state === s_done || state === s_error | ||||
| @@ -73,6 +88,8 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component | ||||
|     io.requestor(i).resp.bits.error := resp_err | ||||
|     io.requestor(i).resp.bits.perm := r_resp_perm | ||||
|     io.requestor(i).resp.bits.ppn := resp_ppn.toUFix | ||||
|     io.requestor(i).invalidate := io.dpath.invalidate | ||||
|     io.requestor(i).status := io.dpath.status | ||||
|   } | ||||
|  | ||||
|   // control state machine | ||||
|   | ||||
| @@ -13,7 +13,8 @@ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, | ||||
|  | ||||
| class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) | ||||
| { | ||||
|   implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(DMEM_PORTS)) | ||||
|   val memPorts = if (HAVE_VEC) 3 else 2 | ||||
|   implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts)) | ||||
|   implicit val conf = confIn.copy(dcache = dcConf) | ||||
|  | ||||
|   val io = new Bundle { | ||||
| @@ -21,13 +22,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon | ||||
|     val host = new ioHTIF(conf.ntiles) | ||||
|   } | ||||
|  | ||||
|   val cpu       = new rocketProc | ||||
|   val core      = new Core | ||||
|   val icache    = new Frontend()(confIn.icache) | ||||
|   val dcache    = new HellaCache | ||||
|  | ||||
|   val arbiter   = new MemArbiter(DMEM_PORTS) | ||||
|   arbiter.io.requestor(DMEM_DCACHE) <> dcache.io.mem | ||||
|   arbiter.io.requestor(DMEM_ICACHE) <> icache.io.mem | ||||
|   val arbiter   = new MemArbiter(memPorts) | ||||
|   arbiter.io.requestor(0) <> dcache.io.mem | ||||
|   arbiter.io.requestor(1) <> icache.io.mem | ||||
|  | ||||
|   io.tilelink.xact_init <> arbiter.io.mem.xact_init | ||||
|   io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data | ||||
| @@ -38,14 +39,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon | ||||
|   io.tilelink.probe_rep <> dcache.io.mem.probe_rep | ||||
|   io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data | ||||
|  | ||||
|   if (HAVE_VEC) | ||||
|   { | ||||
|   if (HAVE_VEC) { | ||||
|     val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) | ||||
|     arbiter.io.requestor(DMEM_VICACHE) <> vicache.io.mem | ||||
|     cpu.io.vimem <> vicache.io.cpu | ||||
|     arbiter.io.requestor(2) <> vicache.io.mem | ||||
|     core.io.vimem <> vicache.io.cpu | ||||
|   } | ||||
|  | ||||
|   cpu.io.host       <> io.host | ||||
|   cpu.io.imem       <> icache.io.cpu | ||||
|   cpu.io.dmem       <> dcache.io.cpu | ||||
|   core.io.host <> io.host | ||||
|   core.io.imem <> icache.io.cpu | ||||
|   core.io.dmem <> dcache.io.cpu | ||||
| } | ||||
|   | ||||
| @@ -66,21 +66,11 @@ class PseudoLRU(n: Int) | ||||
|   } | ||||
| } | ||||
|  | ||||
| class IOTLBPTW extends Bundle { | ||||
|   val req = new FIFOIO()(UFix(width = VPN_BITS)) | ||||
|   val resp = new PipeIO()(new Bundle { | ||||
|     val error = Bool() | ||||
|     val ppn = UFix(width = PPN_BITS) | ||||
|     val perm = Bits(width = PERM_BITS) | ||||
|   }).flip | ||||
| } | ||||
|  | ||||
| class TLBReq extends Bundle | ||||
| { | ||||
|   val asid = UFix(width = ASID_BITS) | ||||
|   val vpn = UFix(width = VPN_BITS+1) | ||||
|   val status = Bits(width = 32) | ||||
|   val invalidate = Bool() | ||||
|   val passthrough = Bool() | ||||
|   val instruction = Bool() | ||||
| } | ||||
|  | ||||
| @@ -116,7 +106,7 @@ class TLB(entries: Int) extends Component | ||||
|   when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn } | ||||
|    | ||||
|   val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix | ||||
|   tag_cam.io.clear := io.req.bits.invalidate | ||||
|   tag_cam.io.clear := io.ptw.invalidate | ||||
|   tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st) | ||||
|   tag_cam.io.tag := lookup_tag | ||||
|   tag_cam.io.write := state === s_wait && io.ptw.resp.valid | ||||
| @@ -148,8 +138,8 @@ class TLB(entries: Int) extends Component | ||||
|   val plru = new PseudoLRU(entries) | ||||
|   val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) | ||||
|    | ||||
|   val status_s  = io.req.bits.status(SR_S)  // user/supervisor mode | ||||
|   val status_vm = io.req.bits.status(SR_VM) // virtual memory enable | ||||
|   val status_s  = io.ptw.status(SR_S)  // user/supervisor mode | ||||
|   val status_vm = io.ptw.status(SR_VM) // virtual memory enable | ||||
|   val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1) | ||||
|   val tlb_hit  = status_vm && tag_hit | ||||
|   val tlb_miss = status_vm && !tag_hit && !bad_va | ||||
| @@ -163,7 +153,7 @@ class TLB(entries: Int) extends Component | ||||
|   io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr)) | ||||
|   io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr)) | ||||
|   io.resp.miss := tlb_miss | ||||
|   io.resp.ppn := Mux(status_vm, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) | ||||
|   io.resp.ppn := Mux(status_vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) | ||||
|   io.resp.hit_idx := tag_cam.io.hits | ||||
|    | ||||
|   io.ptw.req.valid := state === s_request | ||||
| @@ -175,15 +165,15 @@ class TLB(entries: Int) extends Component | ||||
|     r_refill_waddr := repl_waddr | ||||
|   } | ||||
|   when (state === s_request) { | ||||
|     when (io.req.bits.invalidate) { | ||||
|     when (io.ptw.invalidate) { | ||||
|       state := s_ready | ||||
|     } | ||||
|     when (io.ptw.req.ready) { | ||||
|       state := s_wait | ||||
|       when (io.req.bits.invalidate) { state := s_wait_invalidate } | ||||
|       when (io.ptw.invalidate) { state := s_wait_invalidate } | ||||
|     } | ||||
|   } | ||||
|   when (state === s_wait && io.req.bits.invalidate) { | ||||
|   when (state === s_wait && io.ptw.invalidate) { | ||||
|     state := s_wait_invalidate | ||||
|   } | ||||
|   when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) { | ||||
| @@ -204,10 +194,6 @@ class ioDTLB_CPU_resp extends TLBResp(1) | ||||
|  | ||||
| class ioDTLB extends Bundle | ||||
| { | ||||
|   // status bits (from PCR), to check current permission and whether VM is enabled | ||||
|   val status = Bits(INPUT, 32) | ||||
|   // invalidate all TLB entries | ||||
|   val invalidate = Bool(INPUT) | ||||
|   val cpu_req = new ioDTLB_CPU_req().flip | ||||
|   val cpu_resp = new ioDTLB_CPU_resp() | ||||
|   val ptw = new IOTLBPTW | ||||
| @@ -225,8 +211,7 @@ class rocketTLB(entries: Int) extends Component | ||||
|   val tlb = new TLB(entries) | ||||
|   tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill | ||||
|   tlb.io.req.bits.instruction := Bool(false) | ||||
|   tlb.io.req.bits.invalidate := io.invalidate | ||||
|   tlb.io.req.bits.status := io.status | ||||
|   tlb.io.req.bits.passthrough := Bool(false) | ||||
|   tlb.io.req.bits.vpn := r_cpu_req_vpn | ||||
|   tlb.io.req.bits.asid := r_cpu_req_asid | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user