remove more global consts; refactor DTLBs
D$ now contains DTLB. provide full VAddr with initial request. VU now has its own DTLBs.
This commit is contained in:
		| @@ -12,58 +12,40 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp | |||||||
|     val mem = new ioHellaCache()(conf.dcache) |     val mem = new ioHellaCache()(conf.dcache) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   var req_val = Bool(false) |   val r_valid = io.requestor.map(r => Reg(r.req.valid)) | ||||||
|   var req_rdy = io.mem.req.ready |  | ||||||
|   for (i <- 0 until n) |   io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_) | ||||||
|   { |   io.requestor(0).req.ready := io.mem.req.ready | ||||||
|     io.requestor(i).req.ready := req_rdy |   for (i <- 1 until n) | ||||||
|     req_val = req_val || io.requestor(i).req.valid |     io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid | ||||||
|     req_rdy = req_rdy && !io.requestor(i).req.valid |  | ||||||
|  |   io.mem.req.bits := io.requestor(n-1).req.bits | ||||||
|  |   io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UFix(n-1, log2Up(n))) | ||||||
|  |   for (i <- n-2 to 0 by -1) { | ||||||
|  |     val req = io.requestor(i).req | ||||||
|  |     when (req.valid) { | ||||||
|  |       io.mem.req.bits.cmd := req.bits.cmd | ||||||
|  |       io.mem.req.bits.typ := req.bits.typ | ||||||
|  |       io.mem.req.bits.addr := req.bits.addr | ||||||
|  |       io.mem.req.bits.phys := req.bits.phys | ||||||
|  |       io.mem.req.bits.tag := Cat(req.bits.tag, UFix(i, log2Up(n))) | ||||||
|  |     } | ||||||
|  |     when (r_valid(i)) { | ||||||
|  |       io.mem.req.bits.kill := req.bits.kill | ||||||
|  |       io.mem.req.bits.data := req.bits.data | ||||||
|  |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   var req_cmd  = io.requestor(n-1).req.bits.cmd |   for (i <- 0 until n) { | ||||||
|   var req_type = io.requestor(n-1).req.bits.typ |     val resp = io.requestor(i).resp | ||||||
|   var req_idx  = io.requestor(n-1).req.bits.idx |  | ||||||
|   var req_ppn  = io.requestor(n-1).req.bits.ppn |  | ||||||
|   var req_data = io.requestor(n-1).req.bits.data |  | ||||||
|   var req_kill = io.requestor(n-1).req.bits.kill |  | ||||||
|   var req_tag  = io.requestor(n-1).req.bits.tag |  | ||||||
|   for (i <- n-1 to 0 by -1) |  | ||||||
|   { |  | ||||||
|     val r = io.requestor(i).req |  | ||||||
|     req_cmd  = Mux(r.valid, r.bits.cmd, req_cmd) |  | ||||||
|     req_type = Mux(r.valid, r.bits.typ, req_type) |  | ||||||
|     req_idx  = Mux(r.valid, r.bits.idx, req_idx) |  | ||||||
|     req_ppn  = Mux(Reg(r.valid), r.bits.ppn, req_ppn) |  | ||||||
|     req_data = Mux(Reg(r.valid), r.bits.data, req_data) |  | ||||||
|     req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) |  | ||||||
|     req_tag  = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag) |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   io.mem.req.valid     := req_val |  | ||||||
|   io.mem.req.bits.cmd  := req_cmd |  | ||||||
|   io.mem.req.bits.typ  := req_type |  | ||||||
|   io.mem.req.bits.idx  := req_idx |  | ||||||
|   io.mem.req.bits.ppn  := req_ppn |  | ||||||
|   io.mem.req.bits.data := req_data |  | ||||||
|   io.mem.req.bits.kill := req_kill |  | ||||||
|   io.mem.req.bits.tag  := req_tag |  | ||||||
|  |  | ||||||
|   for (i <- 0 until n) |  | ||||||
|   { |  | ||||||
|     val r = io.requestor(i).resp |  | ||||||
|     val x = io.requestor(i).xcpt |  | ||||||
|     val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) |     val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) | ||||||
|     x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) |     resp.valid := io.mem.resp.valid && tag_hit | ||||||
|     x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) |     io.requestor(i).xcpt := io.mem.xcpt | ||||||
|     r.valid             := io.mem.resp.valid && tag_hit |     resp.bits := io.mem.resp.bits | ||||||
|     r.bits.miss         := io.mem.resp.bits.miss && tag_hit |     resp.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) | ||||||
|     r.bits.nack         := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) |     resp.bits.miss := io.mem.resp.bits.miss && tag_hit | ||||||
|     r.bits.replay       := io.mem.resp.bits.replay && tag_hit |     resp.bits.nack := io.mem.resp.bits.nack && r_valid(i) | ||||||
|     r.bits.data         := io.mem.resp.bits.data |     resp.bits.replay := io.mem.resp.bits.replay && tag_hit | ||||||
|     r.bits.data_subword := io.mem.resp.bits.data_subword |  | ||||||
|     r.bits.typ          := io.mem.resp.bits.typ |  | ||||||
|     r.bits.tag          := io.mem.resp.bits.tag >> UFix(log2Up(n)) |  | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -80,15 +62,6 @@ class MemArbiter(n: Int) extends Component { | |||||||
|     val requestor = Vec(n) { new ioUncachedRequestor }.flip |     val requestor = Vec(n) { new ioUncachedRequestor }.flip | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   var xi_val = Bool(false) |  | ||||||
|   var xi_rdy = io.mem.xact_init.ready |  | ||||||
|   for (i <- 0 until n) |  | ||||||
|   { |  | ||||||
|     io.requestor(i).xact_init.ready := xi_rdy |  | ||||||
|     xi_val = xi_val || io.requestor(i).xact_init.valid |  | ||||||
|     xi_rdy = xi_rdy && !io.requestor(i).xact_init.valid |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   var xi_bits = new TransactionInit |   var xi_bits = new TransactionInit | ||||||
|   xi_bits := io.requestor(n-1).xact_init.bits |   xi_bits := io.requestor(n-1).xact_init.bits | ||||||
|   xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2Up(n))) |   xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2Up(n))) | ||||||
| @@ -101,24 +74,21 @@ class MemArbiter(n: Int) extends Component { | |||||||
|     xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits) |     xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   io.mem.xact_init.valid := xi_val |  | ||||||
|   io.mem.xact_init.bits := xi_bits |   io.mem.xact_init.bits := xi_bits | ||||||
|  |   io.mem.xact_init.valid := io.requestor.map(_.xact_init.valid).reduce(_||_) | ||||||
|   var xf_val = Bool(false) |   io.requestor(0).xact_init.ready := io.mem.xact_init.ready | ||||||
|   var xf_rdy = io.mem.xact_finish.ready |   for (i <- 1 until n) | ||||||
|   for (i <- 0 until n) |     io.requestor(i).xact_init.ready := io.requestor(i-1).xact_init.ready && !io.requestor(i-1).xact_init.valid | ||||||
|   { |  | ||||||
|     io.requestor(i).xact_finish.ready := xf_rdy |  | ||||||
|     xf_val = xf_val || io.requestor(i).xact_finish.valid |  | ||||||
|     xf_rdy = xf_rdy && !io.requestor(i).xact_finish.valid |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   var xf_bits = io.requestor(n-1).xact_finish.bits |   var xf_bits = io.requestor(n-1).xact_finish.bits | ||||||
|   for (i <- n-2 to 0 by -1) |   for (i <- n-2 to 0 by -1) | ||||||
|     xf_bits = Mux(io.requestor(i).xact_finish.valid, io.requestor(i).xact_finish.bits, xf_bits) |     xf_bits = Mux(io.requestor(i).xact_finish.valid, io.requestor(i).xact_finish.bits, xf_bits) | ||||||
|  |  | ||||||
|   io.mem.xact_finish.valid := xf_val |  | ||||||
|   io.mem.xact_finish.bits := xf_bits |   io.mem.xact_finish.bits := xf_bits | ||||||
|  |   io.mem.xact_finish.valid := io.requestor.map(_.xact_finish.valid).reduce(_||_) | ||||||
|  |   io.requestor(0).xact_finish.ready := io.mem.xact_finish.ready | ||||||
|  |   for (i <- 1 until n) | ||||||
|  |     io.requestor(i).xact_finish.ready := io.requestor(i-1).xact_finish.ready && !io.requestor(i-1).xact_finish.valid | ||||||
|  |  | ||||||
|   for (i <- 0 until n) |   for (i <- 0 until n) | ||||||
|   { |   { | ||||||
|   | |||||||
| @@ -154,12 +154,7 @@ trait InterruptConstants { | |||||||
|   val IRQ_TIMER = 7 |   val IRQ_TIMER = 7 | ||||||
| } | } | ||||||
|   |   | ||||||
| abstract trait RocketDcacheConstants extends ArbiterConstants with uncore.constants.AddressConstants { | abstract trait RocketDcacheConstants extends TileConfigConstants with uncore.constants.CacheConstants with uncore.constants.AddressConstants { | ||||||
|   val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses |  | ||||||
|   require(log2Up(NMSHR)+3 <= uncore.Constants.TILE_XACT_ID_BITS) |  | ||||||
|   val NRPQ = 16; // number of secondary misses |  | ||||||
|   val NSDQ = 17; // number of secondary stores/AMOs |  | ||||||
|   val OFFSET_BITS = 6; // log2(cache line size in bytes) |  | ||||||
|   require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) |   require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) | ||||||
|   require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS) |   require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS) | ||||||
|   require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) |   require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) | ||||||
| @@ -196,15 +191,3 @@ trait VectorOpConstants { | |||||||
|   val VIMM2_ALU = UFix(1, 1) |   val VIMM2_ALU = UFix(1, 1) | ||||||
|   val VIMM2_X = UFix(0, 1) |   val VIMM2_X = UFix(0, 1) | ||||||
| } | } | ||||||
|  |  | ||||||
| abstract trait ArbiterConstants extends TileConfigConstants { |  | ||||||
|   val DCACHE_PORTS = 3 |  | ||||||
|   val DCACHE_CPU = 0 |  | ||||||
|   val DCACHE_PTW = 1 |  | ||||||
|   val DCACHE_VU = 2 |  | ||||||
|  |  | ||||||
|   val DMEM_PORTS = if (HAVE_VEC) 3 else 2 |  | ||||||
|   val DMEM_DCACHE = 0 |  | ||||||
|   val DMEM_ICACHE = 1 |  | ||||||
|   val DMEM_VICACHE = 2 |  | ||||||
| } |  | ||||||
|   | |||||||
| @@ -13,87 +13,57 @@ class ioRocket(implicit conf: RocketConfiguration) extends Bundle | |||||||
|   val dmem    = new ioHellaCache()(conf.dcache) |   val dmem    = new ioHellaCache()(conf.dcache) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| class rocketProc(implicit conf: RocketConfiguration) extends Component | class Core(implicit conf: RocketConfiguration) extends Component | ||||||
| { | { | ||||||
|   val io    = new ioRocket |   val io    = new ioRocket | ||||||
|     |     | ||||||
|   val ctrl  = new Control |   val ctrl  = new Control | ||||||
|   val dpath = new Datapath |   val dpath = new Datapath | ||||||
| 
 | 
 | ||||||
|   val ptw = Vec(0) { new IOTLBPTW } |  | ||||||
|   val arb = new HellaCacheArbiter(DCACHE_PORTS) |  | ||||||
| 
 |  | ||||||
|   var vu: vu = null |  | ||||||
|   if (HAVE_VEC) |  | ||||||
|   { |  | ||||||
|     vu = new vu() |  | ||||||
| 
 |  | ||||||
|     val vdtlb = new rocketTLB(8) |  | ||||||
|     vdtlb.io.invalidate := dpath.io.ptbr_wen |  | ||||||
|     vdtlb.io.status := dpath.io.ctrl.status |  | ||||||
|     ptw += vdtlb.io.ptw |  | ||||||
| 
 |  | ||||||
|     vdtlb.io.cpu_req <> vu.io.vec_tlb_req |  | ||||||
|     vu.io.vec_tlb_resp := vdtlb.io.cpu_resp |  | ||||||
|     vu.io.vec_tlb_resp.xcpt_pf := Bool(false) |  | ||||||
| 
 |  | ||||||
|     val pftlb = new rocketTLB(2) |  | ||||||
|     pftlb.io.invalidate := dpath.io.ptbr_wen |  | ||||||
|     pftlb.io.status := dpath.io.ctrl.status |  | ||||||
|     pftlb.io.cpu_req <> vu.io.vec_pftlb_req |  | ||||||
|     ptw += pftlb.io.ptw |  | ||||||
| 
 |  | ||||||
|     vu.io.vec_pftlb_resp := pftlb.io.cpu_resp |  | ||||||
|     vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) |  | ||||||
|     vu.io.vec_pftlb_resp.xcpt_st := Bool(false) |  | ||||||
|   } |  | ||||||
| 
 |  | ||||||
|   // connect DTLB to ctrl+dpath |  | ||||||
|   val dtlb = new rocketTLB(DTLB_ENTRIES) |  | ||||||
|   dtlb.io.invalidate := dpath.io.ptbr_wen |  | ||||||
|   dtlb.io.status := dpath.io.ctrl.status |  | ||||||
|   ptw += dtlb.io.ptw |  | ||||||
| 
 |  | ||||||
|   dtlb.io.cpu_req.valid := ctrl.io.dtlb_val |  | ||||||
|   dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill |  | ||||||
|   dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd |  | ||||||
|   dtlb.io.cpu_req.bits.asid := UFix(0) |  | ||||||
|   dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn |  | ||||||
|   ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld |  | ||||||
|   ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st |  | ||||||
|   ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready |  | ||||||
|   ctrl.io.dtlb_miss := dtlb.io.cpu_resp.miss |  | ||||||
| 
 |  | ||||||
|   arb.io.requestor(DCACHE_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn |  | ||||||
| 
 |  | ||||||
|   ctrl.io.dpath <> dpath.io.ctrl |   ctrl.io.dpath <> dpath.io.ctrl | ||||||
|   dpath.io.host <> io.host |   dpath.io.host <> io.host | ||||||
| 
 | 
 | ||||||
|   ctrl.io.imem <> io.imem |   ctrl.io.imem <> io.imem | ||||||
|   dpath.io.imem <> io.imem |   dpath.io.imem <> io.imem | ||||||
| 
 | 
 | ||||||
|   ctrl.io.dmem <> arb.io.requestor(DCACHE_CPU) |   val dmemArb = new HellaCacheArbiter(if (HAVE_VEC) 3 else 2) | ||||||
|   dpath.io.dmem <> arb.io.requestor(DCACHE_CPU) |   dmemArb.io.mem <> io.dmem | ||||||
|  |   val dmem = dmemArb.io.requestor | ||||||
|  |   dmem(1) <> ctrl.io.dmem | ||||||
|  |   dmem(1) <> dpath.io.dmem | ||||||
| 
 | 
 | ||||||
|   var fpu: rocketFPU = null |   val ptw = collection.mutable.ArrayBuffer(io.imem.ptw, io.dmem.ptw) | ||||||
|   if (HAVE_FPU) | 
 | ||||||
|   { |   val fpu: FPU = if (HAVE_FPU) { | ||||||
|     fpu = new rocketFPU(4,6) |     val fpu = new FPU(4,6) | ||||||
|     dpath.io.fpu <> fpu.io.dpath |     dpath.io.fpu <> fpu.io.dpath | ||||||
|     ctrl.io.fpu <> fpu.io.ctrl |     ctrl.io.fpu <> fpu.io.ctrl | ||||||
|   } |     fpu | ||||||
|  |   } else null | ||||||
|  | 
 | ||||||
|  |   if (HAVE_VEC) { | ||||||
|  |     val vu = new vu() | ||||||
|  | 
 | ||||||
|  |     val vdtlb = new rocketTLB(8) | ||||||
|  |     ptw += vdtlb.io.ptw | ||||||
|  |     vdtlb.io.cpu_req <> vu.io.vec_tlb_req | ||||||
|  |     vu.io.vec_tlb_resp := vdtlb.io.cpu_resp | ||||||
|  |     vu.io.vec_tlb_resp.xcpt_pf := Bool(false) | ||||||
|  | 
 | ||||||
|  |     val pftlb = new rocketTLB(2) | ||||||
|  |     pftlb.io.cpu_req <> vu.io.vec_pftlb_req | ||||||
|  |     ptw += pftlb.io.ptw | ||||||
|  |     vu.io.vec_pftlb_resp := pftlb.io.cpu_resp | ||||||
|  |     vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) | ||||||
|  |     vu.io.vec_pftlb_resp.xcpt_st := Bool(false) | ||||||
| 
 | 
 | ||||||
|   if (HAVE_VEC) |  | ||||||
|   { |  | ||||||
|     dpath.io.vec_ctrl <> ctrl.io.vec_dpath |     dpath.io.vec_ctrl <> ctrl.io.vec_dpath | ||||||
| 
 | 
 | ||||||
|     // hooking up vector I$ |     // hooking up vector I$ | ||||||
|     ptw += io.vimem.ptw |     ptw += io.vimem.ptw | ||||||
|     io.vimem.req.bits.status := dpath.io.ctrl.status |  | ||||||
|     io.vimem.req.bits.pc := vu.io.imem_req.bits |     io.vimem.req.bits.pc := vu.io.imem_req.bits | ||||||
|     io.vimem.req.valid := vu.io.imem_req.valid |     io.vimem.req.valid := vu.io.imem_req.valid | ||||||
|     io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst |     io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst | ||||||
|     io.vimem.req.bits.invalidateTLB := dpath.io.ptbr_wen |  | ||||||
|     vu.io.imem_resp.valid := io.vimem.resp.valid |     vu.io.imem_resp.valid := io.vimem.resp.valid | ||||||
|     vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc |     vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc | ||||||
|     vu.io.imem_resp.bits.data := io.vimem.resp.bits.data |     vu.io.imem_resp.bits.data := io.vimem.resp.bits.data | ||||||
| @@ -155,21 +125,16 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component | |||||||
|     vu.io.xcpt.hold := ctrl.io.vec_iface.hold |     vu.io.xcpt.hold := ctrl.io.vec_iface.hold | ||||||
| 
 | 
 | ||||||
|     // hooking up vector memory interface |     // hooking up vector memory interface | ||||||
|     arb.io.requestor(DCACHE_VU).req.valid := vu.io.dmem_req.valid |     dmem(2).req.valid := vu.io.dmem_req.valid | ||||||
|     arb.io.requestor(DCACHE_VU).req.bits.kill := vu.io.dmem_req.bits.kill |     dmem(2).req.bits := vu.io.dmem_req.bits | ||||||
|     arb.io.requestor(DCACHE_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd |     dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) | ||||||
|     arb.io.requestor(DCACHE_VU).req.bits.typ := vu.io.dmem_req.bits.typ |  | ||||||
|     arb.io.requestor(DCACHE_VU).req.bits.idx := vu.io.dmem_req.bits.idx |  | ||||||
|     arb.io.requestor(DCACHE_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) |  | ||||||
|     arb.io.requestor(DCACHE_VU).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) |  | ||||||
|     arb.io.requestor(DCACHE_VU).req.bits.tag := vu.io.dmem_req.bits.tag |  | ||||||
| 
 | 
 | ||||||
|     vu.io.dmem_req.ready := arb.io.requestor(DCACHE_VU).req.ready |     vu.io.dmem_req.ready := dmem(2).req.ready | ||||||
|     vu.io.dmem_resp.valid := Reg(arb.io.requestor(DCACHE_VU).resp.valid) |     vu.io.dmem_resp.valid := Reg(dmem(2).resp.valid) | ||||||
|     vu.io.dmem_resp.bits.nack := arb.io.requestor(DCACHE_VU).resp.bits.nack |     vu.io.dmem_resp.bits.nack := dmem(2).resp.bits.nack | ||||||
|     vu.io.dmem_resp.bits.data := arb.io.requestor(DCACHE_VU).resp.bits.data_subword |     vu.io.dmem_resp.bits.data := dmem(2).resp.bits.data_subword | ||||||
|     vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DCACHE_VU).resp.bits.tag) |     vu.io.dmem_resp.bits.tag := Reg(dmem(2).resp.bits.tag) | ||||||
|     vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DCACHE_VU).resp.bits.typ) |     vu.io.dmem_resp.bits.typ := Reg(dmem(2).resp.bits.typ) | ||||||
| 
 | 
 | ||||||
|     // share vector integer multiplier with rocket |     // share vector integer multiplier with rocket | ||||||
|     dpath.io.vec_imul_req <> vu.io.cp_imul_req |     dpath.io.vec_imul_req <> vu.io.cp_imul_req | ||||||
| @@ -178,22 +143,13 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component | |||||||
|     // share sfma and dfma pipelines with rocket |     // share sfma and dfma pipelines with rocket | ||||||
|     fpu.io.sfma <> vu.io.cp_sfma |     fpu.io.sfma <> vu.io.cp_sfma | ||||||
|     fpu.io.dfma <> vu.io.cp_dfma |     fpu.io.dfma <> vu.io.cp_dfma | ||||||
|   } |   } else if (fpu != null) { | ||||||
|   else |     fpu.io.sfma.valid := Bool(false) | ||||||
|   { |     fpu.io.dfma.valid := Bool(false) | ||||||
|     arb.io.requestor(DCACHE_VU).req.valid := Bool(false) |  | ||||||
|     if (HAVE_FPU) |  | ||||||
|     { |  | ||||||
|       fpu.io.sfma.valid := Bool(false) |  | ||||||
|       fpu.io.dfma.valid := Bool(false) |  | ||||||
|     } |  | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   ptw += io.imem.ptw |  | ||||||
|   val thePTW = new PTW(ptw.length) |   val thePTW = new PTW(ptw.length) | ||||||
|   thePTW.io.requestor <> ptw |   ptw zip thePTW.io.requestor map { case (a, b) => a <> b } | ||||||
|   thePTW.io.ptbr := dpath.io.ptbr; |   thePTW.io.dpath <> dpath.io.ptw | ||||||
|   arb.io.requestor(DCACHE_PTW) <> thePTW.io.mem |   dmem(0) <> thePTW.io.mem | ||||||
| 
 |  | ||||||
|   arb.io.mem <> io.dmem |  | ||||||
| } | } | ||||||
| @@ -569,8 +569,8 @@ class Control(implicit conf: RocketConfiguration) extends Component | |||||||
|     (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), |     (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), | ||||||
|     (mem_reg_mem_val && io.dmem.xcpt.ma.ld,  UFix( 8)), |     (mem_reg_mem_val && io.dmem.xcpt.ma.ld,  UFix( 8)), | ||||||
|     (mem_reg_mem_val && io.dmem.xcpt.ma.st,  UFix( 9)), |     (mem_reg_mem_val && io.dmem.xcpt.ma.st,  UFix( 9)), | ||||||
|     (mem_reg_mem_val && io.xcpt_dtlb_ld,     UFix(10)), |     (mem_reg_mem_val && io.dmem.xcpt.pf.ld,     UFix(10)), | ||||||
|     (mem_reg_mem_val && io.xcpt_dtlb_st,     UFix(11)))) |     (mem_reg_mem_val && io.dmem.xcpt.pf.st,     UFix(11)))) | ||||||
|  |  | ||||||
|   wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next |   wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next | ||||||
|   when (mem_xcpt) { wb_reg_cause := mem_cause } |   when (mem_xcpt) { wb_reg_cause := mem_cause } | ||||||
| @@ -644,7 +644,7 @@ class Control(implicit conf: RocketConfiguration) extends Component | |||||||
|  |  | ||||||
|   // replay inst in ex stage |   // replay inst in ex stage | ||||||
|   val replay_ex    = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst ||  |   val replay_ex    = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst ||  | ||||||
|                      ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || |                      ex_reg_mem_val && !io.dmem.req.ready || | ||||||
|                      ex_reg_div_val && !io.dpath.div_rdy || |                      ex_reg_div_val && !io.dpath.div_rdy || | ||||||
|                      ex_reg_mul_val && !io.dpath.mul_rdy || |                      ex_reg_mul_val && !io.dpath.mul_rdy || | ||||||
|                      mem_reg_replay_next |                      mem_reg_replay_next | ||||||
| @@ -652,7 +652,7 @@ class Control(implicit conf: RocketConfiguration) extends Component | |||||||
|  |  | ||||||
|   // replay inst in mem stage |   // replay inst in mem stage | ||||||
|   val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val |   val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val | ||||||
|   val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) |   val dmem_kill_mem = mem_reg_valid && io.dmem.resp.bits.nack | ||||||
|   val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem |   val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem | ||||||
|   val replay_mem  = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem |   val replay_mem  = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem | ||||||
|   val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid |   val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid | ||||||
| @@ -734,7 +734,7 @@ class Control(implicit conf: RocketConfiguration) extends Component | |||||||
|     id_ex_hazard || id_mem_hazard || id_wb_hazard || |     id_ex_hazard || id_mem_hazard || id_wb_hazard || | ||||||
|     id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || |     id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || | ||||||
|     id_fp_val && id_stall_fpu || |     id_fp_val && id_stall_fpu || | ||||||
|     id_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || |     id_mem_val && !io.dmem.req.ready || | ||||||
|     vec_stalld |     vec_stalld | ||||||
|   ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt |   ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt | ||||||
|  |  | ||||||
| @@ -772,10 +772,9 @@ class Control(implicit conf: RocketConfiguration) extends Component | |||||||
|   io.fpu.killx := ctrl_killx |   io.fpu.killx := ctrl_killx | ||||||
|   io.fpu.killm := killm_common |   io.fpu.killm := killm_common | ||||||
|  |  | ||||||
|   io.dtlb_val           := ex_reg_mem_val |  | ||||||
|   io.dtlb_kill          := !mem_reg_valid |  | ||||||
|   io.dmem.req.valid     := ex_reg_mem_val |   io.dmem.req.valid     := ex_reg_mem_val | ||||||
|   io.dmem.req.bits.kill := killm_common || mem_xcpt || io.dtlb_miss |   io.dmem.req.bits.kill := killm_common || mem_xcpt | ||||||
|   io.dmem.req.bits.cmd  := ex_reg_mem_cmd |   io.dmem.req.bits.cmd  := ex_reg_mem_cmd | ||||||
|   io.dmem.req.bits.typ  := ex_reg_mem_type |   io.dmem.req.bits.typ  := ex_reg_mem_type | ||||||
|  |   io.dmem.req.bits.phys := Bool(false) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -13,9 +13,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component | |||||||
|     val ctrl  = new ioCtrlDpath().flip |     val ctrl  = new ioCtrlDpath().flip | ||||||
|     val dmem = new ioHellaCache()(conf.dcache) |     val dmem = new ioHellaCache()(conf.dcache) | ||||||
|     val dtlb = new ioDTLB_CPU_req_bundle().asOutput() |     val dtlb = new ioDTLB_CPU_req_bundle().asOutput() | ||||||
|  |     val ptw = new IODatapathPTW().flip | ||||||
|     val imem  = new IOCPUFrontend()(conf.icache) |     val imem  = new IOCPUFrontend()(conf.icache) | ||||||
|     val ptbr_wen = Bool(OUTPUT); |  | ||||||
|     val ptbr = UFix(OUTPUT, PADDR_BITS); |  | ||||||
|     val fpu = new ioDpathFPU(); |     val fpu = new ioDpathFPU(); | ||||||
|     val vec_ctrl = new ioCtrlDpathVec().flip |     val vec_ctrl = new ioCtrlDpathVec().flip | ||||||
|     val vec_iface = new ioDpathVecInterface() |     val vec_iface = new ioDpathVecInterface() | ||||||
| @@ -81,9 +80,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component | |||||||
|   val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix |   val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix | ||||||
|  |  | ||||||
|   // hook up I$ |   // hook up I$ | ||||||
|   io.imem.req.bits.invalidateTLB := pcr.io.ptbr_wen |  | ||||||
|   io.imem.req.bits.currentpc := ex_reg_pc |   io.imem.req.bits.currentpc := ex_reg_pc | ||||||
|   io.imem.req.bits.status := pcr.io.status |  | ||||||
|   io.imem.req.bits.pc := |   io.imem.req.bits.pc := | ||||||
|     Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, |     Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, | ||||||
|     Mux(io.ctrl.sel_pc === PC_EX,  Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), |     Mux(io.ctrl.sel_pc === PC_EX,  Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), | ||||||
| @@ -209,7 +206,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component | |||||||
|  |  | ||||||
|   // D$ request interface (registered inside D$ module) |   // D$ request interface (registered inside D$ module) | ||||||
|   // other signals (req_val, req_rdy) connect to control module   |   // other signals (req_val, req_rdy) connect to control module   | ||||||
|   io.dmem.req.bits.idx  := ex_effective_address |   io.dmem.req.bits.addr := ex_effective_address | ||||||
|   io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) |   io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) | ||||||
|   io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) |   io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) | ||||||
|   require(io.dmem.req.bits.tag.getWidth >= 6) |   require(io.dmem.req.bits.tag.getWidth >= 6) | ||||||
| @@ -225,8 +222,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component | |||||||
|   io.ctrl.irq_ipi      := pcr.io.irq_ipi;   |   io.ctrl.irq_ipi      := pcr.io.irq_ipi;   | ||||||
|   io.ctrl.status       := pcr.io.status; |   io.ctrl.status       := pcr.io.status; | ||||||
|   io.ctrl.pcr_replay   := pcr.io.replay |   io.ctrl.pcr_replay   := pcr.io.replay | ||||||
|   io.ptbr              := pcr.io.ptbr; |  | ||||||
|   io.ptbr_wen          := pcr.io.ptbr_wen; |   io.ptw.ptbr := pcr.io.ptbr | ||||||
|  |   io.ptw.invalidate := pcr.io.ptbr_wen | ||||||
|  |   io.ptw.status := pcr.io.status | ||||||
|    |    | ||||||
| 	// branch resolution logic | 	// branch resolution logic | ||||||
|   io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0) |   io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0) | ||||||
|   | |||||||
| @@ -5,8 +5,9 @@ import Node._ | |||||||
| import Constants._ | import Constants._ | ||||||
| import Instructions._ | import Instructions._ | ||||||
| import Util._ | import Util._ | ||||||
|  | import FPConstants._ | ||||||
|  |  | ||||||
| object rocketFPConstants | object FPConstants | ||||||
| { | { | ||||||
|   val FCMD_ADD =        Bits("b000000") |   val FCMD_ADD =        Bits("b000000") | ||||||
|   val FCMD_SUB =        Bits("b000001") |   val FCMD_SUB =        Bits("b000001") | ||||||
| @@ -45,7 +46,6 @@ object rocketFPConstants | |||||||
|   val FCMD_WIDTH = 6 |   val FCMD_WIDTH = 6 | ||||||
|   val FSR_WIDTH = 8 |   val FSR_WIDTH = 8 | ||||||
| } | } | ||||||
| import rocketFPConstants._ |  | ||||||
|  |  | ||||||
| class FPUCtrlSigs extends Bundle | class FPUCtrlSigs extends Bundle | ||||||
| { | { | ||||||
| @@ -64,7 +64,7 @@ class FPUCtrlSigs extends Bundle | |||||||
|   val wrfsr = Bool() |   val wrfsr = Bool() | ||||||
| } | } | ||||||
|  |  | ||||||
| class rocketFPUDecoder extends Component | class FPUDecoder extends Component | ||||||
| { | { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val inst = Bits(INPUT, 32) |     val inst = Bits(INPUT, 32) | ||||||
| @@ -378,7 +378,7 @@ class ioFMA(width: Int) extends Bundle { | |||||||
|   val exc = Bits(OUTPUT, 5) |   val exc = Bits(OUTPUT, 5) | ||||||
| } | } | ||||||
|  |  | ||||||
| class rocketFPUSFMAPipe(val latency: Int) extends Component | class FPUSFMAPipe(val latency: Int) extends Component | ||||||
| { | { | ||||||
|   val io = new ioFMA(33) |   val io = new ioFMA(33) | ||||||
|    |    | ||||||
| @@ -415,7 +415,7 @@ class rocketFPUSFMAPipe(val latency: Int) extends Component | |||||||
|   io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits |   io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits | ||||||
| } | } | ||||||
|  |  | ||||||
| class rocketFPUDFMAPipe(val latency: Int) extends Component | class FPUDFMAPipe(val latency: Int) extends Component | ||||||
| { | { | ||||||
|   val io = new ioFMA(65) |   val io = new ioFMA(65) | ||||||
|    |    | ||||||
| @@ -452,7 +452,7 @@ class rocketFPUDFMAPipe(val latency: Int) extends Component | |||||||
|   io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits |   io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits | ||||||
| } | } | ||||||
|  |  | ||||||
| class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component | class FPU(sfma_latency: Int, dfma_latency: Int) extends Component | ||||||
| { | { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val ctrl = new ioCtrlFPU().flip |     val ctrl = new ioCtrlFPU().flip | ||||||
| @@ -470,7 +470,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component | |||||||
|   val killm = io.ctrl.killm || io.ctrl.nack_mem |   val killm = io.ctrl.killm || io.ctrl.nack_mem | ||||||
|   val wb_reg_valid = Reg(mem_reg_valid && !killm, resetVal = Bool(false)) |   val wb_reg_valid = Reg(mem_reg_valid && !killm, resetVal = Bool(false)) | ||||||
|  |  | ||||||
|   val fp_decoder = new rocketFPUDecoder |   val fp_decoder = new FPUDecoder | ||||||
|   fp_decoder.io.inst := io.dpath.inst |   fp_decoder.io.inst := io.dpath.inst | ||||||
|  |  | ||||||
|   val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid) |   val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid) | ||||||
| @@ -530,7 +530,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component | |||||||
|   val cmd_fma = mem_ctrl.cmd === FCMD_MADD  || mem_ctrl.cmd === FCMD_MSUB || |   val cmd_fma = mem_ctrl.cmd === FCMD_MADD  || mem_ctrl.cmd === FCMD_MSUB || | ||||||
|                 mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB |                 mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB | ||||||
|   val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB |   val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB | ||||||
|   val sfma = new rocketFPUSFMAPipe(sfma_latency) |   val sfma = new FPUSFMAPipe(sfma_latency) | ||||||
|   sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single |   sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single | ||||||
|   sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) |   sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) | ||||||
|   sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) |   sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) | ||||||
| @@ -540,7 +540,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component | |||||||
|   io.sfma.out := sfma.io.out |   io.sfma.out := sfma.io.out | ||||||
|   io.sfma.exc := sfma.io.exc |   io.sfma.exc := sfma.io.exc | ||||||
|  |  | ||||||
|   val dfma = new rocketFPUDFMAPipe(dfma_latency) |   val dfma = new FPUDFMAPipe(dfma_latency) | ||||||
|   dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single |   dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single | ||||||
|   dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) |   dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) | ||||||
|   dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) |   dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) | ||||||
|   | |||||||
| @@ -29,9 +29,7 @@ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, | |||||||
|  |  | ||||||
| class FrontendReq extends Bundle { | class FrontendReq extends Bundle { | ||||||
|   val pc = UFix(width = VADDR_BITS+1) |   val pc = UFix(width = VADDR_BITS+1) | ||||||
|   val status = Bits(width = 32) |  | ||||||
|   val invalidate = Bool() |   val invalidate = Bool() | ||||||
|   val invalidateTLB = Bool() |  | ||||||
|   val mispredict = Bool() |   val mispredict = Bool() | ||||||
|   val taken = Bool() |   val taken = Bool() | ||||||
|   val currentpc = UFix(width = VADDR_BITS+1) |   val currentpc = UFix(width = VADDR_BITS+1) | ||||||
| @@ -99,14 +97,13 @@ class Frontend(implicit c: ICacheConfig) extends Component | |||||||
|   btb.io.clr := !io.cpu.req.bits.taken |   btb.io.clr := !io.cpu.req.bits.taken | ||||||
|   btb.io.correct_pc := io.cpu.req.bits.currentpc |   btb.io.correct_pc := io.cpu.req.bits.currentpc | ||||||
|   btb.io.correct_target := io.cpu.req.bits.pc |   btb.io.correct_target := io.cpu.req.bits.pc | ||||||
|   btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.req.bits.invalidateTLB |   btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.ptw.invalidate | ||||||
|  |  | ||||||
|   tlb.io.ptw <> io.cpu.ptw |   tlb.io.ptw <> io.cpu.ptw | ||||||
|   tlb.io.req.valid := !stall && !icmiss |   tlb.io.req.valid := !stall && !icmiss | ||||||
|   tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS) |   tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS) | ||||||
|   tlb.io.req.bits.status := io.cpu.req.bits.status |  | ||||||
|   tlb.io.req.bits.asid := UFix(0) |   tlb.io.req.bits.asid := UFix(0) | ||||||
|   tlb.io.req.bits.invalidate := io.cpu.req.bits.invalidateTLB |   tlb.io.req.bits.passthrough := Bool(false) | ||||||
|   tlb.io.req.bits.instruction := Bool(true) |   tlb.io.req.bits.instruction := Bool(true) | ||||||
|  |  | ||||||
|   icache.io.mem <> io.mem |   icache.io.mem <> io.mem | ||||||
|   | |||||||
| @@ -3,16 +3,18 @@ package rocket | |||||||
| import Chisel._ | import Chisel._ | ||||||
| import Constants._ | import Constants._ | ||||||
| import uncore._ | import uncore._ | ||||||
|  | import Util._ | ||||||
|  |  | ||||||
| case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, | case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, | ||||||
|                         nmshr: Int, nsecondary: Int, nsdq: Int, |                         nmshr: Int, nrpq: Int, nsdq: Int, | ||||||
|                         reqtagbits: Int = -1) |                         reqtagbits: Int = -1) | ||||||
| { | { | ||||||
|   require(isPow2(sets)) |   require(isPow2(sets)) | ||||||
|   require(isPow2(ways)) // TODO: relax this |   require(isPow2(ways)) // TODO: relax this | ||||||
|   def lines = sets*ways |   def lines = sets*ways | ||||||
|   def dm = ways == 1 |   def dm = ways == 1 | ||||||
|   def ppnbits = PPN_BITS |   def ppnbits = PADDR_BITS - PGIDX_BITS | ||||||
|  |   def vpnbits = VADDR_BITS - PGIDX_BITS | ||||||
|   def pgidxbits = PGIDX_BITS |   def pgidxbits = PGIDX_BITS | ||||||
|   def offbits = OFFSET_BITS |   def offbits = OFFSET_BITS | ||||||
|   def paddrbits = ppnbits + pgidxbits |   def paddrbits = ppnbits + pgidxbits | ||||||
| @@ -161,7 +163,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { | |||||||
|     val req_sec_val    = Bool(INPUT) |     val req_sec_val    = Bool(INPUT) | ||||||
|     val req_sec_rdy    = Bool(OUTPUT) |     val req_sec_rdy    = Bool(OUTPUT) | ||||||
|     val req_bits       = new MSHRReq().asInput |     val req_bits       = new MSHRReq().asInput | ||||||
|     val req_sdq_id     = UFix(INPUT, log2Up(NSDQ)) |     val req_sdq_id     = UFix(INPUT, log2Up(conf.nsdq)) | ||||||
|  |  | ||||||
|     val idx_match      = Bool(OUTPUT) |     val idx_match      = Bool(OUTPUT) | ||||||
|     val idx            = Bits(OUTPUT, conf.idxbits) |     val idx            = Bits(OUTPUT, conf.idxbits) | ||||||
| @@ -194,7 +196,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { | |||||||
|   val idx_match = req.idx === io.req_bits.idx |   val idx_match = req.idx === io.req_bits.idx | ||||||
|   val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) |   val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) | ||||||
|  |  | ||||||
|   val rpq = (new Queue(NRPQ)) { new RPQEntry } |   val rpq = (new Queue(conf.nrpq)) { new RPQEntry } | ||||||
|   rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq |   rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq | ||||||
|   rpq.io.enq.bits := io.req_bits |   rpq.io.enq.bits := io.req_bits | ||||||
|   rpq.io.enq.bits.sdq_id := io.req_sdq_id |   rpq.io.enq.bits.sdq_id := io.req_sdq_id | ||||||
| @@ -312,24 +314,24 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { | |||||||
|     val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits) |     val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   val sdq_val = Reg(resetVal = Bits(0, NSDQ)) |   val sdq_val = Reg(resetVal = Bits(0, conf.nsdq)) | ||||||
|   val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) |   val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0)) | ||||||
|   val sdq_rdy = !sdq_val.andR |   val sdq_rdy = !sdq_val.andR | ||||||
|   val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd) |   val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd) | ||||||
|   val sdq_enq = io.req.valid && io.req.ready && req_write |   val sdq_enq = io.req.valid && io.req.ready && req_write | ||||||
|   val sdq = Mem(NSDQ) { io.req.bits.data.clone } |   val sdq = Mem(conf.nsdq) { io.req.bits.data.clone } | ||||||
|   when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } |   when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } | ||||||
|  |  | ||||||
|   val idxMatch = Vec(NMSHR) { Bool() } |   val idxMatch = Vec(conf.nmshr) { Bool() } | ||||||
|   val tagList = Vec(NMSHR) { Bits() } |   val tagList = Vec(conf.nmshr) { Bits() } | ||||||
|   val wbTagList = Vec(NMSHR) { Bits() } |   val wbTagList = Vec(conf.nmshr) { Bits() } | ||||||
|   val memRespMux = Vec(NMSHR) { new DataArrayReq } |   val memRespMux = Vec(conf.nmshr) { new DataArrayReq } | ||||||
|   val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } |   val meta_req_arb = (new Arbiter(conf.nmshr)) { new MetaArrayReq() } | ||||||
|   val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } |   val mem_req_arb = (new Arbiter(conf.nmshr)) { new TransactionInit } | ||||||
|   val mem_finish_arb = (new Arbiter(NMSHR)) { new TransactionFinish } |   val mem_finish_arb = (new Arbiter(conf.nmshr)) { new TransactionFinish } | ||||||
|   val wb_req_arb = (new Arbiter(NMSHR)) { new WritebackReq } |   val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq } | ||||||
|   val replay_arb = (new Arbiter(NMSHR)) { new Replay() } |   val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } | ||||||
|   val alloc_arb = (new Arbiter(NMSHR)) { Bool() } |   val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } | ||||||
|  |  | ||||||
|   val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag |   val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag | ||||||
|   val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag |   val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag | ||||||
| @@ -341,7 +343,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { | |||||||
|   var writeback_probe_rdy = Bool(true) |   var writeback_probe_rdy = Bool(true) | ||||||
|   var refill_probe_rdy = Bool(true) |   var refill_probe_rdy = Bool(true) | ||||||
|  |  | ||||||
|   for (i <- 0 to NMSHR-1) { |   for (i <- 0 to conf.nmshr-1) { | ||||||
|     val mshr = new MSHR(i) |     val mshr = new MSHR(i) | ||||||
|  |  | ||||||
|     idxMatch(i) := mshr.io.idx_match |     idxMatch(i) := mshr.io.idx_match | ||||||
| @@ -400,8 +402,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { | |||||||
|  |  | ||||||
|   val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) |   val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) | ||||||
|   val sdq_free = replay.valid && replay.ready && replay_write |   val sdq_free = replay.valid && replay.ready && replay_write | ||||||
|   sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, NSDQ)) |  |   sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, conf.nsdq)) |  | ||||||
|              PriorityEncoderOH(~sdq_val(NSDQ-1,0)) & Fill(NSDQ, sdq_enq && io.req.bits.tag_miss) |              PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq && io.req.bits.tag_miss) | ||||||
|   val sdq_rdata = Reg() { io.req.bits.data.clone } |   val sdq_rdata = Reg() { io.req.bits.data.clone } | ||||||
|   sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) |   sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) | ||||||
|   io.data_req.bits.data := sdq_rdata |   io.data_req.bits.data := sdq_rdata | ||||||
| @@ -711,8 +713,8 @@ class AMOALU extends Component { | |||||||
| class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { | class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { | ||||||
|   val kill = Bool() |   val kill = Bool() | ||||||
|   val typ  = Bits(width = 3) |   val typ  = Bits(width = 3) | ||||||
|   val idx  = Bits(width = conf.pgidxbits) |   val phys = Bool() | ||||||
|   val ppn  = Bits(width = conf.ppnbits) |   val addr = UFix(width = conf.ppnbits.max(conf.vpnbits+1) + conf.pgidxbits) | ||||||
|   val data = Bits(width = conf.databits) |   val data = Bits(width = conf.databits) | ||||||
|   val tag  = Bits(width = conf.reqtagbits) |   val tag  = Bits(width = conf.reqtagbits) | ||||||
|   val cmd  = Bits(width = 4) |   val cmd  = Bits(width = 4) | ||||||
| @@ -739,6 +741,7 @@ class AlignmentExceptions extends Bundle { | |||||||
|  |  | ||||||
| class HellaCacheExceptions extends Bundle { | class HellaCacheExceptions extends Bundle { | ||||||
|   val ma = new AlignmentExceptions |   val ma = new AlignmentExceptions | ||||||
|  |   val pf = new AlignmentExceptions | ||||||
| } | } | ||||||
|  |  | ||||||
| // interface between D$ and processor/DTLB | // interface between D$ and processor/DTLB | ||||||
| @@ -746,6 +749,7 @@ class ioHellaCache(implicit conf: DCacheConfig) extends Bundle { | |||||||
|   val req = (new FIFOIO){ new HellaCacheReq } |   val req = (new FIFOIO){ new HellaCacheReq } | ||||||
|   val resp = (new PipeIO){ new HellaCacheResp }.flip |   val resp = (new PipeIO){ new HellaCacheResp }.flip | ||||||
|   val xcpt = (new HellaCacheExceptions).asInput |   val xcpt = (new HellaCacheExceptions).asInput | ||||||
|  |   val ptw = new IOTLBPTW().flip | ||||||
| } | } | ||||||
|  |  | ||||||
| class HellaCache(implicit conf: DCacheConfig) extends Component { | class HellaCache(implicit conf: DCacheConfig) extends Component { | ||||||
| @@ -768,6 +772,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | |||||||
|   val early_nack       = Reg { Bool() } |   val early_nack       = Reg { Bool() } | ||||||
|   val r_cpu_req_val_   = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) |   val r_cpu_req_val_   = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) | ||||||
|   val r_cpu_req_val    = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack |   val r_cpu_req_val    = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack | ||||||
|  |   val r_cpu_req_phys   = Reg() { Bool() } | ||||||
|  |   val r_cpu_req_vpn    = Reg() { UFix() } | ||||||
|   val r_cpu_req_idx    = Reg() { Bits() } |   val r_cpu_req_idx    = Reg() { Bits() } | ||||||
|   val r_cpu_req_cmd    = Reg() { Bits() } |   val r_cpu_req_cmd    = Reg() { Bits() } | ||||||
|   val r_cpu_req_type   = Reg() { Bits() } |   val r_cpu_req_type   = Reg() { Bits() } | ||||||
| @@ -799,6 +805,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | |||||||
|   val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch |   val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch | ||||||
|   val nack_hit = Bool() |   val nack_hit = Bool() | ||||||
|  |  | ||||||
|  |   val dtlb = new TLB(8) | ||||||
|  |   dtlb.io.ptw <> io.cpu.ptw | ||||||
|  |   dtlb.io.req.valid := r_cpu_req_val_ && r_req_readwrite && !r_cpu_req_phys | ||||||
|  |   dtlb.io.req.bits.passthrough := r_cpu_req_phys | ||||||
|  |   dtlb.io.req.bits.asid := UFix(0) | ||||||
|  |   dtlb.io.req.bits.vpn := r_cpu_req_vpn | ||||||
|  |   dtlb.io.req.bits.instruction := Bool(false) | ||||||
|  |  | ||||||
|   val wb = new WritebackUnit |   val wb = new WritebackUnit | ||||||
|   val prober = new ProbeUnit |   val prober = new ProbeUnit | ||||||
|   val mshr = new MSHRFile |   val mshr = new MSHRFile | ||||||
| @@ -812,7 +826,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | |||||||
|   flusher.io.mshr_req.ready := mshr.io.req.ready |   flusher.io.mshr_req.ready := mshr.io.req.ready | ||||||
|    |    | ||||||
|   when (io.cpu.req.valid) { |   when (io.cpu.req.valid) { | ||||||
|     r_cpu_req_idx  := io.cpu.req.bits.idx |     r_cpu_req_phys := io.cpu.req.bits.phys | ||||||
|  |     r_cpu_req_vpn  := io.cpu.req.bits.addr >> taglsb | ||||||
|  |     r_cpu_req_idx  := io.cpu.req.bits.addr(indexmsb,0) | ||||||
|     r_cpu_req_cmd  := io.cpu.req.bits.cmd |     r_cpu_req_cmd  := io.cpu.req.bits.cmd | ||||||
|     r_cpu_req_type := io.cpu.req.bits.typ |     r_cpu_req_type := io.cpu.req.bits.typ | ||||||
|     r_cpu_req_tag  := io.cpu.req.bits.tag |     r_cpu_req_tag  := io.cpu.req.bits.tag | ||||||
| @@ -839,8 +855,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | |||||||
|     (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || |     (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || | ||||||
|     ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); |     ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); | ||||||
|      |      | ||||||
|   io.cpu.xcpt.ma.ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned |   io.cpu.xcpt.ma.ld := r_cpu_req_val_ && r_req_read && misaligned | ||||||
|   io.cpu.xcpt.ma.st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned |   io.cpu.xcpt.ma.st := r_cpu_req_val_ && r_req_write && misaligned | ||||||
|  |   io.cpu.xcpt.pf.ld := r_cpu_req_val_ && r_req_read && dtlb.io.resp.xcpt_ld | ||||||
|  |   io.cpu.xcpt.pf.st := r_cpu_req_val_ && r_req_write && dtlb.io.resp.xcpt_st | ||||||
|  |  | ||||||
|   // tags |   // tags | ||||||
|   val meta = new MetaDataArrayArray(lines) |   val meta = new MetaDataArrayArray(lines) | ||||||
| @@ -855,11 +873,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | |||||||
|  |  | ||||||
|   // cpu tag check |   // cpu tag check | ||||||
|   meta_arb.io.in(3).valid := io.cpu.req.valid |   meta_arb.io.in(3).valid := io.cpu.req.valid | ||||||
|   meta_arb.io.in(3).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) |   meta_arb.io.in(3).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) | ||||||
|   meta_arb.io.in(3).bits.rw := Bool(false) |   meta_arb.io.in(3).bits.rw := Bool(false) | ||||||
|   meta_arb.io.in(3).bits.way_en := Fix(-1) |   meta_arb.io.in(3).bits.way_en := Fix(-1) | ||||||
|   val early_tag_nack = !meta_arb.io.in(3).ready |   val early_tag_nack = !meta_arb.io.in(3).ready | ||||||
|   val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), io.cpu.req.bits.ppn) |   val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), dtlb.io.resp.ppn) | ||||||
|   val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) |   val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) | ||||||
|   val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) |   val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) | ||||||
|   val tag_match = Cat(Bits(0),tag_match_arr:_*).orR |   val tag_match = Cat(Bits(0),tag_match_arr:_*).orR | ||||||
| @@ -892,8 +910,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | |||||||
|   data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) |   data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) | ||||||
|  |  | ||||||
|   // load hits |   // load hits | ||||||
|   data_arb.io.in(4).bits.offset := io.cpu.req.bits.idx(offsetmsb,ramindexlsb) |   data_arb.io.in(4).bits.offset := io.cpu.req.bits.addr(offsetmsb,ramindexlsb) | ||||||
|   data_arb.io.in(4).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) |   data_arb.io.in(4).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) | ||||||
|   data_arb.io.in(4).bits.rw := Bool(false) |   data_arb.io.in(4).bits.rw := Bool(false) | ||||||
|   data_arb.io.in(4).valid := io.cpu.req.valid && req_read |   data_arb.io.in(4).valid := io.cpu.req.valid && req_read | ||||||
|   data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check |   data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check | ||||||
| @@ -1015,13 +1033,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { | |||||||
|   val pending_fence = Reg(resetVal = Bool(false)) |   val pending_fence = Reg(resetVal = Bool(false)) | ||||||
|   pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy |   pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy | ||||||
|   nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy || |   nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy || | ||||||
|               p_store_idx_match && meta.io.state_req.valid |               p_store_idx_match && meta.io.state_req.valid || | ||||||
|  |               !r_cpu_req_phys && dtlb.io.resp.miss | ||||||
|   val nack_miss  = !mshr.io.req.ready |   val nack_miss  = !mshr.io.req.ready | ||||||
|   val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) || |   val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) || | ||||||
|                    !flushed && r_req_flush |                    !flushed && r_req_flush | ||||||
|   val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush |   val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush | ||||||
|  |  | ||||||
|   io.cpu.req.ready   := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence |   io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence && (dtlb.io.req.ready || io.cpu.req.bits.phys) | ||||||
|   io.cpu.resp.valid  := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val |   io.cpu.resp.valid  := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val | ||||||
|   io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack |   io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack | ||||||
|   io.cpu.resp.bits.replay := mshr.io.cpu_resp_val |   io.cpu.resp.bits.replay := mshr.io.cpu_resp_val | ||||||
|   | |||||||
| @@ -5,16 +5,31 @@ import Node._ | |||||||
| import Constants._ | import Constants._ | ||||||
| import scala.math._ | import scala.math._ | ||||||
|  |  | ||||||
| class ioPTW(n: Int)(implicit conf: RocketConfiguration) extends Bundle | class IOTLBPTW extends Bundle { | ||||||
| { |   val req = new FIFOIO()(UFix(width = VPN_BITS)) | ||||||
|   val requestor = Vec(n) { new IOTLBPTW }.flip |   val resp = new PipeIO()(new Bundle { | ||||||
|   val mem   = new ioHellaCache()(conf.dcache) |     val error = Bool() | ||||||
|   val ptbr  = UFix(INPUT, PADDR_BITS) |     val ppn = UFix(width = PPN_BITS) | ||||||
|  |     val perm = Bits(width = PERM_BITS) | ||||||
|  |   }).flip | ||||||
|  |  | ||||||
|  |   val status = Bits(INPUT, width = 32) | ||||||
|  |   val invalidate = Bool(INPUT) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | class IODatapathPTW extends Bundle { | ||||||
|  |   val ptbr = UFix(INPUT, PADDR_BITS) | ||||||
|  |   val invalidate = Bool(INPUT) | ||||||
|  |   val status = Bits(INPUT, 32) | ||||||
| } | } | ||||||
|  |  | ||||||
| class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component | class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component | ||||||
| { | { | ||||||
|   val io = new ioPTW(n) |   val io = new Bundle { | ||||||
|  |     val requestor = Vec(n) { new IOTLBPTW }.flip | ||||||
|  |     val mem = new ioHellaCache()(conf.dcache) | ||||||
|  |     val dpath = new IODatapathPTW | ||||||
|  |   } | ||||||
|    |    | ||||||
|   val levels = 3 |   val levels = 3 | ||||||
|   val bitsPerLevel = VPN_BITS/levels |   val bitsPerLevel = VPN_BITS/levels | ||||||
| @@ -27,7 +42,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component | |||||||
|   val r_req_vpn = Reg() { Bits() } |   val r_req_vpn = Reg() { Bits() } | ||||||
|   val r_req_dest = Reg() { Bits() } |   val r_req_dest = Reg() { Bits() } | ||||||
|    |    | ||||||
|   val req_addr = Reg() { Bits() } |   val req_addr = Reg() { UFix() } | ||||||
|   val r_resp_ppn = Reg() { Bits() }; |   val r_resp_ppn = Reg() { Bits() }; | ||||||
|   val r_resp_perm = Reg() { Bits() }; |   val r_resp_perm = Reg() { Bits() }; | ||||||
|    |    | ||||||
| @@ -41,21 +56,21 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component | |||||||
|   when (arb.io.out.fire()) { |   when (arb.io.out.fire()) { | ||||||
|     r_req_vpn := arb.io.out.bits |     r_req_vpn := arb.io.out.bits | ||||||
|     r_req_dest := arb.io.chosen |     r_req_dest := arb.io.chosen | ||||||
|     req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) |     req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), UFix(0,3)) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) |   val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) | ||||||
|   when (dmem_resp_val) { |   when (dmem_resp_val) { | ||||||
|     req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) |     req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix | ||||||
|     r_resp_perm := io.mem.resp.bits.data_subword(9,4); |     r_resp_perm := io.mem.resp.bits.data_subword(9,4); | ||||||
|     r_resp_ppn  := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS); |     r_resp_ppn  := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS); | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   io.mem.req.valid     := state === s_req |   io.mem.req.valid     := state === s_req | ||||||
|  |   io.mem.req.bits.phys := Bool(true) | ||||||
|   io.mem.req.bits.cmd  := M_XRD |   io.mem.req.bits.cmd  := M_XRD | ||||||
|   io.mem.req.bits.typ  := MT_D |   io.mem.req.bits.typ  := MT_D | ||||||
|   io.mem.req.bits.idx  := req_addr(PGIDX_BITS-1,0) |   io.mem.req.bits.addr := req_addr | ||||||
|   io.mem.req.bits.ppn  := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) |  | ||||||
|   io.mem.req.bits.kill := Bool(false) |   io.mem.req.bits.kill := Bool(false) | ||||||
|    |    | ||||||
|   val resp_val = state === s_done || state === s_error |   val resp_val = state === s_done || state === s_error | ||||||
| @@ -73,6 +88,8 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component | |||||||
|     io.requestor(i).resp.bits.error := resp_err |     io.requestor(i).resp.bits.error := resp_err | ||||||
|     io.requestor(i).resp.bits.perm := r_resp_perm |     io.requestor(i).resp.bits.perm := r_resp_perm | ||||||
|     io.requestor(i).resp.bits.ppn := resp_ppn.toUFix |     io.requestor(i).resp.bits.ppn := resp_ppn.toUFix | ||||||
|  |     io.requestor(i).invalidate := io.dpath.invalidate | ||||||
|  |     io.requestor(i).status := io.dpath.status | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // control state machine |   // control state machine | ||||||
|   | |||||||
| @@ -13,7 +13,8 @@ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, | |||||||
|  |  | ||||||
| class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) | class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) | ||||||
| { | { | ||||||
|   implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(DMEM_PORTS)) |   val memPorts = if (HAVE_VEC) 3 else 2 | ||||||
|  |   implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts)) | ||||||
|   implicit val conf = confIn.copy(dcache = dcConf) |   implicit val conf = confIn.copy(dcache = dcConf) | ||||||
|  |  | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
| @@ -21,13 +22,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon | |||||||
|     val host = new ioHTIF(conf.ntiles) |     val host = new ioHTIF(conf.ntiles) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   val cpu       = new rocketProc |   val core      = new Core | ||||||
|   val icache    = new Frontend()(confIn.icache) |   val icache    = new Frontend()(confIn.icache) | ||||||
|   val dcache    = new HellaCache |   val dcache    = new HellaCache | ||||||
|  |  | ||||||
|   val arbiter   = new MemArbiter(DMEM_PORTS) |   val arbiter   = new MemArbiter(memPorts) | ||||||
|   arbiter.io.requestor(DMEM_DCACHE) <> dcache.io.mem |   arbiter.io.requestor(0) <> dcache.io.mem | ||||||
|   arbiter.io.requestor(DMEM_ICACHE) <> icache.io.mem |   arbiter.io.requestor(1) <> icache.io.mem | ||||||
|  |  | ||||||
|   io.tilelink.xact_init <> arbiter.io.mem.xact_init |   io.tilelink.xact_init <> arbiter.io.mem.xact_init | ||||||
|   io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data |   io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data | ||||||
| @@ -38,14 +39,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon | |||||||
|   io.tilelink.probe_rep <> dcache.io.mem.probe_rep |   io.tilelink.probe_rep <> dcache.io.mem.probe_rep | ||||||
|   io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data |   io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data | ||||||
|  |  | ||||||
|   if (HAVE_VEC) |   if (HAVE_VEC) { | ||||||
|   { |  | ||||||
|     val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) |     val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) | ||||||
|     arbiter.io.requestor(DMEM_VICACHE) <> vicache.io.mem |     arbiter.io.requestor(2) <> vicache.io.mem | ||||||
|     cpu.io.vimem <> vicache.io.cpu |     core.io.vimem <> vicache.io.cpu | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   cpu.io.host       <> io.host |   core.io.host <> io.host | ||||||
|   cpu.io.imem       <> icache.io.cpu |   core.io.imem <> icache.io.cpu | ||||||
|   cpu.io.dmem       <> dcache.io.cpu |   core.io.dmem <> dcache.io.cpu | ||||||
| } | } | ||||||
|   | |||||||
| @@ -66,21 +66,11 @@ class PseudoLRU(n: Int) | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| class IOTLBPTW extends Bundle { |  | ||||||
|   val req = new FIFOIO()(UFix(width = VPN_BITS)) |  | ||||||
|   val resp = new PipeIO()(new Bundle { |  | ||||||
|     val error = Bool() |  | ||||||
|     val ppn = UFix(width = PPN_BITS) |  | ||||||
|     val perm = Bits(width = PERM_BITS) |  | ||||||
|   }).flip |  | ||||||
| } |  | ||||||
|  |  | ||||||
| class TLBReq extends Bundle | class TLBReq extends Bundle | ||||||
| { | { | ||||||
|   val asid = UFix(width = ASID_BITS) |   val asid = UFix(width = ASID_BITS) | ||||||
|   val vpn = UFix(width = VPN_BITS+1) |   val vpn = UFix(width = VPN_BITS+1) | ||||||
|   val status = Bits(width = 32) |   val passthrough = Bool() | ||||||
|   val invalidate = Bool() |  | ||||||
|   val instruction = Bool() |   val instruction = Bool() | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -116,7 +106,7 @@ class TLB(entries: Int) extends Component | |||||||
|   when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn } |   when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn } | ||||||
|    |    | ||||||
|   val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix |   val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix | ||||||
|   tag_cam.io.clear := io.req.bits.invalidate |   tag_cam.io.clear := io.ptw.invalidate | ||||||
|   tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st) |   tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st) | ||||||
|   tag_cam.io.tag := lookup_tag |   tag_cam.io.tag := lookup_tag | ||||||
|   tag_cam.io.write := state === s_wait && io.ptw.resp.valid |   tag_cam.io.write := state === s_wait && io.ptw.resp.valid | ||||||
| @@ -148,8 +138,8 @@ class TLB(entries: Int) extends Component | |||||||
|   val plru = new PseudoLRU(entries) |   val plru = new PseudoLRU(entries) | ||||||
|   val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) |   val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) | ||||||
|    |    | ||||||
|   val status_s  = io.req.bits.status(SR_S)  // user/supervisor mode |   val status_s  = io.ptw.status(SR_S)  // user/supervisor mode | ||||||
|   val status_vm = io.req.bits.status(SR_VM) // virtual memory enable |   val status_vm = io.ptw.status(SR_VM) // virtual memory enable | ||||||
|   val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1) |   val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1) | ||||||
|   val tlb_hit  = status_vm && tag_hit |   val tlb_hit  = status_vm && tag_hit | ||||||
|   val tlb_miss = status_vm && !tag_hit && !bad_va |   val tlb_miss = status_vm && !tag_hit && !bad_va | ||||||
| @@ -163,7 +153,7 @@ class TLB(entries: Int) extends Component | |||||||
|   io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr)) |   io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr)) | ||||||
|   io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr)) |   io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr)) | ||||||
|   io.resp.miss := tlb_miss |   io.resp.miss := tlb_miss | ||||||
|   io.resp.ppn := Mux(status_vm, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) |   io.resp.ppn := Mux(status_vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) | ||||||
|   io.resp.hit_idx := tag_cam.io.hits |   io.resp.hit_idx := tag_cam.io.hits | ||||||
|    |    | ||||||
|   io.ptw.req.valid := state === s_request |   io.ptw.req.valid := state === s_request | ||||||
| @@ -175,15 +165,15 @@ class TLB(entries: Int) extends Component | |||||||
|     r_refill_waddr := repl_waddr |     r_refill_waddr := repl_waddr | ||||||
|   } |   } | ||||||
|   when (state === s_request) { |   when (state === s_request) { | ||||||
|     when (io.req.bits.invalidate) { |     when (io.ptw.invalidate) { | ||||||
|       state := s_ready |       state := s_ready | ||||||
|     } |     } | ||||||
|     when (io.ptw.req.ready) { |     when (io.ptw.req.ready) { | ||||||
|       state := s_wait |       state := s_wait | ||||||
|       when (io.req.bits.invalidate) { state := s_wait_invalidate } |       when (io.ptw.invalidate) { state := s_wait_invalidate } | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   when (state === s_wait && io.req.bits.invalidate) { |   when (state === s_wait && io.ptw.invalidate) { | ||||||
|     state := s_wait_invalidate |     state := s_wait_invalidate | ||||||
|   } |   } | ||||||
|   when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) { |   when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) { | ||||||
| @@ -204,10 +194,6 @@ class ioDTLB_CPU_resp extends TLBResp(1) | |||||||
|  |  | ||||||
| class ioDTLB extends Bundle | class ioDTLB extends Bundle | ||||||
| { | { | ||||||
|   // status bits (from PCR), to check current permission and whether VM is enabled |  | ||||||
|   val status = Bits(INPUT, 32) |  | ||||||
|   // invalidate all TLB entries |  | ||||||
|   val invalidate = Bool(INPUT) |  | ||||||
|   val cpu_req = new ioDTLB_CPU_req().flip |   val cpu_req = new ioDTLB_CPU_req().flip | ||||||
|   val cpu_resp = new ioDTLB_CPU_resp() |   val cpu_resp = new ioDTLB_CPU_resp() | ||||||
|   val ptw = new IOTLBPTW |   val ptw = new IOTLBPTW | ||||||
| @@ -225,8 +211,7 @@ class rocketTLB(entries: Int) extends Component | |||||||
|   val tlb = new TLB(entries) |   val tlb = new TLB(entries) | ||||||
|   tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill |   tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill | ||||||
|   tlb.io.req.bits.instruction := Bool(false) |   tlb.io.req.bits.instruction := Bool(false) | ||||||
|   tlb.io.req.bits.invalidate := io.invalidate |   tlb.io.req.bits.passthrough := Bool(false) | ||||||
|   tlb.io.req.bits.status := io.status |  | ||||||
|   tlb.io.req.bits.vpn := r_cpu_req_vpn |   tlb.io.req.bits.vpn := r_cpu_req_vpn | ||||||
|   tlb.io.req.bits.asid := r_cpu_req_asid |   tlb.io.req.bits.asid := r_cpu_req_asid | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user