Merge commit '3c3e35a56e954b35e6ceb17179ebadc52e8d9b3f' into rocc-fpu-port
This commit is contained in:
		| @@ -5,10 +5,10 @@ package rocket | |||||||
| import Chisel._ | import Chisel._ | ||||||
| import uncore._ | import uncore._ | ||||||
|  |  | ||||||
| class HellaCacheArbiter(n: Int) extends Module | class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module | ||||||
| { | { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val requestor = Vec.fill(n){new HellaCacheIO}.flip |     val requestor = Vec(new HellaCacheIO, n).flip | ||||||
|     val mem = new HellaCacheIO |     val mem = new HellaCacheIO | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -43,12 +43,12 @@ class HellaCacheArbiter(n: Int) extends Module | |||||||
|     io.requestor(i).xcpt := io.mem.xcpt |     io.requestor(i).xcpt := io.mem.xcpt | ||||||
|     io.requestor(i).ordered := io.mem.ordered |     io.requestor(i).ordered := io.mem.ordered | ||||||
|     resp.bits := io.mem.resp.bits |     resp.bits := io.mem.resp.bits | ||||||
|     resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n)) |     resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) | ||||||
|     resp.bits.nack := io.mem.resp.bits.nack && tag_hit |     resp.bits.nack := io.mem.resp.bits.nack && tag_hit | ||||||
|     resp.bits.replay := io.mem.resp.bits.replay && tag_hit |     resp.bits.replay := io.mem.resp.bits.replay && tag_hit | ||||||
|  |  | ||||||
|     io.requestor(i).replay_next.valid := io.mem.replay_next.valid && |     io.requestor(i).replay_next.valid := io.mem.replay_next.valid && | ||||||
|       io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i) |       io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i) | ||||||
|     io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> UInt(log2Up(n)) |     io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> log2Up(n) | ||||||
|   } |   } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -6,18 +6,28 @@ import Chisel._ | |||||||
| import junctions._ | import junctions._ | ||||||
| import Util._ | import Util._ | ||||||
|  |  | ||||||
| case object NBTBEntries extends Field[Int] | case object BtbKey extends Field[BtbParameters] | ||||||
| case object NRAS extends Field[Int] |  | ||||||
|  |  | ||||||
| abstract trait BTBParameters extends CoreParameters { | case class BtbParameters( | ||||||
|   val matchBits = params(PgIdxBits) |   enabled: Boolean = true, | ||||||
|   val entries = params(NBTBEntries) |   nEntries: Int = 62, | ||||||
|   val nRAS = params(NRAS) |   nRAS: Int = 2, | ||||||
|  |   updatesOutOfOrder: Boolean = false) | ||||||
|  |  | ||||||
|  | abstract trait HasBtbParameters extends HasCoreParameters { | ||||||
|  |   val matchBits = pgIdxBits | ||||||
|  |   val entries = p(BtbKey).nEntries | ||||||
|  |   val nRAS = p(BtbKey).nRAS | ||||||
|  |   val updatesOutOfOrder = p(BtbKey).updatesOutOfOrder | ||||||
|   val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages |   val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages | ||||||
|   val opaqueBits = log2Up(entries) |   val opaqueBits = log2Up(entries) | ||||||
|   val nBHT = 1 << log2Up(entries*2) |   val nBHT = 1 << log2Up(entries*2) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters | ||||||
|  | abstract class BtbBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) | ||||||
|  |   with HasBtbParameters | ||||||
|  |  | ||||||
| class RAS(nras: Int) { | class RAS(nras: Int) { | ||||||
|   def push(addr: UInt): Unit = { |   def push(addr: UInt): Unit = { | ||||||
|     when (count < nras) { count := count + 1 } |     when (count < nras) { count := count + 1 } | ||||||
| @@ -35,10 +45,10 @@ class RAS(nras: Int) { | |||||||
|  |  | ||||||
|   private val count = Reg(init=UInt(0,log2Up(nras+1))) |   private val count = Reg(init=UInt(0,log2Up(nras+1))) | ||||||
|   private val pos = Reg(init=UInt(0,log2Up(nras))) |   private val pos = Reg(init=UInt(0,log2Up(nras))) | ||||||
|   private val stack = Reg(Vec.fill(nras){UInt()}) |   private val stack = Reg(Vec(UInt(), nras)) | ||||||
| } | } | ||||||
|  |  | ||||||
| class BHTResp extends Bundle with BTBParameters { | class BHTResp(implicit p: Parameters) extends BtbBundle()(p) { | ||||||
|   val history = UInt(width = log2Up(nBHT).max(1)) |   val history = UInt(width = log2Up(nBHT).max(1)) | ||||||
|   val value = UInt(width = 2) |   val value = UInt(width = 2) | ||||||
| } | } | ||||||
| @@ -52,7 +62,7 @@ class BHTResp extends Bundle with BTBParameters { | |||||||
| //    - each counter corresponds with the address of the fetch packet ("fetch pc"). | //    - each counter corresponds with the address of the fetch packet ("fetch pc"). | ||||||
| //    - updated when a branch resolves (and BTB was a hit for that branch). | //    - updated when a branch resolves (and BTB was a hit for that branch). | ||||||
| //      The updating branch must provide its "fetch pc". | //      The updating branch must provide its "fetch pc". | ||||||
| class BHT(nbht: Int) { | class BHT(nbht: Int)(implicit p: Parameters) { | ||||||
|   val nbhtbits = log2Up(nbht) |   val nbhtbits = log2Up(nbht) | ||||||
|   def get(addr: UInt, update: Bool): BHTResp = { |   def get(addr: UInt, update: Bool): BHTResp = { | ||||||
|     val res = Wire(new BHTResp) |     val res = Wire(new BHTResp) | ||||||
| @@ -69,14 +79,14 @@ class BHT(nbht: Int) { | |||||||
|     when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } |     when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   private val table = Mem(UInt(width = 2), nbht) |   private val table = Mem(nbht, UInt(width = 2)) | ||||||
|   val history = Reg(UInt(width = nbhtbits)) |   val history = Reg(UInt(width = nbhtbits)) | ||||||
| } | } | ||||||
|  |  | ||||||
| // BTB update occurs during branch resolution (and only on a mispredict). | // BTB update occurs during branch resolution (and only on a mispredict). | ||||||
| //  - "pc" is what future fetch PCs will tag match against. | //  - "pc" is what future fetch PCs will tag match against. | ||||||
| //  - "br_pc" is the PC of the branch instruction. | //  - "br_pc" is the PC of the branch instruction. | ||||||
| class BTBUpdate extends Bundle with BTBParameters { | class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) { | ||||||
|   val prediction = Valid(new BTBResp) |   val prediction = Valid(new BTBResp) | ||||||
|   val pc = UInt(width = vaddrBits) |   val pc = UInt(width = vaddrBits) | ||||||
|   val target = UInt(width = vaddrBits) |   val target = UInt(width = vaddrBits) | ||||||
| @@ -88,14 +98,14 @@ class BTBUpdate extends Bundle with BTBParameters { | |||||||
|  |  | ||||||
| // BHT update occurs during branch resolution on all conditional branches. | // BHT update occurs during branch resolution on all conditional branches. | ||||||
| //  - "pc" is what future fetch PCs will tag match against. | //  - "pc" is what future fetch PCs will tag match against. | ||||||
| class BHTUpdate extends Bundle with BTBParameters { | class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) { | ||||||
|   val prediction = Valid(new BTBResp) |   val prediction = Valid(new BTBResp) | ||||||
|   val pc = UInt(width = vaddrBits) |   val pc = UInt(width = vaddrBits) | ||||||
|   val taken = Bool() |   val taken = Bool() | ||||||
|   val mispredict = Bool() |   val mispredict = Bool() | ||||||
| } | } | ||||||
|  |  | ||||||
| class RASUpdate extends Bundle with BTBParameters { | class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) { | ||||||
|   val isCall = Bool() |   val isCall = Bool() | ||||||
|   val isReturn = Bool() |   val isReturn = Bool() | ||||||
|   val returnAddr = UInt(width = vaddrBits) |   val returnAddr = UInt(width = vaddrBits) | ||||||
| @@ -106,16 +116,16 @@ class RASUpdate extends Bundle with BTBParameters { | |||||||
| //     shifting off the lowest log(inst_bytes) bits off). | //     shifting off the lowest log(inst_bytes) bits off). | ||||||
| //  - "resp.mask" provides a mask of valid instructions (instructions are | //  - "resp.mask" provides a mask of valid instructions (instructions are | ||||||
| //     masked off by the predicted taken branch). | //     masked off by the predicted taken branch). | ||||||
| class BTBResp extends Bundle with BTBParameters { | class BTBResp(implicit p: Parameters) extends BtbBundle()(p) { | ||||||
|   val taken = Bool() |   val taken = Bool() | ||||||
|   val mask = Bits(width = params(FetchWidth)) |   val mask = Bits(width = fetchWidth) | ||||||
|   val bridx = Bits(width = log2Up(params(FetchWidth))) |   val bridx = Bits(width = log2Up(fetchWidth)) | ||||||
|   val target = UInt(width = vaddrBits) |   val target = UInt(width = vaddrBits) | ||||||
|   val entry = UInt(width = opaqueBits) |   val entry = UInt(width = opaqueBits) | ||||||
|   val bht = new BHTResp |   val bht = new BHTResp | ||||||
| } | } | ||||||
|  |  | ||||||
| class BTBReq extends Bundle with BTBParameters { | class BTBReq(implicit p: Parameters) extends BtbBundle()(p) { | ||||||
|    val addr = UInt(width = vaddrBits) |    val addr = UInt(width = vaddrBits) | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -123,7 +133,7 @@ class BTBReq extends Bundle with BTBParameters { | |||||||
| // Higher-performance processors may cause BTB updates to occur out-of-order, | // Higher-performance processors may cause BTB updates to occur out-of-order, | ||||||
| // which requires an extra CAM port for updates (to ensure no duplicates get | // which requires an extra CAM port for updates (to ensure no duplicates get | ||||||
| // placed in BTB). | // placed in BTB). | ||||||
| class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParameters { | class BTB(implicit p: Parameters) extends BtbModule { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val req = Valid(new BTBReq).flip |     val req = Valid(new BTBReq).flip | ||||||
|     val resp = Valid(new BTBResp) |     val resp = Valid(new BTBResp) | ||||||
| @@ -134,18 +144,18 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   val idxValid = Reg(init=UInt(0, entries)) |   val idxValid = Reg(init=UInt(0, entries)) | ||||||
|   val idxs = Mem(UInt(width=matchBits), entries) |   val idxs = Mem(entries, UInt(width=matchBits)) | ||||||
|   val idxPages = Mem(UInt(width=log2Up(nPages)), entries) |   val idxPages = Mem(entries, UInt(width=log2Up(nPages))) | ||||||
|   val tgts = Mem(UInt(width=matchBits), entries) |   val tgts = Mem(entries, UInt(width=matchBits)) | ||||||
|   val tgtPages = Mem(UInt(width=log2Up(nPages)), entries) |   val tgtPages = Mem(entries, UInt(width=log2Up(nPages))) | ||||||
|   val pages = Mem(UInt(width=vaddrBits-matchBits), nPages) |   val pages = Mem(nPages, UInt(width=vaddrBits-matchBits)) | ||||||
|   val pageValid = Reg(init=UInt(0, nPages)) |   val pageValid = Reg(init=UInt(0, nPages)) | ||||||
|   val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0)) |   val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0)) | ||||||
|   val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0)) |   val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0)) | ||||||
|  |  | ||||||
|   val useRAS = Reg(Vec(Bool(), entries)) |   val useRAS = Reg(Vec(entries, Bool())) | ||||||
|   val isJump = Reg(Vec(Bool(), entries)) |   val isJump = Reg(Vec(entries, Bool())) | ||||||
|   val brIdx  = Mem(UInt(width=log2Up(params(FetchWidth))), entries) |   val brIdx  = Mem(entries, UInt(width=log2Up(fetchWidth))) | ||||||
|  |  | ||||||
|   private def page(addr: UInt) = addr >> matchBits |   private def page(addr: UInt) = addr >> matchBits | ||||||
|   private def pageMatch(addr: UInt) = { |   private def pageMatch(addr: UInt) = { | ||||||
| @@ -174,6 +184,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   val updateHit = r_btb_update.bits.prediction.valid |   val updateHit = r_btb_update.bits.prediction.valid | ||||||
|  |   val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1 | ||||||
|  |  | ||||||
|   val useUpdatePageHit = updatePageHit.orR |   val useUpdatePageHit = updatePageHit.orR | ||||||
|   val doIdxPageRepl = !useUpdatePageHit |   val doIdxPageRepl = !useUpdatePageHit | ||||||
| @@ -196,9 +207,8 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete | |||||||
|   when (r_btb_update.valid) { |   when (r_btb_update.valid) { | ||||||
|     assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target") |     assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target") | ||||||
|  |  | ||||||
|     val nextRepl = Counter(!updateHit, entries)._1 |  | ||||||
|     val waddr = |     val waddr = | ||||||
|       if (updates_out_of_order) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) |       if (updatesOutOfOrder) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) | ||||||
|       else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) |       else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) | ||||||
|  |  | ||||||
|     // invalidate entries if we stomp on pages they depend upon |     // invalidate entries if we stomp on pages they depend upon | ||||||
| @@ -212,10 +222,10 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete | |||||||
|     tgtPages(waddr) := tgtPageUpdate |     tgtPages(waddr) := tgtPageUpdate | ||||||
|     useRAS(waddr) := r_btb_update.bits.isReturn |     useRAS(waddr) := r_btb_update.bits.isReturn | ||||||
|     isJump(waddr) := r_btb_update.bits.isJump |     isJump(waddr) := r_btb_update.bits.isJump | ||||||
|     if (params(FetchWidth) == 1) { |     if (fetchWidth == 1) { | ||||||
|       brIdx(waddr) := UInt(0) |       brIdx(waddr) := UInt(0) | ||||||
|     } else { |     } else { | ||||||
|       brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) |       brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     require(nPages % 2 == 0) |     require(nPages % 2 == 0) | ||||||
| @@ -243,7 +253,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete | |||||||
|   io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) |   io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) | ||||||
|   io.resp.bits.entry := OHToUInt(hits) |   io.resp.bits.entry := OHToUInt(hits) | ||||||
|   io.resp.bits.bridx := brIdx(io.resp.bits.entry) |   io.resp.bits.bridx := brIdx(io.resp.bits.entry) | ||||||
|   if (params(FetchWidth) == 1) { |   if (fetchWidth == 1) { | ||||||
|     io.resp.bits.mask := UInt(1) |     io.resp.bits.mask := UInt(1) | ||||||
|   } else { |   } else { | ||||||
|     // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case |     // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case | ||||||
|   | |||||||
| @@ -64,12 +64,14 @@ object CSR | |||||||
|   val C = UInt(3,SZ) |   val C = UInt(3,SZ) | ||||||
|   val I = UInt(4,SZ) |   val I = UInt(4,SZ) | ||||||
|   val R = UInt(5,SZ) |   val R = UInt(5,SZ) | ||||||
|  |  | ||||||
|  |   val ADDRSZ = 12 | ||||||
| } | } | ||||||
|  |  | ||||||
| class CSRFileIO extends CoreBundle { | class CSRFileIO(implicit p: Parameters) extends CoreBundle { | ||||||
|   val host = new HTIFIO |   val host = new HtifIO | ||||||
|   val rw = new Bundle { |   val rw = new Bundle { | ||||||
|     val addr = UInt(INPUT, 12) |     val addr = UInt(INPUT, CSR.ADDRSZ) | ||||||
|     val cmd = Bits(INPUT, CSR.SZ) |     val cmd = Bits(INPUT, CSR.SZ) | ||||||
|     val rdata = Bits(OUTPUT, xLen) |     val rdata = Bits(OUTPUT, xLen) | ||||||
|     val wdata = Bits(INPUT, xLen) |     val wdata = Bits(INPUT, xLen) | ||||||
| @@ -85,8 +87,8 @@ class CSRFileIO extends CoreBundle { | |||||||
|   val evec = UInt(OUTPUT, vaddrBitsExtended) |   val evec = UInt(OUTPUT, vaddrBitsExtended) | ||||||
|   val exception = Bool(INPUT) |   val exception = Bool(INPUT) | ||||||
|   val retire = UInt(INPUT, log2Up(1+retireWidth)) |   val retire = UInt(INPUT, log2Up(1+retireWidth)) | ||||||
|   val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+retireWidth))) |   val uarch_counters = Vec(UInt(INPUT, log2Up(1+retireWidth)), 16) | ||||||
|   val custom_mrw_csrs = Vec.fill(params(NCustomMRWCSRs))(UInt(INPUT, xLen)) |   val custom_mrw_csrs = Vec(UInt(INPUT, xLen), nCustomMrwCsrs) | ||||||
|   val cause = UInt(INPUT, xLen) |   val cause = UInt(INPUT, xLen) | ||||||
|   val pc = UInt(INPUT, vaddrBitsExtended) |   val pc = UInt(INPUT, vaddrBitsExtended) | ||||||
|   val fatc = Bool(OUTPUT) |   val fatc = Bool(OUTPUT) | ||||||
| @@ -98,7 +100,7 @@ class CSRFileIO extends CoreBundle { | |||||||
|   val interrupt_cause = UInt(OUTPUT, xLen) |   val interrupt_cause = UInt(OUTPUT, xLen) | ||||||
| } | } | ||||||
|  |  | ||||||
| class CSRFile extends CoreModule | class CSRFile(implicit p: Parameters) extends CoreModule()(p) | ||||||
| { | { | ||||||
|   val io = new CSRFileIO |   val io = new CSRFileIO | ||||||
|  |  | ||||||
| @@ -123,13 +125,13 @@ class CSRFile extends CoreModule | |||||||
|   val reg_fromhost = Reg(init=Bits(0, xLen)) |   val reg_fromhost = Reg(init=Bits(0, xLen)) | ||||||
|   val reg_stats = Reg(init=Bool(false)) |   val reg_stats = Reg(init=Bool(false)) | ||||||
|   val reg_time = Reg(UInt(width = xLen)) |   val reg_time = Reg(UInt(width = xLen)) | ||||||
|   val reg_cycle = WideCounter(xLen) |  | ||||||
|   val reg_instret = WideCounter(xLen, io.retire) |   val reg_instret = WideCounter(xLen, io.retire) | ||||||
|  |   val reg_cycle = if (enableCommitLog) { reg_instret } else { WideCounter(xLen) } | ||||||
|   val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) |   val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) | ||||||
|   val reg_fflags = Reg(UInt(width = 5)) |   val reg_fflags = Reg(UInt(width = 5)) | ||||||
|   val reg_frm = Reg(UInt(width = 3)) |   val reg_frm = Reg(UInt(width = 3)) | ||||||
|  |  | ||||||
|   val irq_rocc = Bool(!params(BuildRoCC).isEmpty) && io.rocc.interrupt |   val irq_rocc = Bool(!p(BuildRoCC).isEmpty) && io.rocc.interrupt | ||||||
|  |  | ||||||
|   io.interrupt_cause := 0 |   io.interrupt_cause := 0 | ||||||
|   io.interrupt := io.interrupt_cause(xLen-1) |   io.interrupt := io.interrupt_cause(xLen-1) | ||||||
| @@ -153,48 +155,47 @@ class CSRFile extends CoreModule | |||||||
|   val system_insn = io.rw.cmd === CSR.I |   val system_insn = io.rw.cmd === CSR.I | ||||||
|   val cpu_ren = io.rw.cmd != CSR.N && !system_insn |   val cpu_ren = io.rw.cmd != CSR.N && !system_insn | ||||||
|  |  | ||||||
|   val host_pcr_req_valid = Reg(Bool()) // don't reset |   val host_csr_req_valid = Reg(Bool()) // don't reset | ||||||
|   val host_pcr_req_fire = host_pcr_req_valid && !cpu_ren |   val host_csr_req_fire = host_csr_req_valid && !cpu_ren | ||||||
|   val host_pcr_rep_valid = Reg(Bool()) // don't reset |   val host_csr_rep_valid = Reg(Bool()) // don't reset | ||||||
|   val host_pcr_bits = Reg(io.host.pcr_req.bits) |   val host_csr_bits = Reg(io.host.csr.req.bits) | ||||||
|   io.host.pcr_req.ready := !host_pcr_req_valid && !host_pcr_rep_valid |   io.host.csr.req.ready := !host_csr_req_valid && !host_csr_rep_valid | ||||||
|   io.host.pcr_rep.valid := host_pcr_rep_valid |   io.host.csr.resp.valid := host_csr_rep_valid | ||||||
|   io.host.pcr_rep.bits := host_pcr_bits.data |   io.host.csr.resp.bits := host_csr_bits.data | ||||||
|   when (io.host.pcr_req.fire()) { |   when (io.host.csr.req.fire()) { | ||||||
|     host_pcr_req_valid := true |     host_csr_req_valid := true | ||||||
|     host_pcr_bits := io.host.pcr_req.bits |     host_csr_bits := io.host.csr.req.bits | ||||||
|   } |   } | ||||||
|   when (host_pcr_req_fire) { |   when (host_csr_req_fire) { | ||||||
|     host_pcr_req_valid := false |     host_csr_req_valid := false | ||||||
|     host_pcr_rep_valid := true |     host_csr_rep_valid := true | ||||||
|     host_pcr_bits.data := io.rw.rdata |     host_csr_bits.data := io.rw.rdata | ||||||
|   } |   } | ||||||
|   when (io.host.pcr_rep.fire()) { host_pcr_rep_valid := false } |   when (io.host.csr.resp.fire()) { host_csr_rep_valid := false } | ||||||
|    |  | ||||||
|   io.host.debug_stats_pcr := reg_stats // direct export up the hierarchy |  | ||||||
|  |  | ||||||
|  |   io.host.debug_stats_csr := reg_stats // direct export up the hierarchy | ||||||
|  |  | ||||||
|  |   val read_time = if (usingPerfCounters) reg_time else (reg_cycle: UInt) | ||||||
|   val read_mstatus = io.status.toBits |   val read_mstatus = io.status.toBits | ||||||
|   val isa_string = "IMA" + |   val isa_string = "IMA" + | ||||||
|     (if (params(UseVM)) "S" else "") + |     (if (usingVM) "S" else "") + | ||||||
|     (if (!params(BuildFPU).isEmpty) "FD" else "") + |     (if (usingFPU) "FD" else "") + | ||||||
|     (if (!params(BuildRoCC).isEmpty) "X" else "") |     (if (usingRoCC) "X" else "") | ||||||
|   val cpuid = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | |   val cpuid = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | | ||||||
|     isa_string.map(x => 1 << (x - 'A')).reduce(_|_) |     isa_string.map(x => 1 << (x - 'A')).reduce(_|_) | ||||||
|   val impid = 1 |   val impid = 1 | ||||||
|  |  | ||||||
|   val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( |   val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( | ||||||
|     CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), |     CSRs.fflags -> (if (usingFPU) reg_fflags else UInt(0)), | ||||||
|     CSRs.frm -> (if (!params(BuildFPU).isEmpty) reg_frm else UInt(0)), |     CSRs.frm -> (if (usingFPU) reg_frm else UInt(0)), | ||||||
|     CSRs.fcsr -> (if (!params(BuildFPU).isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), |     CSRs.fcsr -> (if (usingFPU) Cat(reg_frm, reg_fflags) else UInt(0)), | ||||||
|     CSRs.cycle -> reg_cycle, |     CSRs.cycle -> reg_cycle, | ||||||
|     CSRs.cyclew -> reg_cycle, |     CSRs.cyclew -> reg_cycle, | ||||||
|     CSRs.instret -> reg_instret, |     CSRs.time -> read_time, | ||||||
|     CSRs.instretw -> reg_instret, |     CSRs.timew -> read_time, | ||||||
|     CSRs.time -> reg_time, |     CSRs.stime -> read_time, | ||||||
|     CSRs.timew -> reg_time, |     CSRs.stimew -> read_time, | ||||||
|     CSRs.stime -> reg_time, |     CSRs.mtime -> read_time, | ||||||
|     CSRs.stimew -> reg_time, |  | ||||||
|     CSRs.mtime -> reg_time, |  | ||||||
|     CSRs.mcpuid -> UInt(cpuid), |     CSRs.mcpuid -> UInt(cpuid), | ||||||
|     CSRs.mimpid -> UInt(impid), |     CSRs.mimpid -> UInt(impid), | ||||||
|     CSRs.mstatus -> read_mstatus, |     CSRs.mstatus -> read_mstatus, | ||||||
| @@ -214,7 +215,15 @@ class CSRFile extends CoreModule | |||||||
|     CSRs.mtohost -> reg_tohost, |     CSRs.mtohost -> reg_tohost, | ||||||
|     CSRs.mfromhost -> reg_fromhost) |     CSRs.mfromhost -> reg_fromhost) | ||||||
|  |  | ||||||
|   if (params(UseVM)) { |   if (usingPerfCounters) { | ||||||
|  |     read_mapping += CSRs.instret -> reg_instret | ||||||
|  |     read_mapping += CSRs.instretw -> reg_instret | ||||||
|  |  | ||||||
|  |     for (i <- 0 until reg_uarch_counters.size) | ||||||
|  |       read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if (usingVM) { | ||||||
|     val read_sstatus = Wire(init=new SStatus().fromBits(read_mstatus)) |     val read_sstatus = Wire(init=new SStatus().fromBits(read_mstatus)) | ||||||
|     read_sstatus.zero1 := 0 |     read_sstatus.zero1 := 0 | ||||||
|     read_sstatus.zero2 := 0 |     read_sstatus.zero2 := 0 | ||||||
| @@ -241,17 +250,14 @@ class CSRFile extends CoreModule | |||||||
|     read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen) |     read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   for (i <- 0 until reg_uarch_counters.size) |   for (i <- 0 until nCustomMrwCsrs) { | ||||||
|     read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) |  | ||||||
|  |  | ||||||
|   for (i <- 0 until params(NCustomMRWCSRs)) { |  | ||||||
|     val addr = 0x790 + i // turn 0x790 into parameter CustomMRWCSRBase? |     val addr = 0x790 + i // turn 0x790 into parameter CustomMRWCSRBase? | ||||||
|     require(addr >= 0x780 && addr <= 0x7ff, "custom MRW CSR address " + i + " is out of range") |     require(addr >= 0x780 && addr <= 0x7ff, "custom MRW CSR address " + i + " is out of range") | ||||||
|     require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use") |     require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use") | ||||||
|     read_mapping += addr -> io.custom_mrw_csrs(i) |     read_mapping += addr -> io.custom_mrw_csrs(i) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   val addr = Mux(cpu_ren, io.rw.addr, host_pcr_bits.addr) |   val addr = Mux(cpu_ren, io.rw.addr, host_csr_bits.addr) | ||||||
|   val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) } |   val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) } | ||||||
|  |  | ||||||
|   val addr_valid = decoded_addr.values.reduce(_||_) |   val addr_valid = decoded_addr.values.reduce(_||_) | ||||||
| @@ -260,11 +266,11 @@ class CSRFile extends CoreModule | |||||||
|   val priv_sufficient = reg_mstatus.prv >= csr_addr_priv |   val priv_sufficient = reg_mstatus.prv >= csr_addr_priv | ||||||
|   val read_only = io.rw.addr(11,10).andR |   val read_only = io.rw.addr(11,10).andR | ||||||
|   val cpu_wen = cpu_ren && io.rw.cmd != CSR.R && priv_sufficient |   val cpu_wen = cpu_ren && io.rw.cmd != CSR.R && priv_sufficient | ||||||
|   val wen = cpu_wen && !read_only || host_pcr_req_fire && host_pcr_bits.rw |   val wen = cpu_wen && !read_only || host_csr_req_fire && host_csr_bits.rw | ||||||
|   val wdata = Mux(io.rw.cmd === CSR.W, io.rw.wdata, |   val wdata = Mux(io.rw.cmd === CSR.W, io.rw.wdata, | ||||||
|               Mux(io.rw.cmd === CSR.C, io.rw.rdata & ~io.rw.wdata, |               Mux(io.rw.cmd === CSR.C, io.rw.rdata & ~io.rw.wdata, | ||||||
|               Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata, |               Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata, | ||||||
|               host_pcr_bits.data))) |               host_csr_bits.data))) | ||||||
|  |  | ||||||
|   val opcode = io.rw.addr |   val opcode = io.rw.addr | ||||||
|   val insn_call = !opcode(8) && !opcode(0) && system_insn |   val insn_call = !opcode(8) && !opcode(0) && system_insn | ||||||
| @@ -341,7 +347,7 @@ class CSRFile extends CoreModule | |||||||
|  |  | ||||||
|   assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: io.csr_replay :: Nil) <= 1, "these conditions must be mutually exclusive") |   assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: io.csr_replay :: Nil) <= 1, "these conditions must be mutually exclusive") | ||||||
|  |  | ||||||
|   when (reg_time >= reg_mtimecmp) { |   when (read_time >= reg_mtimecmp) { | ||||||
|     reg_mip.mtip := true |     reg_mip.mtip := true | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -351,7 +357,7 @@ class CSRFile extends CoreModule | |||||||
|   io.csr_replay := io.host.ipi_req.valid && !io.host.ipi_req.ready |   io.csr_replay := io.host.ipi_req.valid && !io.host.ipi_req.ready | ||||||
|   io.csr_stall := reg_wfi |   io.csr_stall := reg_wfi | ||||||
|  |  | ||||||
|   when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) } |   when (host_csr_req_fire && !host_csr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) } | ||||||
|  |  | ||||||
|   io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) |   io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) | ||||||
|  |  | ||||||
| @@ -366,7 +372,7 @@ class CSRFile extends CoreModule | |||||||
|       reg_mstatus.ie := new_mstatus.ie |       reg_mstatus.ie := new_mstatus.ie | ||||||
|       reg_mstatus.ie1 := new_mstatus.ie1 |       reg_mstatus.ie1 := new_mstatus.ie1 | ||||||
|  |  | ||||||
|       val supportedModes = Vec((PRV_M :: PRV_U :: (if (params(UseVM)) List(PRV_S) else Nil)).map(UInt(_))) |       val supportedModes = Vec((PRV_M :: PRV_U :: (if (usingVM) List(PRV_S) else Nil)).map(UInt(_))) | ||||||
|       if (supportedModes.size > 1) { |       if (supportedModes.size > 1) { | ||||||
|         reg_mstatus.mprv := new_mstatus.mprv |         reg_mstatus.mprv := new_mstatus.mprv | ||||||
|         when (supportedModes contains new_mstatus.prv) { reg_mstatus.prv := new_mstatus.prv } |         when (supportedModes contains new_mstatus.prv) { reg_mstatus.prv := new_mstatus.prv } | ||||||
| @@ -377,17 +383,17 @@ class CSRFile extends CoreModule | |||||||
|         } |         } | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       if (params(UseVM)) { |       if (usingVM) { | ||||||
|         val vm_on = if (xLen == 32) 8 else 9 |         val vm_on = if (xLen == 32) 8 else 9 | ||||||
|         when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 } |         when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 } | ||||||
|         when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on } |         when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on } | ||||||
|       } |       } | ||||||
|       if (params(UseVM) || !params(BuildFPU).isEmpty) reg_mstatus.fs := new_mstatus.fs |       if (usingVM || usingFPU) reg_mstatus.fs := new_mstatus.fs | ||||||
|       if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_mstatus.xs |       if (usingRoCC) reg_mstatus.xs := new_mstatus.xs | ||||||
|     } |     } | ||||||
|     when (decoded_addr(CSRs.mip)) { |     when (decoded_addr(CSRs.mip)) { | ||||||
|       val new_mip = new MIP().fromBits(wdata) |       val new_mip = new MIP().fromBits(wdata) | ||||||
|       if (params(UseVM)) { |       if (usingVM) { | ||||||
|         reg_mip.ssip := new_mip.ssip |         reg_mip.ssip := new_mip.ssip | ||||||
|         reg_mip.stip := new_mip.stip |         reg_mip.stip := new_mip.stip | ||||||
|       } |       } | ||||||
| @@ -395,7 +401,7 @@ class CSRFile extends CoreModule | |||||||
|     } |     } | ||||||
|     when (decoded_addr(CSRs.mie)) { |     when (decoded_addr(CSRs.mie)) { | ||||||
|       val new_mie = new MIP().fromBits(wdata) |       val new_mie = new MIP().fromBits(wdata) | ||||||
|       if (params(UseVM)) { |       if (usingVM) { | ||||||
|         reg_mie.ssip := new_mie.ssip |         reg_mie.ssip := new_mie.ssip | ||||||
|         reg_mie.stip := new_mie.stip |         reg_mie.stip := new_mie.stip | ||||||
|       } |       } | ||||||
| @@ -409,13 +415,14 @@ class CSRFile extends CoreModule | |||||||
|     when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } |     when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } | ||||||
|     when (decoded_addr(CSRs.mcause))   { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } |     when (decoded_addr(CSRs.mcause))   { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } | ||||||
|     when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } |     when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } | ||||||
|     when (decoded_addr(CSRs.instretw)) { reg_instret := wdata } |     if (usingPerfCounters) | ||||||
|  |       when (decoded_addr(CSRs.instretw)) { reg_instret := wdata } | ||||||
|     when (decoded_addr(CSRs.mtimecmp)) { reg_mtimecmp := wdata; reg_mip.mtip := false } |     when (decoded_addr(CSRs.mtimecmp)) { reg_mtimecmp := wdata; reg_mip.mtip := false } | ||||||
|     when (decoded_addr(CSRs.mreset) /* XXX used by HTIF to write mtime */) { reg_time := wdata } |     when (decoded_addr(CSRs.mtime))    { reg_time := wdata } | ||||||
|     when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } |     when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_csr_req_fire) { reg_fromhost := wdata } } | ||||||
|     when (decoded_addr(CSRs.mtohost))  { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } |     when (decoded_addr(CSRs.mtohost))  { when (reg_tohost === UInt(0) || host_csr_req_fire) { reg_tohost := wdata } } | ||||||
|     when (decoded_addr(CSRs.stats))    { reg_stats := wdata(0) } |     when (decoded_addr(CSRs.stats))    { reg_stats := wdata(0) } | ||||||
|     if (params(UseVM)) { |     if (usingVM) { | ||||||
|       when (decoded_addr(CSRs.sstatus)) { |       when (decoded_addr(CSRs.sstatus)) { | ||||||
|         val new_sstatus = new SStatus().fromBits(wdata) |         val new_sstatus = new SStatus().fromBits(wdata) | ||||||
|         reg_mstatus.ie := new_sstatus.ie |         reg_mstatus.ie := new_sstatus.ie | ||||||
| @@ -423,7 +430,7 @@ class CSRFile extends CoreModule | |||||||
|         reg_mstatus.prv1 := Mux[UInt](new_sstatus.ps, PRV_S, PRV_U) |         reg_mstatus.prv1 := Mux[UInt](new_sstatus.ps, PRV_S, PRV_U) | ||||||
|         reg_mstatus.mprv := new_sstatus.mprv |         reg_mstatus.mprv := new_sstatus.mprv | ||||||
|         reg_mstatus.fs := new_sstatus.fs // even without an FPU |         reg_mstatus.fs := new_sstatus.fs // even without an FPU | ||||||
|         if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_sstatus.xs |         if (usingRoCC) reg_mstatus.xs := new_sstatus.xs | ||||||
|       } |       } | ||||||
|       when (decoded_addr(CSRs.sip)) { |       when (decoded_addr(CSRs.sip)) { | ||||||
|         val new_sip = new MIP().fromBits(wdata) |         val new_sip = new MIP().fromBits(wdata) | ||||||
|   | |||||||
| @@ -42,7 +42,7 @@ object ALU | |||||||
| } | } | ||||||
| import ALU._ | import ALU._ | ||||||
|  |  | ||||||
| class ALUIO extends CoreBundle { | class ALUIO(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|   val dw = Bits(INPUT, SZ_DW) |   val dw = Bits(INPUT, SZ_DW) | ||||||
|   val fn = Bits(INPUT, SZ_ALU_FN) |   val fn = Bits(INPUT, SZ_ALU_FN) | ||||||
|   val in2 = UInt(INPUT, xLen) |   val in2 = UInt(INPUT, xLen) | ||||||
| @@ -51,8 +51,7 @@ class ALUIO extends CoreBundle { | |||||||
|   val adder_out = UInt(OUTPUT, xLen) |   val adder_out = UInt(OUTPUT, xLen) | ||||||
| } | } | ||||||
|  |  | ||||||
| class ALU extends Module | class ALU(implicit p: Parameters) extends Module { | ||||||
| { |  | ||||||
|   val io = new ALUIO |   val io = new ALUIO | ||||||
|  |  | ||||||
|   // ADD, SUB |   // ADD, SUB | ||||||
|   | |||||||
| @@ -131,8 +131,10 @@ class FPUDecoder extends Module | |||||||
|           FSQRT_D  -> List(FCMD_SQRT,   N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y) |           FSQRT_D  -> List(FCMD_SQRT,   N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y) | ||||||
|           )) |           )) | ||||||
|   val s = io.sigs |   val s = io.sigs | ||||||
|   Vec(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, s.swap23, s.single, s.fromint, |   val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, | ||||||
|       s.toint, s.fastpipe, s.fma, s.div, s.sqrt, s.round, s.wflags) := decoder |                  s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma, | ||||||
|  |                  s.div, s.sqrt, s.round, s.wflags) | ||||||
|  |   sigs zip decoder map {case(s,d) => s := d} | ||||||
| } | } | ||||||
|  |  | ||||||
| class FPUIO extends Bundle { | class FPUIO extends Bundle { | ||||||
| @@ -215,7 +217,7 @@ class FPToInt extends Module | |||||||
|   dcmp.io.a := in.in1 |   dcmp.io.a := in.in1 | ||||||
|   dcmp.io.b := in.in2 |   dcmp.io.b := in.in2 | ||||||
|   val dcmp_out = (~in.rm & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR |   val dcmp_out = (~in.rm & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR | ||||||
|   val dcmp_exc = (~in.rm & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UInt(4) |   val dcmp_exc = (~in.rm & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << 4 | ||||||
|  |  | ||||||
|   val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, in.typ ^ 1, 52, 12, 64) |   val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, in.typ ^ 1, 52, 12, 64) | ||||||
|  |  | ||||||
| @@ -346,13 +348,12 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module | |||||||
|   fma.io.c := in.in3 |   fma.io.c := in.in3 | ||||||
|  |  | ||||||
|   val res = Wire(new FPResult) |   val res = Wire(new FPResult) | ||||||
|   res.data := fma.io.out |   res.data := Cat(SInt(-1, 32), fma.io.out) | ||||||
|   res.exc := fma.io.exceptionFlags |   res.exc := fma.io.exceptionFlags | ||||||
|   io.out := Pipe(valid, res, latency-1) |   io.out := Pipe(valid, res, latency-1) | ||||||
| } | } | ||||||
|  |  | ||||||
| class FPU extends Module | class FPU(implicit p: Parameters) extends CoreModule()(p) { | ||||||
| { |  | ||||||
|   val io = new FPUIO |   val io = new FPUIO | ||||||
|  |  | ||||||
|   val ex_reg_valid = Reg(next=io.valid, init=Bool(false)) |   val ex_reg_valid = Reg(next=io.valid, init=Bool(false)) | ||||||
| @@ -389,8 +390,14 @@ class FPU extends Module | |||||||
|   val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d) |   val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d) | ||||||
|  |  | ||||||
|   // regfile |   // regfile | ||||||
|   val regfile = Mem(Bits(width = 65), 32) |   val regfile = Mem(32, Bits(width = 65)) | ||||||
|   when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } |   when (load_wb) {  | ||||||
|  |     regfile(load_wb_tag) := load_wb_data_recoded  | ||||||
|  |     if (enableCommitLog) { | ||||||
|  |       printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32), | ||||||
|  |         Mux(load_wb_single, load_wb_data(31,0), load_wb_data)) | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|   val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt())) |   val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt())) | ||||||
|   when (io.valid) { |   when (io.valid) { | ||||||
| @@ -420,11 +427,11 @@ class FPU extends Module | |||||||
|   req.in3 := Mux(ex_reg_valid, ex_rs3, cp_rs3) |   req.in3 := Mux(ex_reg_valid, ex_rs3, cp_rs3) | ||||||
|   req.typ := Mux(ex_reg_valid, ex_reg_inst(21,20), io.cp_req.bits.typ) |   req.typ := Mux(ex_reg_valid, ex_reg_inst(21,20), io.cp_req.bits.typ) | ||||||
|  |  | ||||||
|   val sfma = Module(new FPUFMAPipe(params(SFMALatency), 23, 9)) |   val sfma = Module(new FPUFMAPipe(p(SFMALatency), 23, 9)) | ||||||
|   sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single |   sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single | ||||||
|   sfma.io.in.bits := req |   sfma.io.in.bits := req | ||||||
|  |  | ||||||
|   val dfma = Module(new FPUFMAPipe(params(DFMALatency), 52, 12)) |   val dfma = Module(new FPUFMAPipe(p(DFMALatency), 52, 12)) | ||||||
|   dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single |   dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single | ||||||
|   dfma.io.in.bits := req |   dfma.io.in.bits := req | ||||||
|  |  | ||||||
| @@ -457,12 +464,12 @@ class FPU extends Module | |||||||
|   val divSqrt_cp = Reg(init=Bool(false)) |   val divSqrt_cp = Reg(init=Bool(false)) | ||||||
|  |  | ||||||
|   // writeback arbitration |   // writeback arbitration | ||||||
|   case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: UInt, wexc: UInt) |   case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult) | ||||||
|   val pipes = List( |   val pipes = List( | ||||||
|     Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits.data, fpmu.io.out.bits.exc), |     Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits), | ||||||
|     Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits.data, ifpu.io.out.bits.exc), |     Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits), | ||||||
|     Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, Cat(SInt(-1, 32), sfma.io.out.bits.data), sfma.io.out.bits.exc), |     Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out.bits), | ||||||
|     Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits.data, dfma.io.out.bits.exc)) |     Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits)) | ||||||
|   def latencyMask(c: FPUCtrlSigs, offset: Int) = { |   def latencyMask(c: FPUCtrlSigs, offset: Int) = { | ||||||
|     require(pipes.forall(_.lat >= offset)) |     require(pipes.forall(_.lat >= offset)) | ||||||
|     pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_) |     pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_) | ||||||
| @@ -472,10 +479,10 @@ class FPU extends Module | |||||||
|   val memLatencyMask = latencyMask(mem_ctrl, 2) |   val memLatencyMask = latencyMask(mem_ctrl, 2) | ||||||
|  |  | ||||||
|   val wen = Reg(init=Bits(0, maxLatency-1)) |   val wen = Reg(init=Bits(0, maxLatency-1)) | ||||||
|   val winfo = Reg(Vec.fill(maxLatency-1){Bits()}) |   val winfo = Reg(Vec(Bits(), maxLatency-1)) | ||||||
|   val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) |   val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) | ||||||
|   val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid) |   val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid) | ||||||
|   val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_reg_inst(11,7)) |   val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_ctrl.single, mem_reg_inst(11,7)) //single only used for debugging | ||||||
|  |  | ||||||
|   for (i <- 0 until maxLatency-2) { |   for (i <- 0 until maxLatency-2) { | ||||||
|     when (wen(i+1)) { winfo(i) := winfo(i+1) } |     when (wen(i+1)) { winfo(i) := winfo(i+1) } | ||||||
| @@ -493,11 +500,20 @@ class FPU extends Module | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt) |   val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt) | ||||||
|   val wsrc = winfo(0) >> 5 |   val wsrc = (winfo(0) >> 6) | ||||||
|   val wcp = winfo(0)(5+log2Up(pipes.size)) |   val wcp = winfo(0)(6+log2Up(pipes.size)) | ||||||
|   val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.wdata))(wsrc)) |   val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.res.data))(wsrc)) | ||||||
|   val wexc = Vec(pipes.map(_.wexc))(wsrc) |   val wexc = Vec(pipes.map(_.res.exc))(wsrc) | ||||||
|   when ((!wcp && wen(0)) || (!divSqrt_cp && divSqrt_wen)) { regfile(waddr) := wdata } |   when ((!wcp && wen(0)) || (!divSqrt_cp && divSqrt_wen)) {  | ||||||
|  |     regfile(waddr) := wdata | ||||||
|  |     if (enableCommitLog) { | ||||||
|  |       val wdata_unrec_s = hardfloat.recodedFloatNToFloatN(wdata(64,0), 23, 9) | ||||||
|  |       val wdata_unrec_d = hardfloat.recodedFloatNToFloatN(wdata(64,0), 52, 12) | ||||||
|  |       val wb_single = (winfo(0) >> 5)(0) | ||||||
|  |       printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32), | ||||||
|  |         Mux(wb_single, Cat(UInt(0,32), wdata_unrec_s), wdata_unrec_d)) | ||||||
|  |     } | ||||||
|  |   } | ||||||
|   when ((wcp && wen(0)) || (divSqrt_cp && divSqrt_wen)) {  |   when ((wcp && wen(0)) || (divSqrt_cp && divSqrt_wen)) {  | ||||||
|     io.cp_resp.bits.data := wdata |     io.cp_resp.bits.data := wdata | ||||||
|     io.cp_resp.valid := Bool(true)  |     io.cp_resp.valid := Bool(true)  | ||||||
| @@ -525,7 +541,7 @@ class FPU extends Module | |||||||
|  |  | ||||||
|   divSqrt_wdata := 0 |   divSqrt_wdata := 0 | ||||||
|   divSqrt_flags := 0 |   divSqrt_flags := 0 | ||||||
|   if (params(FDivSqrt)) { |   if (p(FDivSqrt)) { | ||||||
|     val divSqrt_single = Reg(Bool()) |     val divSqrt_single = Reg(Bool()) | ||||||
|     val divSqrt_rm = Reg(Bits()) |     val divSqrt_rm = Reg(Bits()) | ||||||
|     val divSqrt_flags_double = Reg(Bits()) |     val divSqrt_flags_double = Reg(Bits()) | ||||||
|   | |||||||
							
								
								
									
										126
									
								
								rocket/src/main/scala/frontend.scala
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										126
									
								
								rocket/src/main/scala/frontend.scala
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,126 @@ | |||||||
|  | package rocket | ||||||
|  |  | ||||||
|  | import Chisel._ | ||||||
|  | import uncore._ | ||||||
|  | import Util._ | ||||||
|  |  | ||||||
|  | class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|  |   val pc = UInt(width = vaddrBitsExtended) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|  |   val pc = UInt(width = vaddrBitsExtended)  // ID stage PC | ||||||
|  |   val data = Vec(Bits(width = coreInstBits), fetchWidth) | ||||||
|  |   val mask = Bits(width = fetchWidth) | ||||||
|  |   val xcpt_if = Bool() | ||||||
|  | } | ||||||
|  |  | ||||||
|  | class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|  |   val req = Valid(new FrontendReq) | ||||||
|  |   val resp = Decoupled(new FrontendResp).flip | ||||||
|  |   val btb_resp = Valid(new BTBResp).flip | ||||||
|  |   val btb_update = Valid(new BTBUpdate) | ||||||
|  |   val bht_update = Valid(new BHTUpdate) | ||||||
|  |   val ras_update = Valid(new RASUpdate) | ||||||
|  |   val invalidate = Bool(OUTPUT) | ||||||
|  |   val npc = UInt(INPUT, width = vaddrBitsExtended) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters { | ||||||
|  |   val io = new Bundle { | ||||||
|  |     val cpu = new FrontendIO().flip | ||||||
|  |     val ptw = new TLBPTWIO() | ||||||
|  |     val mem = new ClientUncachedTileLinkIO | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   val btb = Module(new BTB) | ||||||
|  |   val icache = Module(new ICache) | ||||||
|  |   val tlb = Module(new TLB) | ||||||
|  |  | ||||||
|  |   val s1_pc_ = Reg(UInt()) | ||||||
|  |   val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline) | ||||||
|  |   val s1_same_block = Reg(Bool()) | ||||||
|  |   val s2_valid = Reg(init=Bool(true)) | ||||||
|  |   val s2_pc = Reg(init=UInt(START_ADDR)) | ||||||
|  |   val s2_btb_resp_valid = Reg(init=Bool(false)) | ||||||
|  |   val s2_btb_resp_bits = Reg(btb.io.resp.bits) | ||||||
|  |   val s2_xcpt_if = Reg(init=Bool(false)) | ||||||
|  |   val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true)) | ||||||
|  |  | ||||||
|  |   val msb = vaddrBits-1 | ||||||
|  |   val lsb = log2Up(fetchWidth*coreInstBytes) | ||||||
|  |   val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) | ||||||
|  |   val ntpc_0 = s1_pc + UInt(coreInstBytes*fetchWidth) | ||||||
|  |   val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure | ||||||
|  |   val icmiss = s2_valid && !icbuf.io.deq.valid | ||||||
|  |   val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) | ||||||
|  |   val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt | ||||||
|  |   val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) | ||||||
|  |  | ||||||
|  |   val stall = io.cpu.resp.valid && !io.cpu.resp.ready | ||||||
|  |   when (!stall) { | ||||||
|  |     s1_same_block := s0_same_block && !tlb.io.resp.miss | ||||||
|  |     s1_pc_ := npc | ||||||
|  |     s2_valid := !icmiss | ||||||
|  |     when (!icmiss) { | ||||||
|  |       s2_pc := s1_pc | ||||||
|  |       s2_btb_resp_valid := btb.io.resp.valid | ||||||
|  |       when (btb.io.resp.valid) { s2_btb_resp_bits := btb.io.resp.bits } | ||||||
|  |       s2_xcpt_if := tlb.io.resp.xcpt_if | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   when (io.cpu.req.valid) { | ||||||
|  |     s1_same_block := Bool(false) | ||||||
|  |     s1_pc_ := io.cpu.req.bits.pc | ||||||
|  |     s2_valid := Bool(false) | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   btb.io.req.valid := !stall && !icmiss | ||||||
|  |   btb.io.req.bits.addr := s1_pc | ||||||
|  |   btb.io.btb_update := io.cpu.btb_update | ||||||
|  |   btb.io.bht_update := io.cpu.bht_update | ||||||
|  |   btb.io.ras_update := io.cpu.ras_update | ||||||
|  |   btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate | ||||||
|  |  | ||||||
|  |   io.ptw <> tlb.io.ptw | ||||||
|  |   tlb.io.req.valid := !stall && !icmiss | ||||||
|  |   tlb.io.req.bits.vpn := s1_pc >> pgIdxBits | ||||||
|  |   tlb.io.req.bits.asid := UInt(0) | ||||||
|  |   tlb.io.req.bits.passthrough := Bool(false) | ||||||
|  |   tlb.io.req.bits.instruction := Bool(true) | ||||||
|  |   tlb.io.req.bits.store := Bool(false) | ||||||
|  |  | ||||||
|  |   io.mem <> icache.io.mem | ||||||
|  |   icache.io.req.valid := !stall && !s0_same_block | ||||||
|  |   icache.io.req.bits.idx := io.cpu.npc | ||||||
|  |   icache.io.invalidate := io.cpu.invalidate | ||||||
|  |   icache.io.req.bits.ppn := tlb.io.resp.ppn | ||||||
|  |   icache.io.req.bits.kill := io.cpu.req.valid || | ||||||
|  |     tlb.io.resp.miss || tlb.io.resp.xcpt_if || | ||||||
|  |     icmiss || io.ptw.invalidate | ||||||
|  |   icache.io.resp.ready := !stall && !s1_same_block | ||||||
|  |  | ||||||
|  |   io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icbuf.io.deq.valid) | ||||||
|  |   io.cpu.resp.bits.pc := s2_pc | ||||||
|  |   io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) | ||||||
|  |  | ||||||
|  |   icbuf.io.enq <> icache.io.resp | ||||||
|  |   icbuf.io.deq.ready := !stall && !s1_same_block | ||||||
|  |  | ||||||
|  |   require(fetchWidth * coreInstBytes <= rowBytes) | ||||||
|  |   val fetch_data = | ||||||
|  |     if (fetchWidth * coreInstBytes == rowBytes) icbuf.io.deq.bits.datablock | ||||||
|  |     else icbuf.io.deq.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) | ||||||
|  |  | ||||||
|  |   for (i <- 0 until fetchWidth) { | ||||||
|  |     io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   val all_ones = UInt((1 << (fetchWidth+1))-1) | ||||||
|  |   val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2) | ||||||
|  |   io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc) | ||||||
|  |   io.cpu.resp.bits.xcpt_if := s2_xcpt_if | ||||||
|  |  | ||||||
|  |   io.cpu.btb_resp.valid := s2_btb_resp_valid | ||||||
|  |   io.cpu.btb_resp.bits := s2_btb_resp_bits | ||||||
|  | } | ||||||
| @@ -4,148 +4,26 @@ import Chisel._ | |||||||
| import uncore._ | import uncore._ | ||||||
| import Util._ | import Util._ | ||||||
|  |  | ||||||
| abstract trait L1CacheParameters extends CacheParameters with CoreParameters { | trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { | ||||||
|   val outerDataBeats = params(TLDataBeats) |   val outerDataBeats = p(TLKey(p(TLId))).dataBeats | ||||||
|   val outerDataBits = params(TLDataBits) |   val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat | ||||||
|  |   val outerAddrBits = p(TLKey(p(TLId))).addrBits | ||||||
|   val refillCyclesPerBeat = outerDataBits/rowBits |   val refillCyclesPerBeat = outerDataBits/rowBits | ||||||
|   val refillCycles = refillCyclesPerBeat*outerDataBeats |   val refillCycles = refillCyclesPerBeat*outerDataBeats | ||||||
| } | } | ||||||
|  |  | ||||||
| abstract trait FrontendParameters extends L1CacheParameters | class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
| abstract class FrontendBundle extends Bundle with FrontendParameters |  | ||||||
| abstract class FrontendModule extends Module with FrontendParameters |  | ||||||
|  |  | ||||||
| class FrontendReq extends CoreBundle { |  | ||||||
|   val pc = UInt(width = vaddrBitsExtended) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| class FrontendResp extends CoreBundle { |  | ||||||
|   val pc = UInt(width = vaddrBitsExtended)  // ID stage PC |  | ||||||
|   val data = Vec.fill(coreFetchWidth) (Bits(width = coreInstBits)) |  | ||||||
|   val mask = Bits(width = coreFetchWidth) |  | ||||||
|   val xcpt_if = Bool() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| class CPUFrontendIO extends CoreBundle { |  | ||||||
|   val req = Valid(new FrontendReq) |  | ||||||
|   val resp = Decoupled(new FrontendResp).flip |  | ||||||
|   val btb_resp = Valid(new BTBResp).flip |  | ||||||
|   val btb_update = Valid(new BTBUpdate) |  | ||||||
|   val bht_update = Valid(new BHTUpdate) |  | ||||||
|   val ras_update = Valid(new RASUpdate) |  | ||||||
|   val invalidate = Bool(OUTPUT) |  | ||||||
|   val npc = UInt(INPUT, width = vaddrBitsExtended) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule |  | ||||||
| { |  | ||||||
|   val io = new Bundle { |  | ||||||
|     val cpu = new CPUFrontendIO().flip |  | ||||||
|     val ptw = new TLBPTWIO() |  | ||||||
|     val mem = new ClientUncachedTileLinkIO |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   val btb = Module(new BTB(btb_updates_out_of_order)) |  | ||||||
|   val icache = Module(new ICache) |  | ||||||
|   val tlb = Module(new TLB) |  | ||||||
|  |  | ||||||
|   val s1_pc_ = Reg(UInt()) |  | ||||||
|   val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline) |  | ||||||
|   val s1_same_block = Reg(Bool()) |  | ||||||
|   val s2_valid = Reg(init=Bool(true)) |  | ||||||
|   val s2_pc = Reg(init=UInt(START_ADDR)) |  | ||||||
|   val s2_btb_resp_valid = Reg(init=Bool(false)) |  | ||||||
|   val s2_btb_resp_bits = Reg(btb.io.resp.bits) |  | ||||||
|   val s2_xcpt_if = Reg(init=Bool(false)) |  | ||||||
|   val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true)) |  | ||||||
|  |  | ||||||
|   val msb = vaddrBits-1 |  | ||||||
|   val lsb = log2Up(coreFetchWidth*coreInstBytes) |  | ||||||
|   val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) |  | ||||||
|   val ntpc_0 = s1_pc + UInt(coreInstBytes*coreFetchWidth) |  | ||||||
|   val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure |  | ||||||
|   val icmiss = s2_valid && !icbuf.io.deq.valid |  | ||||||
|   val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) |  | ||||||
|   val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt |  | ||||||
|   val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) |  | ||||||
|  |  | ||||||
|   val stall = io.cpu.resp.valid && !io.cpu.resp.ready |  | ||||||
|   when (!stall) { |  | ||||||
|     s1_same_block := s0_same_block && !tlb.io.resp.miss |  | ||||||
|     s1_pc_ := npc |  | ||||||
|     s2_valid := !icmiss |  | ||||||
|     when (!icmiss) { |  | ||||||
|       s2_pc := s1_pc |  | ||||||
|       s2_btb_resp_valid := btb.io.resp.valid |  | ||||||
|       when (btb.io.resp.valid) { s2_btb_resp_bits := btb.io.resp.bits } |  | ||||||
|       s2_xcpt_if := tlb.io.resp.xcpt_if |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|   when (io.cpu.req.valid) { |  | ||||||
|     s1_same_block := Bool(false) |  | ||||||
|     s1_pc_ := io.cpu.req.bits.pc |  | ||||||
|     s2_valid := Bool(false) |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   btb.io.req.valid := !stall && !icmiss |  | ||||||
|   btb.io.req.bits.addr := s1_pc |  | ||||||
|   btb.io.btb_update := io.cpu.btb_update |  | ||||||
|   btb.io.bht_update := io.cpu.bht_update |  | ||||||
|   btb.io.ras_update := io.cpu.ras_update |  | ||||||
|   btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate |  | ||||||
|  |  | ||||||
|   io.ptw <> tlb.io.ptw |  | ||||||
|   tlb.io.req.valid := !stall && !icmiss |  | ||||||
|   tlb.io.req.bits.vpn := s1_pc >> UInt(pgIdxBits) |  | ||||||
|   tlb.io.req.bits.asid := UInt(0) |  | ||||||
|   tlb.io.req.bits.passthrough := Bool(false) |  | ||||||
|   tlb.io.req.bits.instruction := Bool(true) |  | ||||||
|   tlb.io.req.bits.store := Bool(false) |  | ||||||
|  |  | ||||||
|   io.mem <> icache.io.mem |  | ||||||
|   icache.io.req.valid := !stall && !s0_same_block |  | ||||||
|   icache.io.req.bits.idx := io.cpu.npc |  | ||||||
|   icache.io.invalidate := io.cpu.invalidate |  | ||||||
|   icache.io.req.bits.ppn := tlb.io.resp.ppn |  | ||||||
|   icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || io.ptw.invalidate |  | ||||||
|  |  | ||||||
|   io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icbuf.io.deq.valid) |  | ||||||
|   io.cpu.resp.bits.pc := s2_pc |  | ||||||
|   io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) |  | ||||||
|  |  | ||||||
|   icbuf.io.enq <> icache.io.resp |  | ||||||
|   icbuf.io.deq.ready := !stall && !s1_same_block |  | ||||||
|  |  | ||||||
|   require(coreFetchWidth * coreInstBytes <= rowBytes) |  | ||||||
|   val fetch_data = |  | ||||||
|     if (coreFetchWidth * coreInstBytes == rowBytes) icbuf.io.deq.bits.datablock |  | ||||||
|     else icbuf.io.deq.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) |  | ||||||
|  |  | ||||||
|   for (i <- 0 until coreFetchWidth) { |  | ||||||
|     io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   val all_ones = UInt((1 << (coreFetchWidth+1))-1) |  | ||||||
|   val msk_pc = if (coreFetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(coreFetchWidth) -1+2,2) |  | ||||||
|   io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc) |  | ||||||
|   io.cpu.resp.bits.xcpt_if := s2_xcpt_if |  | ||||||
|  |  | ||||||
|   io.cpu.btb_resp.valid := s2_btb_resp_valid |  | ||||||
|   io.cpu.btb_resp.bits := s2_btb_resp_bits |  | ||||||
| } |  | ||||||
|  |  | ||||||
| class ICacheReq extends FrontendBundle { |  | ||||||
|   val idx = UInt(width = pgIdxBits) |   val idx = UInt(width = pgIdxBits) | ||||||
|   val ppn = UInt(width = ppnBits) // delayed one cycle |   val ppn = UInt(width = ppnBits) // delayed one cycle | ||||||
|   val kill = Bool() // delayed one cycle |   val kill = Bool() // delayed one cycle | ||||||
| } | } | ||||||
|  |  | ||||||
| class ICacheResp extends FrontendBundle { | class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters { | ||||||
|  |   val data = Bits(width = coreInstBits) | ||||||
|   val datablock = Bits(width = rowBits) |   val datablock = Bits(width = rowBits) | ||||||
| } | } | ||||||
|  |  | ||||||
| class ICache extends FrontendModule | class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters { | ||||||
| { |  | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val req = Valid(new ICacheReq).flip |     val req = Valid(new ICacheReq).flip | ||||||
|     val resp = Decoupled(new ICacheResp) |     val resp = Decoupled(new ICacheResp) | ||||||
| @@ -197,12 +75,11 @@ class ICache extends FrontendModule | |||||||
|  |  | ||||||
|   val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0) |   val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0) | ||||||
|   val entagbits = code.width(tagBits) |   val entagbits = code.width(tagBits) | ||||||
|   val tag_array = SeqMem(Bits(width = entagbits*nWays), nSets) |   val tag_array = SeqMem(Vec(Bits(width = entagbits), nWays), nSets) | ||||||
|   val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid) |   val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid) | ||||||
|   when (refill_done) { |   when (refill_done) { | ||||||
|     val wmask = FillInterleaved(entagbits, if (isDM) Bits(1) else UIntToOH(repl_way)) |  | ||||||
|     val tag = code.encode(refill_tag).toUInt |     val tag = code.encode(refill_tag).toUInt | ||||||
|     tag_array.write(s1_idx, Fill(nWays, tag), wmask) |     tag_array.write(s1_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _)) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   val vb_array = Reg(init=Bits(0, nSets*nWays)) |   val vb_array = Reg(init=Bits(0, nSets*nWays)) | ||||||
| @@ -223,7 +100,7 @@ class ICache extends FrontendModule | |||||||
|  |  | ||||||
|   for (i <- 0 until nWays) { |   for (i <- 0 until nWays) { | ||||||
|     val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool |     val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool | ||||||
|     val tag_out = tag_rdata(entagbits*(i+1)-1, entagbits*i) |     val tag_out = tag_rdata(i) | ||||||
|     val s1_tag_disparity = code.decode(tag_out).error |     val s1_tag_disparity = code.decode(tag_out).error | ||||||
|     when (s1_valid && rdy && !stall) { |     when (s1_valid && rdy && !stall) { | ||||||
|     } |     } | ||||||
| @@ -252,7 +129,7 @@ class ICache extends FrontendModule | |||||||
|   // output signals |   // output signals | ||||||
|   io.resp.valid := s1_hit |   io.resp.valid := s1_hit | ||||||
|   io.mem.acquire.valid := (state === s_request) |   io.mem.acquire.valid := (state === s_request) | ||||||
|   io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> UInt(blockOffBits)) |   io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits) | ||||||
|  |  | ||||||
|   // control state machine |   // control state machine | ||||||
|   switch (state) { |   switch (state) { | ||||||
|   | |||||||
| @@ -192,6 +192,26 @@ object Instructions { | |||||||
|   def CUSTOM3_RD         = BitPat("b?????????????????100?????1111011") |   def CUSTOM3_RD         = BitPat("b?????????????????100?????1111011") | ||||||
|   def CUSTOM3_RD_RS1     = BitPat("b?????????????????110?????1111011") |   def CUSTOM3_RD_RS1     = BitPat("b?????????????????110?????1111011") | ||||||
|   def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011") |   def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011") | ||||||
|  |   def SLLI_RV32          = BitPat("b0000000??????????001?????0010011") | ||||||
|  |   def SRLI_RV32          = BitPat("b0000000??????????101?????0010011") | ||||||
|  |   def SRAI_RV32          = BitPat("b0100000??????????101?????0010011") | ||||||
|  |   def FRFLAGS            = BitPat("b00000000000100000010?????1110011") | ||||||
|  |   def FSFLAGS            = BitPat("b000000000001?????001?????1110011") | ||||||
|  |   def FSFLAGSI           = BitPat("b000000000001?????101?????1110011") | ||||||
|  |   def FRRM               = BitPat("b00000000001000000010?????1110011") | ||||||
|  |   def FSRM               = BitPat("b000000000010?????001?????1110011") | ||||||
|  |   def FSRMI              = BitPat("b000000000010?????101?????1110011") | ||||||
|  |   def FSCSR              = BitPat("b000000000011?????001?????1110011") | ||||||
|  |   def FRCSR              = BitPat("b00000000001100000010?????1110011") | ||||||
|  |   def RDCYCLE            = BitPat("b11000000000000000010?????1110011") | ||||||
|  |   def RDTIME             = BitPat("b11000000000100000010?????1110011") | ||||||
|  |   def RDINSTRET          = BitPat("b11000000001000000010?????1110011") | ||||||
|  |   def RDCYCLEH           = BitPat("b11001000000000000010?????1110011") | ||||||
|  |   def RDTIMEH            = BitPat("b11001000000100000010?????1110011") | ||||||
|  |   def RDINSTRETH         = BitPat("b11001000001000000010?????1110011") | ||||||
|  |   def ECALL              = BitPat("b00000000000000000000000001110011") | ||||||
|  |   def EBREAK             = BitPat("b00000000000100000000000001110011") | ||||||
|  |   def ERET               = BitPat("b00010000000000000000000001110011") | ||||||
| } | } | ||||||
| object Causes { | object Causes { | ||||||
|   val misaligned_fetch = 0x0 |   val misaligned_fetch = 0x0 | ||||||
|   | |||||||
| @@ -6,29 +6,35 @@ import Chisel._ | |||||||
| import ALU._ | import ALU._ | ||||||
| import Util._ | import Util._ | ||||||
|  |  | ||||||
| class MultiplierReq extends CoreBundle { | class MultiplierReq(dataBits: Int, tagBits: Int) extends Bundle { | ||||||
|   val fn = Bits(width = SZ_ALU_FN) |   val fn = Bits(width = SZ_ALU_FN) | ||||||
|   val dw = Bits(width = SZ_DW) |   val dw = Bits(width = SZ_DW) | ||||||
|   val in1 = Bits(width = xLen) |   val in1 = Bits(width = dataBits) | ||||||
|   val in2 = Bits(width = xLen) |   val in2 = Bits(width = dataBits) | ||||||
|   val tag = UInt(width = log2Up(params(NMultXpr))) |   val tag = UInt(width = tagBits) | ||||||
|  |   override def cloneType = new MultiplierReq(dataBits, tagBits).asInstanceOf[this.type] | ||||||
| } | } | ||||||
|  |  | ||||||
| class MultiplierResp extends CoreBundle { | class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle { | ||||||
|   val data = Bits(width = xLen) |   val data = Bits(width = dataBits) | ||||||
|   val tag = UInt(width = log2Up(params(NMultXpr))) |   val tag = UInt(width = tagBits) | ||||||
|  |   override def cloneType = new MultiplierResp(dataBits, tagBits).asInstanceOf[this.type] | ||||||
| } | } | ||||||
|  |  | ||||||
| class MultiplierIO extends Bundle { | class MultiplierIO(dataBits: Int, tagBits: Int) extends Bundle { | ||||||
|   val req = Decoupled(new MultiplierReq).flip |   val req = Decoupled(new MultiplierReq(dataBits, tagBits)).flip | ||||||
|   val kill = Bool(INPUT) |   val kill = Bool(INPUT) | ||||||
|   val resp = Decoupled(new MultiplierResp) |   val resp = Decoupled(new MultiplierResp(dataBits, tagBits)) | ||||||
| } | } | ||||||
|  |  | ||||||
| class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module { | class MulDiv( | ||||||
|   val io = new MultiplierIO |     width: Int, | ||||||
|  |     nXpr: Int = 32, | ||||||
|  |     unroll: Int = 1, | ||||||
|  |     earlyOut: Boolean = false) extends Module { | ||||||
|  |   val io = new MultiplierIO(width, log2Up(nXpr)) | ||||||
|   val w = io.req.bits.in1.getWidth |   val w = io.req.bits.in1.getWidth | ||||||
|   val mulw = (w+mulUnroll-1)/mulUnroll*mulUnroll |   val mulw = (w+unroll-1)/unroll*unroll | ||||||
|   |   | ||||||
|   val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6) |   val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6) | ||||||
|   val state = Reg(init=s_ready) |   val state = Reg(init=s_ready) | ||||||
| @@ -87,18 +93,18 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module { | |||||||
|     val mplier = mulReg(mulw-1,0) |     val mplier = mulReg(mulw-1,0) | ||||||
|     val accum = mulReg(2*mulw,mulw).toSInt |     val accum = mulReg(2*mulw,mulw).toSInt | ||||||
|     val mpcand = divisor.toSInt |     val mpcand = divisor.toSInt | ||||||
|     val prod = mplier(mulUnroll-1,0) * mpcand + accum |     val prod = mplier(unroll-1,0) * mpcand + accum | ||||||
|     val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)).toUInt |     val nextMulReg = Cat(prod, mplier(mulw-1,unroll)).toUInt | ||||||
|  |  | ||||||
|     val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * mulUnroll)(log2Up(mulw)-1,0))(mulw-1,0) |     val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * unroll)(log2Up(mulw)-1,0))(mulw-1,0) | ||||||
|     val eOut = Bool(earlyOut) && count != mulw/mulUnroll-1 && count != 0 && |     val eOut = Bool(earlyOut) && count != mulw/unroll-1 && count != 0 && | ||||||
|       !isHi && (mplier & ~eOutMask) === UInt(0) |       !isHi && (mplier & ~eOutMask) === UInt(0) | ||||||
|     val eOutRes = (mulReg >> (mulw - count * mulUnroll)(log2Up(mulw)-1,0)) |     val eOutRes = (mulReg >> (mulw - count * unroll)(log2Up(mulw)-1,0)) | ||||||
|     val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0)) |     val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0)) | ||||||
|     remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0)) |     remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0)) | ||||||
|  |  | ||||||
|     count := count + 1 |     count := count + 1 | ||||||
|     when (eOut || count === mulw/mulUnroll-1) { |     when (eOut || count === mulw/unroll-1) { | ||||||
|       state := Mux(isHi, s_move_rem, s_done) |       state := Mux(isHi, s_move_rem, s_done) | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -4,18 +4,23 @@ package rocket | |||||||
|  |  | ||||||
| import Chisel._ | import Chisel._ | ||||||
| import uncore._ | import uncore._ | ||||||
|  | import junctions.MMIOBase | ||||||
| import Util._ | import Util._ | ||||||
|  |  | ||||||
| case object WordBits extends Field[Int] | case object WordBits extends Field[Int] | ||||||
| case object StoreDataQueueDepth extends Field[Int] | case object StoreDataQueueDepth extends Field[Int] | ||||||
| case object ReplayQueueDepth extends Field[Int] | case object ReplayQueueDepth extends Field[Int] | ||||||
| case object NMSHRs extends Field[Int] | case object NMSHRs extends Field[Int] | ||||||
|  | case object NIOMSHRs extends Field[Int] | ||||||
| case object LRSCCycles extends Field[Int] | case object LRSCCycles extends Field[Int] | ||||||
|  |  | ||||||
| abstract trait L1HellaCacheParameters extends L1CacheParameters { | trait HasL1HellaCacheParameters extends HasL1CacheParameters { | ||||||
|   val wordBits = params(WordBits) |   val wordBits = p(WordBits) | ||||||
|   val wordBytes = wordBits/8 |   val wordBytes = wordBits/8 | ||||||
|   val wordOffBits = log2Up(wordBytes) |   val wordOffBits = log2Up(wordBytes) | ||||||
|  |   val beatBytes = p(CacheBlockBytes) / outerDataBeats | ||||||
|  |   val beatWords = beatBytes / wordBytes | ||||||
|  |   val beatOffBits = log2Up(beatBytes) | ||||||
|   val idxMSB = untagBits-1 |   val idxMSB = untagBits-1 | ||||||
|   val idxLSB = blockOffBits |   val idxLSB = blockOffBits | ||||||
|   val offsetmsb = idxLSB-1 |   val offsetmsb = idxLSB-1 | ||||||
| @@ -24,46 +29,53 @@ abstract trait L1HellaCacheParameters extends L1CacheParameters { | |||||||
|   val doNarrowRead = coreDataBits * nWays % rowBits == 0 |   val doNarrowRead = coreDataBits * nWays % rowBits == 0 | ||||||
|   val encDataBits = code.width(coreDataBits) |   val encDataBits = code.width(coreDataBits) | ||||||
|   val encRowBits = encDataBits*rowWords |   val encRowBits = encDataBits*rowWords | ||||||
|   val sdqDepth = params(StoreDataQueueDepth) |   val sdqDepth = p(StoreDataQueueDepth) | ||||||
|   val nMSHRs = params(NMSHRs) |   val nMSHRs = p(NMSHRs) | ||||||
|  |   val nIOMSHRs = p(NIOMSHRs) | ||||||
|  |   val lrscCycles = p(LRSCCycles) | ||||||
| } | } | ||||||
|  |  | ||||||
| abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters | abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module | ||||||
| abstract class L1HellaCacheModule extends Module with L1HellaCacheParameters |   with HasL1HellaCacheParameters | ||||||
|  | abstract class L1HellaCacheBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p) | ||||||
|  |   with HasL1HellaCacheParameters | ||||||
|  |  | ||||||
| trait HasCoreMemOp extends CoreBundle { | trait HasCoreMemOp extends HasCoreParameters { | ||||||
|   val addr = UInt(width = coreMaxAddrBits) |   val addr = UInt(width = coreMaxAddrBits) | ||||||
|   val tag  = Bits(width = coreDCacheReqTagBits) |   val tag  = Bits(width = coreDCacheReqTagBits) | ||||||
|   val cmd  = Bits(width = M_SZ) |   val cmd  = Bits(width = M_SZ) | ||||||
|   val typ  = Bits(width = MT_SZ) |   val typ  = Bits(width = MT_SZ) | ||||||
| } | } | ||||||
|  |  | ||||||
| trait HasCoreData extends CoreBundle { | trait HasCoreData extends HasCoreParameters { | ||||||
|   val data = Bits(width = coreDataBits) |   val data = Bits(width = coreDataBits) | ||||||
| } | } | ||||||
|  |  | ||||||
| trait HasSDQId extends CoreBundle with L1HellaCacheParameters { | trait HasSDQId extends HasL1HellaCacheParameters { | ||||||
|   val sdq_id = UInt(width = log2Up(sdqDepth)) |   val sdq_id = UInt(width = log2Up(sdqDepth)) | ||||||
| } | } | ||||||
|  |  | ||||||
| trait HasMissInfo extends CoreBundle with L1HellaCacheParameters { | trait HasMissInfo extends HasL1HellaCacheParameters { | ||||||
|   val tag_match = Bool() |   val tag_match = Bool() | ||||||
|   val old_meta = new L1Metadata |   val old_meta = new L1Metadata | ||||||
|   val way_en = Bits(width = nWays) |   val way_en = Bits(width = nWays) | ||||||
| } | } | ||||||
|  |  | ||||||
| class HellaCacheReqInternal extends HasCoreMemOp { | class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p) | ||||||
|  |     with HasCoreMemOp { | ||||||
|   val kill = Bool() |   val kill = Bool() | ||||||
|   val phys = Bool() |   val phys = Bool() | ||||||
| } | } | ||||||
|  |  | ||||||
| class HellaCacheReq extends HellaCacheReqInternal with HasCoreData | class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData | ||||||
|  |  | ||||||
| class HellaCacheResp extends HasCoreMemOp with HasCoreData { | class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p) | ||||||
|  |     with HasCoreMemOp | ||||||
|  |     with HasCoreData { | ||||||
|   val nack = Bool() // comes 2 cycles after req.fire |   val nack = Bool() // comes 2 cycles after req.fire | ||||||
|   val replay = Bool() |   val replay = Bool() | ||||||
|   val has_data = Bool() |   val has_data = Bool() | ||||||
|   val data_subword = Bits(width = coreDataBits) |   val data_word_bypass = Bits(width = coreDataBits) | ||||||
|   val store_data = Bits(width = coreDataBits) |   val store_data = Bits(width = coreDataBits) | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -78,7 +90,7 @@ class HellaCacheExceptions extends Bundle { | |||||||
| } | } | ||||||
|  |  | ||||||
| // interface between D$ and processor/DTLB | // interface between D$ and processor/DTLB | ||||||
| class HellaCacheIO extends CoreBundle { | class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|   val req = Decoupled(new HellaCacheReq) |   val req = Decoupled(new HellaCacheReq) | ||||||
|   val resp = Valid(new HellaCacheResp).flip |   val resp = Valid(new HellaCacheResp).flip | ||||||
|   val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip |   val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip | ||||||
| @@ -87,50 +99,128 @@ class HellaCacheIO extends CoreBundle { | |||||||
|   val ordered = Bool(INPUT) |   val ordered = Bool(INPUT) | ||||||
| } | } | ||||||
|  |  | ||||||
| class L1DataReadReq extends L1HellaCacheBundle { | class L1DataReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { | ||||||
|   val way_en = Bits(width = nWays) |   val way_en = Bits(width = nWays) | ||||||
|   val addr   = Bits(width = untagBits) |   val addr   = Bits(width = untagBits) | ||||||
| } | } | ||||||
|  |  | ||||||
| class L1DataWriteReq extends L1DataReadReq { | class L1DataWriteReq(implicit p: Parameters) extends L1DataReadReq()(p) { | ||||||
|   val wmask  = Bits(width = rowWords) |   val wmask  = Bits(width = rowWords) | ||||||
|   val data   = Bits(width = encRowBits) |   val data   = Bits(width = encRowBits) | ||||||
| } | } | ||||||
|  |  | ||||||
| class L1RefillReq extends L1DataReadReq | class L1RefillReq(implicit p: Parameters) extends L1DataReadReq()(p) | ||||||
|  |  | ||||||
| class L1MetaReadReq extends MetaReadReq { | class L1MetaReadReq(implicit p: Parameters) extends MetaReadReq { | ||||||
|   val tag = Bits(width = tagBits) |   val tag = Bits(width = tagBits) | ||||||
|  |   override def cloneType = new L1MetaReadReq()(p).asInstanceOf[this.type] //TODO remove | ||||||
| } | } | ||||||
|  |  | ||||||
| class L1MetaWriteReq extends  | class L1MetaWriteReq(implicit p: Parameters) extends  | ||||||
|   MetaWriteReq[L1Metadata](new L1Metadata) |   MetaWriteReq[L1Metadata](new L1Metadata) | ||||||
|  |  | ||||||
| object L1Metadata { | object L1Metadata { | ||||||
|   def apply(tag: Bits, coh: ClientMetadata) = { |   def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = { | ||||||
|     val meta = Wire(new L1Metadata) |     val meta = Wire(new L1Metadata) | ||||||
|     meta.tag := tag |     meta.tag := tag | ||||||
|     meta.coh := coh |     meta.coh := coh | ||||||
|     meta |     meta | ||||||
|   } |   } | ||||||
| } | } | ||||||
| class L1Metadata extends Metadata with L1HellaCacheParameters { | class L1Metadata(implicit p: Parameters) extends Metadata()(p) with HasL1HellaCacheParameters { | ||||||
|   val coh = new ClientMetadata |   val coh = new ClientMetadata | ||||||
| } | } | ||||||
|  |  | ||||||
| class Replay extends HellaCacheReqInternal with HasCoreData | class Replay(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData | ||||||
| class ReplayInternal extends HellaCacheReqInternal with HasSDQId | class ReplayInternal(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasSDQId | ||||||
|  |  | ||||||
| class MSHRReq extends Replay with HasMissInfo | class MSHRReq(implicit p: Parameters) extends Replay()(p) with HasMissInfo | ||||||
| class MSHRReqInternal extends ReplayInternal with HasMissInfo | class MSHRReqInternal(implicit p: Parameters) extends ReplayInternal()(p) with HasMissInfo | ||||||
|  |  | ||||||
| class ProbeInternal extends Probe with HasClientTransactionId | class ProbeInternal(implicit p: Parameters) extends Probe()(p) with HasClientTransactionId | ||||||
|  |  | ||||||
| class WritebackReq extends Release with CacheParameters { | class WritebackReq(implicit p: Parameters) extends Release()(p) with HasCacheParameters { | ||||||
|   val way_en = Bits(width = nWays) |   val way_en = Bits(width = nWays) | ||||||
| } | } | ||||||
|  |  | ||||||
| class MSHR(id: Int) extends L1HellaCacheModule { | class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { | ||||||
|  |   val io = new Bundle { | ||||||
|  |     val req = Decoupled(new HellaCacheReq).flip | ||||||
|  |     val acquire = Decoupled(new Acquire) | ||||||
|  |     val grant = Valid(new Grant).flip | ||||||
|  |     val resp = Decoupled(new HellaCacheResp) | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   def wordFromBeat(addr: UInt, dat: UInt) = { | ||||||
|  |     val offset = addr(beatOffBits - 1, wordOffBits) | ||||||
|  |     val shift = Cat(offset, UInt(0, wordOffBits + 3)) | ||||||
|  |     (dat >> shift)(wordBits - 1, 0) | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   val req = Reg(new HellaCacheReq) | ||||||
|  |   val req_cmd_sc = req.cmd === M_XSC | ||||||
|  |   val grant_word = Reg(UInt(width = wordBits)) | ||||||
|  |  | ||||||
|  |   val storegen = new StoreGen64(req.typ, req.addr, req.data) | ||||||
|  |   val loadgen = new LoadGen64(req.typ, req.addr, grant_word, req_cmd_sc) | ||||||
|  |  | ||||||
|  |   val beat_offset = req.addr(beatOffBits - 1, wordOffBits) | ||||||
|  |   val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) | ||||||
|  |   val beat_data = Fill(beatWords, storegen.data) | ||||||
|  |  | ||||||
|  |   val addr_byte = req.addr(beatOffBits - 1, 0) | ||||||
|  |   val a_type = Mux(isRead(req.cmd), Acquire.getType, Acquire.putType) | ||||||
|  |   val union = Mux(isRead(req.cmd), | ||||||
|  |     Cat(addr_byte, req.typ, M_XRD), beat_mask) | ||||||
|  |  | ||||||
|  |   val s_idle :: s_acquire :: s_grant :: s_resp :: Nil = Enum(Bits(), 4) | ||||||
|  |   val state = Reg(init = s_idle) | ||||||
|  |  | ||||||
|  |   io.req.ready := (state === s_idle) | ||||||
|  |  | ||||||
|  |   io.acquire.valid := (state === s_acquire) | ||||||
|  |   io.acquire.bits := Acquire( | ||||||
|  |     is_builtin_type = Bool(true), | ||||||
|  |     a_type = a_type, | ||||||
|  |     client_xact_id = UInt(id), | ||||||
|  |     addr_block = req.addr(paddrBits - 1, blockOffBits), | ||||||
|  |     addr_beat = req.addr(blockOffBits - 1, beatOffBits), | ||||||
|  |     data = beat_data, | ||||||
|  |     // alloc bit should always be false | ||||||
|  |     union = Cat(union, Bool(false))) | ||||||
|  |  | ||||||
|  |   io.resp.valid := (state === s_resp) | ||||||
|  |   io.resp.bits := req | ||||||
|  |   io.resp.bits.has_data := isRead(req.cmd) | ||||||
|  |   io.resp.bits.data := loadgen.byte | req_cmd_sc | ||||||
|  |   io.resp.bits.store_data := req.data | ||||||
|  |   io.resp.bits.nack := Bool(false) | ||||||
|  |   io.resp.bits.replay := io.resp.valid | ||||||
|  |  | ||||||
|  |   when (io.req.fire()) { | ||||||
|  |     req := io.req.bits | ||||||
|  |     state := s_acquire | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   when (io.acquire.fire()) { | ||||||
|  |     state := s_grant | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   when (state === s_grant && io.grant.valid) { | ||||||
|  |     when (isRead(req.cmd)) { | ||||||
|  |       grant_word := wordFromBeat(req.addr, io.grant.bits.data) | ||||||
|  |       state := s_resp | ||||||
|  |     } .otherwise { | ||||||
|  |       state := s_idle | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   when (io.resp.fire()) { | ||||||
|  |     state := s_idle | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val req_pri_val    = Bool(INPUT) |     val req_pri_val    = Bool(INPUT) | ||||||
|     val req_pri_rdy    = Bool(OUTPUT) |     val req_pri_rdy    = Bool(OUTPUT) | ||||||
| @@ -173,7 +263,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { | |||||||
|   val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) // TODO: Zero width? |   val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) // TODO: Zero width? | ||||||
|   val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done) |   val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done) | ||||||
|  |  | ||||||
|   val rpq = Module(new Queue(new ReplayInternal, params(ReplayQueueDepth))) |   val rpq = Module(new Queue(new ReplayInternal, p(ReplayQueueDepth))) | ||||||
|   rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd) |   rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd) | ||||||
|   rpq.io.enq.bits := io.req_bits |   rpq.io.enq.bits := io.req_bits | ||||||
|   rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid |   rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid | ||||||
| @@ -279,9 +369,10 @@ class MSHR(id: Int) extends L1HellaCacheModule { | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| class MSHRFile extends L1HellaCacheModule { | class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val req = Decoupled(new MSHRReq).flip |     val req = Decoupled(new MSHRReq).flip | ||||||
|  |     val resp = Decoupled(new HellaCacheResp) | ||||||
|     val secondary_miss = Bool(OUTPUT) |     val secondary_miss = Bool(OUTPUT) | ||||||
|  |  | ||||||
|     val mem_req  = Decoupled(new Acquire) |     val mem_req  = Decoupled(new Acquire) | ||||||
| @@ -296,11 +387,14 @@ class MSHRFile extends L1HellaCacheModule { | |||||||
|     val fence_rdy = Bool(OUTPUT) |     val fence_rdy = Bool(OUTPUT) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   // determine if the request is in the memory region or mmio region | ||||||
|  |   val cacheable = io.req.bits.addr < UInt(mmioBase) | ||||||
|  |  | ||||||
|   val sdq_val = Reg(init=Bits(0, sdqDepth)) |   val sdq_val = Reg(init=Bits(0, sdqDepth)) | ||||||
|   val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0)) |   val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0)) | ||||||
|   val sdq_rdy = !sdq_val.andR |   val sdq_rdy = !sdq_val.andR | ||||||
|   val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd) |   val sdq_enq = io.req.valid && io.req.ready && cacheable && isWrite(io.req.bits.cmd) | ||||||
|   val sdq = Mem(io.req.bits.data, sdqDepth) |   val sdq = Mem(sdqDepth, io.req.bits.data) | ||||||
|   when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } |   when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } | ||||||
|  |  | ||||||
|   val idxMatch = Wire(Vec(Bool(), nMSHRs)) |   val idxMatch = Wire(Vec(Bool(), nMSHRs)) | ||||||
| @@ -313,7 +407,7 @@ class MSHRFile extends L1HellaCacheModule { | |||||||
|   val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) |   val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) | ||||||
|   val mem_req_arb = Module(new LockingArbiter( |   val mem_req_arb = Module(new LockingArbiter( | ||||||
|                                   new Acquire, |                                   new Acquire, | ||||||
|                                   nMSHRs, |                                   nMSHRs + nIOMSHRs, | ||||||
|                                   outerDataBeats, |                                   outerDataBeats, | ||||||
|                                   (a: Acquire) => a.hasMultibeatData())) |                                   (a: Acquire) => a.hasMultibeatData())) | ||||||
|   val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs)) |   val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs)) | ||||||
| @@ -332,7 +426,7 @@ class MSHRFile extends L1HellaCacheModule { | |||||||
|  |  | ||||||
|     idxMatch(i) := mshr.io.idx_match |     idxMatch(i) := mshr.io.idx_match | ||||||
|     tagList(i) := mshr.io.tag |     tagList(i) := mshr.io.tag | ||||||
|     wbTagList(i) := mshr.io.wb_req.bits.addr_block >> UInt(idxBits) |     wbTagList(i) := mshr.io.wb_req.bits.addr_block >> idxBits | ||||||
|  |  | ||||||
|     alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy |     alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy | ||||||
|     mshr.io.req_pri_val := alloc_arb.io.in(i).ready |     mshr.io.req_pri_val := alloc_arb.io.in(i).ready | ||||||
| @@ -360,14 +454,44 @@ class MSHRFile extends L1HellaCacheModule { | |||||||
|     when (!mshr.io.probe_rdy) { io.probe_rdy := false } |     when (!mshr.io.probe_rdy) { io.probe_rdy := false } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match |   alloc_arb.io.out.ready := io.req.valid && sdq_rdy && cacheable && !idx_match | ||||||
|  |  | ||||||
|   io.meta_read <> meta_read_arb.io.out |   io.meta_read <> meta_read_arb.io.out | ||||||
|   io.meta_write <> meta_write_arb.io.out |   io.meta_write <> meta_write_arb.io.out | ||||||
|   io.mem_req <> mem_req_arb.io.out |   io.mem_req <> mem_req_arb.io.out | ||||||
|   io.wb_req <> wb_req_arb.io.out |   io.wb_req <> wb_req_arb.io.out | ||||||
|  |  | ||||||
|   io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy |   val mmio_alloc_arb = Module(new Arbiter(Bool(), nIOMSHRs)) | ||||||
|  |   val resp_arb = Module(new Arbiter(new HellaCacheResp, nIOMSHRs)) | ||||||
|  |  | ||||||
|  |   var mmio_rdy = Bool(false) | ||||||
|  |  | ||||||
|  |   for (i <- 0 until nIOMSHRs) { | ||||||
|  |     val id = nMSHRs + i | ||||||
|  |     val mshr = Module(new IOMSHR(id)) | ||||||
|  |  | ||||||
|  |     mmio_alloc_arb.io.in(i).valid := mshr.io.req.ready | ||||||
|  |     mshr.io.req.valid := mmio_alloc_arb.io.in(i).ready | ||||||
|  |     mshr.io.req.bits := io.req.bits | ||||||
|  |  | ||||||
|  |     mmio_rdy = mmio_rdy || mshr.io.req.ready | ||||||
|  |  | ||||||
|  |     mem_req_arb.io.in(id) <> mshr.io.acquire | ||||||
|  |  | ||||||
|  |     mshr.io.grant.bits := io.mem_grant.bits | ||||||
|  |     mshr.io.grant.valid := io.mem_grant.valid && | ||||||
|  |         io.mem_grant.bits.client_xact_id === UInt(id) | ||||||
|  |  | ||||||
|  |     resp_arb.io.in(i) <> mshr.io.resp | ||||||
|  |  | ||||||
|  |     when (!mshr.io.req.ready) { io.fence_rdy := Bool(false) } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   mmio_alloc_arb.io.out.ready := io.req.valid && !cacheable | ||||||
|  |  | ||||||
|  |   io.resp <> resp_arb.io.out | ||||||
|  |   io.req.ready := Mux(!cacheable, mmio_rdy, | ||||||
|  |     Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy) | ||||||
|   io.secondary_miss := idx_match |   io.secondary_miss := idx_match | ||||||
|   io.refill := refillMux(io.mem_grant.bits.client_xact_id) |   io.refill := refillMux(io.mem_grant.bits.client_xact_id) | ||||||
|  |  | ||||||
| @@ -381,7 +505,7 @@ class MSHRFile extends L1HellaCacheModule { | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| class WritebackUnit extends L1HellaCacheModule { | class WritebackUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val req = Decoupled(new WritebackReq).flip |     val req = Decoupled(new WritebackReq).flip | ||||||
|     val meta_read = Decoupled(new L1MetaReadReq) |     val meta_read = Decoupled(new L1MetaReadReq) | ||||||
| @@ -437,7 +561,7 @@ class WritebackUnit extends L1HellaCacheModule { | |||||||
|   // We reissue the meta read as it sets up the mux ctrl for s2_data_muxed |   // We reissue the meta read as it sets up the mux ctrl for s2_data_muxed | ||||||
|   io.meta_read.valid := fire |   io.meta_read.valid := fire | ||||||
|   io.meta_read.bits.idx := req_idx |   io.meta_read.bits.idx := req_idx | ||||||
|   io.meta_read.bits.tag := req.addr_block >> UInt(idxBits) |   io.meta_read.bits.tag := req.addr_block >> idxBits | ||||||
|  |  | ||||||
|   io.data_req.valid := fire |   io.data_req.valid := fire | ||||||
|   io.data_req.bits.way_en := req.way_en |   io.data_req.bits.way_en := req.way_en | ||||||
| @@ -461,7 +585,7 @@ class WritebackUnit extends L1HellaCacheModule { | |||||||
|   } else { io.data_resp }) |   } else { io.data_resp }) | ||||||
| } | } | ||||||
|  |  | ||||||
| class ProbeUnit extends L1HellaCacheModule { | class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val req = Decoupled(new ProbeInternal).flip |     val req = Decoupled(new ProbeInternal).flip | ||||||
|     val rep = Decoupled(new Release) |     val rep = Decoupled(new Release) | ||||||
| @@ -513,7 +637,9 @@ class ProbeUnit extends L1HellaCacheModule { | |||||||
|     req := io.req.bits |     req := io.req.bits | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   val reply = old_coh.makeRelease(req) |   val miss_coh = ClientMetadata.onReset | ||||||
|  |   val reply_coh = Mux(tag_matches, old_coh, miss_coh) | ||||||
|  |   val reply = reply_coh.makeRelease(req) | ||||||
|   io.req.ready := state === s_invalid |   io.req.ready := state === s_invalid | ||||||
|   io.rep.valid := state === s_release && |   io.rep.valid := state === s_release && | ||||||
|                   !(tag_matches && old_coh.requiresVoluntaryWriteback()) // Otherwise WBU will issue release |                   !(tag_matches && old_coh.requiresVoluntaryWriteback()) // Otherwise WBU will issue release | ||||||
| @@ -534,11 +660,11 @@ class ProbeUnit extends L1HellaCacheModule { | |||||||
|   io.wb_req.bits.way_en := way_en |   io.wb_req.bits.way_en := way_en | ||||||
| } | } | ||||||
|  |  | ||||||
| class DataArray extends L1HellaCacheModule { | class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val read = Decoupled(new L1DataReadReq).flip |     val read = Decoupled(new L1DataReadReq).flip | ||||||
|     val write = Decoupled(new L1DataWriteReq).flip |     val write = Decoupled(new L1DataWriteReq).flip | ||||||
|     val resp = Vec.fill(nWays){Bits(OUTPUT, encRowBits)} |     val resp = Vec(Bits(OUTPUT, encRowBits), nWays) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   val waddr = io.write.bits.addr >> rowOffBits |   val waddr = io.write.bits.addr >> rowOffBits | ||||||
| @@ -551,13 +677,12 @@ class DataArray extends L1HellaCacheModule { | |||||||
|       val resp = Wire(Vec(Bits(width = encRowBits), rowWords)) |       val resp = Wire(Vec(Bits(width = encRowBits), rowWords)) | ||||||
|       val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) |       val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) | ||||||
|       for (p <- 0 until resp.size) { |       for (p <- 0 until resp.size) { | ||||||
|         val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles) |         val array = SeqMem(Vec(Bits(width=encDataBits), rowWords), nSets*refillCycles) | ||||||
|         when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { |         when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { | ||||||
|           val data = Fill(rowWords, io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) |           val data = Vec.fill(rowWords)(io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) | ||||||
|           val mask = FillInterleaved(encDataBits, wway_en) |           array.write(waddr, data, wway_en.toBools) | ||||||
|           array.write(waddr, data, mask) |  | ||||||
|         } |         } | ||||||
|         resp(p) := array.read(raddr, rway_en.orR && io.read.valid) |         resp(p) := array.read(raddr, rway_en.orR && io.read.valid).toBits | ||||||
|       } |       } | ||||||
|       for (dw <- 0 until rowWords) { |       for (dw <- 0 until rowWords) { | ||||||
|         val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw))) |         val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw))) | ||||||
| @@ -568,13 +693,13 @@ class DataArray extends L1HellaCacheModule { | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   } else { |   } else { | ||||||
|     val wmask = FillInterleaved(encDataBits, io.write.bits.wmask) |  | ||||||
|     for (w <- 0 until nWays) { |     for (w <- 0 until nWays) { | ||||||
|       val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles) |       val array = SeqMem(Vec(Bits(width=encDataBits), rowWords), nSets*refillCycles) | ||||||
|       when (io.write.bits.way_en(w) && io.write.valid) { |       when (io.write.bits.way_en(w) && io.write.valid) { | ||||||
|         array.write(waddr, io.write.bits.data, wmask) |         val data = Vec.tabulate(rowWords)(i => io.write.bits.data(encDataBits*(i+1)-1,encDataBits*i)) | ||||||
|  |         array.write(waddr, data, io.write.bits.wmask.toBools) | ||||||
|       } |       } | ||||||
|       io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid) |       io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid).toBits | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -582,18 +707,18 @@ class DataArray extends L1HellaCacheModule { | |||||||
|   io.write.ready := Bool(true) |   io.write.ready := Bool(true) | ||||||
| } | } | ||||||
|  |  | ||||||
| class HellaCache extends L1HellaCacheModule { | class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val cpu = (new HellaCacheIO).flip |     val cpu = (new HellaCacheIO).flip | ||||||
|     val ptw = new TLBPTWIO() |     val ptw = new TLBPTWIO() | ||||||
|     val mem = new ClientTileLinkIO |     val mem = new ClientTileLinkIO | ||||||
|   } |   } | ||||||
|   |   | ||||||
|   require(params(LRSCCycles) >= 32) // ISA requires 16-insn LRSC sequences to succeed |   require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed | ||||||
|   require(isPow2(nSets)) |   require(isPow2(nSets)) | ||||||
|   require(isPow2(nWays)) // TODO: relax this |   require(isPow2(nWays)) // TODO: relax this | ||||||
|   require(params(RowBits) <= params(TLDataBits)) |   require(rowBits <= outerDataBits) | ||||||
|   require(paddrBits-blockOffBits == params(TLBlockAddrBits) ) |   require(paddrBits-blockOffBits == outerAddrBits) | ||||||
|   require(untagBits <= pgIdxBits) |   require(untagBits <= pgIdxBits) | ||||||
|  |  | ||||||
|   val wb = Module(new WritebackUnit) |   val wb = Module(new WritebackUnit) | ||||||
| @@ -620,7 +745,6 @@ class HellaCache extends L1HellaCacheModule { | |||||||
|   val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en) |   val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en) | ||||||
|   val s1_read  = isRead(s1_req.cmd) |   val s1_read  = isRead(s1_req.cmd) | ||||||
|   val s1_write = isWrite(s1_req.cmd) |   val s1_write = isWrite(s1_req.cmd) | ||||||
|   val s1_sc = s1_req.cmd === M_XSC |  | ||||||
|   val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) |   val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) | ||||||
|  |  | ||||||
|   val dtlb = Module(new TLB) |   val dtlb = Module(new TLB) | ||||||
| @@ -672,8 +796,8 @@ class HellaCache extends L1HellaCacheModule { | |||||||
|      |      | ||||||
|   io.cpu.xcpt.ma.ld := s1_read && misaligned |   io.cpu.xcpt.ma.ld := s1_read && misaligned | ||||||
|   io.cpu.xcpt.ma.st := s1_write && misaligned |   io.cpu.xcpt.ma.st := s1_write && misaligned | ||||||
|   io.cpu.xcpt.pf.ld := !s1_req.phys && s1_read && dtlb.io.resp.xcpt_ld |   io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld | ||||||
|   io.cpu.xcpt.pf.st := !s1_req.phys && s1_write && dtlb.io.resp.xcpt_st |   io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st | ||||||
|  |  | ||||||
|   assert (!(Reg(next= |   assert (!(Reg(next= | ||||||
|     (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) && |     (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) && | ||||||
| @@ -738,7 +862,7 @@ class HellaCache extends L1HellaCacheModule { | |||||||
|   when (lrsc_valid) { lrsc_count := lrsc_count - 1 } |   when (lrsc_valid) { lrsc_count := lrsc_count - 1 } | ||||||
|   when (s2_valid_masked && s2_hit || s2_replay) { |   when (s2_valid_masked && s2_hit || s2_replay) { | ||||||
|     when (s2_lr) { |     when (s2_lr) { | ||||||
|       when (!lrsc_valid) { lrsc_count := params(LRSCCycles)-1 } |       when (!lrsc_valid) { lrsc_count := lrscCycles-1 } | ||||||
|       lrsc_addr := s2_req.addr >> blockOffBits |       lrsc_addr := s2_req.addr >> blockOffBits | ||||||
|     } |     } | ||||||
|     when (s2_sc) { |     when (s2_sc) { | ||||||
| @@ -749,7 +873,7 @@ class HellaCache extends L1HellaCacheModule { | |||||||
|  |  | ||||||
|   val s2_data = Wire(Vec(Bits(width=encRowBits), nWays)) |   val s2_data = Wire(Vec(Bits(width=encRowBits), nWays)) | ||||||
|   for (w <- 0 until nWays) { |   for (w <- 0 until nWays) { | ||||||
|     val regs = Reg(Vec.fill(rowWords){Bits(width = encDataBits)}) |     val regs = Reg(Vec(Bits(width = encDataBits), rowWords)) | ||||||
|     val en1 = s1_clk_en && s1_tag_eq_way(w) |     val en1 = s1_clk_en && s1_tag_eq_way(w) | ||||||
|     for (i <- 0 until regs.size) { |     for (i <- 0 until regs.size) { | ||||||
|       val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback) |       val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback) | ||||||
| @@ -782,7 +906,7 @@ class HellaCache extends L1HellaCacheModule { | |||||||
|   writeArb.io.in(0).bits.way_en :=  s3_way |   writeArb.io.in(0).bits.way_en :=  s3_way | ||||||
|  |  | ||||||
|   // replacement policy |   // replacement policy | ||||||
|   val replacer = params(Replacer)() |   val replacer = p(Replacer)() | ||||||
|   val s1_replaced_way_en = UIntToOH(replacer.way) |   val s1_replaced_way_en = UIntToOH(replacer.way) | ||||||
|   val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en)) |   val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en)) | ||||||
|   val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) |   val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) | ||||||
| @@ -825,10 +949,14 @@ class HellaCache extends L1HellaCacheModule { | |||||||
|   mshrs.io.mem_grant.valid := narrow_grant.fire() |   mshrs.io.mem_grant.valid := narrow_grant.fire() | ||||||
|   mshrs.io.mem_grant.bits := narrow_grant.bits |   mshrs.io.mem_grant.bits := narrow_grant.bits | ||||||
|   narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData() |   narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData() | ||||||
|   writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() |   /* The last clause here is necessary in order to prevent the responses for | ||||||
|  |    * the IOMSHRs from being written into the data array. It works because the | ||||||
|  |    * IOMSHR ids start right the ones for the regular MSHRs. */ | ||||||
|  |   writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() && | ||||||
|  |                              narrow_grant.bits.client_xact_id < UInt(nMSHRs) | ||||||
|   writeArb.io.in(1).bits.addr := mshrs.io.refill.addr |   writeArb.io.in(1).bits.addr := mshrs.io.refill.addr | ||||||
|   writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en |   writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en | ||||||
|   writeArb.io.in(1).bits.wmask := ~UInt(0, nWays) |   writeArb.io.in(1).bits.wmask := ~UInt(0, rowWords) | ||||||
|   writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) |   writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) | ||||||
|   data.io.read <> readArb.io.out |   data.io.read <> readArb.io.out | ||||||
|   readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked |   readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked | ||||||
| @@ -864,7 +992,7 @@ class HellaCache extends L1HellaCacheModule { | |||||||
|   // load data subword mux/sign extension |   // load data subword mux/sign extension | ||||||
|   val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) |   val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) | ||||||
|   val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) |   val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) | ||||||
|   val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) |   val loadgen = new LoadGen64(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) | ||||||
|    |    | ||||||
|   amoalu.io.addr := s2_req.addr |   amoalu.io.addr := s2_req.addr | ||||||
|   amoalu.io.cmd := s2_req.cmd |   amoalu.io.cmd := s2_req.cmd | ||||||
| @@ -894,22 +1022,31 @@ class HellaCache extends L1HellaCacheModule { | |||||||
|     io.cpu.req.ready := Bool(false) |     io.cpu.req.ready := Bool(false) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   io.cpu.resp.valid  := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable |   val cache_resp = Wire(Valid(new HellaCacheResp)) | ||||||
|   io.cpu.resp.bits.nack := s2_valid && s2_nack |   cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable | ||||||
|   io.cpu.resp.bits := s2_req |   cache_resp.bits := s2_req | ||||||
|   io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc |   cache_resp.bits.has_data := isRead(s2_req.cmd) | ||||||
|   io.cpu.resp.bits.replay := s2_replay |   cache_resp.bits.data := loadgen.byte | s2_sc_fail | ||||||
|   io.cpu.resp.bits.data := loadgen.word |   cache_resp.bits.store_data := s2_req.data | ||||||
|   io.cpu.resp.bits.data_subword := loadgen.byte | s2_sc_fail |   cache_resp.bits.nack := s2_valid && s2_nack | ||||||
|   io.cpu.resp.bits.store_data := s2_req.data |   cache_resp.bits.replay := s2_replay | ||||||
|   io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid |  | ||||||
|  |  | ||||||
|   io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc) |   val uncache_resp = Wire(Valid(new HellaCacheResp)) | ||||||
|  |   uncache_resp.bits := mshrs.io.resp.bits | ||||||
|  |   uncache_resp.valid := mshrs.io.resp.valid | ||||||
|  |  | ||||||
|  |   val cache_pass = s2_valid || s2_replay | ||||||
|  |   mshrs.io.resp.ready := !cache_pass | ||||||
|  |  | ||||||
|  |   io.cpu.resp := Mux(cache_pass, cache_resp, uncache_resp) | ||||||
|  |   io.cpu.resp.bits.data_word_bypass := loadgen.word | ||||||
|  |   io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid | ||||||
|  |   io.cpu.replay_next.valid := s1_replay && s1_read | ||||||
|   io.cpu.replay_next.bits := s1_req.tag |   io.cpu.replay_next.bits := s1_req.tag | ||||||
| } | } | ||||||
|  |  | ||||||
| // exposes a sane decoupled request interface | // exposes a sane decoupled request interface | ||||||
| class SimpleHellaCacheIF extends Module | class SimpleHellaCacheIF(implicit p: Parameters) extends Module | ||||||
| { | { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val requestor = new HellaCacheIO().flip |     val requestor = new HellaCacheIO().flip | ||||||
|   | |||||||
| @@ -6,32 +6,32 @@ import Chisel._ | |||||||
| import uncore._ | import uncore._ | ||||||
| import Util._ | import Util._ | ||||||
|  |  | ||||||
| class PTWReq extends CoreBundle { | class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|   val addr = UInt(width = vpnBits) |   val addr = UInt(width = vpnBits) | ||||||
|   val prv = Bits(width = 2) |   val prv = Bits(width = 2) | ||||||
|   val store = Bool() |   val store = Bool() | ||||||
|   val fetch = Bool() |   val fetch = Bool() | ||||||
| } | } | ||||||
|  |  | ||||||
| class PTWResp extends CoreBundle { | class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|   val error = Bool() |   val error = Bool() | ||||||
|   val pte = new PTE |   val pte = new PTE | ||||||
| } | } | ||||||
|  |  | ||||||
| class TLBPTWIO extends CoreBundle { | class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|   val req = Decoupled(new PTWReq) |   val req = Decoupled(new PTWReq) | ||||||
|   val resp = Valid(new PTWResp).flip |   val resp = Valid(new PTWResp).flip | ||||||
|   val status = new MStatus().asInput |   val status = new MStatus().asInput | ||||||
|   val invalidate = Bool(INPUT) |   val invalidate = Bool(INPUT) | ||||||
| } | } | ||||||
|  |  | ||||||
| class DatapathPTWIO extends CoreBundle { | class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|   val ptbr = UInt(INPUT, paddrBits) |   val ptbr = UInt(INPUT, paddrBits) | ||||||
|   val invalidate = Bool(INPUT) |   val invalidate = Bool(INPUT) | ||||||
|   val status = new MStatus().asInput |   val status = new MStatus().asInput | ||||||
| } | } | ||||||
|  |  | ||||||
| class PTE extends CoreBundle { | class PTE(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|   val ppn = Bits(width = ppnBits) |   val ppn = Bits(width = ppnBits) | ||||||
|   val reserved_for_software = Bits(width = 3) |   val reserved_for_software = Bits(width = 3) | ||||||
|   val d = Bool() |   val d = Bool() | ||||||
| @@ -51,10 +51,9 @@ class PTE extends CoreBundle { | |||||||
|     Mux(prv(0), Mux(fetch, sx(), Mux(store, sw(), sr())), Mux(fetch, ux(), Mux(store, uw(), ur()))) |     Mux(prv(0), Mux(fetch, sx(), Mux(store, sw(), sr())), Mux(fetch, ux(), Mux(store, uw(), ur()))) | ||||||
| } | } | ||||||
|  |  | ||||||
| class PTW(n: Int) extends CoreModule | class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { | ||||||
| { |  | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val requestor = Vec.fill(n){new TLBPTWIO}.flip |     val requestor = Vec(new TLBPTWIO, n).flip | ||||||
|     val mem = new HellaCacheIO |     val mem = new HellaCacheIO | ||||||
|     val dpath = new DatapathPTWIO |     val dpath = new DatapathPTWIO | ||||||
|   } |   } | ||||||
| @@ -87,8 +86,8 @@ class PTW(n: Int) extends CoreModule | |||||||
|     val plru = new PseudoLRU(size) |     val plru = new PseudoLRU(size) | ||||||
|     val valid = Reg(Vec(Bool(), size)) |     val valid = Reg(Vec(Bool(), size)) | ||||||
|     val validBits = valid.toBits |     val validBits = valid.toBits | ||||||
|     val tags = Mem(UInt(width = paddrBits), size) |     val tags = Mem(size, UInt(width = paddrBits)) | ||||||
|     val data = Mem(UInt(width = ppnBits), size) |     val data = Mem(size, UInt(width = ppnBits)) | ||||||
|  |  | ||||||
|     val hits = Vec(tags.map(_ === pte_addr)).toBits & validBits |     val hits = Vec(tags.map(_ === pte_addr)).toBits & validBits | ||||||
|     val hit = hits.orR |     val hit = hits.orR | ||||||
| @@ -125,7 +124,7 @@ class PTW(n: Int) extends CoreModule | |||||||
|   val resp_err = state === s_error |   val resp_err = state === s_error | ||||||
|   val resp_val = state === s_done || resp_err |   val resp_val = state === s_done || resp_err | ||||||
|  |  | ||||||
|   val r_resp_ppn = io.mem.req.bits.addr >> UInt(pgIdxBits) |   val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits | ||||||
|   val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) |   val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) | ||||||
|  |  | ||||||
|   for (i <- 0 until io.requestor.size) { |   for (i <- 0 until io.requestor.size) { | ||||||
|   | |||||||
| @@ -20,24 +20,21 @@ class RoCCInstruction extends Bundle | |||||||
|   val opcode = Bits(width = 7) |   val opcode = Bits(width = 7) | ||||||
| } | } | ||||||
|  |  | ||||||
| class RoCCCommand extends CoreBundle | class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
| { |  | ||||||
|   val inst = new RoCCInstruction |   val inst = new RoCCInstruction | ||||||
|   val rs1 = Bits(width = xLen) |   val rs1 = Bits(width = xLen) | ||||||
|   val rs2 = Bits(width = xLen) |   val rs2 = Bits(width = xLen) | ||||||
| } | } | ||||||
|  |  | ||||||
| class RoCCResponse extends CoreBundle | class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
| { |  | ||||||
|   val rd = Bits(width = 5) |   val rd = Bits(width = 5) | ||||||
|   val data = Bits(width = xLen) |   val data = Bits(width = xLen) | ||||||
| } | } | ||||||
|  |  | ||||||
| class RoCCInterface extends Bundle | class RoCCInterface(implicit p: Parameters) extends Bundle { | ||||||
| { |  | ||||||
|   val cmd = Decoupled(new RoCCCommand).flip |   val cmd = Decoupled(new RoCCCommand).flip | ||||||
|   val resp = Decoupled(new RoCCResponse) |   val resp = Decoupled(new RoCCResponse) | ||||||
|   val mem = new HellaCacheIO |   val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) | ||||||
|   val busy = Bool(OUTPUT) |   val busy = Bool(OUTPUT) | ||||||
|   val s = Bool(INPUT) |   val s = Bool(INPUT) | ||||||
|   val interrupt = Bool(OUTPUT) |   val interrupt = Bool(OUTPUT) | ||||||
| @@ -53,15 +50,12 @@ class RoCCInterface extends Bundle | |||||||
|   val exception = Bool(INPUT) |   val exception = Bool(INPUT) | ||||||
| } | } | ||||||
|  |  | ||||||
| abstract class RoCC extends CoreModule | abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) { | ||||||
| { |  | ||||||
|   val io = new RoCCInterface |   val io = new RoCCInterface | ||||||
|   io.mem.req.bits.phys := Bool(true) // don't perform address translation |   io.mem.req.bits.phys := Bool(true) // don't perform address translation | ||||||
| } | } | ||||||
|  |  | ||||||
| class AccumulatorExample extends RoCC | class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) { | ||||||
| { |  | ||||||
|   val n = 4 |  | ||||||
|   val regfile = Mem(UInt(width = xLen), n) |   val regfile = Mem(UInt(width = xLen), n) | ||||||
|   val busy = Reg(init=Vec(Bool(false), n)) |   val busy = Reg(init=Vec(Bool(false), n)) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -7,13 +7,13 @@ import junctions._ | |||||||
| import uncore._ | import uncore._ | ||||||
| import Util._ | import Util._ | ||||||
|  |  | ||||||
| case object BuildFPU extends Field[Option[() => FPU]] | case object BuildFPU extends Field[Option[Parameters => FPU]] | ||||||
| case object FDivSqrt extends Field[Boolean] | case object FDivSqrt extends Field[Boolean] | ||||||
| case object XLen extends Field[Int] | case object XLen extends Field[Int] | ||||||
| case object NMultXpr extends Field[Int] |  | ||||||
| case object FetchWidth extends Field[Int] | case object FetchWidth extends Field[Int] | ||||||
| case object RetireWidth extends Field[Int] | case object RetireWidth extends Field[Int] | ||||||
| case object UseVM extends Field[Boolean] | case object UseVM extends Field[Boolean] | ||||||
|  | case object UsePerfCounters extends Field[Boolean] | ||||||
| case object FastLoadWord extends Field[Boolean] | case object FastLoadWord extends Field[Boolean] | ||||||
| case object FastLoadByte extends Field[Boolean] | case object FastLoadByte extends Field[Boolean] | ||||||
| case object FastMulDiv extends Field[Boolean] | case object FastMulDiv extends Field[Boolean] | ||||||
| @@ -22,54 +22,57 @@ case object CoreDataBits extends Field[Int] | |||||||
| case object CoreDCacheReqTagBits extends Field[Int] | case object CoreDCacheReqTagBits extends Field[Int] | ||||||
| case object NCustomMRWCSRs extends Field[Int] | case object NCustomMRWCSRs extends Field[Int] | ||||||
|  |  | ||||||
| abstract trait CoreParameters extends UsesParameters { | trait HasCoreParameters extends HasAddrMapParameters { | ||||||
|   val xLen = params(XLen) |   implicit val p: Parameters | ||||||
|   val paddrBits = params(PAddrBits) |   val xLen = p(XLen) | ||||||
|   val vaddrBits = params(VAddrBits) |  | ||||||
|   val pgIdxBits = params(PgIdxBits) |  | ||||||
|   val ppnBits = params(PPNBits) |  | ||||||
|   val vpnBits = params(VPNBits) |  | ||||||
|   val pgLevels = params(PgLevels) |  | ||||||
|   val pgLevelBits = params(PgLevelBits) |  | ||||||
|   val asIdBits = params(ASIdBits) |  | ||||||
|  |  | ||||||
|   val retireWidth = params(RetireWidth) |   val retireWidth = p(RetireWidth) | ||||||
|   val coreFetchWidth = params(FetchWidth) |   val fetchWidth = p(FetchWidth) | ||||||
|   val coreInstBits = params(CoreInstBits) |   val coreInstBits = p(CoreInstBits) | ||||||
|   val coreInstBytes = coreInstBits/8 |   val coreInstBytes = coreInstBits/8 | ||||||
|   val coreDataBits = xLen |   val coreDataBits = xLen | ||||||
|   val coreDataBytes = coreDataBits/8 |   val coreDataBytes = coreDataBits/8 | ||||||
|   val coreDCacheReqTagBits = params(CoreDCacheReqTagBits) |   val coreDCacheReqTagBits = p(CoreDCacheReqTagBits) | ||||||
|   val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits |   val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits | ||||||
|   val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt |   val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt | ||||||
|  |   val mmioBase = p(MMIOBase) | ||||||
|  |   val nCustomMrwCsrs = p(NCustomMRWCSRs) | ||||||
|  |  | ||||||
|   if(params(FastLoadByte)) require(params(FastLoadWord)) |   val usingVM = p(UseVM) | ||||||
|  |   val usingFPU = !p(BuildFPU).isEmpty | ||||||
|  |   val usingFDivSqrt = p(FDivSqrt) | ||||||
|  |   val usingRoCC = !p(BuildRoCC).isEmpty | ||||||
|  |   val usingFastMulDiv = p(FastMulDiv) | ||||||
|  |   val fastLoadWord = p(FastLoadWord) | ||||||
|  |   val fastLoadByte = p(FastLoadByte) | ||||||
|  |  | ||||||
|  |   // Print out log of committed instructions and their writeback values. | ||||||
|  |   // Requires post-processing due to out-of-order writebacks. | ||||||
|  |   val enableCommitLog = false | ||||||
|  |   val usingPerfCounters = p(UsePerfCounters) | ||||||
|  |  | ||||||
|  |   if (fastLoadByte) require(fastLoadWord) | ||||||
| } | } | ||||||
|  |  | ||||||
| abstract trait RocketCoreParameters extends CoreParameters | abstract class CoreModule(implicit val p: Parameters) extends Module | ||||||
| { |   with HasCoreParameters | ||||||
|   require(params(FetchWidth) == 1)  // for now... | abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) | ||||||
|   require(params(RetireWidth) == 1) // for now... |   with HasCoreParameters | ||||||
| } |  | ||||||
|  |  | ||||||
| abstract class CoreBundle extends Bundle with CoreParameters | class Rocket(implicit p: Parameters) extends CoreModule()(p) { | ||||||
| abstract class CoreModule extends Module with CoreParameters |  | ||||||
|  |  | ||||||
| class Rocket extends CoreModule |  | ||||||
| { |  | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val host = new HTIFIO |     val host = new HtifIO | ||||||
|     val imem  = new CPUFrontendIO |     val imem  = new FrontendIO()(p.alterPartial({case CacheName => "L1I" })) | ||||||
|     val dmem = new HellaCacheIO |     val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) | ||||||
|     val ptw = new DatapathPTWIO().flip |     val ptw = new DatapathPTWIO().flip | ||||||
|     val fpu = new FPUIO().flip |     val fpu = new FPUIO().flip | ||||||
|     val rocc = new RoCCInterface().flip |     val rocc = new RoCCInterface().flip | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   var decode_table = XDecode.table |   var decode_table = XDecode.table | ||||||
|   if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table |   if (usingFPU) decode_table ++= FDecode.table | ||||||
|   if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table |   if (usingFPU && usingFDivSqrt) decode_table ++= FDivSqrtDecode.table | ||||||
|   if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table |   if (usingRoCC) decode_table ++= RoCCDecode.table | ||||||
|  |  | ||||||
|   val ex_ctrl = Reg(new IntCtrlSigs) |   val ex_ctrl = Reg(new IntCtrlSigs) | ||||||
|   val mem_ctrl = Reg(new IntCtrlSigs) |   val mem_ctrl = Reg(new IntCtrlSigs) | ||||||
| @@ -117,7 +120,7 @@ class Rocket extends CoreModule | |||||||
|  |  | ||||||
|   // decode stage |   // decode stage | ||||||
|   val id_pc = io.imem.resp.bits.pc |   val id_pc = io.imem.resp.bits.pc | ||||||
|   val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1) |   val id_inst = io.imem.resp.bits.data(0).toBits; require(fetchWidth == 1) | ||||||
|   val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table) |   val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table) | ||||||
|   val id_raddr3 = id_inst(31,27) |   val id_raddr3 = id_inst(31,27) | ||||||
|   val id_raddr2 = id_inst(24,20) |   val id_raddr2 = id_inst(24,20) | ||||||
| @@ -150,7 +153,7 @@ class Rocket extends CoreModule | |||||||
|   val id_amo_rl = id_inst(25) |   val id_amo_rl = id_inst(25) | ||||||
|   val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl |   val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl | ||||||
|   val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid |   val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid | ||||||
|   val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) && |   val id_rocc_busy = Bool(usingRoCC) && | ||||||
|     (io.rocc.busy || ex_reg_valid && ex_ctrl.rocc || |     (io.rocc.busy || ex_reg_valid && ex_ctrl.rocc || | ||||||
|      mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc) |      mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc) | ||||||
|   id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy |   id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy | ||||||
| @@ -163,8 +166,8 @@ class Rocket extends CoreModule | |||||||
|     (id_illegal_insn,           UInt(Causes.illegal_instruction)))) |     (id_illegal_insn,           UInt(Causes.illegal_instruction)))) | ||||||
|  |  | ||||||
|   val dcache_bypass_data = |   val dcache_bypass_data = | ||||||
|     if(params(FastLoadByte)) io.dmem.resp.bits.data_subword |     if (fastLoadByte) io.dmem.resp.bits.data | ||||||
|     else if(params(FastLoadWord)) io.dmem.resp.bits.data |     else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass | ||||||
|     else wb_reg_wdata |     else wb_reg_wdata | ||||||
|  |  | ||||||
|   // detect bypass opportunities |   // detect bypass opportunities | ||||||
| @@ -180,9 +183,9 @@ class Rocket extends CoreModule | |||||||
|  |  | ||||||
|   // execute stage |   // execute stage | ||||||
|   val bypass_mux = Vec(bypass_sources.map(_._3)) |   val bypass_mux = Vec(bypass_sources.map(_._3)) | ||||||
|   val ex_reg_rs_bypass = Reg(Vec.fill(id_raddr.size)(Bool())) |   val ex_reg_rs_bypass = Reg(Vec(Bool(), id_raddr.size)) | ||||||
|   val ex_reg_rs_lsb = Reg(Vec.fill(id_raddr.size)(Bits())) |   val ex_reg_rs_lsb = Reg(Vec(UInt(), id_raddr.size)) | ||||||
|   val ex_reg_rs_msb = Reg(Vec.fill(id_raddr.size)(Bits())) |   val ex_reg_rs_msb = Reg(Vec(UInt(), id_raddr.size)) | ||||||
|   val ex_rs = for (i <- 0 until id_raddr.size) |   val ex_rs = for (i <- 0 until id_raddr.size) | ||||||
|     yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) |     yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) | ||||||
|   val ex_imm = imm(ex_ctrl.sel_imm, ex_reg_inst) |   val ex_imm = imm(ex_ctrl.sel_imm, ex_reg_inst) | ||||||
| @@ -201,8 +204,9 @@ class Rocket extends CoreModule | |||||||
|   alu.io.in1 := ex_op1.toUInt |   alu.io.in1 := ex_op1.toUInt | ||||||
|    |    | ||||||
|   // multiplier and divider |   // multiplier and divider | ||||||
|   val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1, |   val div = Module(new MulDiv(width = xLen, | ||||||
|                        earlyOut = params(FastMulDiv))) |                               unroll = if(usingFastMulDiv) 8 else 1, | ||||||
|  |                               earlyOut = usingFastMulDiv)) | ||||||
|   div.io.req.valid := ex_reg_valid && ex_ctrl.div |   div.io.req.valid := ex_reg_valid && ex_ctrl.div | ||||||
|   div.io.req.bits.dw := ex_ctrl.alu_dw |   div.io.req.bits.dw := ex_ctrl.alu_dw | ||||||
|   div.io.req.bits.fn := ex_ctrl.alu_fn |   div.io.req.bits.fn := ex_ctrl.alu_fn | ||||||
| @@ -331,7 +335,7 @@ class Rocket extends CoreModule | |||||||
|   // writeback arbitration |   // writeback arbitration | ||||||
|   val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool |   val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool | ||||||
|   val dmem_resp_fpu =  io.dmem.resp.bits.tag(0).toBool |   val dmem_resp_fpu =  io.dmem.resp.bits.tag(0).toBool | ||||||
|   val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1) |   val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt()(5,1) | ||||||
|   val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data |   val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data | ||||||
|   val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data |   val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data | ||||||
|  |  | ||||||
| @@ -339,7 +343,7 @@ class Rocket extends CoreModule | |||||||
|   val ll_wdata = Wire(init = div.io.resp.bits.data) |   val ll_wdata = Wire(init = div.io.resp.bits.data) | ||||||
|   val ll_waddr = Wire(init = div.io.resp.bits.tag) |   val ll_waddr = Wire(init = div.io.resp.bits.tag) | ||||||
|   val ll_wen = Wire(init = div.io.resp.fire()) |   val ll_wen = Wire(init = div.io.resp.fire()) | ||||||
|   if (!params(BuildRoCC).isEmpty) { |   if (usingRoCC) { | ||||||
|     io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd) |     io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd) | ||||||
|     when (io.rocc.resp.fire()) { |     when (io.rocc.resp.fire()) { | ||||||
|       div.io.resp.ready := Bool(false) |       div.io.resp.ready := Bool(false) | ||||||
| @@ -350,7 +354,7 @@ class Rocket extends CoreModule | |||||||
|   } |   } | ||||||
|   when (dmem_resp_replay && dmem_resp_xpu) { |   when (dmem_resp_replay && dmem_resp_xpu) { | ||||||
|     div.io.resp.ready := Bool(false) |     div.io.resp.ready := Bool(false) | ||||||
|     if (!params(BuildRoCC).isEmpty) |     if (usingRoCC) | ||||||
|       io.rocc.resp.ready := Bool(false) |       io.rocc.resp.ready := Bool(false) | ||||||
|     ll_waddr := dmem_resp_waddr |     ll_waddr := dmem_resp_waddr | ||||||
|     ll_wen := Bool(true) |     ll_wen := Bool(true) | ||||||
| @@ -360,7 +364,7 @@ class Rocket extends CoreModule | |||||||
|   val wb_wen = wb_valid && wb_ctrl.wxd |   val wb_wen = wb_valid && wb_ctrl.wxd | ||||||
|   val rf_wen = wb_wen || ll_wen  |   val rf_wen = wb_wen || ll_wen  | ||||||
|   val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr) |   val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr) | ||||||
|   val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword, |   val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data, | ||||||
|                  Mux(ll_wen, ll_wdata, |                  Mux(ll_wen, ll_wdata, | ||||||
|                  Mux(wb_ctrl.csr != CSR.N, csr.io.rw.rdata, |                  Mux(wb_ctrl.csr != CSR.N, csr.io.rw.rdata, | ||||||
|                  wb_reg_wdata))) |                  wb_reg_wdata))) | ||||||
| @@ -404,7 +408,7 @@ class Rocket extends CoreModule | |||||||
|  |  | ||||||
|   // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage. |   // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage. | ||||||
|   val mem_mem_cmd_bh = |   val mem_mem_cmd_bh = | ||||||
|     if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass |     if (fastLoadWord) Bool(!fastLoadByte) && mem_reg_slow_bypass | ||||||
|     else Bool(true) |     else Bool(true) | ||||||
|   val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc |   val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc | ||||||
|   val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr) |   val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr) | ||||||
| @@ -417,7 +421,7 @@ class Rocket extends CoreModule | |||||||
|   val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr) |   val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr) | ||||||
|   val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb) |   val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb) | ||||||
|  |  | ||||||
|   val id_stall_fpu = if (!params(BuildFPU).isEmpty) { |   val id_stall_fpu = if (usingFPU) { | ||||||
|     val fp_sboard = new Scoreboard(32) |     val fp_sboard = new Scoreboard(32) | ||||||
|     fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr) |     fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr) | ||||||
|     fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr) |     fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr) | ||||||
| @@ -430,7 +434,7 @@ class Rocket extends CoreModule | |||||||
|     id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || |     id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || | ||||||
|     id_ctrl.fp && id_stall_fpu || |     id_ctrl.fp && id_stall_fpu || | ||||||
|     id_ctrl.mem && !io.dmem.req.ready || |     id_ctrl.mem && !io.dmem.req.ready || | ||||||
|     Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || |     Bool(usingRoCC) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || | ||||||
|     id_do_fence || |     id_do_fence || | ||||||
|     csr.io.csr_stall |     csr.io.csr_stall | ||||||
|   ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt |   ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt | ||||||
| @@ -470,7 +474,7 @@ class Rocket extends CoreModule | |||||||
|   io.fpu.inst := id_inst |   io.fpu.inst := id_inst | ||||||
|   io.fpu.fromint_data := ex_rs(0) |   io.fpu.fromint_data := ex_rs(0) | ||||||
|   io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu |   io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu | ||||||
|   io.fpu.dmem_resp_data := io.dmem.resp.bits.data |   io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass | ||||||
|   io.fpu.dmem_resp_type := io.dmem.resp.bits.typ |   io.fpu.dmem_resp_type := io.dmem.resp.bits.typ | ||||||
|   io.fpu.dmem_resp_tag := dmem_resp_waddr |   io.fpu.dmem_resp_tag := dmem_resp_waddr | ||||||
|  |  | ||||||
| @@ -482,7 +486,7 @@ class Rocket extends CoreModule | |||||||
|   io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt |   io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt | ||||||
|   io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp) |   io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp) | ||||||
|   io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) |   io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) | ||||||
|   require(params(CoreDCacheReqTagBits) >= 6) |   require(p(CoreDCacheReqTagBits) >= 6) | ||||||
|   io.dmem.invalidate_lr := wb_xcpt |   io.dmem.invalidate_lr := wb_xcpt | ||||||
|  |  | ||||||
|   io.rocc.cmd.valid := wb_rocc_val |   io.rocc.cmd.valid := wb_rocc_val | ||||||
| @@ -492,19 +496,50 @@ class Rocket extends CoreModule | |||||||
|   io.rocc.cmd.bits.rs1 := wb_reg_wdata |   io.rocc.cmd.bits.rs1 := wb_reg_wdata | ||||||
|   io.rocc.cmd.bits.rs2 := wb_reg_rs2 |   io.rocc.cmd.bits.rs2 := wb_reg_rs2 | ||||||
|  |  | ||||||
|   if (!params(BuildFPU).isEmpty && !params(BuildRoCC).isEmpty) { |   if (!p(BuildFPU).isEmpty && !p(BuildRoCC).isEmpty) { | ||||||
|     io.fpu.cp_req <> io.rocc.fpu_req |     io.fpu.cp_req <> io.rocc.fpu_req | ||||||
|     io.fpu.cp_resp <> io.rocc.fpu_resp |     io.fpu.cp_resp <> io.rocc.fpu_resp | ||||||
|   } else { |   } else { | ||||||
|     io.fpu.cp_req.valid := Bool(false) |     io.fpu.cp_req.valid := Bool(false) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", |   if (enableCommitLog) { | ||||||
|  |     val pc = Wire(SInt(width=64)) | ||||||
|  |     pc := wb_reg_pc | ||||||
|  |     val inst = wb_reg_inst | ||||||
|  |     val rd = RegNext(RegNext(RegNext(id_waddr))) | ||||||
|  |     val wfd = wb_ctrl.wfd | ||||||
|  |     val wxd = wb_ctrl.wxd | ||||||
|  |     val has_data = wb_wen && !wb_set_sboard | ||||||
|  |     val priv = csr.io.status.prv | ||||||
|  |  | ||||||
|  |     when (wb_valid) { | ||||||
|  |       when (wfd) { | ||||||
|  |         printf ("%d 0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd+UInt(32)) | ||||||
|  |       } | ||||||
|  |       .elsewhen (wxd && rd != UInt(0) && has_data) { | ||||||
|  |         printf ("%d 0x%x (0x%x) x%d 0x%x\n", priv, pc, inst, rd, rf_wdata) | ||||||
|  |       } | ||||||
|  |       .elsewhen (wxd && rd != UInt(0) && !has_data) { | ||||||
|  |         printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd) | ||||||
|  |       } | ||||||
|  |       .otherwise { | ||||||
|  |         printf ("%d 0x%x (0x%x)\n", priv, pc, inst) | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     when (ll_wen && rf_waddr != UInt(0)) { | ||||||
|  |       printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata) | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |   else { | ||||||
|  |     printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", | ||||||
|          io.host.id, csr.io.time(32,0), wb_valid, wb_reg_pc, |          io.host.id, csr.io.time(32,0), wb_valid, wb_reg_pc, | ||||||
|          Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen, |          Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen, | ||||||
|          wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), |          wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), | ||||||
|          wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), |          wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), | ||||||
|          wb_reg_inst, wb_reg_inst) |          wb_reg_inst, wb_reg_inst) | ||||||
|  |   } | ||||||
|  |  | ||||||
|   def checkExceptions(x: Seq[(Bool, UInt)]) = |   def checkExceptions(x: Seq[(Bool, UInt)]) = | ||||||
|     (x.map(_._1).reduce(_||_), PriorityMux(x)) |     (x.map(_._1).reduce(_||_), PriorityMux(x)) | ||||||
| @@ -541,7 +576,7 @@ class Rocket extends CoreModule | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   class RegFile { |   class RegFile { | ||||||
|     private val rf = Mem(UInt(width = 64), 31) |     private val rf = Mem(31, UInt(width = 64)) | ||||||
|     private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() |     private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() | ||||||
|     private var canRead = true |     private var canRead = true | ||||||
|     def read(addr: UInt) = { |     def read(addr: UInt) = { | ||||||
|   | |||||||
| @@ -9,24 +9,29 @@ import Util._ | |||||||
| case object CoreName extends Field[String] | case object CoreName extends Field[String] | ||||||
| case object NDCachePorts extends Field[Int] | case object NDCachePorts extends Field[Int] | ||||||
| case object NPTWPorts extends Field[Int] | case object NPTWPorts extends Field[Int] | ||||||
| case object BuildRoCC extends Field[Option[() => RoCC]] | case object BuildRoCC extends Field[Option[Parameters => RoCC]] | ||||||
|  |  | ||||||
| abstract class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { | abstract class Tile(resetSignal: Bool = null) | ||||||
|  |                    (implicit p: Parameters) extends Module(_reset = resetSignal) { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val cached = new ClientTileLinkIO |     val cached = new ClientTileLinkIO | ||||||
|     val uncached = new ClientUncachedTileLinkIO |     val uncached = new ClientUncachedTileLinkIO | ||||||
|     val host = new HTIFIO |     val host = new HtifIO | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { | class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(resetSignal)(p) { | ||||||
|   val icache = Module(new Frontend, { case CacheName => "L1I"; case CoreName => "Rocket" }) |   //TODO | ||||||
|   val dcache = Module(new HellaCache, { case CacheName => "L1D" }) |   val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) | ||||||
|   val ptw = Module(new PTW(params(NPTWPorts))) |   val icache = Module(new Frontend()(p.alterPartial({ | ||||||
|   val core = Module(new Rocket, { case CoreName => "Rocket" }) |                  case CacheName => "L1I" | ||||||
|  |                  case CoreName => "Rocket" }))) | ||||||
|  |   val dcache = Module(new HellaCache()(dcacheParams)) | ||||||
|  |   val ptw = Module(new PTW(p(NPTWPorts))(dcacheParams)) | ||||||
|  |   val core = Module(new Rocket()(p.alterPartial({ case CoreName => "Rocket" }))) | ||||||
|  |  | ||||||
|   dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache |   dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache | ||||||
|   val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) |   val dcArb = Module(new HellaCacheArbiter(p(NDCachePorts))(dcacheParams)) | ||||||
|   dcArb.io.requestor(0) <> ptw.io.mem |   dcArb.io.requestor(0) <> ptw.io.mem | ||||||
|   dcArb.io.requestor(1) <> core.io.dmem |   dcArb.io.requestor(1) <> core.io.dmem | ||||||
|   dcache.io.cpu <> dcArb.io.mem |   dcache.io.cpu <> dcArb.io.mem | ||||||
| @@ -39,20 +44,16 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { | |||||||
|   core.io.ptw <> ptw.io.dpath |   core.io.ptw <> ptw.io.dpath | ||||||
|  |  | ||||||
|   //If so specified, build an FPU module and wire it in |   //If so specified, build an FPU module and wire it in | ||||||
|   params(BuildFPU) |   p(BuildFPU) foreach { fpu => core.io.fpu <> fpu(p).io } | ||||||
|     .map { bf => bf() } |  | ||||||
|     .foreach { fpu => |  | ||||||
|       core.io.fpu <> fpu.io |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|   // Connect the caches and ROCC to the outer memory system |   // Connect the caches and ROCC to the outer memory system | ||||||
|   io.cached <> dcache.io.mem |   io.cached <> dcache.io.mem | ||||||
|   // If so specified, build an RoCC module and wire it in |   // If so specified, build an RoCC module and wire it in | ||||||
|   // otherwise, just hookup the icache |   // otherwise, just hookup the icache | ||||||
|   io.uncached <> params(BuildRoCC).map { buildItHere => |   io.uncached <> p(BuildRoCC).map { buildItHere => | ||||||
|     val rocc = buildItHere() |     val rocc = buildItHere(p) | ||||||
|     val memArb = Module(new ClientTileLinkIOArbiter(3)) |     val memArb = Module(new ClientTileLinkIOArbiter(3)) | ||||||
|     val dcIF = Module(new SimpleHellaCacheIF) |     val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) | ||||||
|     core.io.rocc <> rocc.io |     core.io.rocc <> rocc.io | ||||||
|     dcIF.io.requestor <> rocc.io.mem |     dcIF.io.requestor <> rocc.io.mem | ||||||
|     dcArb.io.requestor(2) <> dcIF.io.cache |     dcArb.io.requestor(2) <> dcIF.io.cache | ||||||
|   | |||||||
| @@ -9,16 +9,18 @@ import scala.math._ | |||||||
|  |  | ||||||
| case object NTLBEntries extends Field[Int] | case object NTLBEntries extends Field[Int] | ||||||
|  |  | ||||||
| abstract trait TLBParameters extends CoreParameters { | trait HasTLBParameters extends HasAddrMapParameters { | ||||||
|   val entries = params(NTLBEntries) |   val entries = p(NTLBEntries) | ||||||
|   val camAddrBits = ceil(log(entries)/log(2)).toInt |   val camAddrBits = log2Ceil(entries) | ||||||
|   val camTagBits = asIdBits + vpnBits |   val camTagBits = asIdBits + vpnBits | ||||||
| } | } | ||||||
|  |  | ||||||
| abstract class TLBBundle extends Bundle with TLBParameters | abstract class TLBModule(implicit val p: Parameters) extends Module | ||||||
| abstract class TLBModule extends Module with TLBParameters |   with HasTLBParameters | ||||||
|  | abstract class TLBBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) | ||||||
|  |   with HasTLBParameters | ||||||
|  |  | ||||||
| class CAMIO extends TLBBundle { | class CAMIO(implicit p: Parameters) extends TLBBundle()(p) { | ||||||
|     val clear        = Bool(INPUT) |     val clear        = Bool(INPUT) | ||||||
|     val clear_mask   = Bits(INPUT, entries) |     val clear_mask   = Bits(INPUT, entries) | ||||||
|     val tag          = Bits(INPUT, camTagBits) |     val tag          = Bits(INPUT, camTagBits) | ||||||
| @@ -31,9 +33,9 @@ class CAMIO extends TLBBundle { | |||||||
|     val write_addr    = UInt(INPUT, camAddrBits) |     val write_addr    = UInt(INPUT, camAddrBits) | ||||||
| } | } | ||||||
|  |  | ||||||
| class RocketCAM extends TLBModule { | class RocketCAM(implicit p: Parameters) extends TLBModule()(p) { | ||||||
|   val io = new CAMIO |   val io = new CAMIO | ||||||
|   val cam_tags = Mem(Bits(width = camTagBits), entries) |   val cam_tags = Mem(entries, Bits(width = camTagBits)) | ||||||
|  |  | ||||||
|   val vb_array = Reg(init=Bits(0, entries)) |   val vb_array = Reg(init=Bits(0, entries)) | ||||||
|   when (io.write) { |   when (io.write) { | ||||||
| @@ -74,7 +76,7 @@ class PseudoLRU(n: Int) | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| class TLBReq extends CoreBundle { | class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|   val asid = UInt(width = asIdBits) |   val asid = UInt(width = asIdBits) | ||||||
|   val vpn = UInt(width = vpnBits+1) |   val vpn = UInt(width = vpnBits+1) | ||||||
|   val passthrough = Bool() |   val passthrough = Bool() | ||||||
| @@ -82,7 +84,7 @@ class TLBReq extends CoreBundle { | |||||||
|   val store = Bool() |   val store = Bool() | ||||||
| } | } | ||||||
|  |  | ||||||
| class TLBRespNoHitIndex extends CoreBundle { | class TLBRespNoHitIndex(implicit p: Parameters) extends CoreBundle()(p) { | ||||||
|   // lookup responses |   // lookup responses | ||||||
|   val miss = Bool(OUTPUT) |   val miss = Bool(OUTPUT) | ||||||
|   val ppn = UInt(OUTPUT, ppnBits) |   val ppn = UInt(OUTPUT, ppnBits) | ||||||
| @@ -91,11 +93,11 @@ class TLBRespNoHitIndex extends CoreBundle { | |||||||
|   val xcpt_if = Bool(OUTPUT) |   val xcpt_if = Bool(OUTPUT) | ||||||
| } | } | ||||||
|  |  | ||||||
| class TLBResp extends TLBRespNoHitIndex with TLBParameters { | class TLBResp(implicit p: Parameters) extends TLBRespNoHitIndex()(p) with HasTLBParameters { | ||||||
|   val hit_idx = UInt(OUTPUT, entries) |   val hit_idx = UInt(OUTPUT, entries) | ||||||
| } | } | ||||||
|  |  | ||||||
| class TLB extends TLBModule { | class TLB(implicit p: Parameters) extends TLBModule()(p) { | ||||||
|   val io = new Bundle { |   val io = new Bundle { | ||||||
|     val req = Decoupled(new TLBReq).flip |     val req = Decoupled(new TLBReq).flip | ||||||
|     val resp = new TLBResp |     val resp = new TLBResp | ||||||
| @@ -109,7 +111,7 @@ class TLB extends TLBModule { | |||||||
|   val r_req = Reg(new TLBReq) |   val r_req = Reg(new TLBReq) | ||||||
|  |  | ||||||
|   val tag_cam = Module(new RocketCAM) |   val tag_cam = Module(new RocketCAM) | ||||||
|   val tag_ram = Mem(io.ptw.resp.bits.pte.ppn, entries) |   val tag_ram = Mem(entries, io.ptw.resp.bits.pte.ppn) | ||||||
|    |    | ||||||
|   val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt |   val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt | ||||||
|   tag_cam.io.tag := lookup_tag |   tag_cam.io.tag := lookup_tag | ||||||
| @@ -155,24 +157,28 @@ class TLB extends TLBModule { | |||||||
|   val w_array = Mux(priv_s, sw_array.toBits, uw_array.toBits) |   val w_array = Mux(priv_s, sw_array.toBits, uw_array.toBits) | ||||||
|   val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits) |   val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits) | ||||||
|  |  | ||||||
|   val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm |   val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough | ||||||
|   val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) |   val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) | ||||||
|   // it's only a store hit if the dirty bit is set |   // it's only a store hit if the dirty bit is set | ||||||
|   val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0))) |   val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0))) | ||||||
|   val tag_hit = tag_hits.orR |   val tag_hit = tag_hits.orR | ||||||
|   val tlb_hit = vm_enabled && tag_hit |   val tlb_hit = vm_enabled && tag_hit | ||||||
|   val tlb_miss = vm_enabled && !tag_hit && !bad_va |   val tlb_miss = vm_enabled && !tag_hit && !bad_va | ||||||
|    |  | ||||||
|   when (io.req.valid && tlb_hit) { |   when (io.req.valid && tlb_hit) { | ||||||
|     plru.access(OHToUInt(tag_cam.io.hits)) |     plru.access(OHToUInt(tag_cam.io.hits)) | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   val paddr = Cat(io.resp.ppn, UInt(0, pgIdxBits)) | ||||||
|  |   val addr_ok = addrMap.isValid(paddr) | ||||||
|  |   val addr_prot = addrMap.getProt(paddr) | ||||||
|  |  | ||||||
|   io.req.ready := state === s_ready |   io.req.ready := state === s_ready | ||||||
|   io.resp.xcpt_ld := bad_va || tlb_hit && !(r_array & tag_cam.io.hits).orR |   io.resp.xcpt_ld := !addr_ok || !addr_prot.r || bad_va || tlb_hit && !(r_array & tag_cam.io.hits).orR | ||||||
|   io.resp.xcpt_st := bad_va || tlb_hit && !(w_array & tag_cam.io.hits).orR |   io.resp.xcpt_st := !addr_ok || !addr_prot.w || bad_va || tlb_hit && !(w_array & tag_cam.io.hits).orR | ||||||
|   io.resp.xcpt_if := bad_va || tlb_hit && !(x_array & tag_cam.io.hits).orR |   io.resp.xcpt_if := !addr_ok || !addr_prot.x || bad_va || tlb_hit && !(x_array & tag_cam.io.hits).orR | ||||||
|   io.resp.miss := tlb_miss |   io.resp.miss := tlb_miss | ||||||
|   io.resp.ppn := Mux(vm_enabled && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(params(PPNBits)-1,0)) |   io.resp.ppn := Mux(vm_enabled, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(ppnBits-1,0)) | ||||||
|   io.resp.hit_idx := tag_cam.io.hits |   io.resp.hit_idx := tag_cam.io.hits | ||||||
|  |  | ||||||
|   // clear invalid entries on access, or all entries on a TLB flush |   // clear invalid entries on access, or all entries on a TLB flush | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user