From db0a02b78e594febe6e4767caa872261c1e4be4b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 27 Feb 2017 14:27:19 -0800 Subject: [PATCH 01/18] WIP on priv-1.10 --- src/main/scala/coreplex/Configs.scala | 2 +- src/main/scala/groundtest/DummyPTW.scala | 2 +- src/main/scala/rocket/CSR.scala | 121 ++++++++++++++--------- src/main/scala/rocket/IDecode.scala | 2 +- src/main/scala/rocket/Instructions.scala | 10 +- src/main/scala/rocket/PTW.scala | 63 +++--------- src/main/scala/rocket/TLB.scala | 10 +- src/main/scala/tile/Core.scala | 2 +- 8 files changed, 101 insertions(+), 111 deletions(-) diff --git a/src/main/scala/coreplex/Configs.scala b/src/main/scala/coreplex/Configs.scala index e67f4117..ece3aaa8 100644 --- a/src/main/scala/coreplex/Configs.scala +++ b/src/main/scala/coreplex/Configs.scala @@ -17,7 +17,7 @@ import util._ class BaseCoreplexConfig extends Config ((site, here, up) => { case PAddrBits => 32 case PgLevels => if (site(XLen) == 64) 3 /* Sv39 */ else 2 /* Sv32 */ - case ASIdBits => 7 + case ASIdBits => 0 case XLen => 64 // Applies to all cores case BuildCore => (p: Parameters) => new Rocket()(p) case RocketCrossing => Synchronous diff --git a/src/main/scala/groundtest/DummyPTW.scala b/src/main/scala/groundtest/DummyPTW.scala index 99a8f79e..f770e42d 100644 --- a/src/main/scala/groundtest/DummyPTW.scala +++ b/src/main/scala/groundtest/DummyPTW.scala @@ -45,11 +45,11 @@ class DummyPTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { io.requestors.zipWithIndex.foreach { case (requestor, i) => requestor.resp.valid := s2_valid && s2_chosen === UInt(i) requestor.resp.bits := s2_resp - requestor.status.vm := UInt("b01000") requestor.status.prv := UInt(PRV.S) requestor.status.debug := Bool(false) requestor.status.mprv := Bool(true) requestor.status.mpp := UInt(0) + requestor.ptbr.mode := requestor.ptbr.pgLevelsToMode(pgLevels).U requestor.ptbr.asid := UInt(0) requestor.ptbr.ppn := UInt(0) requestor.invalidate := Bool(false) diff --git a/src/main/scala/rocket/CSR.scala b/src/main/scala/rocket/CSR.scala index d21561fe..4d640caa 100644 --- a/src/main/scala/rocket/CSR.scala +++ b/src/main/scala/rocket/CSR.scala @@ -18,11 +18,13 @@ class MStatus extends Bundle { val prv = UInt(width = PRV.SZ) // not truly part of mstatus, but convenient val sd = Bool() - val zero3 = UInt(width = 31) + val zero2 = UInt(width = 27) + val sxl = UInt(width = 2) + val uxl = UInt(width = 2) val sd_rv32 = Bool() - val zero2 = UInt(width = 2) - val vm = UInt(width = 5) - val zero1 = UInt(width = 4) + val zero1 = UInt(width = 9) + val tw = Bool() + val tvm = Bool() val mxr = Bool() val pum = Bool() val mprv = Bool() @@ -78,8 +80,18 @@ class MIP extends Bundle { } class PTBR(implicit p: Parameters) extends CoreBundle()(p) { - require(maxPAddrBits - pgIdxBits + asIdBits <= xLen) - val asid = UInt(width = asIdBits) + def pgLevelsToMode(i: Int) = (xLen, i) match { + case (32, 2) => 1 + case (64, x) if x >= 3 && x <= 6 => x + 5 + } + val (modeBits, maxASIdBits) = xLen match { + case 32 => (1, 9) + case 64 => (4, 16) + } + require(modeBits + maxASIdBits + maxPAddrBits - pgIdxBits == xLen) + + val mode = UInt(width = modeBits) + val asid = UInt(width = maxASIdBits) val ppn = UInt(width = maxPAddrBits - pgIdxBits) } @@ -112,12 +124,12 @@ object CSR } val firstCtr = CSRs.cycle + val firstHPC = CSRs.hpmcounter3 + val firstHPE = CSRs.mhpmevent3 + val firstMHPC = CSRs.mhpmcounter3 val firstHPM = 3 - val firstHPC = CSRs.cycle + firstHPM - val firstHPE = CSRs.mucounteren + firstHPM - val firstMHPC = CSRs.mcycle + firstHPM - val nHPM = 29 - val nCtr = firstHPM + nHPM + val nCtr = 32 + val nHPM = nCtr - firstHPM } class CSRFileIO(implicit p: Parameters) extends CoreBundle @@ -200,6 +212,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val exception = io.exception || io.csr_xcpt val reg_debug = Reg(init=Bool(false)) + val effective_prv = Cat(reg_debug, reg_mstatus.prv) val reg_dpc = Reg(UInt(width = vaddrBitsExtended)) val reg_dscratch = Reg(UInt(width = xLen)) @@ -225,8 +238,8 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) case Some(addr) => Reg(init=UInt(addr, mtvecWidth)) case None => Reg(UInt(width = mtvecWidth)) } - val reg_mucounteren = Reg(UInt(width = 32)) - val reg_mscounteren = Reg(UInt(width = 32)) + val reg_mcounteren = Reg(UInt(width = 32)) + val reg_scounteren = Reg(UInt(width = 32)) val delegable_counters = (BigInt(1) << (nPerfCounters + CSR.firstHPM)) - 1 val reg_sepc = Reg(UInt(width = vaddrBitsExtended)) @@ -250,12 +263,12 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val read_mip = mip.asUInt & supported_interrupts val pending_interrupts = read_mip & reg_mie - val m_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.M || (reg_mstatus.prv === PRV.M && reg_mstatus.mie)), pending_interrupts & ~reg_mideleg, UInt(0)) - val s_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie)), pending_interrupts & reg_mideleg, UInt(0)) + val m_interrupts = Mux(reg_mstatus.prv <= PRV.S || (reg_mstatus.prv === PRV.M && reg_mstatus.mie), pending_interrupts & ~reg_mideleg, UInt(0)) + val s_interrupts = Mux(m_interrupts === 0 && (reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie)), pending_interrupts & reg_mideleg, UInt(0)) val all_interrupts = m_interrupts | s_interrupts val interruptMSB = BigInt(1) << (xLen-1) val interruptCause = UInt(interruptMSB) + PriorityEncoder(all_interrupts) - io.interrupt := all_interrupts.orR && !io.singleStep || reg_singleStepped + io.interrupt := all_interrupts.orR && !reg_debug && !io.singleStep || reg_singleStepped io.interrupt_cause := interruptCause io.bp := reg_bp take nBreakpoints @@ -332,7 +345,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val read_sie = reg_mie & reg_mideleg val read_sip = read_mip & reg_mideleg val read_sstatus = Wire(init=io.status) - read_sstatus.vm := 0 read_sstatus.mprv := 0 read_sstatus.mpp := 0 read_sstatus.hpp := 0 @@ -350,11 +362,11 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) read_mapping += CSRs.sptbr -> reg_sptbr.asUInt read_mapping += CSRs.sepc -> reg_sepc.sextTo(xLen) read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen) - read_mapping += CSRs.mscounteren -> reg_mscounteren + read_mapping += CSRs.scounteren -> reg_scounteren } if (usingUser) { - read_mapping += CSRs.mucounteren -> reg_mucounteren + read_mapping += CSRs.mcounteren -> reg_mcounteren read_mapping += CSRs.cycle -> reg_cycle read_mapping += CSRs.instret -> reg_instret } @@ -379,9 +391,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val addr_valid = decoded_addr.values.reduce(_||_) val fp_csr = if (usingFPU) decoded_addr.filterKeys(fp_csrs contains _ ).values reduce(_||_) else Bool(false) val hpm_csr = if (usingUser) io.rw.addr >= CSR.firstCtr && io.rw.addr < CSR.firstCtr + CSR.nCtr else Bool(false) - val hpm_en = reg_debug || reg_mstatus.prv === PRV.M || - (reg_mstatus.prv === PRV.S && reg_mscounteren(io.rw.addr(log2Ceil(CSR.nCtr)-1, 0))) || - (reg_mstatus.prv === PRV.U && reg_mucounteren(io.rw.addr(log2Ceil(CSR.nCtr)-1, 0))) + val hpm_en = effective_prv > PRV.S || (reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren))(io.rw.addr(log2Ceil(CSR.nCtr)-1, 0)) val csr_addr_priv = io.rw.addr(9,8) val debug_csr_mask = 0x090 // only debug CSRs have address bits 7 and 4 set @@ -392,25 +402,37 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val read_only = io.rw.addr(11,10).andR val wen = cpu_wen && priv_sufficient && !read_only - val wdata = (Mux(io.rw.cmd.isOneOf(CSR.S, CSR.C), io.rw.rdata, UInt(0)) | - Mux(io.rw.cmd =/= CSR.C, io.rw.wdata, UInt(0))) & + val wdata = (Mux(io.rw.cmd.isOneOf(CSR.S, CSR.C), io.rw.rdata, UInt(0)) | io.rw.wdata) & ~Mux(io.rw.cmd === CSR.C, io.rw.wdata, UInt(0)) val do_system_insn = priv_sufficient && system_insn val opcode = UInt(1) << io.rw.addr(2,0) - val insn_call = do_system_insn && opcode(0) + val insn_rs2 = io.rw.addr(5) + val insn_call = do_system_insn && !insn_rs2 && opcode(0) val insn_break = do_system_insn && opcode(1) val insn_ret = do_system_insn && opcode(2) - val insn_sfence_vm = do_system_insn && opcode(4) - val insn_wfi = do_system_insn && opcode(5) + val allow_wfi = effective_prv > PRV.S || !reg_mstatus.tw + val want_wfi = do_system_insn && opcode(5) + val insn_wfi = want_wfi && allow_wfi + val allow_sfence_vma = effective_prv > PRV.S || !reg_mstatus.tvm + val want_sfence_vma = do_system_insn && insn_rs2 + val insn_sfence_vma = want_sfence_vma && allow_sfence_vma + val allow_fcsr = io.status.fs.orR && reg_misa('f'-'a') io.csr_xcpt := (cpu_wen && read_only) || - (cpu_ren && (!priv_sufficient || !addr_valid || (hpm_csr && !hpm_en) || (fp_csr && !(io.status.fs.orR && reg_misa('f'-'a'))))) || + (cpu_ren && + (!priv_sufficient || + !addr_valid || + (if (usingVM) decoded_addr(CSRs.sptbr) && !allow_sfence_vma else false.B) || + (hpm_csr && !hpm_en) || + (fp_csr && !allow_fcsr))) || (system_insn && !priv_sufficient) || - insn_call || insn_break + insn_call || insn_break || + want_wfi && !allow_wfi || + want_sfence_vma && !allow_sfence_vma when (insn_wfi) { reg_wfi := true } - when (pending_interrupts.orR) { reg_wfi := false } + when (pending_interrupts.orR || exception) { reg_wfi := false } val cause = Mux(!io.csr_xcpt, io.cause, @@ -421,12 +443,12 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val causeIsDebugTrigger = !cause(xLen-1) && cause_lsbs === CSR.debugTriggerCause val causeIsDebugBreak = !cause(xLen-1) && insn_break && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv) val trapToDebug = Bool(usingDebug) && (reg_singleStepped || causeIsDebugInt || causeIsDebugTrigger || causeIsDebugBreak || reg_debug) - val delegate = Bool(usingVM) && reg_mstatus.prv < PRV.M && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) + val delegate = Bool(usingVM) && reg_mstatus.prv <= PRV.S && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800)) val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec)) val epc = Mux(csr_debug, reg_dpc, Mux(Bool(usingVM) && !csr_addr_priv(1), reg_sepc, reg_mepc)) - io.fatc := insn_sfence_vm - io.evec := Mux(exception, tvec, epc) + io.fatc := insn_sfence_vma + io.evec := Mux(insn_ret, epc, tvec) io.ptbr := reg_sptbr io.eret := insn_ret io.singleStep := reg_dcsr.step && !reg_debug @@ -434,6 +456,8 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) io.status.sd := io.status.fs.andR || io.status.xs.andR io.status.debug := reg_debug io.status.isa := reg_misa + io.status.uxl := (if (usingUser) log2Ceil(xLen) - 4 else 0) + io.status.sxl := (if (usingVM) log2Ceil(xLen) - 4 else 0) if (xLen == 32) io.status.sd_rv32 := io.status.sd @@ -508,21 +532,17 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) if (usingUser) { reg_mstatus.mprv := new_mstatus.mprv reg_mstatus.mpp := trimPrivilege(new_mstatus.mpp) + reg_mstatus.mxr := new_mstatus.mxr if (usingVM) { - reg_mstatus.mxr := new_mstatus.mxr reg_mstatus.pum := new_mstatus.pum reg_mstatus.spp := new_mstatus.spp reg_mstatus.spie := new_mstatus.spie reg_mstatus.sie := new_mstatus.sie + reg_mstatus.tw := new_mstatus.tw + reg_mstatus.tvm := new_mstatus.tvm } } - if (usingVM) { - require(if (xLen == 32) pgLevels == 2 else pgLevels > 2 && pgLevels < 6) - val vm_on = 6 + pgLevels // TODO Sv48 support should imply Sv39 support - when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 } - when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on } - } if (usingVM || usingFPU) reg_mstatus.fs := Fill(2, new_mstatus.fs.orR) if (usingRoCC) reg_mstatus.xs := Fill(2, new_mstatus.xs.orR) } @@ -554,7 +574,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) writeCounter(CSRs.mcycle, reg_cycle, wdata) writeCounter(CSRs.minstret, reg_instret, wdata) - if (usingFPU) { + if (usingFPU) when (allow_fcsr) { when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } @@ -586,19 +606,28 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val new_sip = new MIP().fromBits(wdata) reg_mip.ssip := new_sip.ssip } + when (decoded_addr(CSRs.sptbr) && allow_sfence_vma) { + val new_sptbr = new PTBR().fromBits(wdata) + val valid_mode = new_sptbr.pgLevelsToMode(pgLevels) + when (new_sptbr.mode === 0) { reg_sptbr.mode := 0 } + when (new_sptbr.mode === valid_mode) { reg_sptbr.mode := valid_mode } + when (new_sptbr.mode === 0 || new_sptbr.mode === valid_mode) { + reg_sptbr.ppn := new_sptbr.ppn(ppnBits-1,0) + if (asIdBits > 0) reg_sptbr.asid := new_sptbr.asid(asIdBits-1,0) + } + } when (decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~reg_mideleg) | (wdata & reg_mideleg) } when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } - when (decoded_addr(CSRs.sptbr)) { reg_sptbr.ppn := wdata(ppnBits-1,0) } when (decoded_addr(CSRs.sepc)) { reg_sepc := formEPC(wdata) } when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata >> 2 << 2 } when (decoded_addr(CSRs.scause)) { reg_scause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.sbadaddr)) { reg_sbadaddr := wdata(vaddrBitsExtended-1,0) } when (decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata & delegable_interrupts } when (decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata & delegable_exceptions } - when (decoded_addr(CSRs.mscounteren)) { reg_mscounteren := wdata & UInt(delegable_counters) } + when (decoded_addr(CSRs.scounteren)) { reg_scounteren := wdata & UInt(delegable_counters) } } if (usingUser) { - when (decoded_addr(CSRs.mucounteren)) { reg_mucounteren := wdata & UInt(delegable_counters) } + when (decoded_addr(CSRs.mcounteren)) { reg_mcounteren := wdata & UInt(delegable_counters) } } if (nBreakpoints > 0) { when (decoded_addr(CSRs.tselect)) { reg_tselect := wdata } @@ -623,11 +652,11 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) if (!usingVM) { reg_mideleg := 0 reg_medeleg := 0 - reg_mscounteren := 0 + reg_scounteren := 0 } if (!usingUser) { - reg_mucounteren := 0 + reg_mcounteren := 0 } reg_sptbr.asid := 0 diff --git a/src/main/scala/rocket/IDecode.scala b/src/main/scala/rocket/IDecode.scala index 5e420a38..89ee819a 100644 --- a/src/main/scala/rocket/IDecode.scala +++ b/src/main/scala/rocket/IDecode.scala @@ -129,7 +129,7 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants class SDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - SFENCE_VM-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N), + SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N), SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N)) } diff --git a/src/main/scala/rocket/Instructions.scala b/src/main/scala/rocket/Instructions.scala index 228724f3..aef4c910 100644 --- a/src/main/scala/rocket/Instructions.scala +++ b/src/main/scala/rocket/Instructions.scala @@ -100,7 +100,7 @@ object Instructions { def HRET = BitPat("b00100000001000000000000001110011") def MRET = BitPat("b00110000001000000000000001110011") def DRET = BitPat("b01111011001000000000000001110011") - def SFENCE_VM = BitPat("b000100000100?????000000001110011") + def SFENCE_VMA = BitPat("b0001001??????????000000001110011") def WFI = BitPat("b00010000010100000000000001110011") def CSRRW = BitPat("b?????????????????001?????1110011") def CSRRS = BitPat("b?????????????????010?????1110011") @@ -283,6 +283,7 @@ object CSRs { val sstatus = 0x100 val sie = 0x104 val stvec = 0x105 + val scounteren = 0x106 val sscratch = 0x140 val sepc = 0x141 val scause = 0x142 @@ -295,6 +296,7 @@ object CSRs { val mideleg = 0x303 val mie = 0x304 val mtvec = 0x305 + val mcounteren = 0x306 val mscratch = 0x340 val mepc = 0x341 val mcause = 0x342 @@ -338,8 +340,6 @@ object CSRs { val mhpmcounter29 = 0xb1d val mhpmcounter30 = 0xb1e val mhpmcounter31 = 0xb1f - val mucounteren = 0x320 - val mscounteren = 0x321 val mhpmevent3 = 0x323 val mhpmevent4 = 0x324 val mhpmevent5 = 0x325 @@ -476,6 +476,7 @@ object CSRs { res += sstatus res += sie res += stvec + res += scounteren res += sscratch res += sepc res += scause @@ -488,6 +489,7 @@ object CSRs { res += mideleg res += mie res += mtvec + res += mcounteren res += mscratch res += mepc res += mcause @@ -531,8 +533,6 @@ object CSRs { res += mhpmcounter29 res += mhpmcounter30 res += mhpmcounter31 - res += mucounteren - res += mscounteren res += mhpmevent3 res += mhpmevent4 res += mhpmevent5 diff --git a/src/main/scala/rocket/PTW.scala b/src/main/scala/rocket/PTW.scala index eeb4050e..e9f86d0a 100644 --- a/src/main/scala/rocket/PTW.scala +++ b/src/main/scala/rocket/PTW.scala @@ -40,8 +40,7 @@ class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) { } class PTE(implicit p: Parameters) extends CoreBundle()(p) { - val reserved_for_hardware = Bits(width = 16) - val ppn = UInt(width = 38) + val ppn = UInt(width = 54) val reserved_for_software = Bits(width = 2) val d = Bool() val a = Bool() @@ -53,19 +52,13 @@ class PTE(implicit p: Parameters) extends CoreBundle()(p) { val v = Bool() def table(dummy: Int = 0) = v && !r && !w && !x - def leaf(dummy: Int = 0) = v && (r || (x && !w)) + def leaf(dummy: Int = 0) = v && (r || (x && !w)) && a def ur(dummy: Int = 0) = sr() && u def uw(dummy: Int = 0) = sw() && u def ux(dummy: Int = 0) = sx() && u def sr(dummy: Int = 0) = leaf() && r - def sw(dummy: Int = 0) = leaf() && w + def sw(dummy: Int = 0) = leaf() && w && d def sx(dummy: Int = 0) = leaf() && x - - def access_ok(req: PTWReq) = { - val perm_ok = Mux(req.fetch, x, Mux(req.store, w, r || (x && req.mxr))) - val priv_ok = Mux(u, !req.pum, req.prv(0)) - leaf() && priv_ok && perm_ok - } } class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { @@ -77,10 +70,11 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { require(usingAtomics, "PTW requires atomic memory operations") - val s_ready :: s_req :: s_wait1 :: s_wait2 :: s_set_dirty :: s_wait1_dirty :: s_wait2_dirty :: s_done :: Nil = Enum(UInt(), 8) + val s_ready :: s_req :: s_wait1 :: s_wait2 :: Nil = Enum(UInt(), 4) val state = Reg(init=s_ready) val count = Reg(UInt(width = log2Up(pgLevels))) val s1_kill = Reg(next = Bool(false)) + val resp_valid = Reg(next = Bool(false)) val r_req = Reg(new PTWReq) val r_req_dest = Reg(Bits()) @@ -128,23 +122,18 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { (hit && count < pgLevels-1, Mux1H(hits, data)) } - - val pte_wdata = Wire(init=new PTE().fromBits(0)) - pte_wdata.a := true - pte_wdata.d := r_req.store - io.mem.req.valid := state.isOneOf(s_req, s_set_dirty) + io.mem.req.valid := state === s_req io.mem.req.bits.phys := Bool(true) - io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD) + io.mem.req.bits.cmd := M_XRD io.mem.req.bits.typ := log2Ceil(xLen/8) io.mem.req.bits.addr := pte_addr - io.mem.s1_data := pte_wdata.asUInt io.mem.s1_kill := s1_kill io.mem.invalidate_lr := Bool(false) val resp_ppns = (0 until pgLevels-1).map(i => Cat(pte_addr >> (pgIdxBits + pgLevelBits*(pgLevels-i-1)), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ (pte_addr >> pgIdxBits) for (i <- 0 until io.requestor.size) { - io.requestor(i).resp.valid := state === s_done && (r_req_dest === i) + io.requestor(i).resp.valid := resp_valid && (r_req_dest === i) io.requestor(i).resp.bits.pte := r_pte io.requestor(i).resp.bits.pte.ppn := resp_ppns(count) io.requestor(i).ptbr := io.dpath.ptbr @@ -174,7 +163,8 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { state := s_wait2 when (io.mem.xcpt.pf.ld) { r_pte.v := false - state := s_done + state := s_ready + resp_valid := true } } is (s_wait2) { @@ -182,41 +172,16 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { state := s_req } when (io.mem.resp.valid) { - state := s_done - when (pte.access_ok(r_req) && (!pte.a || (r_req.store && !pte.d))) { - state := s_set_dirty - }.otherwise { - r_pte := pte - } + r_pte := pte when (pte.table() && count < pgLevels-1) { state := s_req count := count + 1 + }.otherwise { + state := s_ready + resp_valid := true } } } - is (s_set_dirty) { - when (io.mem.req.ready) { - state := s_wait1_dirty - } - } - is (s_wait1_dirty) { - state := s_wait2_dirty - when (io.mem.xcpt.pf.st) { - r_pte.v := false - state := s_done - } - } - is (s_wait2_dirty) { - when (io.mem.s2_nack) { - state := s_set_dirty - } - when (io.mem.resp.valid) { - state := s_req - } - } - is (s_done) { - state := s_ready - } } } diff --git a/src/main/scala/rocket/TLB.scala b/src/main/scala/rocket/TLB.scala index b4b78fce..0bc39053 100644 --- a/src/main/scala/rocket/TLB.scala +++ b/src/main/scala/rocket/TLB.scala @@ -86,7 +86,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod } val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0)) - val vm_enabled = Bool(usingVM) && io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough + val vm_enabled = Bool(usingVM) && io.ptw.ptbr.mode(io.ptw.ptbr.mode.getWidth-1) && priv_uses_vm && !io.req.bits.passthrough val hitsVec = (0 until entries).map(i => valid(i) && vm_enabled && tags(i) === lookup_tag) :+ !vm_enabled val hits = hitsVec.asUInt @@ -98,7 +98,6 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod val sr_array = Reg(UInt(width = entries)) // read permission val xr_array = Reg(UInt(width = entries)) // read permission to executable page val cash_array = Reg(UInt(width = entries)) // cacheable - val dirty_array = Reg(UInt(width = entries)) // PTE dirty bit when (do_refill) { val pte = io.ptw.resp.bits.pte ppns(r_refill_waddr) := pte.ppn @@ -112,7 +111,6 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod sr_array := Mux(pte.sr() && prot_r, sr_array | mask, sr_array & ~mask) xr_array := Mux(pte.sx() && prot_r, xr_array | mask, xr_array & ~mask) cash_array := Mux(cacheable, cash_array | mask, cash_array & ~mask) - dirty_array := Mux(pte.d, dirty_array | mask, dirty_array & ~mask) } val plru = new PseudoLRU(entries) @@ -121,15 +119,13 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod val priv_ok = Mux(priv_s, ~Mux(io.ptw.status.pum, u_array, UInt(0)), u_array) val w_array = Cat(prot_w, priv_ok & sw_array) val x_array = Cat(prot_x, priv_ok & sx_array) - val r_array = Cat(prot_r, priv_ok & (sr_array | Mux(io.ptw.status.mxr, xr_array, UInt(0)))) + val r_array = Cat(prot_r | (prot_x & io.ptw.status.mxr), priv_ok & (sr_array | Mux(io.ptw.status.mxr, xr_array, UInt(0)))) val c_array = Cat(cacheable, cash_array) val bad_va = if (vpnBits == vpnBitsExtended) Bool(false) else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1) - // it's only a store hit if the dirty bit is set - val tlb_hits = hits(entries-1, 0) & (dirty_array | ~Mux(io.req.bits.store, w_array, UInt(0))) - val tlb_hit = tlb_hits.orR + val tlb_hit = hits(entries-1, 0).orR val tlb_miss = vm_enabled && !bad_va && !tlb_hit when (io.req.valid && !tlb_miss) { diff --git a/src/main/scala/tile/Core.scala b/src/main/scala/tile/Core.scala index 8e45da3b..66068474 100644 --- a/src/main/scala/tile/Core.scala +++ b/src/main/scala/tile/Core.scala @@ -60,7 +60,7 @@ trait HasCoreParameters extends HasTileParameters { val vpnBitsExtended = vpnBits + (vaddrBits < xLen).toInt val vaddrBitsExtended = vpnBitsExtended + pgIdxBits val coreMaxAddrBits = paddrBits max vaddrBitsExtended - val maxPAddrBits = xLen match { case 32 => 34; case 64 => 50 } + val maxPAddrBits = xLen match { case 32 => 34; case 64 => 56 } require(paddrBits <= maxPAddrBits) // Print out log of committed instructions and their writeback values. From 11c8857b5de30ada566b771b3114e5908d2793ad Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 5 Mar 2017 21:43:20 -0800 Subject: [PATCH 02/18] Don't re-read I$ RAMs on stall --- src/main/scala/rocket/Frontend.scala | 2 +- src/main/scala/rocket/ICache.scala | 42 ++++++++++++---------------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/src/main/scala/rocket/Frontend.scala b/src/main/scala/rocket/Frontend.scala index f0968232..39815cd1 100644 --- a/src/main/scala/rocket/Frontend.scala +++ b/src/main/scala/rocket/Frontend.scala @@ -134,7 +134,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) icache.io.req.valid := !stall && !s0_same_block icache.io.req.bits.addr := io.cpu.npc icache.io.invalidate := io.cpu.flush_icache - icache.io.s1_ppn := tlb.io.resp.ppn + icache.io.s1_paddr := Cat(tlb.io.resp.ppn, s1_pc(pgIdxBits-1, 0)) icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb icache.io.s2_kill := s2_speculative && !s2_cacheable icache.io.resp.ready := !stall && !s1_same_block diff --git a/src/main/scala/rocket/ICache.scala b/src/main/scala/rocket/ICache.scala index 9df237d1..c56b3eb1 100644 --- a/src/main/scala/rocket/ICache.scala +++ b/src/main/scala/rocket/ICache.scala @@ -44,7 +44,7 @@ class ICache(val latency: Int)(implicit p: Parameters) extends LazyModule { class ICacheBundle(outer: ICache) extends CoreBundle()(outer.p) { val req = Valid(new ICacheReq).flip - val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req + val s1_paddr = UInt(INPUT, paddrBits) // delayed one cycle w.r.t. req val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission @@ -67,34 +67,27 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) val state = Reg(init=s_ready) val invalidated = Reg(Bool()) val stall = !io.resp.ready - val rdy = Wire(Bool()) val refill_addr = Reg(UInt(width = paddrBits)) val s1_any_tag_hit = Wire(Bool()) val s1_valid = Reg(init=Bool(false)) - val s1_vaddr = Reg(UInt()) - val s1_paddr = Cat(io.s1_ppn, s1_vaddr(pgIdxBits-1,0)) - val s1_tag = s1_paddr(tagBits+untagBits-1,untagBits) - - val s0_valid = io.req.valid || s1_valid && stall - val s0_vaddr = Mux(s1_valid && stall, s1_vaddr, io.req.bits.addr) - - s1_valid := io.req.valid && rdy || s1_valid && stall && !io.s1_kill - when (io.req.valid && rdy) { - s1_vaddr := io.req.bits.addr - } - val out_valid = s1_valid && !io.s1_kill && state === s_ready - val s1_idx = s1_vaddr(untagBits-1,blockOffBits) + val s1_idx = io.s1_paddr(untagBits-1,blockOffBits) + val s1_tag = io.s1_paddr(tagBits+untagBits-1,untagBits) val s1_hit = out_valid && s1_any_tag_hit val s1_miss = out_valid && !s1_any_tag_hit - rdy := state === s_ready && !s1_miss + + val s0_valid = io.req.valid && state === s_ready && !(out_valid && stall) + val s0_vaddr = io.req.bits.addr + + s1_valid := s0_valid || out_valid && stall when (s1_miss && state === s_ready) { - refill_addr := s1_paddr + refill_addr := io.s1_paddr } val refill_tag = refill_addr(tagBits+untagBits-1,untagBits) + val refill_idx = refill_addr(untagBits-1,blockOffBits) val (_, _, refill_done, refill_cnt) = edge.count(tl_out.d) tl_out.d.ready := Bool(true) require (edge.manager.minLatency > 0) @@ -105,12 +98,12 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) val tag_rdata = tag_array.read(s0_vaddr(untagBits-1,blockOffBits), !refill_done && s0_valid) when (refill_done) { val tag = code.encode(refill_tag) - tag_array.write(s1_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _)) + tag_array.write(refill_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _)) } val vb_array = Reg(init=Bits(0, nSets*nWays)) when (refill_done && !invalidated) { - vb_array := vb_array.bitSet(Cat(repl_way, s1_idx), Bool(true)) + vb_array := vb_array.bitSet(Cat(repl_way, refill_idx), Bool(true)) } when (io.invalidate) { vb_array := Bits(0) @@ -123,12 +116,13 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) val s1_tag_match = Wire(Vec(nWays, Bool())) val s1_tag_hit = Wire(Vec(nWays, Bool())) val s1_dout = Wire(Vec(nWays, Bits(width = rowBits))) + val s1_dout_valid = RegNext(s0_valid) for (i <- 0 until nWays) { - val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_vaddr(untagBits-1,blockOffBits))).toBool + val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), io.s1_paddr(untagBits-1,blockOffBits))).toBool val tag_out = tag_rdata(i) - val s1_tag_disparity = code.decode(tag_out).error - s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag + val s1_tag_disparity = code.decode(tag_out).error holdUnless s1_dout_valid + s1_tag_match(i) := (tag_out(tagBits-1,0) === s1_tag) holdUnless s1_dout_valid s1_tag_hit(i) := s1_vb && s1_tag_match(i) s1_disparity(i) := s1_vb && (s1_tag_disparity || code.decode(s1_dout(i)).error) } @@ -139,10 +133,10 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer) val wen = tl_out.d.valid && repl_way === UInt(i) when (wen) { val e_d = code.encode(tl_out.d.bits.data) - data_array.write((s1_idx << log2Ceil(refillCycles)) | refill_cnt, e_d) + data_array.write((refill_idx << log2Ceil(refillCycles)) | refill_cnt, e_d) } val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles)) - s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid) + s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid) holdUnless s1_dout_valid } // output signals From 74d8d672bf84389e4cfab65775bb2f3950c05ea2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 5 Mar 2017 23:01:07 -0800 Subject: [PATCH 03/18] Improve BTB critical path at slight accuracy cost Make entries fully associative on lower 14 bits only, not full address. --- src/main/scala/rocket/BTB.scala | 53 ++++++++++++++++---------------- src/main/scala/rocket/IBuf.scala | 1 + 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/main/scala/rocket/BTB.scala b/src/main/scala/rocket/BTB.scala index f39aa042..15cb5d24 100644 --- a/src/main/scala/rocket/BTB.scala +++ b/src/main/scala/rocket/BTB.scala @@ -11,16 +11,18 @@ import util._ case class BTBParams( nEntries: Int = 40, + nMatchBits: Int = 14, + nPages: Int = 6, nRAS: Int = 2, updatesOutOfOrder: Boolean = false) trait HasBtbParameters extends HasCoreParameters { val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0)) - val matchBits = pgIdxBits max log2Ceil(p(coreplex.CacheBlockBytes) * tileParams.icache.get.nSets) + val matchBits = btbParams.nMatchBits max log2Ceil(p(coreplex.CacheBlockBytes) * tileParams.icache.get.nSets) val entries = btbParams.nEntries val nRAS = btbParams.nRAS val updatesOutOfOrder = btbParams.updatesOutOfOrder - val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages + val nPages = (btbParams.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages val opaqueBits = log2Up(entries) val nBHT = 1 << log2Up(entries*2) } @@ -150,8 +152,6 @@ class BTB(implicit p: Parameters) extends BtbModule { val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages)))) val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits))) val pageValid = Reg(init = UInt(0, nPages)) - val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0)) - val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0)) val isValid = Reg(init = UInt(0, entries)) val isReturn = Reg(UInt(width = entries)) @@ -163,31 +163,29 @@ class BTB(implicit p: Parameters) extends BtbModule { val p = page(addr) pageValid & pages.map(_ === p).asUInt } - private def tagMatch(addr: UInt, pgMatch: UInt) = { - val idxMatch = idxs.map(_ === addr(matchBits-1, log2Up(coreInstBytes))).asUInt - val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).asUInt - idxMatch & idxPageMatch & isValid + private def idxMatch(addr: UInt) = { + val idx = addr(matchBits-1, log2Up(coreInstBytes)) + idxs.map(_ === idx).asUInt & isValid } val r_btb_update = Pipe(io.btb_update) val update_target = io.req.bits.addr val pageHit = pageMatch(io.req.bits.addr) - val hitsVec = tagMatch(io.req.bits.addr, pageHit) - val hits = hitsVec.asUInt + val idxHit = idxMatch(io.req.bits.addr) + val updatePageHit = pageMatch(r_btb_update.bits.pc) - - val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit) - val updateHit = if (updatesOutOfOrder) updateHits.orR else r_btb_update.bits.prediction.valid - val updateHitAddr = if (updatesOutOfOrder) OHToUInt(updateHits) else r_btb_update.bits.prediction.bits.entry - - val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1 + val (updateHit, updateHitAddr) = + if (updatesOutOfOrder) { + val updateHits = (pageHit << 1)(Mux1H(idxMatch(r_btb_update.bits.pc), idxPages)) + (updateHits.orR, OHToUInt(updateHits)) + } else (r_btb_update.bits.prediction.valid, r_btb_update.bits.prediction.bits.entry) val useUpdatePageHit = updatePageHit.orR val usePageHit = pageHit.orR val doIdxPageRepl = !useUpdatePageHit val nextPageRepl = Reg(UInt(width = log2Ceil(nPages))) - val idxPageRepl = Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), UIntToOH(nextPageRepl)) + val idxPageRepl = Cat(pageHit(nPages-2,0), pageHit(nPages-1)) | Mux(usePageHit, UInt(0), UIntToOH(nextPageRepl)) val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) @@ -195,7 +193,7 @@ class BTB(implicit p: Parameters) extends BtbModule { val samePage = page(r_btb_update.bits.pc) === page(update_target) val doTgtPageRepl = !samePage && !usePageHit val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1))) - val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl)) + val tgtPageUpdate = OHToUInt(pageHit | Mux(usePageHit, UInt(0), tgtPageRepl)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) { @@ -205,11 +203,12 @@ class BTB(implicit p: Parameters) extends BtbModule { } when (r_btb_update.valid) { + val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1 val waddr = Mux(updateHit, updateHitAddr, nextRepl) val mask = UIntToOH(waddr) idxs(waddr) := r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes)) tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes)) - idxPages(waddr) := idxPageUpdate + idxPages(waddr) := idxPageUpdate +& 1 // the +1 corresponds to the <<1 on io.resp.valid tgtPages(waddr) := tgtPageUpdate isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask) isReturn := Mux(r_btb_update.bits.isReturn, isReturn | mask, isReturn & ~mask) @@ -231,21 +230,21 @@ class BTB(implicit p: Parameters) extends BtbModule { pageValid := pageValid | tgtPageReplEn | idxPageReplEn } - io.resp.valid := hits.orR + io.resp.valid := (pageHit << 1)(Mux1H(idxHit, idxPages)) io.resp.bits.taken := true - io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes)) - io.resp.bits.entry := OHToUInt(hits) - io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(hitsVec, brIdx) else UInt(0)) + io.resp.bits.target := Cat(pages(Mux1H(idxHit, tgtPages)), Mux1H(idxHit, tgts) << log2Up(coreInstBytes)) + io.resp.bits.entry := OHToUInt(idxHit) + io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(idxHit, brIdx) else UInt(0)) io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1)) // if multiple entries for same PC land in BTB, zap them - when (PopCountAtLeast(hits, 2)) { - isValid := isValid & ~hits + when (PopCountAtLeast(idxHit, 2)) { + isValid := isValid & ~idxHit } if (nBHT > 0) { val bht = new BHT(nBHT) - val isBranch = !(hits & isJump).orR + val isBranch = !(idxHit & isJump).orR val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch) val update_btb_hit = io.bht_update.bits.prediction.valid when (io.bht_update.valid && update_btb_hit) { @@ -257,7 +256,7 @@ class BTB(implicit p: Parameters) extends BtbModule { if (nRAS > 0) { val ras = new RAS(nRAS) - val doPeek = (hits & isReturn).orR + val doPeek = (idxHit & isReturn).orR when (!ras.isEmpty && doPeek) { io.resp.bits.target := ras.peek } diff --git a/src/main/scala/rocket/IBuf.scala b/src/main/scala/rocket/IBuf.scala index b1a6ac6d..ec524c8b 100644 --- a/src/main/scala/rocket/IBuf.scala +++ b/src/main/scala/rocket/IBuf.scala @@ -80,6 +80,7 @@ class IBuf(implicit p: Parameters) extends CoreModule { val xcpt_if = valid & (Mux(buf.xcpt_if, bufMask, UInt(0)) | Mux(io.imem.bits.xcpt_if, ~bufMask, UInt(0))) val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0))) val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0)) + assert(!io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits) val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0)) val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask From 7668827741de59bc68c9ae7c92fac7f8c254d22c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 6 Mar 2017 15:03:14 -0800 Subject: [PATCH 04/18] Support unrolling the integer divider --- src/main/scala/rocket/Multiplier.scala | 44 ++++++++++++++++---------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/src/main/scala/rocket/Multiplier.scala b/src/main/scala/rocket/Multiplier.scala index 4bf566b0..400d8314 100644 --- a/src/main/scala/rocket/Multiplier.scala +++ b/src/main/scala/rocket/Multiplier.scala @@ -31,6 +31,7 @@ class MultiplierIO(dataBits: Int, tagBits: Int) extends Bundle { case class MulDivParams( mulUnroll: Int = 1, + divUnroll: Int = 1, mulEarlyOut: Boolean = false, divEarlyOut: Boolean = false ) @@ -44,7 +45,7 @@ class MulDiv(cfg: MulDivParams, width: Int, nXpr: Int = 32) extends Module { val state = Reg(init=s_ready) val req = Reg(io.req.bits) - val count = Reg(UInt(width = log2Up(w+1))) + val count = Reg(UInt(width = log2Ceil((w/cfg.divUnroll + 1) max (w/cfg.mulUnroll)))) val neg_out = Reg(Bool()) val isMul = Reg(Bool()) val isHi = Reg(Bool()) @@ -73,8 +74,7 @@ class MulDiv(cfg: MulDivParams, width: Int, nXpr: Int = 32) extends Module { val (lhs_in, lhs_sign) = sext(io.req.bits.in1, halfWidth(io.req.bits), lhsSigned) val (rhs_in, rhs_sign) = sext(io.req.bits.in2, halfWidth(io.req.bits), rhsSigned) - val subtractor = remainder(2*w,w) - divisor(w,0) - val less = subtractor(w) + val subtractor = remainder(2*w,w) - divisor val negated_remainder = -remainder(w-1,0) when (state === s_neg_inputs) { @@ -116,24 +116,36 @@ class MulDiv(cfg: MulDivParams, width: Int, nXpr: Int = 32) extends Module { } } when (state === s_busy && !isMul) { - when (count === w) { + val unrolls = ((0 until cfg.divUnroll) scanLeft remainder) { case (rem, i) => + // the special case for iteration 0 is to save HW, not for correctness + val difference = if (i == 0) subtractor else rem(2*w,w) - divisor(w-1,0) + val less = difference(w) + Cat(Mux(less, rem(2*w-1,w), difference(w-1,0)), rem(w-1,0), !less) + } tail + + remainder := unrolls.last + when (count === w/cfg.divUnroll) { state := Mux(isHi, s_move_rem, Mux(neg_out, s_neg_output, s_done)) + if (w % cfg.divUnroll < cfg.divUnroll - 1) + remainder := unrolls(w % cfg.divUnroll) } count := count + 1 - remainder := Cat(Mux(less, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !less) - - val divisorMSB = Log2(divisor(w-1,0), w) - val dividendMSB = Log2(remainder(w-1,0), w) - val eOutPos = UInt(w-1) + divisorMSB - dividendMSB - val eOutZero = divisorMSB > dividendMSB - val eOut = count === 0 && less /* not divby0 */ && (eOutPos > 0 || eOutZero) - when (Bool(cfg.divEarlyOut) && eOut) { - val shift = Mux(eOutZero, UInt(w-1), eOutPos(log2Up(w)-1,0)) - remainder := remainder(w-1,0) << shift - count := shift + val divby0 = count === 0 && !subtractor(w) + if (cfg.divEarlyOut) { + val divisorMSB = Log2(divisor(w-1,0), w) + val dividendMSB = Log2(remainder(w-1,0), w) + val eOutPos = UInt(w-1) + divisorMSB - dividendMSB + val eOutZero = divisorMSB > dividendMSB + val eOut = count === 0 && !divby0 && (eOutPos >= cfg.divUnroll || eOutZero) + when (eOut) { + val inc = Mux(eOutZero, UInt(w-1), eOutPos) >> log2Floor(cfg.divUnroll) + val shift = inc << log2Floor(cfg.divUnroll) + remainder := remainder(w-1,0) << shift + count := inc + } } - when (count === 0 && !less /* divby0 */ && !isHi) { neg_out := false } + when (divby0 && !isHi) { neg_out := false } } when (io.resp.fire() || io.kill) { state := s_ready From 24a2278fc40d0a335d5a490e78babf3f2ea9bde8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 7 Mar 2017 14:33:51 -0800 Subject: [PATCH 05/18] Perform all illegal-instruction detection in ID stage This is simpler, reduces what would have become a critical path in the commit stage, and will make it easier to support the mbadinst CSR if it is implemented. --- src/main/scala/rocket/CSR.scala | 117 ++++++++++++------------- src/main/scala/rocket/IBuf.scala | 2 + src/main/scala/rocket/Rocket.scala | 26 +++--- src/main/scala/tile/FPU.scala | 133 ++++++++++++++--------------- 4 files changed, 135 insertions(+), 143 deletions(-) diff --git a/src/main/scala/rocket/CSR.scala b/src/main/scala/rocket/CSR.scala index 4d640caa..bc3bbae3 100644 --- a/src/main/scala/rocket/CSR.scala +++ b/src/main/scala/rocket/CSR.scala @@ -143,8 +143,17 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle val wdata = Bits(INPUT, xLen) } + val decode = new Bundle { + val csr = UInt(INPUT, CSR.ADDRSZ) + val fp_illegal = Bool(OUTPUT) + val rocc_illegal = Bool(OUTPUT) + val read_illegal = Bool(OUTPUT) + val write_illegal = Bool(OUTPUT) + val write_flush = Bool(OUTPUT) + val system_illegal = Bool(OUTPUT) + } + val csr_stall = Bool(OUTPUT) - val csr_xcpt = Bool(OUTPUT) val eret = Bool(OUTPUT) val singleStep = Bool(OUTPUT) @@ -210,17 +219,11 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) Causes.fault_store, Causes.user_ecall).map(1 << _).sum) - val exception = io.exception || io.csr_xcpt val reg_debug = Reg(init=Bool(false)) val effective_prv = Cat(reg_debug, reg_mstatus.prv) val reg_dpc = Reg(UInt(width = vaddrBitsExtended)) val reg_dscratch = Reg(UInt(width = xLen)) - val reg_singleStepped = Reg(Bool()) - when (io.retire(0) || exception) { reg_singleStepped := true } - when (!io.singleStep) { reg_singleStepped := false } - assert(!io.singleStep || io.retire <= UInt(1)) - assert(!reg_singleStepped || io.retire === UInt(0)) val reg_tselect = Reg(UInt(width = log2Up(nBreakpoints))) val reg_bp = Reg(Vec(1 << log2Up(nBreakpoints), new BP)) @@ -257,6 +260,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_cycle = if (enableCommitLog) reg_instret else WideCounter(64) val reg_hpmevent = Seq.fill(nPerfCounters)(if (nPerfEvents > 1) Reg(UInt(width = log2Ceil(nPerfEvents))) else UInt(0)) val reg_hpmcounter = reg_hpmevent.map(e => WideCounter(64, ((UInt(0) +: io.events): Seq[UInt])(e))) + val hpm_mask = reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren) val mip = Wire(init=reg_mip) mip.rocc := io.rocc_interrupt @@ -278,10 +282,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) io.interrupt_cause := UInt(interruptMSB) + CSR.debugIntCause } - val system_insn = io.rw.cmd === CSR.I - val cpu_ren = io.rw.cmd =/= CSR.N && !system_insn - val cpu_wen = cpu_ren && io.rw.cmd =/= CSR.R - val isaMaskString = (if (usingMulDiv) "M" else "") + (if (usingAtomics) "A" else "") + @@ -388,56 +388,37 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } val decoded_addr = read_mapping map { case (k, v) => k -> (io.rw.addr === k) } - val addr_valid = decoded_addr.values.reduce(_||_) - val fp_csr = if (usingFPU) decoded_addr.filterKeys(fp_csrs contains _ ).values reduce(_||_) else Bool(false) - val hpm_csr = if (usingUser) io.rw.addr >= CSR.firstCtr && io.rw.addr < CSR.firstCtr + CSR.nCtr else Bool(false) - val hpm_en = effective_prv > PRV.S || (reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren))(io.rw.addr(log2Ceil(CSR.nCtr)-1, 0)) - val csr_addr_priv = io.rw.addr(9,8) - - val debug_csr_mask = 0x090 // only debug CSRs have address bits 7 and 4 set - require((read_mapping -- debug_csrs.keys).keys.forall(x => (x & debug_csr_mask) != debug_csr_mask)) - require(debug_csrs.keys.forall(x => (x & debug_csr_mask) == debug_csr_mask)) - val csr_debug = Bool(usingDebug) && (io.rw.addr & debug_csr_mask) === debug_csr_mask - val priv_sufficient = reg_debug || (!csr_debug && reg_mstatus.prv >= csr_addr_priv) - val read_only = io.rw.addr(11,10).andR - val wen = cpu_wen && priv_sufficient && !read_only - val wdata = (Mux(io.rw.cmd.isOneOf(CSR.S, CSR.C), io.rw.rdata, UInt(0)) | io.rw.wdata) & ~Mux(io.rw.cmd === CSR.C, io.rw.wdata, UInt(0)) - val do_system_insn = priv_sufficient && system_insn + val system_insn = io.rw.cmd === CSR.I val opcode = UInt(1) << io.rw.addr(2,0) val insn_rs2 = io.rw.addr(5) - val insn_call = do_system_insn && !insn_rs2 && opcode(0) - val insn_break = do_system_insn && opcode(1) - val insn_ret = do_system_insn && opcode(2) - val allow_wfi = effective_prv > PRV.S || !reg_mstatus.tw - val want_wfi = do_system_insn && opcode(5) - val insn_wfi = want_wfi && allow_wfi - val allow_sfence_vma = effective_prv > PRV.S || !reg_mstatus.tvm - val want_sfence_vma = do_system_insn && insn_rs2 - val insn_sfence_vma = want_sfence_vma && allow_sfence_vma - val allow_fcsr = io.status.fs.orR && reg_misa('f'-'a') + val insn_call = system_insn && !insn_rs2 && opcode(0) + val insn_break = system_insn && opcode(1) + val insn_ret = system_insn && opcode(2) + val insn_wfi = system_insn && opcode(5) + val insn_sfence_vma = system_insn && insn_rs2 - io.csr_xcpt := (cpu_wen && read_only) || - (cpu_ren && - (!priv_sufficient || - !addr_valid || - (if (usingVM) decoded_addr(CSRs.sptbr) && !allow_sfence_vma else false.B) || - (hpm_csr && !hpm_en) || - (fp_csr && !allow_fcsr))) || - (system_insn && !priv_sufficient) || - insn_call || insn_break || - want_wfi && !allow_wfi || - want_sfence_vma && !allow_sfence_vma - - when (insn_wfi) { reg_wfi := true } - when (pending_interrupts.orR || exception) { reg_wfi := false } + val allow_wfi = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tw + val allow_sfence_vma = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tvm + io.decode.fp_illegal := io.status.fs === 0 || !reg_misa('f'-'a') + io.decode.rocc_illegal := io.status.xs === 0 || !reg_misa('x'-'a') + io.decode.read_illegal := effective_prv < io.decode.csr(9,8) || + !read_mapping.keys.map(io.decode.csr === _).reduce(_||_) || + io.decode.csr === CSRs.sptbr && !allow_sfence_vma || + io.decode.csr >= CSR.firstCtr && io.decode.csr < CSR.firstCtr + CSR.nCtr && effective_prv <= PRV.S && hpm_mask(io.decode.csr(log2Ceil(CSR.firstCtr)-1,0)) || + Bool(usingDebug) && !reg_debug && debug_csrs.keys.map(io.decode.csr === _).reduce(_||_) || + Bool(usingFPU) && fp_csrs.keys.map(io.decode.csr === _).reduce(_||_) && io.decode.fp_illegal + io.decode.write_illegal := io.decode.csr(11,10).andR + io.decode.write_flush := !(io.decode.csr >= CSRs.mscratch && io.decode.csr <= CSRs.mbadaddr || io.decode.csr >= CSRs.sscratch && io.decode.csr <= CSRs.sbadaddr) + io.decode.system_illegal := effective_prv < io.decode.csr(9,8) || + io.decode.csr(2) && !allow_wfi || + io.decode.csr(5) && !allow_sfence_vma val cause = - Mux(!io.csr_xcpt, io.cause, Mux(insn_call, reg_mstatus.prv + Causes.user_ecall, - Mux[UInt](insn_break, Causes.breakpoint, Causes.illegal_instruction))) + Mux[UInt](insn_break, Causes.breakpoint, io.cause)) val cause_lsbs = cause(log2Up(xLen)-1,0) val causeIsDebugInt = cause(xLen-1) && cause_lsbs === CSR.debugIntCause val causeIsDebugTrigger = !cause(xLen-1) && cause_lsbs === CSR.debugTriggerCause @@ -446,11 +427,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val delegate = Bool(usingVM) && reg_mstatus.prv <= PRV.S && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800)) val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec)) - val epc = Mux(csr_debug, reg_dpc, Mux(Bool(usingVM) && !csr_addr_priv(1), reg_sepc, reg_mepc)) io.fatc := insn_sfence_vma - io.evec := Mux(insn_ret, epc, tvec) + io.evec := tvec io.ptbr := reg_sptbr - io.eret := insn_ret + io.eret := insn_call || insn_break || insn_ret io.singleStep := reg_dcsr.step && !reg_debug io.status := reg_mstatus io.status.sd := io.status.fs.andR || io.status.xs.andR @@ -461,6 +441,18 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) if (xLen == 32) io.status.sd_rv32 := io.status.sd + val exception = insn_call || insn_break || io.exception + assert(PopCount(insn_ret :: insn_call :: insn_break :: io.exception :: Nil) <= 1, "these conditions must be mutually exclusive") + + when (insn_wfi) { reg_wfi := true } + when (pending_interrupts.orR || exception) { reg_wfi := false } + assert(!reg_wfi || io.retire === UInt(0)) + + when (io.retire(0)) { reg_singleStepped := true } + when (!io.singleStep) { reg_singleStepped := false } + assert(!io.singleStep || io.retire <= UInt(1)) + assert(!reg_singleStepped || io.retire === UInt(0)) + when (exception) { val epc = ~(~io.pc | (coreInstBytes-1)) val pie = read_mstatus(reg_mstatus.prv) @@ -494,25 +486,26 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } when (insn_ret) { - when (Bool(usingVM) && !csr_addr_priv(1)) { + when (Bool(usingVM) && !io.rw.addr(9)) { when (reg_mstatus.spp.toBool) { reg_mstatus.sie := reg_mstatus.spie } reg_mstatus.spie := true reg_mstatus.spp := PRV.U new_prv := reg_mstatus.spp - }.elsewhen (csr_debug) { + io.evec := reg_sepc + }.elsewhen (Bool(usingDebug) && io.rw.addr(10)) { new_prv := reg_dcsr.prv reg_debug := false + io.evec := reg_dpc }.otherwise { when (reg_mstatus.mpp(1)) { reg_mstatus.mie := reg_mstatus.mpie } .elsewhen (Bool(usingVM) && reg_mstatus.mpp(0)) { reg_mstatus.sie := reg_mstatus.mpie } reg_mstatus.mpie := true reg_mstatus.mpp := legalizePrivilege(PRV.U) new_prv := reg_mstatus.mpp + io.evec := reg_mepc } } - assert(PopCount(insn_ret :: io.exception :: io.csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive") - io.time := reg_cycle io.csr_stall := reg_wfi @@ -523,7 +516,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_fflags := reg_fflags | io.fcsr_flags.bits } - when (wen) { + when (io.rw.cmd.isOneOf(CSR.S, CSR.C, CSR.W)) { when (decoded_addr(CSRs.mstatus)) { val new_mstatus = new MStatus().fromBits(wdata) reg_mstatus.mie := new_mstatus.mie @@ -574,7 +567,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) writeCounter(CSRs.mcycle, reg_cycle, wdata) writeCounter(CSRs.minstret, reg_instret, wdata) - if (usingFPU) when (allow_fcsr) { + if (usingFPU) { when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } @@ -606,7 +599,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val new_sip = new MIP().fromBits(wdata) reg_mip.ssip := new_sip.ssip } - when (decoded_addr(CSRs.sptbr) && allow_sfence_vma) { + when (decoded_addr(CSRs.sptbr)) { val new_sptbr = new PTBR().fromBits(wdata) val valid_mode = new_sptbr.pgLevelsToMode(pgLevels) when (new_sptbr.mode === 0) { reg_sptbr.mode := 0 } diff --git a/src/main/scala/rocket/IBuf.scala b/src/main/scala/rocket/IBuf.scala index ec524c8b..d81f9a4b 100644 --- a/src/main/scala/rocket/IBuf.scala +++ b/src/main/scala/rocket/IBuf.scala @@ -15,6 +15,7 @@ class Instruction(implicit val p: Parameters) extends ParameterizedBundle with H val btb_hit = Bool() val rvc = Bool() val inst = new ExpandedInstruction + val raw = UInt(width = 32) require(coreInstBits == (if (usingCompressed) 16 else 32)) } @@ -92,6 +93,7 @@ class IBuf(implicit p: Parameters) extends CoreModule { val exp = Module(new RVCExpander) exp.io.in := curInst io.inst(i).bits.inst := exp.io.out + io.inst(i).bits.raw := curInst if (usingCompressed) { val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1))) diff --git a/src/main/scala/rocket/Rocket.scala b/src/main/scala/rocket/Rocket.scala index 5c17c5f4..2ac26f9c 100644 --- a/src/main/scala/rocket/Rocket.scala +++ b/src/main/scala/rocket/Rocket.scala @@ -143,23 +143,21 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) take_pc_id := Bool(fastJAL) && !ctrl_killd && id_ctrl.jal val csr = Module(new CSRFile) - val id_csr_en = id_ctrl.csr =/= CSR.N - val id_system_insn = id_ctrl.csr === CSR.I - val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0) + val id_csr_en = id_ctrl.csr.isOneOf(CSR.S, CSR.C, CSR.W) + val id_system_insn = id_ctrl.csr >= CSR.I + val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0) val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr) - val id_csr_addr = id_inst(0)(31,20) - // this is overly conservative - val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil - val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) - val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs.map(UInt(_)), (legal_csrs -- safe_csrs).toList.map(UInt(_)))) + val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && csr.io.decode.write_flush) val id_illegal_insn = !id_ctrl.legal || id_ctrl.div && !csr.io.status.isa('m'-'a') || id_ctrl.amo && !csr.io.status.isa('a'-'a') || - id_ctrl.fp && !(csr.io.status.fs.orR && csr.io.status.isa('f'-'a')) || + id_ctrl.fp && (csr.io.decode.fp_illegal || io.fpu.illegal_rm) || id_ctrl.dp && !csr.io.status.isa('d'-'a') || ibuf.io.inst(0).bits.rvc && !csr.io.status.isa('c'-'a') || - id_ctrl.rocc && !(csr.io.status.xs.orR && csr.io.status.isa('x'-'a')) + id_ctrl.rocc && csr.io.decode.rocc_illegal || + id_csr_en && (csr.io.decode.read_illegal || !id_csr_ren && csr.io.decode.write_illegal) || + id_system_insn && csr.io.decode.system_illegal // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) val id_amo_aq = id_inst(0)(26) val id_amo_rl = id_inst(0)(25) @@ -205,7 +203,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) // execute stage val bypass_mux = Vec(bypass_sources.map(_._3)) val ex_reg_rs_bypass = Reg(Vec(id_raddr.size, Bool())) - val ex_reg_rs_lsb = Reg(Vec(id_raddr.size, UInt())) + val ex_reg_rs_lsb = Reg(Vec(id_raddr.size, UInt(width = log2Ceil(bypass_sources.size)))) val ex_reg_rs_msb = Reg(Vec(id_raddr.size, UInt())) val ex_rs = for (i <- 0 until id_raddr.size) yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) @@ -291,8 +289,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type) val (ex_xcpt, ex_cause) = checkExceptions(List( - (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), - (ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction)))) + (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause))) // memory stage val mem_br_taken = mem_reg_wdata(0) @@ -375,7 +372,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val replay_wb_common = io.dmem.s2_nack || wb_reg_replay val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready val replay_wb = replay_wb_common || replay_wb_rocc - val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt + val wb_xcpt = wb_reg_xcpt take_pc_wb := replay_wb || wb_xcpt || csr.io.eret // writeback arbitration @@ -417,6 +414,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) when (rf_wen) { rf.write(rf_waddr, rf_wdata) } // hook up control/status regfile + csr.io.decode.csr := ibuf.io.inst(0).bits.raw(31,20) csr.io.exception := wb_reg_xcpt csr.io.cause := wb_reg_cause csr.io.retire := wb_valid diff --git a/src/main/scala/tile/FPU.scala b/src/main/scala/tile/FPU.scala index db3001e7..9ce6a89c 100644 --- a/src/main/scala/tile/FPU.scala +++ b/src/main/scala/tile/FPU.scala @@ -60,7 +60,6 @@ trait HasFPUCtrlSigs { val fma = Bool() val div = Bool() val sqrt = Bool() - val round = Bool() val wflags = Bool() } @@ -72,71 +71,71 @@ class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) { val sigs = new FPUCtrlSigs().asOutput } - val default = List(FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X) + val default = List(FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X) val f = - Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N), - FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,Y,N,Y,N,N,N,N,N,N), - FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,N), - FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y), - FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y), - FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y), - FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y), - FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N), - FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N), - FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y), - FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y), - FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y), - FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y), - FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y), - FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y), - FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y), - FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N), - FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N), - FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N), - FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y), - FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y), - FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y), - FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y), - FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,Y,N,N,N,Y,N,N,Y,Y), - FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y), - FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y), - FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y), - FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y), - FDIV_S -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,Y,N,N,N,N,Y,N,Y,Y), - FSQRT_S -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,Y,N,N,N,N,N,Y,Y,Y)) + Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N), + FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,Y,N,Y,N,N,N,N,N), + FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,N), + FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y), + FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y), + FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y), + FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y), + FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,N), + FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,N), + FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y), + FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y), + FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y), + FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y), + FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,Y), + FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,Y), + FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,Y), + FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N), + FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N), + FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N), + FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,Y), + FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,Y), + FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y), + FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y), + FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,Y,N,N,N,Y,N,N,Y), + FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y), + FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y), + FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y), + FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y), + FDIV_S -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,Y,N,N,N,N,Y,N,Y), + FSQRT_S -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,Y,N,N,N,N,N,Y,Y)) val d = - Array(FLD -> List(FCMD_X, Y,Y,N,N,N,X,X,N,N,N,N,N,N,N,N,N), - FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,N,N,Y,N,N,N,N,N,N), - FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,N), - FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y), - FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y), - FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y), - FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y), - FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N), - FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N), - FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y), - FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y), - FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y), - FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y), - FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,Y,N,N,Y,N,N,N,Y,Y), - FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,N,N,N,Y,N,N,N,Y,Y), - FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y), - FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y), - FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y), - FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N), - FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N), - FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N), - FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y), - FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y), - FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y), - FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y), - FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y,Y), - FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y), - FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y), - FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y), - FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y), - FDIV_D -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,Y,Y), - FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y)) + Array(FLD -> List(FCMD_X, Y,Y,N,N,N,X,X,N,N,N,N,N,N,N,N), + FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,N,N,Y,N,N,N,N,N), + FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,N), + FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y), + FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y), + FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y), + FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y), + FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,N), + FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,N), + FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y), + FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y), + FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y), + FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y), + FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,Y,N,N,Y,N,N,N,Y), + FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,N,N,N,Y,N,N,N,Y), + FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,Y), + FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,Y), + FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,Y), + FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N), + FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N), + FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N), + FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,Y), + FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,Y), + FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y), + FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y), + FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y), + FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y), + FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y), + FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y), + FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y), + FDIV_D -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,Y), + FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y)) val insns = fLen match { case 32 => f @@ -146,7 +145,7 @@ class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) { val s = io.sigs val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma, - s.div, s.sqrt, s.round, s.wflags) + s.div, s.sqrt, s.wflags) sigs zip decoder map {case(s,d) => s := d} } @@ -710,7 +709,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wbInfo(0).pipeid === UInt(x._2)))) io.sboard_clra := waddr // we don't currently support round-max-magnitude (rm=4) - io.illegal_rm := ex_rm(2) && ex_ctrl.round + io.illegal_rm := io.inst(14) && (io.inst(13,12) < 3 || io.fcsr_rm >= 4) divSqrt_wdata := 0 divSqrt_flags := 0 @@ -750,7 +749,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) { divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle.io.out, divSqrt_wdata_double) divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0)) } else { - when (ex_ctrl.div || ex_ctrl.sqrt) { io.illegal_rm := true } + when (id_ctrl.div || id_ctrl.sqrt) { io.illegal_rm := true } } } From 33b6d483769a131608c65f54158eb44ea2329824 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Mar 2017 12:37:20 -0800 Subject: [PATCH 06/18] Fix haltnot reporting (previously always returned 0) --- src/main/scala/uncore/devices/Debug.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/uncore/devices/Debug.scala b/src/main/scala/uncore/devices/Debug.scala index 11fd21fb..15f14f47 100644 --- a/src/main/scala/uncore/devices/Debug.scala +++ b/src/main/scala/uncore/devices/Debug.scala @@ -724,7 +724,7 @@ trait DebugModule extends Module with HasDebugModuleParameters with HasRegMap { // This logic assumes only up to 128 components. rdHaltnotStatus := Bits(0) for (ii <- 0 until numHaltnotStatus) { - when (dbReq.addr === UInt(ii)) { + when (dbReq.addr(1, 0) === UInt(ii)) { rdHaltnotStatus := haltnotStatus(ii) } } From e57ee2692de1ca805942a2c3e942c3c46ce8c26a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Mar 2017 12:46:39 -0800 Subject: [PATCH 07/18] bump tools --- riscv-tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riscv-tools b/riscv-tools index cd78e37f..ed2db791 160000 --- a/riscv-tools +++ b/riscv-tools @@ -1 +1 @@ -Subproject commit cd78e37f72cfc2a452a0c11744586084fbae1dcd +Subproject commit ed2db7918d571b75c4075ef318b4e601004c6424 From 4f8f05d6352246ad8da128441181557938e208fc Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Mar 2017 00:28:19 -0800 Subject: [PATCH 08/18] Add performance counter facility --- src/main/scala/rocket/Arbiter.scala | 2 + src/main/scala/rocket/CSR.scala | 33 +++++++++++----- src/main/scala/rocket/DCache.scala | 4 ++ src/main/scala/rocket/Events.scala | 35 ++++++++++++++++ src/main/scala/rocket/Frontend.scala | 6 +++ src/main/scala/rocket/HellaCache.scala | 4 ++ src/main/scala/rocket/NBDcache.scala | 4 ++ src/main/scala/rocket/Rocket.scala | 55 ++++++++++++++++++++++++-- src/main/scala/util/Counters.scala | 2 +- src/main/scala/util/Package.scala | 2 + 10 files changed, 132 insertions(+), 15 deletions(-) create mode 100644 src/main/scala/rocket/Events.scala diff --git a/src/main/scala/rocket/Arbiter.scala b/src/main/scala/rocket/Arbiter.scala index 747130df..4fe8b838 100644 --- a/src/main/scala/rocket/Arbiter.scala +++ b/src/main/scala/rocket/Arbiter.scala @@ -56,6 +56,8 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module resp.valid := io.mem.resp.valid && tag_hit io.requestor(i).xcpt := io.mem.xcpt io.requestor(i).ordered := io.mem.ordered + io.requestor(i).acquire := io.mem.acquire + io.requestor(i).release := io.mem.release io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i) resp.bits := io.mem.resp.bits resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) diff --git a/src/main/scala/rocket/CSR.scala b/src/main/scala/rocket/CSR.scala index bc3bbae3..298ac974 100644 --- a/src/main/scala/rocket/CSR.scala +++ b/src/main/scala/rocket/CSR.scala @@ -124,14 +124,23 @@ object CSR } val firstCtr = CSRs.cycle + val firstCtrH = CSRs.cycleh val firstHPC = CSRs.hpmcounter3 + val firstHPCH = CSRs.hpmcounter3h val firstHPE = CSRs.mhpmevent3 val firstMHPC = CSRs.mhpmcounter3 + val firstMHPCH = CSRs.mhpmcounter3h val firstHPM = 3 val nCtr = 32 val nHPM = nCtr - firstHPM } +class PerfCounterIO(implicit p: Parameters) extends CoreBundle + with HasRocketCoreParameters { + val eventSel = UInt(OUTPUT, xLen) + val inc = UInt(INPUT, log2Ceil(1+retireWidth)) +} + class CSRFileIO(implicit p: Parameters) extends CoreBundle with HasRocketCoreParameters { val interrupts = new TileInterrupts().asInput @@ -174,10 +183,10 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle val interrupt = Bool(OUTPUT) val interrupt_cause = UInt(OUTPUT, xLen) val bp = Vec(nBreakpoints, new BP).asOutput - val events = Vec(nPerfEvents, Bool()).asInput + val counters = Vec(nPerfCounters, new PerfCounterIO) } -class CSRFile(implicit p: Parameters) extends CoreModule()(p) +class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Parameters) extends CoreModule()(p) with HasRocketCoreParameters { val io = new CSRFileIO @@ -258,8 +267,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_instret = WideCounter(64, io.retire) val reg_cycle = if (enableCommitLog) reg_instret else WideCounter(64) - val reg_hpmevent = Seq.fill(nPerfCounters)(if (nPerfEvents > 1) Reg(UInt(width = log2Ceil(nPerfEvents))) else UInt(0)) - val reg_hpmcounter = reg_hpmevent.map(e => WideCounter(64, ((UInt(0) +: io.events): Seq[UInt])(e))) + val reg_hpmevent = io.counters.map(c => Reg(init = UInt(0, xLen))) + (io.counters zip reg_hpmevent) foreach { case (c, e) => c.eventSel := e } + val reg_hpmcounter = io.counters.map(c => WideCounter(40, c.inc, reset = false)) val hpm_mask = reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren) val mip = Wire(init=reg_mip) @@ -339,6 +349,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) read_mapping += (i + CSR.firstHPE) -> e // mhpmeventN read_mapping += (i + CSR.firstMHPC) -> c // mhpmcounterN if (usingUser) read_mapping += (i + CSR.firstHPC) -> c // hpmcounterN + if (xLen == 32) { + read_mapping += (i + CSR.firstMHPCH) -> c // mhpmcounterNh + if (usingUser) read_mapping += (i + CSR.firstHPCH) -> c // hpmcounterNh + } } if (usingVM) { @@ -407,7 +421,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) io.decode.read_illegal := effective_prv < io.decode.csr(9,8) || !read_mapping.keys.map(io.decode.csr === _).reduce(_||_) || io.decode.csr === CSRs.sptbr && !allow_sfence_vma || - io.decode.csr >= CSR.firstCtr && io.decode.csr < CSR.firstCtr + CSR.nCtr && effective_prv <= PRV.S && hpm_mask(io.decode.csr(log2Ceil(CSR.firstCtr)-1,0)) || + (io.decode.csr.inRange(CSR.firstCtr, CSR.firstCtr + CSR.nCtr) || io.decode.csr.inRange(CSR.firstCtrH, CSR.firstCtrH + CSR.nCtr)) && effective_prv <= PRV.S && hpm_mask(io.decode.csr(log2Ceil(CSR.firstCtr)-1,0)) || Bool(usingDebug) && !reg_debug && debug_csrs.keys.map(io.decode.csr === _).reduce(_||_) || Bool(usingFPU) && fp_csrs.keys.map(io.decode.csr === _).reduce(_||_) && io.decode.fp_illegal io.decode.write_illegal := io.decode.csr(11,10).andR @@ -561,8 +575,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) for (((e, c), i) <- (reg_hpmevent zip reg_hpmcounter) zipWithIndex) { writeCounter(i + CSR.firstMHPC, c, wdata) - if (nPerfEvents > 1) - when (decoded_addr(i + CSR.firstHPE)) { e := wdata } + when (decoded_addr(i + CSR.firstHPE)) { e := perfEventSets.maskEventSelector(wdata) } } writeCounter(CSRs.mcycle, reg_cycle, wdata) writeCounter(CSRs.minstret, reg_instret, wdata) @@ -688,10 +701,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) def writeCounter(lo: Int, ctr: WideCounter, wdata: UInt) = { if (xLen == 32) { val hi = lo + CSRs.mcycleh - CSRs.mcycle - when (decoded_addr(lo)) { ctr := Cat(ctr(63, 32), wdata) } - when (decoded_addr(hi)) { ctr := Cat(wdata, ctr(31, 0)) } + when (decoded_addr(lo)) { ctr := Cat(ctr(ctr.getWidth-1, 32), wdata) } + when (decoded_addr(hi)) { ctr := Cat(wdata(ctr.getWidth-33, 0), ctr(31, 0)) } } else { - when (decoded_addr(lo)) { ctr := wdata } + when (decoded_addr(lo)) { ctr := wdata(ctr.getWidth-1, 0) } } } def formEPC(x: UInt) = ~(~x | Cat(!reg_misa('c'-'a'), UInt(1))) diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index b62a927a..575f167f 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -503,4 +503,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { flushing := false } } + + // performance events + io.cpu.acquire := tl_out.a.fire() + io.cpu.release := tl_out.c.fire() } diff --git a/src/main/scala/rocket/Events.scala b/src/main/scala/rocket/Events.scala new file mode 100644 index 00000000..d4aeef76 --- /dev/null +++ b/src/main/scala/rocket/Events.scala @@ -0,0 +1,35 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package rocket + +import util._ +import Chisel._ + +class EventSet(gate: (UInt, UInt) => Bool, events: Seq[(String, () => Bool)]) { + def size = events.size + def hits = events.map(_._2()).asUInt + def check(mask: UInt) = gate(mask, hits) +} + +class EventSets(eventSets: Seq[EventSet]) { + def maskEventSelector(eventSel: UInt): UInt = { + // allow full associativity between counters and event sets (for now?) + val setMask = (BigInt(1) << log2Ceil(eventSets.size)) - 1 + val maskMask = ((BigInt(1) << eventSets.map(_.size).max) - 1) << eventSetIdBits + eventSel & (setMask | maskMask).U + } + + private def decode(counter: UInt): (UInt, UInt) = { + require(eventSets.size <= (1 << eventSetIdBits)) + (counter(log2Ceil(eventSets.size)-1, 0), counter >> eventSetIdBits) + } + + def evaluate(eventSel: UInt): Bool = { + val (set, mask) = decode(eventSel) + val sets = eventSets map (_ check mask) + sets(set) + } + + private def eventSetIdBits = 8 +} diff --git a/src/main/scala/rocket/Frontend.scala b/src/main/scala/rocket/Frontend.scala index 39815cd1..b6328f34 100644 --- a/src/main/scala/rocket/Frontend.scala +++ b/src/main/scala/rocket/Frontend.scala @@ -35,6 +35,9 @@ class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { val flush_icache = Bool(OUTPUT) val flush_tlb = Bool(OUTPUT) val npc = UInt(INPUT, width = vaddrBitsExtended) + + // performance events + val acquire = Bool(INPUT) } class Frontend(implicit p: Parameters) extends LazyModule { @@ -150,6 +153,9 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) io.cpu.resp.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt_if io.cpu.resp.bits.btb.valid := s2_btb_resp_valid io.cpu.resp.bits.btb.bits := s2_btb_resp_bits + + // performance events + io.cpu.acquire := icache.io.mem(0).a.fire() } /** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */ diff --git a/src/main/scala/rocket/HellaCache.scala b/src/main/scala/rocket/HellaCache.scala index 4917770f..f0b23107 100644 --- a/src/main/scala/rocket/HellaCache.scala +++ b/src/main/scala/rocket/HellaCache.scala @@ -121,6 +121,10 @@ class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req val s2_nack = Bool(INPUT) // req from two cycles ago is rejected + // performance events + val acquire = Bool(INPUT) + val release = Bool(INPUT) + val resp = Valid(new HellaCacheResp).flip val replay_next = Bool(INPUT) val xcpt = (new HellaCacheExceptions).asInput diff --git a/src/main/scala/rocket/NBDcache.scala b/src/main/scala/rocket/NBDcache.scala index 4ffef4f1..5adf56c3 100644 --- a/src/main/scala/rocket/NBDcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -973,4 +973,8 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next + + // performance events + io.cpu.acquire := tl_out.a.fire() + io.cpu.release := tl_out.c.fire() } diff --git a/src/main/scala/rocket/Rocket.scala b/src/main/scala/rocket/Rocket.scala index 2ac26f9c..c8c93c19 100644 --- a/src/main/scala/rocket/Rocket.scala +++ b/src/main/scala/rocket/Rocket.scala @@ -19,7 +19,6 @@ case class RocketCoreParams( useCompressed: Boolean = true, nBreakpoints: Int = 1, nPerfCounters: Int = 0, - nPerfEvents: Int = 0, nCustomMRWCSRs: Int = 0, mtvecInit: Option[BigInt] = Some(BigInt(0)), mtvecWritable: Boolean = true, @@ -44,7 +43,6 @@ trait HasRocketCoreParameters extends HasCoreParameters { val fastJAL = rocketParams.fastJAL val nBreakpoints = rocketParams.nBreakpoints val nPerfCounters = rocketParams.nPerfCounters - val nPerfEvents = rocketParams.nPerfEvents val nCustomMrwCsrs = rocketParams.nCustomMRWCSRs val mtvecInit = rocketParams.mtvecInit val mtvecWritable = rocketParams.mtvecWritable @@ -58,6 +56,50 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) with HasRocketCoreParameters with HasCoreIO { + // performance counters + def pipelineIDToWB[T <: Data](x: T): T = + RegEnable(RegEnable(RegEnable(x, !ctrl_killd), ex_pc_valid), mem_pc_valid) + val perfEvents = new EventSets(Seq( + new EventSet((mask, hits) => Mux(mask(0), wb_xcpt, wb_valid && pipelineIDToWB((mask & hits).orR)), Seq( + ("exception", () => false.B), + ("load", () => id_ctrl.mem && id_ctrl.mem_cmd === M_XRD && !id_ctrl.fp), + ("store", () => id_ctrl.mem && id_ctrl.mem_cmd === M_XWR && !id_ctrl.fp), + ("amo", () => Bool(usingAtomics) && id_ctrl.mem && (isAMO(id_ctrl.mem_cmd) || id_ctrl.mem_cmd.isOneOf(M_XLR, M_XSC))), + ("system", () => id_ctrl.csr =/= CSR.N), + ("arith", () => id_ctrl.wxd && !(id_ctrl.jal || id_ctrl.jalr || id_ctrl.mem || id_ctrl.fp || id_ctrl.div || id_ctrl.csr =/= CSR.N)), + ("branch", () => id_ctrl.branch), + ("jal", () => id_ctrl.jal), + ("jalr", () => id_ctrl.jalr)) + ++ (if (!usingMulDiv) Seq() else Seq( + ("mul", () => id_ctrl.div && (id_ctrl.alu_fn & ALU.FN_DIV) =/= ALU.FN_DIV), + ("div", () => id_ctrl.div && (id_ctrl.alu_fn & ALU.FN_DIV) === ALU.FN_DIV))) + ++ (if (!usingFPU) Seq() else Seq( + ("fp load", () => id_ctrl.fp && io.fpu.dec.ldst && io.fpu.dec.wen), + ("fp store", () => id_ctrl.fp && io.fpu.dec.ldst && !io.fpu.dec.wen), + ("fp add", () => id_ctrl.fp && io.fpu.dec.fma && io.fpu.dec.swap23), + ("fp mul", () => id_ctrl.fp && io.fpu.dec.fma && !io.fpu.dec.swap23 && !io.fpu.dec.ren3), + ("fp mul-add", () => id_ctrl.fp && io.fpu.dec.fma && io.fpu.dec.ren3), + ("fp div/sqrt", () => id_ctrl.fp && (io.fpu.dec.div || io.fpu.dec.sqrt)), + ("fp other", () => id_ctrl.fp && !(io.fpu.dec.ldst || io.fpu.dec.fma || io.fpu.dec.div || io.fpu.dec.sqrt))))), + new EventSet((mask, hits) => (mask & hits).orR, Seq( + ("load-use interlock", () => id_ex_hazard && ex_ctrl.mem || id_mem_hazard && mem_ctrl.mem || id_wb_hazard && wb_ctrl.mem), + ("long-latency interlock", () => id_sboard_hazard), + ("csr interlock", () => id_ex_hazard && ex_ctrl.csr =/= CSR.N || id_mem_hazard && mem_ctrl.csr =/= CSR.N || id_wb_hazard && wb_ctrl.csr =/= CSR.N), + ("I$ blocked", () => !(ibuf.io.inst(0).valid || Reg(next = take_pc))), + ("D$ blocked", () => id_ctrl.mem && dcache_blocked), + ("branch misprediction", () => take_pc_mem && mem_direction_misprediction), + ("control-flow target misprediction", () => take_pc_mem && mem_misprediction && !mem_direction_misprediction), + ("flush", () => take_pc_mem && mem_reg_flush_pipe), + ("replay", () => replay_wb)) + ++ (if (!usingMulDiv) Seq() else Seq( + ("mul/div interlock", () => id_ex_hazard && ex_ctrl.div || id_mem_hazard && mem_ctrl.div || id_wb_hazard && wb_ctrl.div))) + ++ (if (!usingFPU) Seq() else Seq( + ("fp interlock", () => id_ex_hazard && ex_ctrl.fp || id_mem_hazard && mem_ctrl.fp || id_wb_hazard && wb_ctrl.fp || id_ctrl.fp && id_stall_fpu)))), + new EventSet((mask, hits) => (mask & hits).orR, Seq( + ("I$ miss", () => io.imem.acquire), + ("D$ miss", () => io.dmem.acquire), + ("D$ release", () => io.dmem.release))))) + val decode_table = { (if (usingMulDiv) new MDecode +: (xLen > 32).option(new M64Decode).toSeq else Nil) ++: (if (usingAtomics) new ADecode +: (xLen > 32).option(new A64Decode).toSeq else Nil) ++: @@ -142,7 +184,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val id_npc = (ibuf.io.pc.asSInt + ImmGen(IMM_UJ, id_inst(0))).asUInt take_pc_id := Bool(fastJAL) && !ctrl_killd && id_ctrl.jal - val csr = Module(new CSRFile) + val csr = Module(new CSRFile(perfEvents)) val id_csr_en = id_ctrl.csr.isOneOf(CSR.S, CSR.C, CSR.W) val id_system_insn = id_ctrl.csr >= CSR.I val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0) @@ -292,6 +334,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause))) // memory stage + val mem_pc_valid = mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt val mem_br_taken = mem_reg_wdata(0) val mem_br_target = mem_reg_pc.asSInt + Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst), @@ -303,6 +346,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || (Bool(!fastJAL) && mem_ctrl.jal) + val mem_direction_misprediction = mem_reg_btb_hit && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) @@ -357,7 +401,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) wb_reg_replay := replay_mem && !take_pc_wb wb_reg_xcpt := mem_xcpt && !take_pc_wb when (mem_xcpt) { wb_reg_cause := mem_cause } - when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) { + when (mem_pc_valid) { wb_ctrl := mem_ctrl wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata) when (mem_ctrl.rocc) { @@ -556,6 +600,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) io.rocc.cmd.bits.rs1 := wb_reg_wdata io.rocc.cmd.bits.rs2 := wb_reg_rs2 + // evaluate performance counters + csr.io.counters foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) } + if (enableCommitLog) { val pc = Wire(SInt(width=xLen)) pc := wb_reg_pc diff --git a/src/main/scala/util/Counters.scala b/src/main/scala/util/Counters.scala index 2d12a13f..eb555f7a 100644 --- a/src/main/scala/util/Counters.scala +++ b/src/main/scala/util/Counters.scala @@ -50,7 +50,7 @@ case class WideCounter(width: Int, inc: UInt = UInt(1), reset: Boolean = true) private val large = if (isWide) { val r = if (reset) Reg(init=UInt(0, width - smallWidth)) else Reg(UInt(width = width - smallWidth)) - when (nextSmall(smallWidth)) { r := r +& UInt(1) } + when (nextSmall(smallWidth)) { r := r + UInt(1) } r } else null diff --git a/src/main/scala/util/Package.scala b/src/main/scala/util/Package.scala index 5440fcf4..74b0aab2 100644 --- a/src/main/scala/util/Package.scala +++ b/src/main/scala/util/Package.scala @@ -43,6 +43,8 @@ package object util { if (hi == lo-1) UInt(0) else x(hi, lo) } + + def inRange(base: UInt, bounds: UInt) = x >= base && x < bounds } implicit class BooleanToAugmentedBoolean(val x: Boolean) extends AnyVal { From 63f8ce36f6f5b3e5f0bdb77944905a929ffdc390 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Mar 2017 16:48:28 -0800 Subject: [PATCH 09/18] Avoid VM exceptions in groundtest by setting Accessed bit --- src/main/scala/groundtest/DummyPTW.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/groundtest/DummyPTW.scala b/src/main/scala/groundtest/DummyPTW.scala index f770e42d..e2d25397 100644 --- a/src/main/scala/groundtest/DummyPTW.scala +++ b/src/main/scala/groundtest/DummyPTW.scala @@ -34,7 +34,7 @@ class DummyPTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { s2_resp.pte.ppn := s2_ppn s2_resp.pte.reserved_for_software := UInt(0) s2_resp.pte.d := Bool(true) - s2_resp.pte.a := Bool(false) + s2_resp.pte.a := Bool(true) s2_resp.pte.g := Bool(false) s2_resp.pte.u := Bool(true) s2_resp.pte.r := Bool(true) From b24c43badb811c4cf4d755905b84584759e43e9a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Mar 2017 16:49:02 -0800 Subject: [PATCH 10/18] Don't double-count release traffic in perfctrs --- src/main/scala/rocket/DCache.scala | 4 ++-- src/main/scala/rocket/Frontend.scala | 2 +- src/main/scala/rocket/NBDcache.scala | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index 575f167f..0eab446f 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -505,6 +505,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { } // performance events - io.cpu.acquire := tl_out.a.fire() - io.cpu.release := tl_out.c.fire() + io.cpu.acquire := edge.last(tl_out.a) + io.cpu.release := edge.last(tl_out.c) } diff --git a/src/main/scala/rocket/Frontend.scala b/src/main/scala/rocket/Frontend.scala index b6328f34..0a9875bf 100644 --- a/src/main/scala/rocket/Frontend.scala +++ b/src/main/scala/rocket/Frontend.scala @@ -155,7 +155,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) io.cpu.resp.bits.btb.bits := s2_btb_resp_bits // performance events - io.cpu.acquire := icache.io.mem(0).a.fire() + io.cpu.acquire := edge.last(icache.io.mem(0).a) } /** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */ diff --git a/src/main/scala/rocket/NBDcache.scala b/src/main/scala/rocket/NBDcache.scala index 5adf56c3..cbe3716f 100644 --- a/src/main/scala/rocket/NBDcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -975,6 +975,6 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next // performance events - io.cpu.acquire := tl_out.a.fire() - io.cpu.release := tl_out.c.fire() + io.cpu.acquire := edge.last(tl_out.a) + io.cpu.release := edge.last(tl_out.c) } From 380c10f7bd702fc879928420d6b93d588228b775 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 10 Mar 2017 14:00:36 -0800 Subject: [PATCH 11/18] Zap conflicting TLB entries, preparing for superpage support Superpages create the possibility that two entries in the TLB may match. This corresponds to a software bug, but we can't return complete garbage; we must return either the old translation or the new translation. This isn't compatible with the Mux1H approach. So, flush the TLB and report a miss on duplicate entries. --- src/main/scala/rocket/TLB.scala | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/main/scala/rocket/TLB.scala b/src/main/scala/rocket/TLB.scala index 0bc39053..22ed4c57 100644 --- a/src/main/scala/rocket/TLB.scala +++ b/src/main/scala/rocket/TLB.scala @@ -132,12 +132,19 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod plru.access(OHToUInt(hits(entries-1, 0))) } + // Superpages create the possibility that two entries in the TLB may match. + // This corresponds to a software bug, but we can't return complete garbage; + // we must return either the old translation or the new translation. This + // isn't compatible with the Mux1H approach. So, flush the TLB and report + // a miss on duplicate entries. + val multipleHits = PopCountAtLeast(hits(entries-1, 0), 2) + io.req.ready := state === s_ready io.resp.xcpt_ld := bad_va || (~r_array & hits).orR io.resp.xcpt_st := bad_va || (~w_array & hits).orR io.resp.xcpt_if := bad_va || (~x_array & hits).orR io.resp.cacheable := (c_array & hits).orR - io.resp.miss := do_refill || tlb_miss + io.resp.miss := do_refill || tlb_miss || multipleHits io.resp.ppn := Mux1H(hitsVec, ppns :+ passthrough_ppn) io.ptw.req.valid := state === s_request @@ -169,7 +176,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod state := s_ready } - when (io.ptw.invalidate) { + when (io.ptw.invalidate || multipleHits) { valid := 0 } } From dbc8f4b30bc970d3a55ef5be19a4dce8a4b3d08b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 9 Mar 2017 18:56:54 -0800 Subject: [PATCH 12/18] last => done --- src/main/scala/rocket/DCache.scala | 4 ++-- src/main/scala/rocket/Frontend.scala | 2 +- src/main/scala/rocket/NBDcache.scala | 4 ++-- src/main/scala/uncore/tilelink2/Edges.scala | 4 ++++ 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index 0eab446f..b3bdb447 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -505,6 +505,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { } // performance events - io.cpu.acquire := edge.last(tl_out.a) - io.cpu.release := edge.last(tl_out.c) + io.cpu.acquire := edge.done(tl_out.a) + io.cpu.release := edge.done(tl_out.c) } diff --git a/src/main/scala/rocket/Frontend.scala b/src/main/scala/rocket/Frontend.scala index 0a9875bf..483ba89a 100644 --- a/src/main/scala/rocket/Frontend.scala +++ b/src/main/scala/rocket/Frontend.scala @@ -155,7 +155,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) io.cpu.resp.bits.btb.bits := s2_btb_resp_bits // performance events - io.cpu.acquire := edge.last(icache.io.mem(0).a) + io.cpu.acquire := edge.done(icache.io.mem(0).a) } /** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */ diff --git a/src/main/scala/rocket/NBDcache.scala b/src/main/scala/rocket/NBDcache.scala index cbe3716f..07de60a7 100644 --- a/src/main/scala/rocket/NBDcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -975,6 +975,6 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next // performance events - io.cpu.acquire := edge.last(tl_out.a) - io.cpu.release := edge.last(tl_out.c) + io.cpu.acquire := edge.done(tl_out.a) + io.cpu.release := edge.done(tl_out.c) } diff --git a/src/main/scala/uncore/tilelink2/Edges.scala b/src/main/scala/uncore/tilelink2/Edges.scala index 14800fe9..778eda60 100644 --- a/src/main/scala/uncore/tilelink2/Edges.scala +++ b/src/main/scala/uncore/tilelink2/Edges.scala @@ -224,6 +224,10 @@ class TLEdge( def last(x: DecoupledIO[TLChannel]): Bool = last(x.bits, x.fire()) def last(x: ValidIO[TLChannel]): Bool = last(x.bits, x.valid) + def done(bits: TLChannel, fire: Bool): Bool = firstlastHelper(bits, fire)._3 + def done(x: DecoupledIO[TLChannel]): Bool = done(x.bits, x.fire()) + def done(x: ValidIO[TLChannel]): Bool = done(x.bits, x.valid) + def firstlast(bits: TLChannel, fire: Bool): (Bool, Bool, Bool) = { val r = firstlastHelper(bits, fire) (r._1, r._2, r._3) From fe287864ef6e13510ac26c0a7d36b67ccbfc44bb Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 13 Mar 2017 13:13:30 -0700 Subject: [PATCH 13/18] bump firrtl --- firrtl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firrtl b/firrtl index 7f280a5b..2376ff98 160000 --- a/firrtl +++ b/firrtl @@ -1 +1 @@ -Subproject commit 7f280a5b0821c61284e9bf9ed7780cc825f7f3e8 +Subproject commit 2376ff9849beafaf02b657b461c15a36d7b38fd4 From c84755985396fd568ebd3b80398f4ff62d9ee19a Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Fri, 10 Mar 2017 15:13:17 -0800 Subject: [PATCH 14/18] TLB: add a helper API to determine homogeneous page permissions --- src/main/scala/rocket/TLBPermissions.scala | 97 ++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 src/main/scala/rocket/TLBPermissions.scala diff --git a/src/main/scala/rocket/TLBPermissions.scala b/src/main/scala/rocket/TLBPermissions.scala new file mode 100644 index 00000000..58e984de --- /dev/null +++ b/src/main/scala/rocket/TLBPermissions.scala @@ -0,0 +1,97 @@ +// See LICENSE.SiFive for license details. + +package rocket + +import Chisel._ +import diplomacy._ +import uncore.tilelink2._ + +case class TLBPermissions( + homogeneous: Bool, // if false, the below are undefined + r: Bool, // readable + w: Bool, // writeable + x: Bool, // executable + c: Bool, // cacheable + a: Bool, // arithmetic ops + l: Bool) // logical ops + +object TLBPageLookup +{ + private case class TLBFixedPermissions( + r: Boolean, // readable + w: Boolean, // writeable + x: Boolean, // executable + c: Boolean, // cacheable + a: Boolean, // arithmetic ops + l: Boolean) { // logical ops + val useful = r || w || x || c || a || l + } + + // Unmapped memory is considered to be inhomogeneous + def apply(managers: Seq[TLManagerParameters], xLen: Int, cacheBlockBytes: Int, pageSize: BigInt): UInt => TLBPermissions = { + require (isPow2(xLen) && xLen >= 8) + require (isPow2(cacheBlockBytes) && cacheBlockBytes >= xLen/8) + require (isPow2(pageSize) && pageSize >= cacheBlockBytes) + + val xferSizes = TransferSizes(cacheBlockBytes, cacheBlockBytes) + val allSizes = TransferSizes(1, cacheBlockBytes) + val amoSizes = TransferSizes(4, xLen/8) + + val permissions = managers.map { m => + require (!m.supportsGet || m.supportsGet .contains(allSizes), s"MemoryMap region ${m.name} only supports ${m.supportsGet} Get, but must support ${allSizes}") + require (!m.supportsPutFull || m.supportsPutFull .contains(allSizes), s"MemoryMap region ${m.name} only supports ${m.supportsPutFull} PutFull, but must support ${allSizes}") + require (!m.supportsAcquireB || m.supportsAcquireB .contains(xferSizes), s"MemoryMap region ${m.name} only supports ${m.supportsAcquireB} AcquireB, but must support ${xferSizes}") + require (!m.supportsAcquireT || m.supportsAcquireT .contains(xferSizes), s"MemoryMap region ${m.name} only supports ${m.supportsAcquireT} AcquireT, but must support ${xferSizes}") + require (!m.supportsLogical || m.supportsLogical .contains(amoSizes), s"MemoryMap region ${m.name} only supports ${m.supportsLogical} Logical, but must support ${amoSizes}") + require (!m.supportsArithmetic || m.supportsArithmetic.contains(amoSizes), s"MemoryMap region ${m.name} only supports ${m.supportsArithmetic} Arithmetic, but must support ${amoSizes}") + require (m.supportsAcquireT || !m.supportsAcquireB, s"MemoryMap region ${m.name} supports AcquireB (cached read) but not AcquireT (cached write)... and rocket assumes this") + + (m.address, TLBFixedPermissions( + r = m.supportsGet || m.supportsAcquireB, // if cached, never uses Get + w = m.supportsPutFull || m.supportsAcquireT, // if cached, never uses Put + x = m.executable, + c = m.supportsAcquireB, + a = m.supportsArithmetic, + l = m.supportsLogical)) + } + + val grouped: Map[TLBFixedPermissions, Seq[AddressSet]] = permissions + .filter(_._2.useful) // get rid of no-permission devices + .groupBy(_._2) // group by permission type + .mapValues(seq => + AddressSet.unify(seq.flatMap(_._1)) // coalesce same-permission regions + .filter(_.alignment >= pageSize)) // discard any region that's not big enough + + def lowCostProperty(prop: TLBFixedPermissions => Boolean): UInt => Bool = { + val (yesm, nom) = grouped.partition { case (k, eq) => prop(k) } + val (yes, no) = (yesm.values.flatten.toList, nom.values.flatten.toList) + // Find the minimal bits needed to distinguish between yes and no + val decisionMask = AddressDecoder(Seq(yes, no)) + def simplify(x: Seq[AddressSet]) = AddressSet.unify(x.map(_.widen(~decisionMask)).distinct) + val (yesf, nof) = (simplify(yes), simplify(no)) + if (yesf.size < no.size) { + (x: UInt) => yesf.map(_.contains(x)).reduce(_ || _) + } else { + (x: UInt) => !nof.map(_.contains(x)).reduce(_ || _) + } + } + + // Derive simplified property circuits (don't care when !homo) + val rfn = lowCostProperty(_.r) + val wfn = lowCostProperty(_.w) + val xfn = lowCostProperty(_.x) + val cfn = lowCostProperty(_.c) + val afn = lowCostProperty(_.a) + val lfn = lowCostProperty(_.l) + + val homo = AddressSet.unify(grouped.values.flatten.toList) + (x: UInt) => TLBPermissions( + homogeneous = homo.map(_.contains(x)).reduce(_ || _), + r = rfn(x), + w = wfn(x), + x = xfn(x), + c = cfn(x), + a = afn(x), + l = lfn(x)) + } +} From 90b5cc96cb3ed742b25a206f0dc4abcef268c9ca Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 11 Mar 2017 19:28:59 -0800 Subject: [PATCH 15/18] Gracefully handle empty ports in AddressDecoder --- src/main/scala/diplomacy/AddressDecoder.scala | 55 ++++++++++--------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/src/main/scala/diplomacy/AddressDecoder.scala b/src/main/scala/diplomacy/AddressDecoder.scala index 64fced81..dc8cd6a0 100644 --- a/src/main/scala/diplomacy/AddressDecoder.scala +++ b/src/main/scala/diplomacy/AddressDecoder.scala @@ -19,33 +19,36 @@ object AddressDecoder // Find the minimum subset of bits needed to disambiguate port addresses. // ie: inspecting only the bits in the output, you can look at an address // and decide to which port (outer Seq) the address belongs. - def apply(ports: Ports, givenBits: BigInt = BigInt(0)): BigInt = if (ports.size <= 1) givenBits else { - // Every port must have at least one address! - ports.foreach { p => require (!p.isEmpty) } - // Verify the user did not give us an impossible problem - ports.combinations(2).foreach { case Seq(x, y) => - x.foreach { a => y.foreach { b => - require (!a.overlaps(b)) // it must be possible to disambiguate ports! - } } + def apply(ports: Ports, givenBits: BigInt = BigInt(0)): BigInt = { + val nonEmptyPorts = ports.filter(_.nonEmpty) + if (nonEmptyPorts.size <= 1) { + givenBits + } else { + // Verify the user did not give us an impossible problem + nonEmptyPorts.combinations(2).foreach { case Seq(x, y) => + x.foreach { a => y.foreach { b => + require (!a.overlaps(b)) // it must be possible to disambiguate ports! + } } + } + + val maxBits = log2Ceil(nonEmptyPorts.map(_.map(_.base).max).max) + val (bitsToTry, bitsToTake) = (0 to maxBits).map(BigInt(1) << _).partition(b => (givenBits & b) == 0) + val partitions = Seq(nonEmptyPorts.map(_.sorted).sorted(portOrder)) + val givenPartitions = bitsToTake.foldLeft(partitions) { (p, b) => partitionPartitions(p, b) } + val selected = recurse(givenPartitions, bitsToTry.toSeq) + val output = selected.reduceLeft(_ | _) | givenBits + + // Modify the AddressSets to allow the new wider match functions + val widePorts = nonEmptyPorts.map { _.map { _.widen(~output) } } + // Verify that it remains possible to disambiguate all ports + widePorts.combinations(2).foreach { case Seq(x, y) => + x.foreach { a => y.foreach { b => + require (!a.overlaps(b)) + } } + } + + output } - - val maxBits = log2Ceil(ports.map(_.map(_.base).max).max) - val (bitsToTry, bitsToTake) = (0 to maxBits).map(BigInt(1) << _).partition(b => (givenBits & b) == 0) - val partitions = Seq(ports.map(_.sorted).sorted(portOrder)) - val givenPartitions = bitsToTake.foldLeft(partitions) { (p, b) => partitionPartitions(p, b) } - val selected = recurse(givenPartitions, bitsToTry.toSeq) - val output = selected.reduceLeft(_ | _) | givenBits - - // Modify the AddressSets to allow the new wider match functions - val widePorts = ports.map { _.map { _.widen(~output) } } - // Verify that it remains possible to disambiguate all ports - widePorts.combinations(2).foreach { case Seq(x, y) => - x.foreach { a => y.foreach { b => - require (!a.overlaps(b)) - } } - } - - output } // A simpler version that works for a Seq[Int] From 2d267b49400bde88e6bb066cf1a039ddd42e8b0a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 11 Mar 2017 19:29:25 -0800 Subject: [PATCH 16/18] Support corner cases in TLBPermissions Don't crap out if the yes-set or no-set is empty. --- src/main/scala/rocket/TLBPermissions.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/rocket/TLBPermissions.scala b/src/main/scala/rocket/TLBPermissions.scala index 58e984de..a2762090 100644 --- a/src/main/scala/rocket/TLBPermissions.scala +++ b/src/main/scala/rocket/TLBPermissions.scala @@ -70,9 +70,9 @@ object TLBPageLookup def simplify(x: Seq[AddressSet]) = AddressSet.unify(x.map(_.widen(~decisionMask)).distinct) val (yesf, nof) = (simplify(yes), simplify(no)) if (yesf.size < no.size) { - (x: UInt) => yesf.map(_.contains(x)).reduce(_ || _) + (x: UInt) => yesf.map(_.contains(x)).foldLeft(false.B)(_ || _) } else { - (x: UInt) => !nof.map(_.contains(x)).reduce(_ || _) + (x: UInt) => !nof.map(_.contains(x)).foldLeft(false.B)(_ || _) } } @@ -86,7 +86,7 @@ object TLBPageLookup val homo = AddressSet.unify(grouped.values.flatten.toList) (x: UInt) => TLBPermissions( - homogeneous = homo.map(_.contains(x)).reduce(_ || _), + homogeneous = homo.map(_.contains(x)).foldLeft(false.B)(_ || _), r = rfn(x), w = wfn(x), x = xfn(x), From 1fea0460ba014847232f00c1ef8a3a0c602c2fe6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 12 Mar 2017 20:42:51 -0700 Subject: [PATCH 17/18] Support superpage entries in TLB --- src/main/scala/rocket/DCache.scala | 4 +- src/main/scala/rocket/Frontend.scala | 4 +- src/main/scala/rocket/NBDcache.scala | 4 +- src/main/scala/rocket/PTW.scala | 5 +- src/main/scala/rocket/TLB.scala | 118 +++++++++++++++------------ 5 files changed, 74 insertions(+), 61 deletions(-) diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index b3bdb447..8ed9ba0f 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -105,13 +105,13 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { io.ptw <> tlb.io.ptw tlb.io.req.valid := s1_valid_masked && s1_readwrite tlb.io.req.bits.passthrough := s1_req.phys - tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits + tlb.io.req.bits.vaddr := s1_req.addr tlb.io.req.bits.instruction := false tlb.io.req.bits.store := s1_write when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false } when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true } - val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) + val s1_paddr = tlb.io.resp.paddr val s1_tag = Mux(s1_probe, probe_bits.address, s1_paddr)(paddrBits-1, untagBits) val s1_victim_way = Wire(init = replacer.way) val (s1_hit_way, s1_hit_state, s1_victim_meta) = diff --git a/src/main/scala/rocket/Frontend.scala b/src/main/scala/rocket/Frontend.scala index 483ba89a..b52339fb 100644 --- a/src/main/scala/rocket/Frontend.scala +++ b/src/main/scala/rocket/Frontend.scala @@ -129,7 +129,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) io.ptw <> tlb.io.ptw tlb.io.req.valid := !stall && !icmiss - tlb.io.req.bits.vpn := s1_pc >> pgIdxBits + tlb.io.req.bits.vaddr := s1_pc tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) tlb.io.req.bits.store := Bool(false) @@ -137,7 +137,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) icache.io.req.valid := !stall && !s0_same_block icache.io.req.bits.addr := io.cpu.npc icache.io.invalidate := io.cpu.flush_icache - icache.io.s1_paddr := Cat(tlb.io.resp.ppn, s1_pc(pgIdxBits-1, 0)) + icache.io.s1_paddr := tlb.io.resp.paddr icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb icache.io.s2_kill := s2_speculative && !s2_cacheable icache.io.resp.ready := !stall && !s1_same_block diff --git a/src/main/scala/rocket/NBDcache.scala b/src/main/scala/rocket/NBDcache.scala index 07de60a7..215d6f20 100644 --- a/src/main/scala/rocket/NBDcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -700,7 +700,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule io.ptw <> dtlb.io.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite dtlb.io.req.bits.passthrough := s1_req.phys - dtlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits + dtlb.io.req.bits.vaddr := s1_req.addr dtlb.io.req.bits.instruction := Bool(false) dtlb.io.req.bits.store := s1_write when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } @@ -722,7 +722,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule when (s2_recycle) { s1_req := s2_req } - val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) + val s1_addr = dtlb.io.resp.paddr when (s1_clk_en) { s2_req.typ := s1_req.typ diff --git a/src/main/scala/rocket/PTW.scala b/src/main/scala/rocket/PTW.scala index e9f86d0a..bcfb1386 100644 --- a/src/main/scala/rocket/PTW.scala +++ b/src/main/scala/rocket/PTW.scala @@ -23,6 +23,7 @@ class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { val pte = new PTE + val level = UInt(width = log2Ceil(pgLevels)) } class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) { @@ -131,11 +132,11 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { io.mem.s1_kill := s1_kill io.mem.invalidate_lr := Bool(false) - val resp_ppns = (0 until pgLevels-1).map(i => Cat(pte_addr >> (pgIdxBits + pgLevelBits*(pgLevels-i-1)), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ (pte_addr >> pgIdxBits) for (i <- 0 until io.requestor.size) { io.requestor(i).resp.valid := resp_valid && (r_req_dest === i) io.requestor(i).resp.bits.pte := r_pte - io.requestor(i).resp.bits.pte.ppn := resp_ppns(count) + io.requestor(i).resp.bits.level := count + io.requestor(i).resp.bits.pte.ppn := pte_addr >> pgIdxBits io.requestor(i).ptbr := io.dpath.ptbr io.requestor(i).invalidate := io.dpath.invalidate io.requestor(i).status := io.dpath.status diff --git a/src/main/scala/rocket/TLB.scala b/src/main/scala/rocket/TLB.scala index 22ed4c57..df8e320b 100644 --- a/src/main/scala/rocket/TLB.scala +++ b/src/main/scala/rocket/TLB.scala @@ -17,7 +17,7 @@ case object PgLevels extends Field[Int] case object ASIdBits extends Field[Int] class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { - val vpn = UInt(width = vpnBitsExtended) + val vaddr = UInt(width = vaddrBitsExtended) val passthrough = Bool() val instruction = Bool() val store = Bool() @@ -26,7 +26,7 @@ class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { // lookup responses val miss = Bool(OUTPUT) - val ppn = UInt(OUTPUT, ppnBits) + val paddr = UInt(OUTPUT, paddrBits) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT) @@ -39,31 +39,33 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod val resp = new TLBResp val ptw = new TLBPTWIO } - val cacheBlockBytes = p(CacheBlockBytes) - val camAddrBits = log2Ceil(entries) - val camTagBits = asIdBits + vpnBits - - val valid = Reg(init = UInt(0, entries)) - val ppns = Reg(Vec(entries, UInt(width = ppnBits))) - val tags = Reg(Vec(entries, UInt(width = asIdBits + vpnBits))) + val totalEntries = entries + 1 + val normalEntries = entries + val specialEntry = entries + val valid = Reg(init = UInt(0, totalEntries)) + val ppns = Reg(Vec(totalEntries, UInt(width = ppnBits))) + val tags = Reg(Vec(totalEntries, UInt(width = asIdBits + vpnBits))) + val levels = Reg(Vec(totalEntries, UInt(width = log2Ceil(pgLevels)))) val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4) val state = Reg(init=s_ready) val r_refill_tag = Reg(UInt(width = asIdBits + vpnBits)) - val r_refill_waddr = Reg(UInt(width = log2Ceil(entries))) + val r_refill_waddr = Reg(UInt(width = log2Ceil(normalEntries))) val r_req = Reg(new TLBReq) val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv) val priv_s = priv === PRV.S val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug + val vm_enabled = Bool(usingVM) && io.ptw.ptbr.mode(io.ptw.ptbr.mode.getWidth-1) && priv_uses_vm && !io.req.bits.passthrough // share a single physical memory attribute checker (unshare if critical path) - val passthrough_ppn = io.req.bits.vpn(ppnBits-1, 0) + val (vpn, pgOffset) = Split(io.req.bits.vaddr, pgIdxBits) val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0) val do_refill = Bool(usingVM) && io.ptw.resp.valid - val mpu_ppn = Mux(do_refill, refill_ppn, passthrough_ppn) - val mpu_physaddr = mpu_ppn << pgIdxBits + val mpu_ppn = Mux(do_refill, refill_ppn, + Mux(vm_enabled, ppns.last, vpn(ppnBits-1, 0))) + val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0)) val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_||_) def fastCheck(member: TLManagerParameters => Boolean) = legal_address && Mux1H(edge.manager.findFast(mpu_physaddr), edge.manager.managers.map(m => Bool(member(m)))) @@ -71,65 +73,75 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod val prot_w = fastCheck(_.supportsPutFull) val prot_x = fastCheck(_.executable) val cacheable = fastCheck(_.supportsAcquireB) - val xferSizes = TransferSizes(cacheBlockBytes, cacheBlockBytes) - val allSizes = TransferSizes(1, cacheBlockBytes) - val amoSizes = TransferSizes(1, xLen/8) - edge.manager.managers.foreach { m => - require (m.minAlignment >= 4096, s"MemoryMap region ${m.name} must be page-aligned (is ${m.minAlignment})") - require (!m.supportsGet || m.supportsGet .contains(allSizes), s"MemoryMap region ${m.name} only supports ${m.supportsGet} Get, but must support ${allSizes}") - require (!m.supportsPutFull || m.supportsPutFull .contains(allSizes), s"MemoryMap region ${m.name} only supports ${m.supportsPutFull} PutFull, but must support ${allSizes}") - require (!m.supportsAcquireB || m.supportsAcquireB .contains(xferSizes), s"MemoryMap region ${m.name} only supports ${m.supportsAcquireB} AcquireB, but must support ${xferSizes}") - require (!m.supportsAcquireT || m.supportsAcquireT .contains(xferSizes), s"MemoryMap region ${m.name} only supports ${m.supportsAcquireT} AcquireT, but must support ${xferSizes}") - require (!m.supportsLogical || m.supportsLogical .contains(amoSizes), s"MemoryMap region ${m.name} only supports ${m.supportsLogical} Logical, but must support ${amoSizes}") - require (!m.supportsArithmetic || m.supportsArithmetic.contains(amoSizes), s"MemoryMap region ${m.name} only supports ${m.supportsArithmetic} Arithmetic, but must support ${amoSizes}") - require (m.supportsAcquireT || !m.supportsPutFull || !m.supportsAcquireB, s"MemoryMap region ${m.name} supports PutFull and AcquireB but not AcquireT") + val isSpecial = { + val homogeneous = Wire(init = false.B) + for (i <- 0 until pgLevels) { + println(BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits))) + when (io.ptw.resp.bits.level === i) { homogeneous := TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))(mpu_physaddr).homogeneous } + } + !homogeneous } - val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0)) - val vm_enabled = Bool(usingVM) && io.ptw.ptbr.mode(io.ptw.ptbr.mode.getWidth-1) && priv_uses_vm && !io.req.bits.passthrough - val hitsVec = (0 until entries).map(i => valid(i) && vm_enabled && tags(i) === lookup_tag) :+ !vm_enabled + val lookup_tag = Cat(io.ptw.ptbr.asid, vpn(vpnBits-1,0)) + val hitsVec = (0 until totalEntries).map { i => vm_enabled && { + var tagMatch = valid(i) + for (j <- 0 until pgLevels) { + val base = vpnBits - (j + 1) * pgLevelBits + tagMatch = tagMatch && (levels(i) < j || tags(i)(base + pgLevelBits - 1, base) === vpn(base + pgLevelBits - 1, base)) + } + tagMatch + }} :+ !vm_enabled val hits = hitsVec.asUInt + val level = Mux1H(hitsVec.init, levels) + val partialPPN = Mux1H(hitsVec.init, ppns) + val ppn = { + var ppn = Mux(vm_enabled, partialPPN, vpn)(pgLevelBits*pgLevels - 1, pgLevelBits*(pgLevels - 1)) + for (i <- 1 until pgLevels) + ppn = Cat(ppn, (Mux(level < i, vpn, 0.U) | partialPPN)(vpnBits - i*pgLevelBits - 1, vpnBits - (i + 1)*pgLevelBits)) + ppn + } // permission bit arrays - val pte_array = Reg(new PTE) - val u_array = Reg(UInt(width = entries)) // user permission - val sw_array = Reg(UInt(width = entries)) // write permission - val sx_array = Reg(UInt(width = entries)) // execute permission - val sr_array = Reg(UInt(width = entries)) // read permission - val xr_array = Reg(UInt(width = entries)) // read permission to executable page - val cash_array = Reg(UInt(width = entries)) // cacheable + val u_array = Reg(UInt(width = totalEntries)) // user permission + val sw_array = Reg(UInt(width = totalEntries)) // write permission + val sx_array = Reg(UInt(width = totalEntries)) // execute permission + val sr_array = Reg(UInt(width = totalEntries)) // read permission + val xr_array = Reg(UInt(width = totalEntries)) // read permission to executable page + val cash_array = Reg(UInt(width = normalEntries)) // cacheable when (do_refill) { + val waddr = Mux(isSpecial, specialEntry.U, r_refill_waddr) val pte = io.ptw.resp.bits.pte - ppns(r_refill_waddr) := pte.ppn - tags(r_refill_waddr) := r_refill_tag + ppns(waddr) := pte.ppn + tags(waddr) := r_refill_tag + levels(waddr) := io.ptw.resp.bits.level - val mask = UIntToOH(r_refill_waddr) + val mask = UIntToOH(waddr) valid := valid | mask u_array := Mux(pte.u, u_array | mask, u_array & ~mask) - sw_array := Mux(pte.sw() && prot_w, sw_array | mask, sw_array & ~mask) - sx_array := Mux(pte.sx() && prot_x, sx_array | mask, sx_array & ~mask) - sr_array := Mux(pte.sr() && prot_r, sr_array | mask, sr_array & ~mask) - xr_array := Mux(pte.sx() && prot_r, xr_array | mask, xr_array & ~mask) + sw_array := Mux(pte.sw() && (isSpecial || prot_w), sw_array | mask, sw_array & ~mask) + sx_array := Mux(pte.sx() && (isSpecial || prot_x), sx_array | mask, sx_array & ~mask) + sr_array := Mux(pte.sr() && (isSpecial || prot_r), sr_array | mask, sr_array & ~mask) + xr_array := Mux(pte.sx() && (isSpecial || prot_r), xr_array | mask, xr_array & ~mask) cash_array := Mux(cacheable, cash_array | mask, cash_array & ~mask) } - val plru = new PseudoLRU(entries) + val plru = new PseudoLRU(normalEntries) val repl_waddr = Mux(!valid.andR, PriorityEncoder(~valid), plru.replace) val priv_ok = Mux(priv_s, ~Mux(io.ptw.status.pum, u_array, UInt(0)), u_array) - val w_array = Cat(prot_w, priv_ok & sw_array) - val x_array = Cat(prot_x, priv_ok & sx_array) - val r_array = Cat(prot_r | (prot_x & io.ptw.status.mxr), priv_ok & (sr_array | Mux(io.ptw.status.mxr, xr_array, UInt(0)))) - val c_array = Cat(cacheable, cash_array) + val w_array = Cat(prot_w, priv_ok & ~(~prot_w << specialEntry) & sw_array) + val x_array = Cat(prot_x, priv_ok & ~(~prot_x << specialEntry) & sx_array) + val r_array = Cat(prot_r, priv_ok & ~(~prot_r << specialEntry) & (sr_array | Mux(io.ptw.status.mxr, xr_array, UInt(0)))) + val c_array = Cat(cacheable, cacheable, cash_array) val bad_va = if (vpnBits == vpnBitsExtended) Bool(false) - else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1) - val tlb_hit = hits(entries-1, 0).orR + else vpn(vpnBits) =/= vpn(vpnBits-1) + val tlb_hit = hits(totalEntries-1, 0).orR val tlb_miss = vm_enabled && !bad_va && !tlb_hit - when (io.req.valid && !tlb_miss) { - plru.access(OHToUInt(hits(entries-1, 0))) + when (io.req.valid && !tlb_miss && !hits(specialEntry)) { + plru.access(OHToUInt(hits(normalEntries-1, 0))) } // Superpages create the possibility that two entries in the TLB may match. @@ -137,7 +149,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod // we must return either the old translation or the new translation. This // isn't compatible with the Mux1H approach. So, flush the TLB and report // a miss on duplicate entries. - val multipleHits = PopCountAtLeast(hits(entries-1, 0), 2) + val multipleHits = PopCountAtLeast(hits(totalEntries-1, 0), 2) io.req.ready := state === s_ready io.resp.xcpt_ld := bad_va || (~r_array & hits).orR @@ -145,7 +157,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod io.resp.xcpt_if := bad_va || (~x_array & hits).orR io.resp.cacheable := (c_array & hits).orR io.resp.miss := do_refill || tlb_miss || multipleHits - io.resp.ppn := Mux1H(hitsVec, ppns :+ passthrough_ppn) + io.resp.paddr := Cat(ppn, pgOffset) io.ptw.req.valid := state === s_request io.ptw.req.bits <> io.ptw.status From d6f571cbbb5debc2b0b584203aea2cdc02bb7e67 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 13 Mar 2017 14:49:46 -0700 Subject: [PATCH 18/18] Implement mstatus.TSR --- src/main/scala/rocket/CSR.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/scala/rocket/CSR.scala b/src/main/scala/rocket/CSR.scala index 298ac974..2b715040 100644 --- a/src/main/scala/rocket/CSR.scala +++ b/src/main/scala/rocket/CSR.scala @@ -22,7 +22,8 @@ class MStatus extends Bundle { val sxl = UInt(width = 2) val uxl = UInt(width = 2) val sd_rv32 = Bool() - val zero1 = UInt(width = 9) + val zero1 = UInt(width = 8) + val tsr = Bool() val tw = Bool() val tvm = Bool() val mxr = Bool() @@ -416,6 +417,7 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param val allow_wfi = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tw val allow_sfence_vma = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tvm + val allow_sret = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tsr io.decode.fp_illegal := io.status.fs === 0 || !reg_misa('f'-'a') io.decode.rocc_illegal := io.status.xs === 0 || !reg_misa('x'-'a') io.decode.read_illegal := effective_prv < io.decode.csr(9,8) || @@ -427,7 +429,8 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param io.decode.write_illegal := io.decode.csr(11,10).andR io.decode.write_flush := !(io.decode.csr >= CSRs.mscratch && io.decode.csr <= CSRs.mbadaddr || io.decode.csr >= CSRs.sscratch && io.decode.csr <= CSRs.sbadaddr) io.decode.system_illegal := effective_prv < io.decode.csr(9,8) || - io.decode.csr(2) && !allow_wfi || + !io.decode.csr(5) && io.decode.csr(2) && !allow_wfi || + !io.decode.csr(5) && io.decode.csr(1) && !allow_sret || io.decode.csr(5) && !allow_sfence_vma val cause = @@ -547,6 +550,7 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param reg_mstatus.sie := new_mstatus.sie reg_mstatus.tw := new_mstatus.tw reg_mstatus.tvm := new_mstatus.tvm + reg_mstatus.tsr := new_mstatus.tsr } }