Support SFENCE.VMA rs1 argument
This one's a little invasive. To flush a specific entry from the TLB, you need to reuse its CAM port. Since the TLB lookup can be on the critical path, we wish to avoid muxing in another address. This is simple on the data side, where the datapath already carries rs1 to the TLB (it's the same path as the AMO address calculation). It's trickier for the I$, where the TLB lookup address comes from the fetch stage PC. The trick is to temporarily redirect the PC to rs1, then redirect the PC again to the instruction after SFENCE.VMA.
This commit is contained in:
parent
797c18b8db
commit
cf168e419b
@ -52,6 +52,5 @@ class DummyPTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
requestor.ptbr.mode := requestor.ptbr.pgLevelsToMode(pgLevels).U
|
requestor.ptbr.mode := requestor.ptbr.pgLevelsToMode(pgLevels).U
|
||||||
requestor.ptbr.asid := UInt(0)
|
requestor.ptbr.asid := UInt(0)
|
||||||
requestor.ptbr.ppn := UInt(0)
|
requestor.ptbr.ppn := UInt(0)
|
||||||
requestor.invalidate := Bool(false)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -178,7 +178,6 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle
|
|||||||
val cause = UInt(INPUT, xLen)
|
val cause = UInt(INPUT, xLen)
|
||||||
val pc = UInt(INPUT, vaddrBitsExtended)
|
val pc = UInt(INPUT, vaddrBitsExtended)
|
||||||
val badaddr = UInt(INPUT, vaddrBitsExtended)
|
val badaddr = UInt(INPUT, vaddrBitsExtended)
|
||||||
val fatc = Bool(OUTPUT)
|
|
||||||
val time = UInt(OUTPUT, xLen)
|
val time = UInt(OUTPUT, xLen)
|
||||||
val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ)
|
val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ)
|
||||||
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip
|
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip
|
||||||
@ -410,12 +409,10 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
|
|||||||
|
|
||||||
val system_insn = io.rw.cmd === CSR.I
|
val system_insn = io.rw.cmd === CSR.I
|
||||||
val opcode = UInt(1) << io.rw.addr(2,0)
|
val opcode = UInt(1) << io.rw.addr(2,0)
|
||||||
val insn_rs2 = io.rw.addr(5)
|
val insn_call = system_insn && opcode(0)
|
||||||
val insn_call = system_insn && !insn_rs2 && opcode(0)
|
|
||||||
val insn_break = system_insn && opcode(1)
|
val insn_break = system_insn && opcode(1)
|
||||||
val insn_ret = system_insn && opcode(2)
|
val insn_ret = system_insn && opcode(2)
|
||||||
val insn_wfi = system_insn && opcode(5)
|
val insn_wfi = system_insn && opcode(5)
|
||||||
val insn_sfence_vma = system_insn && insn_rs2
|
|
||||||
|
|
||||||
private def decodeAny(m: LinkedHashMap[Int,Bits]): Bool = m.map { case(k: Int, _: Bits) => io.decode.csr === k }.reduce(_||_)
|
private def decodeAny(m: LinkedHashMap[Int,Bits]): Bool = m.map { case(k: Int, _: Bits) => io.decode.csr === k }.reduce(_||_)
|
||||||
val allow_wfi = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tw
|
val allow_wfi = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tw
|
||||||
@ -447,7 +444,6 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
|
|||||||
val delegate = Bool(usingVM) && reg_mstatus.prv <= PRV.S && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs))
|
val delegate = Bool(usingVM) && reg_mstatus.prv <= PRV.S && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs))
|
||||||
val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800))
|
val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800))
|
||||||
val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec))
|
val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec))
|
||||||
io.fatc := insn_sfence_vma
|
|
||||||
io.evec := tvec
|
io.evec := tvec
|
||||||
io.ptbr := reg_sptbr
|
io.ptbr := reg_sptbr
|
||||||
io.eret := insn_call || insn_break || insn_ret
|
io.eret := insn_call || insn_break || insn_ret
|
||||||
|
@ -73,6 +73,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
val s1_read = isRead(s1_req.cmd)
|
val s1_read = isRead(s1_req.cmd)
|
||||||
val s1_write = isWrite(s1_req.cmd)
|
val s1_write = isWrite(s1_req.cmd)
|
||||||
val s1_readwrite = s1_read || s1_write
|
val s1_readwrite = s1_read || s1_write
|
||||||
|
val s1_sfence = s1_req.cmd === M_SFENCE
|
||||||
val s1_flush_valid = Reg(Bool())
|
val s1_flush_valid = Reg(Bool())
|
||||||
|
|
||||||
val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 7)
|
val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 7)
|
||||||
@ -104,7 +105,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
// address translation
|
// address translation
|
||||||
val tlb = Module(new TLB(nTLBEntries))
|
val tlb = Module(new TLB(nTLBEntries))
|
||||||
io.ptw <> tlb.io.ptw
|
io.ptw <> tlb.io.ptw
|
||||||
tlb.io.req.valid := s1_valid_masked && s1_readwrite
|
tlb.io.req.valid := s1_valid_masked && (s1_readwrite || s1_sfence)
|
||||||
|
tlb.io.req.bits.sfence.valid := s1_sfence
|
||||||
|
tlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0)
|
||||||
|
tlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1)
|
||||||
|
tlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data
|
||||||
tlb.io.req.bits.passthrough := s1_req.phys
|
tlb.io.req.bits.passthrough := s1_req.phys
|
||||||
tlb.io.req.bits.vaddr := s1_req.addr
|
tlb.io.req.bits.vaddr := s1_req.addr
|
||||||
tlb.io.req.bits.instruction := false
|
tlb.io.req.bits.instruction := false
|
||||||
@ -136,7 +141,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
|
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
|
||||||
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
|
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
|
||||||
|
|
||||||
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
|
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false))
|
||||||
val s2_probe = Reg(next=s1_probe, init=Bool(false))
|
val s2_probe = Reg(next=s1_probe, init=Bool(false))
|
||||||
val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
|
val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
|
||||||
val s2_valid_masked = s2_valid && Reg(next = !s1_nack)
|
val s2_valid_masked = s2_valid && Reg(next = !s1_nack)
|
||||||
|
@ -28,12 +28,12 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
|
|||||||
|
|
||||||
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
|
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||||
val req = Valid(new FrontendReq)
|
val req = Valid(new FrontendReq)
|
||||||
|
val sfence = Valid(new SFenceReq)
|
||||||
val resp = Decoupled(new FrontendResp).flip
|
val resp = Decoupled(new FrontendResp).flip
|
||||||
val btb_update = Valid(new BTBUpdate)
|
val btb_update = Valid(new BTBUpdate)
|
||||||
val bht_update = Valid(new BHTUpdate)
|
val bht_update = Valid(new BHTUpdate)
|
||||||
val ras_update = Valid(new RASUpdate)
|
val ras_update = Valid(new RASUpdate)
|
||||||
val flush_icache = Bool(OUTPUT)
|
val flush_icache = Bool(OUTPUT)
|
||||||
val flush_tlb = Bool(OUTPUT)
|
|
||||||
val npc = UInt(INPUT, width = vaddrBitsExtended)
|
val npc = UInt(INPUT, width = vaddrBitsExtended)
|
||||||
|
|
||||||
// performance events
|
// performance events
|
||||||
@ -133,12 +133,13 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
tlb.io.req.bits.passthrough := Bool(false)
|
tlb.io.req.bits.passthrough := Bool(false)
|
||||||
tlb.io.req.bits.instruction := Bool(true)
|
tlb.io.req.bits.instruction := Bool(true)
|
||||||
tlb.io.req.bits.store := Bool(false)
|
tlb.io.req.bits.store := Bool(false)
|
||||||
|
tlb.io.req.bits.sfence := io.cpu.sfence
|
||||||
|
|
||||||
icache.io.req.valid := !stall && !s0_same_block
|
icache.io.req.valid := !stall && !s0_same_block
|
||||||
icache.io.req.bits.addr := io.cpu.npc
|
icache.io.req.bits.addr := io.cpu.npc
|
||||||
icache.io.invalidate := io.cpu.flush_icache
|
icache.io.invalidate := io.cpu.flush_icache
|
||||||
icache.io.s1_paddr := tlb.io.resp.paddr
|
icache.io.s1_paddr := tlb.io.resp.paddr
|
||||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb
|
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss
|
||||||
icache.io.s2_kill := s2_speculative && !s2_cacheable
|
icache.io.s2_kill := s2_speculative && !s2_cacheable
|
||||||
icache.io.resp.ready := !stall && !s1_same_block
|
icache.io.resp.ready := !stall && !s1_same_block
|
||||||
|
|
||||||
|
@ -129,7 +129,7 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants
|
|||||||
class SDecode(implicit val p: Parameters) extends DecodeConstants
|
class SDecode(implicit val p: Parameters) extends DecodeConstants
|
||||||
{
|
{
|
||||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||||
SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N),
|
SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_SFENCE, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N),
|
||||||
SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N))
|
SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -680,8 +680,9 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
|
|||||||
val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR
|
val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR
|
||||||
val s1_replay = Reg(init=Bool(false))
|
val s1_replay = Reg(init=Bool(false))
|
||||||
val s1_clk_en = Reg(Bool())
|
val s1_clk_en = Reg(Bool())
|
||||||
|
val s1_sfence = s1_req.cmd === M_SFENCE
|
||||||
|
|
||||||
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
|
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false))
|
||||||
val s2_req = Reg(io.cpu.req.bits)
|
val s2_req = Reg(io.cpu.req.bits)
|
||||||
val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL
|
val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL
|
||||||
val s2_recycle = Wire(Bool())
|
val s2_recycle = Wire(Bool())
|
||||||
@ -698,7 +699,11 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
|
|||||||
|
|
||||||
val dtlb = Module(new TLB(nTLBEntries))
|
val dtlb = Module(new TLB(nTLBEntries))
|
||||||
io.ptw <> dtlb.io.ptw
|
io.ptw <> dtlb.io.ptw
|
||||||
dtlb.io.req.valid := s1_valid_masked && s1_readwrite
|
dtlb.io.req.valid := s1_valid_masked && (s1_readwrite || s1_sfence)
|
||||||
|
dtlb.io.req.bits.sfence.valid := s1_sfence
|
||||||
|
dtlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0)
|
||||||
|
dtlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1)
|
||||||
|
dtlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data
|
||||||
dtlb.io.req.bits.passthrough := s1_req.phys
|
dtlb.io.req.bits.passthrough := s1_req.phys
|
||||||
dtlb.io.req.bits.vaddr := s1_req.addr
|
dtlb.io.req.bits.vaddr := s1_req.addr
|
||||||
dtlb.io.req.bits.instruction := Bool(false)
|
dtlb.io.req.bits.instruction := Bool(false)
|
||||||
|
@ -30,7 +30,6 @@ class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
|
|||||||
val req = Decoupled(new PTWReq)
|
val req = Decoupled(new PTWReq)
|
||||||
val resp = Valid(new PTWResp).flip
|
val resp = Valid(new PTWResp).flip
|
||||||
val ptbr = new PTBR().asInput
|
val ptbr = new PTBR().asInput
|
||||||
val invalidate = Bool(INPUT)
|
|
||||||
val status = new MStatus().asInput
|
val status = new MStatus().asInput
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -138,7 +137,6 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
io.requestor(i).resp.bits.level := count
|
io.requestor(i).resp.bits.level := count
|
||||||
io.requestor(i).resp.bits.pte.ppn := pte_addr >> pgIdxBits
|
io.requestor(i).resp.bits.pte.ppn := pte_addr >> pgIdxBits
|
||||||
io.requestor(i).ptbr := io.dpath.ptbr
|
io.requestor(i).ptbr := io.dpath.ptbr
|
||||||
io.requestor(i).invalidate := io.dpath.invalidate
|
|
||||||
io.requestor(i).status := io.dpath.status
|
io.requestor(i).status := io.dpath.status
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,6 +141,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
val mem_reg_slow_bypass = Reg(Bool())
|
val mem_reg_slow_bypass = Reg(Bool())
|
||||||
val mem_reg_load = Reg(Bool())
|
val mem_reg_load = Reg(Bool())
|
||||||
val mem_reg_store = Reg(Bool())
|
val mem_reg_store = Reg(Bool())
|
||||||
|
val mem_reg_sfence = Reg(Bool())
|
||||||
val mem_reg_pc = Reg(UInt())
|
val mem_reg_pc = Reg(UInt())
|
||||||
val mem_reg_inst = Reg(Bits())
|
val mem_reg_inst = Reg(Bits())
|
||||||
val mem_reg_wdata = Reg(Bits())
|
val mem_reg_wdata = Reg(Bits())
|
||||||
@ -151,6 +152,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
val wb_reg_xcpt = Reg(Bool())
|
val wb_reg_xcpt = Reg(Bool())
|
||||||
val wb_reg_replay = Reg(Bool())
|
val wb_reg_replay = Reg(Bool())
|
||||||
val wb_reg_cause = Reg(UInt())
|
val wb_reg_cause = Reg(UInt())
|
||||||
|
val wb_reg_sfence = Reg(Bool())
|
||||||
|
val wb_reg_sfence_done = Reg(Bool())
|
||||||
val wb_reg_pc = Reg(UInt())
|
val wb_reg_pc = Reg(UInt())
|
||||||
val wb_reg_inst = Reg(Bits())
|
val wb_reg_inst = Reg(Bits())
|
||||||
val wb_reg_wdata = Reg(Bits())
|
val wb_reg_wdata = Reg(Bits())
|
||||||
@ -189,7 +192,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
val id_system_insn = id_ctrl.csr >= CSR.I
|
val id_system_insn = id_ctrl.csr >= CSR.I
|
||||||
val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0)
|
val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0)
|
||||||
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
|
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
|
||||||
val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && csr.io.decode.write_flush)
|
val id_sfence = id_ctrl.mem && id_ctrl.mem_cmd === M_SFENCE
|
||||||
|
val id_csr_flush = id_sfence || id_system_insn || (id_csr_en && !id_csr_ren && csr.io.decode.write_flush)
|
||||||
|
|
||||||
val id_illegal_insn = !id_ctrl.legal ||
|
val id_illegal_insn = !id_ctrl.legal ||
|
||||||
id_ctrl.div && !csr.io.status.isa('m'-'a') ||
|
id_ctrl.div && !csr.io.status.isa('m'-'a') ||
|
||||||
@ -199,7 +203,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
ibuf.io.inst(0).bits.rvc && !csr.io.status.isa('c'-'a') ||
|
ibuf.io.inst(0).bits.rvc && !csr.io.status.isa('c'-'a') ||
|
||||||
id_ctrl.rocc && csr.io.decode.rocc_illegal ||
|
id_ctrl.rocc && csr.io.decode.rocc_illegal ||
|
||||||
id_csr_en && (csr.io.decode.read_illegal || !id_csr_ren && csr.io.decode.write_illegal) ||
|
id_csr_en && (csr.io.decode.read_illegal || !id_csr_ren && csr.io.decode.write_illegal) ||
|
||||||
id_system_insn && csr.io.decode.system_illegal
|
(id_sfence || id_system_insn) && csr.io.decode.system_illegal
|
||||||
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
|
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
|
||||||
val id_amo_aq = id_inst(0)(26)
|
val id_amo_aq = id_inst(0)(26)
|
||||||
val id_amo_rl = id_inst(0)(25)
|
val id_amo_rl = id_inst(0)(25)
|
||||||
@ -297,6 +301,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
}
|
}
|
||||||
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep
|
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep
|
||||||
ex_reg_load_use := id_load_use
|
ex_reg_load_use := id_load_use
|
||||||
|
when (id_sfence) {
|
||||||
|
ex_ctrl.mem_type := Cat(id_raddr2 =/= UInt(0), id_raddr1 =/= UInt(0))
|
||||||
|
when (wb_reg_sfence_done) { ex_ctrl.mem := false }
|
||||||
|
}
|
||||||
|
|
||||||
when (id_ctrl.jalr && csr.io.status.debug) {
|
when (id_ctrl.jalr && csr.io.status.debug) {
|
||||||
ex_reg_flush_pipe := true
|
ex_reg_flush_pipe := true
|
||||||
@ -340,9 +348,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
|
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
|
||||||
Mux(Bool(!fastJAL) && mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst),
|
Mux(Bool(!fastJAL) && mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst),
|
||||||
Mux(mem_reg_rvc, SInt(2), SInt(4))))
|
Mux(mem_reg_rvc, SInt(2), SInt(4))))
|
||||||
val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt
|
val mem_npc = (Mux(mem_ctrl.jalr || mem_reg_sfence, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt
|
||||||
val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true)))
|
val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true)))
|
||||||
val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1)
|
val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1) && !mem_reg_sfence
|
||||||
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
|
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
|
||||||
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
|
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
|
||||||
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || (Bool(!fastJAL) && mem_ctrl.jal)
|
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || (Bool(!fastJAL) && mem_ctrl.jal)
|
||||||
@ -361,6 +369,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
mem_reg_rvc := ex_reg_rvc
|
mem_reg_rvc := ex_reg_rvc
|
||||||
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
|
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
|
||||||
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
|
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
|
||||||
|
mem_reg_sfence := Bool(usingVM) && ex_ctrl.mem && ex_ctrl.mem_cmd === M_SFENCE
|
||||||
mem_reg_btb_hit := ex_reg_btb_hit
|
mem_reg_btb_hit := ex_reg_btb_hit
|
||||||
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
|
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
|
||||||
mem_reg_flush_pipe := ex_reg_flush_pipe
|
mem_reg_flush_pipe := ex_reg_flush_pipe
|
||||||
@ -391,7 +400,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
|
|
||||||
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port
|
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port
|
||||||
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
|
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
|
||||||
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
|
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem || mem_reg_valid && mem_reg_sfence
|
||||||
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
|
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
|
||||||
div.io.kill := killm_common && Reg(next = div.io.req.fire())
|
div.io.kill := killm_common && Reg(next = div.io.req.fire())
|
||||||
val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
|
val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
|
||||||
@ -403,8 +412,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
when (mem_xcpt) { wb_reg_cause := mem_cause }
|
when (mem_xcpt) { wb_reg_cause := mem_cause }
|
||||||
when (mem_pc_valid) {
|
when (mem_pc_valid) {
|
||||||
wb_ctrl := mem_ctrl
|
wb_ctrl := mem_ctrl
|
||||||
|
wb_reg_sfence := mem_reg_sfence
|
||||||
wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
|
wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
|
||||||
when (mem_ctrl.rocc) {
|
when (mem_ctrl.rocc || mem_reg_sfence) {
|
||||||
wb_reg_rs2 := mem_reg_rs2
|
wb_reg_rs2 := mem_reg_rs2
|
||||||
}
|
}
|
||||||
wb_reg_inst := mem_reg_inst
|
wb_reg_inst := mem_reg_inst
|
||||||
@ -448,6 +458,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
}
|
}
|
||||||
|
|
||||||
val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt
|
val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt
|
||||||
|
when (wb_valid || wb_xcpt) { wb_reg_sfence_done := false }
|
||||||
|
when (io.imem.sfence.valid) { wb_reg_sfence_done := true }
|
||||||
val wb_wen = wb_valid && wb_ctrl.wxd
|
val wb_wen = wb_valid && wb_ctrl.wxd
|
||||||
val rf_wen = wb_wen || ll_wen
|
val rf_wen = wb_wen || ll_wen
|
||||||
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
|
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
|
||||||
@ -470,7 +482,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
csr.io.pc := wb_reg_pc
|
csr.io.pc := wb_reg_pc
|
||||||
csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata)
|
csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata)
|
||||||
io.ptw.ptbr := csr.io.ptbr
|
io.ptw.ptbr := csr.io.ptbr
|
||||||
io.ptw.invalidate := csr.io.fatc
|
|
||||||
io.ptw.status := csr.io.status
|
io.ptw.status := csr.io.status
|
||||||
csr.io.rw.addr := wb_reg_inst(31,20)
|
csr.io.rw.addr := wb_reg_inst(31,20)
|
||||||
csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
|
csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
|
||||||
@ -542,7 +553,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
Mux(take_pc_mem || Bool(!fastJAL), mem_npc, // branch misprediction
|
Mux(take_pc_mem || Bool(!fastJAL), mem_npc, // branch misprediction
|
||||||
id_npc))) // JAL
|
id_npc))) // JAL
|
||||||
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
|
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
|
||||||
io.imem.flush_tlb := csr.io.fatc
|
io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence
|
||||||
|
io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0)
|
||||||
|
io.imem.sfence.bits.rs2 := wb_ctrl.mem_type(1)
|
||||||
|
io.imem.sfence.bits.asid := wb_reg_rs2
|
||||||
|
io.ptw.invalidate := io.imem.sfence.valid && !io.imem.sfence.bits.rs1
|
||||||
|
|
||||||
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt
|
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt
|
||||||
|
|
||||||
|
@ -16,11 +16,18 @@ case object PAddrBits extends Field[Int]
|
|||||||
case object PgLevels extends Field[Int]
|
case object PgLevels extends Field[Int]
|
||||||
case object ASIdBits extends Field[Int]
|
case object ASIdBits extends Field[Int]
|
||||||
|
|
||||||
|
class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) {
|
||||||
|
val rs1 = Bool()
|
||||||
|
val rs2 = Bool()
|
||||||
|
val asid = UInt(width = asIdBits max 1) // TODO zero-width
|
||||||
|
}
|
||||||
|
|
||||||
class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
|
class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
|
||||||
val vaddr = UInt(width = vaddrBitsExtended)
|
val vaddr = UInt(width = vaddrBitsExtended)
|
||||||
val passthrough = Bool()
|
val passthrough = Bool()
|
||||||
val instruction = Bool()
|
val instruction = Bool()
|
||||||
val store = Bool()
|
val store = Bool()
|
||||||
|
val sfence = Valid(new SFenceReq)
|
||||||
}
|
}
|
||||||
|
|
||||||
class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
|
class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
|
||||||
@ -63,6 +70,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
|
|||||||
val (vpn, pgOffset) = Split(io.req.bits.vaddr, pgIdxBits)
|
val (vpn, pgOffset) = Split(io.req.bits.vaddr, pgIdxBits)
|
||||||
val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0)
|
val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0)
|
||||||
val do_refill = Bool(usingVM) && io.ptw.resp.valid
|
val do_refill = Bool(usingVM) && io.ptw.resp.valid
|
||||||
|
val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate)
|
||||||
val mpu_ppn = Mux(do_refill, refill_ppn,
|
val mpu_ppn = Mux(do_refill, refill_ppn,
|
||||||
Mux(vm_enabled, ppns.last, vpn(ppnBits-1, 0)))
|
Mux(vm_enabled, ppns.last, vpn(ppnBits-1, 0)))
|
||||||
val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
|
val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
|
||||||
@ -76,7 +84,6 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
|
|||||||
val isSpecial = {
|
val isSpecial = {
|
||||||
val homogeneous = Wire(init = false.B)
|
val homogeneous = Wire(init = false.B)
|
||||||
for (i <- 0 until pgLevels) {
|
for (i <- 0 until pgLevels) {
|
||||||
println(BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))
|
|
||||||
when (io.ptw.resp.bits.level === i) { homogeneous := TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))(mpu_physaddr).homogeneous }
|
when (io.ptw.resp.bits.level === i) { homogeneous := TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))(mpu_physaddr).homogeneous }
|
||||||
}
|
}
|
||||||
!homogeneous
|
!homogeneous
|
||||||
@ -108,7 +115,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
|
|||||||
val sr_array = Reg(UInt(width = totalEntries)) // read permission
|
val sr_array = Reg(UInt(width = totalEntries)) // read permission
|
||||||
val xr_array = Reg(UInt(width = totalEntries)) // read permission to executable page
|
val xr_array = Reg(UInt(width = totalEntries)) // read permission to executable page
|
||||||
val cash_array = Reg(UInt(width = normalEntries)) // cacheable
|
val cash_array = Reg(UInt(width = normalEntries)) // cacheable
|
||||||
when (do_refill) {
|
when (do_refill && !invalidate_refill) {
|
||||||
val waddr = Mux(isSpecial, specialEntry.U, r_refill_waddr)
|
val waddr = Mux(isSpecial, specialEntry.U, r_refill_waddr)
|
||||||
val pte = io.ptw.resp.bits.pte
|
val pte = io.ptw.resp.bits.pte
|
||||||
ppns(waddr) := pte.ppn
|
ppns(waddr) := pte.ppn
|
||||||
@ -138,7 +145,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
|
|||||||
if (vpnBits == vpnBitsExtended) Bool(false)
|
if (vpnBits == vpnBitsExtended) Bool(false)
|
||||||
else vpn(vpnBits) =/= vpn(vpnBits-1)
|
else vpn(vpnBits) =/= vpn(vpnBits-1)
|
||||||
val tlb_hit = hits(totalEntries-1, 0).orR
|
val tlb_hit = hits(totalEntries-1, 0).orR
|
||||||
val tlb_miss = vm_enabled && !bad_va && !tlb_hit
|
val tlb_miss = vm_enabled && !bad_va && !tlb_hit && !io.req.bits.sfence.valid
|
||||||
|
|
||||||
when (io.req.valid && !tlb_miss && !hits(specialEntry)) {
|
when (io.req.valid && !tlb_miss && !hits(specialEntry)) {
|
||||||
plru.access(OHToUInt(hits(normalEntries-1, 0)))
|
plru.access(OHToUInt(hits(normalEntries-1, 0)))
|
||||||
@ -166,6 +173,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
|
|||||||
io.ptw.req.bits.fetch := r_req.instruction
|
io.ptw.req.bits.fetch := r_req.instruction
|
||||||
|
|
||||||
if (usingVM) {
|
if (usingVM) {
|
||||||
|
val sfence = io.req.valid && io.req.bits.sfence.valid
|
||||||
when (io.req.fire() && tlb_miss) {
|
when (io.req.fire() && tlb_miss) {
|
||||||
state := s_request
|
state := s_request
|
||||||
r_refill_tag := lookup_tag
|
r_refill_tag := lookup_tag
|
||||||
@ -173,22 +181,20 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
|
|||||||
r_req := io.req.bits
|
r_req := io.req.bits
|
||||||
}
|
}
|
||||||
when (state === s_request) {
|
when (state === s_request) {
|
||||||
when (io.ptw.invalidate) {
|
when (sfence) { state := s_ready }
|
||||||
state := s_ready
|
when (io.ptw.req.ready) { state := Mux(sfence, s_wait_invalidate, s_wait) }
|
||||||
}
|
}
|
||||||
when (io.ptw.req.ready) {
|
when (state === s_wait && sfence) {
|
||||||
state := s_wait
|
|
||||||
when (io.ptw.invalidate) { state := s_wait_invalidate }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
when (state === s_wait && io.ptw.invalidate) {
|
|
||||||
state := s_wait_invalidate
|
state := s_wait_invalidate
|
||||||
}
|
}
|
||||||
when (io.ptw.resp.valid) {
|
when (io.ptw.resp.valid) {
|
||||||
state := s_ready
|
state := s_ready
|
||||||
}
|
}
|
||||||
|
|
||||||
when (io.ptw.invalidate || multipleHits) {
|
when (sfence && io.req.bits.sfence.bits.rs1) {
|
||||||
|
valid := valid & ~hits(totalEntries-1, 0)
|
||||||
|
}
|
||||||
|
when (sfence && !io.req.bits.sfence.bits.rs1 || multipleHits) {
|
||||||
valid := 0
|
valid := 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,7 @@ trait MemoryOpConstants {
|
|||||||
def M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions
|
def M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions
|
||||||
def M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions
|
def M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions
|
||||||
def M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions
|
def M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions
|
||||||
|
def M_SFENCE = UInt("b10100") // flush TLB
|
||||||
|
|
||||||
def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP
|
def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP
|
||||||
def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW
|
def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW
|
||||||
|
Loading…
Reference in New Issue
Block a user