1
0

Support SFENCE.VMA rs1 argument

This one's a little invasive.  To flush a specific entry from the TLB, you
need to reuse its CAM port.  Since the TLB lookup can be on the critical
path, we wish to avoid muxing in another address.

This is simple on the data side, where the datapath already carries rs1 to
the TLB (it's the same path as the AMO address calculation).  It's trickier
for the I$, where the TLB lookup address comes from the fetch stage PC.
The trick is to temporarily redirect the PC to rs1, then redirect the PC
again to the instruction after SFENCE.VMA.
This commit is contained in:
Andrew Waterman 2017-03-14 13:54:49 -07:00
parent 797c18b8db
commit cf168e419b
10 changed files with 61 additions and 35 deletions

View File

@ -52,6 +52,5 @@ class DummyPTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
requestor.ptbr.mode := requestor.ptbr.pgLevelsToMode(pgLevels).U requestor.ptbr.mode := requestor.ptbr.pgLevelsToMode(pgLevels).U
requestor.ptbr.asid := UInt(0) requestor.ptbr.asid := UInt(0)
requestor.ptbr.ppn := UInt(0) requestor.ptbr.ppn := UInt(0)
requestor.invalidate := Bool(false)
} }
} }

View File

@ -178,7 +178,6 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle
val cause = UInt(INPUT, xLen) val cause = UInt(INPUT, xLen)
val pc = UInt(INPUT, vaddrBitsExtended) val pc = UInt(INPUT, vaddrBitsExtended)
val badaddr = UInt(INPUT, vaddrBitsExtended) val badaddr = UInt(INPUT, vaddrBitsExtended)
val fatc = Bool(OUTPUT)
val time = UInt(OUTPUT, xLen) val time = UInt(OUTPUT, xLen)
val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip
@ -410,12 +409,10 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
val system_insn = io.rw.cmd === CSR.I val system_insn = io.rw.cmd === CSR.I
val opcode = UInt(1) << io.rw.addr(2,0) val opcode = UInt(1) << io.rw.addr(2,0)
val insn_rs2 = io.rw.addr(5) val insn_call = system_insn && opcode(0)
val insn_call = system_insn && !insn_rs2 && opcode(0)
val insn_break = system_insn && opcode(1) val insn_break = system_insn && opcode(1)
val insn_ret = system_insn && opcode(2) val insn_ret = system_insn && opcode(2)
val insn_wfi = system_insn && opcode(5) val insn_wfi = system_insn && opcode(5)
val insn_sfence_vma = system_insn && insn_rs2
private def decodeAny(m: LinkedHashMap[Int,Bits]): Bool = m.map { case(k: Int, _: Bits) => io.decode.csr === k }.reduce(_||_) private def decodeAny(m: LinkedHashMap[Int,Bits]): Bool = m.map { case(k: Int, _: Bits) => io.decode.csr === k }.reduce(_||_)
val allow_wfi = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tw val allow_wfi = Bool(!usingVM) || effective_prv > PRV.S || !reg_mstatus.tw
@ -447,7 +444,6 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
val delegate = Bool(usingVM) && reg_mstatus.prv <= PRV.S && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) val delegate = Bool(usingVM) && reg_mstatus.prv <= PRV.S && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs))
val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800)) val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800))
val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec)) val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec))
io.fatc := insn_sfence_vma
io.evec := tvec io.evec := tvec
io.ptbr := reg_sptbr io.ptbr := reg_sptbr
io.eret := insn_call || insn_break || insn_ret io.eret := insn_call || insn_break || insn_ret

View File

@ -73,6 +73,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s1_read = isRead(s1_req.cmd) val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd) val s1_write = isWrite(s1_req.cmd)
val s1_readwrite = s1_read || s1_write val s1_readwrite = s1_read || s1_write
val s1_sfence = s1_req.cmd === M_SFENCE
val s1_flush_valid = Reg(Bool()) val s1_flush_valid = Reg(Bool())
val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 7) val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 7)
@ -104,7 +105,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
// address translation // address translation
val tlb = Module(new TLB(nTLBEntries)) val tlb = Module(new TLB(nTLBEntries))
io.ptw <> tlb.io.ptw io.ptw <> tlb.io.ptw
tlb.io.req.valid := s1_valid_masked && s1_readwrite tlb.io.req.valid := s1_valid_masked && (s1_readwrite || s1_sfence)
tlb.io.req.bits.sfence.valid := s1_sfence
tlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0)
tlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1)
tlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data
tlb.io.req.bits.passthrough := s1_req.phys tlb.io.req.bits.passthrough := s1_req.phys
tlb.io.req.bits.vaddr := s1_req.addr tlb.io.req.bits.vaddr := s1_req.addr
tlb.io.req.bits.instruction := false tlb.io.req.bits.instruction := false
@ -136,7 +141,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way) val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false))
val s2_probe = Reg(next=s1_probe, init=Bool(false)) val s2_probe = Reg(next=s1_probe, init=Bool(false))
val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
val s2_valid_masked = s2_valid && Reg(next = !s1_nack) val s2_valid_masked = s2_valid && Reg(next = !s1_nack)

View File

@ -28,12 +28,12 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Valid(new FrontendReq) val req = Valid(new FrontendReq)
val sfence = Valid(new SFenceReq)
val resp = Decoupled(new FrontendResp).flip val resp = Decoupled(new FrontendResp).flip
val btb_update = Valid(new BTBUpdate) val btb_update = Valid(new BTBUpdate)
val bht_update = Valid(new BHTUpdate) val bht_update = Valid(new BHTUpdate)
val ras_update = Valid(new RASUpdate) val ras_update = Valid(new RASUpdate)
val flush_icache = Bool(OUTPUT) val flush_icache = Bool(OUTPUT)
val flush_tlb = Bool(OUTPUT)
val npc = UInt(INPUT, width = vaddrBitsExtended) val npc = UInt(INPUT, width = vaddrBitsExtended)
// performance events // performance events
@ -133,12 +133,13 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.instruction := Bool(true) tlb.io.req.bits.instruction := Bool(true)
tlb.io.req.bits.store := Bool(false) tlb.io.req.bits.store := Bool(false)
tlb.io.req.bits.sfence := io.cpu.sfence
icache.io.req.valid := !stall && !s0_same_block icache.io.req.valid := !stall && !s0_same_block
icache.io.req.bits.addr := io.cpu.npc icache.io.req.bits.addr := io.cpu.npc
icache.io.invalidate := io.cpu.flush_icache icache.io.invalidate := io.cpu.flush_icache
icache.io.s1_paddr := tlb.io.resp.paddr icache.io.s1_paddr := tlb.io.resp.paddr
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss
icache.io.s2_kill := s2_speculative && !s2_cacheable icache.io.s2_kill := s2_speculative && !s2_cacheable
icache.io.resp.ready := !stall && !s1_same_block icache.io.resp.ready := !stall && !s1_same_block

View File

@ -129,7 +129,7 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants
class SDecode(implicit val p: Parameters) extends DecodeConstants class SDecode(implicit val p: Parameters) extends DecodeConstants
{ {
val table: Array[(BitPat, List[BitPat])] = Array( val table: Array[(BitPat, List[BitPat])] = Array(
SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N), SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_SFENCE, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N),
SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N)) SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N))
} }

View File

@ -680,8 +680,9 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR
val s1_replay = Reg(init=Bool(false)) val s1_replay = Reg(init=Bool(false))
val s1_clk_en = Reg(Bool()) val s1_clk_en = Reg(Bool())
val s1_sfence = s1_req.cmd === M_SFENCE
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false))
val s2_req = Reg(io.cpu.req.bits) val s2_req = Reg(io.cpu.req.bits)
val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL
val s2_recycle = Wire(Bool()) val s2_recycle = Wire(Bool())
@ -698,7 +699,11 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
val dtlb = Module(new TLB(nTLBEntries)) val dtlb = Module(new TLB(nTLBEntries))
io.ptw <> dtlb.io.ptw io.ptw <> dtlb.io.ptw
dtlb.io.req.valid := s1_valid_masked && s1_readwrite dtlb.io.req.valid := s1_valid_masked && (s1_readwrite || s1_sfence)
dtlb.io.req.bits.sfence.valid := s1_sfence
dtlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0)
dtlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1)
dtlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data
dtlb.io.req.bits.passthrough := s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys
dtlb.io.req.bits.vaddr := s1_req.addr dtlb.io.req.bits.vaddr := s1_req.addr
dtlb.io.req.bits.instruction := Bool(false) dtlb.io.req.bits.instruction := Bool(false)

View File

@ -30,7 +30,6 @@ class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Decoupled(new PTWReq) val req = Decoupled(new PTWReq)
val resp = Valid(new PTWResp).flip val resp = Valid(new PTWResp).flip
val ptbr = new PTBR().asInput val ptbr = new PTBR().asInput
val invalidate = Bool(INPUT)
val status = new MStatus().asInput val status = new MStatus().asInput
} }
@ -138,7 +137,6 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
io.requestor(i).resp.bits.level := count io.requestor(i).resp.bits.level := count
io.requestor(i).resp.bits.pte.ppn := pte_addr >> pgIdxBits io.requestor(i).resp.bits.pte.ppn := pte_addr >> pgIdxBits
io.requestor(i).ptbr := io.dpath.ptbr io.requestor(i).ptbr := io.dpath.ptbr
io.requestor(i).invalidate := io.dpath.invalidate
io.requestor(i).status := io.dpath.status io.requestor(i).status := io.dpath.status
} }

View File

@ -141,6 +141,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val mem_reg_slow_bypass = Reg(Bool()) val mem_reg_slow_bypass = Reg(Bool())
val mem_reg_load = Reg(Bool()) val mem_reg_load = Reg(Bool())
val mem_reg_store = Reg(Bool()) val mem_reg_store = Reg(Bool())
val mem_reg_sfence = Reg(Bool())
val mem_reg_pc = Reg(UInt()) val mem_reg_pc = Reg(UInt())
val mem_reg_inst = Reg(Bits()) val mem_reg_inst = Reg(Bits())
val mem_reg_wdata = Reg(Bits()) val mem_reg_wdata = Reg(Bits())
@ -151,6 +152,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val wb_reg_xcpt = Reg(Bool()) val wb_reg_xcpt = Reg(Bool())
val wb_reg_replay = Reg(Bool()) val wb_reg_replay = Reg(Bool())
val wb_reg_cause = Reg(UInt()) val wb_reg_cause = Reg(UInt())
val wb_reg_sfence = Reg(Bool())
val wb_reg_sfence_done = Reg(Bool())
val wb_reg_pc = Reg(UInt()) val wb_reg_pc = Reg(UInt())
val wb_reg_inst = Reg(Bits()) val wb_reg_inst = Reg(Bits())
val wb_reg_wdata = Reg(Bits()) val wb_reg_wdata = Reg(Bits())
@ -189,7 +192,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val id_system_insn = id_ctrl.csr >= CSR.I val id_system_insn = id_ctrl.csr >= CSR.I
val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0) val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0)
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr) val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && csr.io.decode.write_flush) val id_sfence = id_ctrl.mem && id_ctrl.mem_cmd === M_SFENCE
val id_csr_flush = id_sfence || id_system_insn || (id_csr_en && !id_csr_ren && csr.io.decode.write_flush)
val id_illegal_insn = !id_ctrl.legal || val id_illegal_insn = !id_ctrl.legal ||
id_ctrl.div && !csr.io.status.isa('m'-'a') || id_ctrl.div && !csr.io.status.isa('m'-'a') ||
@ -199,7 +203,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
ibuf.io.inst(0).bits.rvc && !csr.io.status.isa('c'-'a') || ibuf.io.inst(0).bits.rvc && !csr.io.status.isa('c'-'a') ||
id_ctrl.rocc && csr.io.decode.rocc_illegal || id_ctrl.rocc && csr.io.decode.rocc_illegal ||
id_csr_en && (csr.io.decode.read_illegal || !id_csr_ren && csr.io.decode.write_illegal) || id_csr_en && (csr.io.decode.read_illegal || !id_csr_ren && csr.io.decode.write_illegal) ||
id_system_insn && csr.io.decode.system_illegal (id_sfence || id_system_insn) && csr.io.decode.system_illegal
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
val id_amo_aq = id_inst(0)(26) val id_amo_aq = id_inst(0)(26)
val id_amo_rl = id_inst(0)(25) val id_amo_rl = id_inst(0)(25)
@ -297,6 +301,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
} }
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep
ex_reg_load_use := id_load_use ex_reg_load_use := id_load_use
when (id_sfence) {
ex_ctrl.mem_type := Cat(id_raddr2 =/= UInt(0), id_raddr1 =/= UInt(0))
when (wb_reg_sfence_done) { ex_ctrl.mem := false }
}
when (id_ctrl.jalr && csr.io.status.debug) { when (id_ctrl.jalr && csr.io.status.debug) {
ex_reg_flush_pipe := true ex_reg_flush_pipe := true
@ -340,9 +348,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst), Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
Mux(Bool(!fastJAL) && mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), Mux(Bool(!fastJAL) && mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst),
Mux(mem_reg_rvc, SInt(2), SInt(4)))) Mux(mem_reg_rvc, SInt(2), SInt(4))))
val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt val mem_npc = (Mux(mem_ctrl.jalr || mem_reg_sfence, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt
val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true))) val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true)))
val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1) val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1) && !mem_reg_sfence
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || (Bool(!fastJAL) && mem_ctrl.jal) val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || (Bool(!fastJAL) && mem_ctrl.jal)
@ -361,6 +369,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
mem_reg_rvc := ex_reg_rvc mem_reg_rvc := ex_reg_rvc
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd) mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd) mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
mem_reg_sfence := Bool(usingVM) && ex_ctrl.mem && ex_ctrl.mem_cmd === M_SFENCE
mem_reg_btb_hit := ex_reg_btb_hit mem_reg_btb_hit := ex_reg_btb_hit
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp } when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
mem_reg_flush_pipe := ex_reg_flush_pipe mem_reg_flush_pipe := ex_reg_flush_pipe
@ -391,7 +400,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem || mem_reg_valid && mem_reg_sfence
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
div.io.kill := killm_common && Reg(next = div.io.req.fire()) div.io.kill := killm_common && Reg(next = div.io.req.fire())
val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
@ -403,8 +412,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
when (mem_xcpt) { wb_reg_cause := mem_cause } when (mem_xcpt) { wb_reg_cause := mem_cause }
when (mem_pc_valid) { when (mem_pc_valid) {
wb_ctrl := mem_ctrl wb_ctrl := mem_ctrl
wb_reg_sfence := mem_reg_sfence
wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata) wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (mem_ctrl.rocc) { when (mem_ctrl.rocc || mem_reg_sfence) {
wb_reg_rs2 := mem_reg_rs2 wb_reg_rs2 := mem_reg_rs2
} }
wb_reg_inst := mem_reg_inst wb_reg_inst := mem_reg_inst
@ -448,6 +458,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
} }
val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt
when (wb_valid || wb_xcpt) { wb_reg_sfence_done := false }
when (io.imem.sfence.valid) { wb_reg_sfence_done := true }
val wb_wen = wb_valid && wb_ctrl.wxd val wb_wen = wb_valid && wb_ctrl.wxd
val rf_wen = wb_wen || ll_wen val rf_wen = wb_wen || ll_wen
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr) val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
@ -470,7 +482,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
csr.io.pc := wb_reg_pc csr.io.pc := wb_reg_pc
csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata) csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata)
io.ptw.ptbr := csr.io.ptbr io.ptw.ptbr := csr.io.ptbr
io.ptw.invalidate := csr.io.fatc
io.ptw.status := csr.io.status io.ptw.status := csr.io.status
csr.io.rw.addr := wb_reg_inst(31,20) csr.io.rw.addr := wb_reg_inst(31,20)
csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N) csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
@ -542,7 +553,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
Mux(take_pc_mem || Bool(!fastJAL), mem_npc, // branch misprediction Mux(take_pc_mem || Bool(!fastJAL), mem_npc, // branch misprediction
id_npc))) // JAL id_npc))) // JAL
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
io.imem.flush_tlb := csr.io.fatc io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence
io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0)
io.imem.sfence.bits.rs2 := wb_ctrl.mem_type(1)
io.imem.sfence.bits.asid := wb_reg_rs2
io.ptw.invalidate := io.imem.sfence.valid && !io.imem.sfence.bits.rs1
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt

View File

@ -16,11 +16,18 @@ case object PAddrBits extends Field[Int]
case object PgLevels extends Field[Int] case object PgLevels extends Field[Int]
case object ASIdBits extends Field[Int] case object ASIdBits extends Field[Int]
class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) {
val rs1 = Bool()
val rs2 = Bool()
val asid = UInt(width = asIdBits max 1) // TODO zero-width
}
class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
val vaddr = UInt(width = vaddrBitsExtended) val vaddr = UInt(width = vaddrBitsExtended)
val passthrough = Bool() val passthrough = Bool()
val instruction = Bool() val instruction = Bool()
val store = Bool() val store = Bool()
val sfence = Valid(new SFenceReq)
} }
class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
@ -63,6 +70,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
val (vpn, pgOffset) = Split(io.req.bits.vaddr, pgIdxBits) val (vpn, pgOffset) = Split(io.req.bits.vaddr, pgIdxBits)
val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0) val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0)
val do_refill = Bool(usingVM) && io.ptw.resp.valid val do_refill = Bool(usingVM) && io.ptw.resp.valid
val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate)
val mpu_ppn = Mux(do_refill, refill_ppn, val mpu_ppn = Mux(do_refill, refill_ppn,
Mux(vm_enabled, ppns.last, vpn(ppnBits-1, 0))) Mux(vm_enabled, ppns.last, vpn(ppnBits-1, 0)))
val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0)) val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
@ -76,7 +84,6 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
val isSpecial = { val isSpecial = {
val homogeneous = Wire(init = false.B) val homogeneous = Wire(init = false.B)
for (i <- 0 until pgLevels) { for (i <- 0 until pgLevels) {
println(BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))
when (io.ptw.resp.bits.level === i) { homogeneous := TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))(mpu_physaddr).homogeneous } when (io.ptw.resp.bits.level === i) { homogeneous := TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)))(mpu_physaddr).homogeneous }
} }
!homogeneous !homogeneous
@ -108,7 +115,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
val sr_array = Reg(UInt(width = totalEntries)) // read permission val sr_array = Reg(UInt(width = totalEntries)) // read permission
val xr_array = Reg(UInt(width = totalEntries)) // read permission to executable page val xr_array = Reg(UInt(width = totalEntries)) // read permission to executable page
val cash_array = Reg(UInt(width = normalEntries)) // cacheable val cash_array = Reg(UInt(width = normalEntries)) // cacheable
when (do_refill) { when (do_refill && !invalidate_refill) {
val waddr = Mux(isSpecial, specialEntry.U, r_refill_waddr) val waddr = Mux(isSpecial, specialEntry.U, r_refill_waddr)
val pte = io.ptw.resp.bits.pte val pte = io.ptw.resp.bits.pte
ppns(waddr) := pte.ppn ppns(waddr) := pte.ppn
@ -138,7 +145,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
if (vpnBits == vpnBitsExtended) Bool(false) if (vpnBits == vpnBitsExtended) Bool(false)
else vpn(vpnBits) =/= vpn(vpnBits-1) else vpn(vpnBits) =/= vpn(vpnBits-1)
val tlb_hit = hits(totalEntries-1, 0).orR val tlb_hit = hits(totalEntries-1, 0).orR
val tlb_miss = vm_enabled && !bad_va && !tlb_hit val tlb_miss = vm_enabled && !bad_va && !tlb_hit && !io.req.bits.sfence.valid
when (io.req.valid && !tlb_miss && !hits(specialEntry)) { when (io.req.valid && !tlb_miss && !hits(specialEntry)) {
plru.access(OHToUInt(hits(normalEntries-1, 0))) plru.access(OHToUInt(hits(normalEntries-1, 0)))
@ -166,6 +173,7 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
io.ptw.req.bits.fetch := r_req.instruction io.ptw.req.bits.fetch := r_req.instruction
if (usingVM) { if (usingVM) {
val sfence = io.req.valid && io.req.bits.sfence.valid
when (io.req.fire() && tlb_miss) { when (io.req.fire() && tlb_miss) {
state := s_request state := s_request
r_refill_tag := lookup_tag r_refill_tag := lookup_tag
@ -173,22 +181,20 @@ class TLB(entries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreMod
r_req := io.req.bits r_req := io.req.bits
} }
when (state === s_request) { when (state === s_request) {
when (io.ptw.invalidate) { when (sfence) { state := s_ready }
state := s_ready when (io.ptw.req.ready) { state := Mux(sfence, s_wait_invalidate, s_wait) }
}
when (io.ptw.req.ready) {
state := s_wait
when (io.ptw.invalidate) { state := s_wait_invalidate }
}
} }
when (state === s_wait && io.ptw.invalidate) { when (state === s_wait && sfence) {
state := s_wait_invalidate state := s_wait_invalidate
} }
when (io.ptw.resp.valid) { when (io.ptw.resp.valid) {
state := s_ready state := s_ready
} }
when (io.ptw.invalidate || multipleHits) { when (sfence && io.req.bits.sfence.bits.rs1) {
valid := valid & ~hits(totalEntries-1, 0)
}
when (sfence && !io.req.bits.sfence.bits.rs1 || multipleHits) {
valid := 0 valid := 0
} }
} }

View File

@ -29,6 +29,7 @@ trait MemoryOpConstants {
def M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions def M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions
def M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions def M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions
def M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions def M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions
def M_SFENCE = UInt("b10100") // flush TLB
def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP
def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW