Reduce latency of page table walks
A small cache in the PTW caches non-leaf PTEs, reducing latency and D$ misses.
This commit is contained in:
parent
31d17cbf86
commit
0332c1e7fe
@ -13,9 +13,7 @@ class PTWReq extends CoreBundle {
|
|||||||
|
|
||||||
class PTWResp extends CoreBundle {
|
class PTWResp extends CoreBundle {
|
||||||
val error = Bool()
|
val error = Bool()
|
||||||
val ppn = UInt(width = ppnBits)
|
val pte = new PTE
|
||||||
val perm = Bits(width = permBits)
|
|
||||||
val dirty = Bool()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class TLBPTWIO extends CoreBundle {
|
class TLBPTWIO extends CoreBundle {
|
||||||
@ -31,6 +29,17 @@ class DatapathPTWIO extends CoreBundle {
|
|||||||
val status = new MStatus().asInput
|
val status = new MStatus().asInput
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class PTE extends CoreBundle {
|
||||||
|
val ppn = Bits(width = ppnBits)
|
||||||
|
val reserved = Bits(width = 2)
|
||||||
|
val d = Bool()
|
||||||
|
val r = Bool()
|
||||||
|
val perm = Bits(width = 6)
|
||||||
|
val g = Bool()
|
||||||
|
val t = Bool()
|
||||||
|
val v = Bool()
|
||||||
|
}
|
||||||
|
|
||||||
class PTW(n: Int) extends CoreModule
|
class PTW(n: Int) extends CoreModule
|
||||||
{
|
{
|
||||||
val io = new Bundle {
|
val io = new Bundle {
|
||||||
@ -49,7 +58,7 @@ class PTW(n: Int) extends CoreModule
|
|||||||
|
|
||||||
val r_req = Reg(new PTWReq)
|
val r_req = Reg(new PTWReq)
|
||||||
val r_req_dest = Reg(Bits())
|
val r_req_dest = Reg(Bits())
|
||||||
val r_pte = Reg(Bits())
|
val r_pte = Reg(new PTE)
|
||||||
|
|
||||||
val vpn_idx = Vec((0 until levels).map(i => (r_req.addr >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count)
|
val vpn_idx = Vec((0 until levels).map(i => (r_req.addr >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count)
|
||||||
|
|
||||||
@ -57,16 +66,39 @@ class PTW(n: Int) extends CoreModule
|
|||||||
arb.io.in <> io.requestor.map(_.req)
|
arb.io.in <> io.requestor.map(_.req)
|
||||||
arb.io.out.ready := state === s_ready
|
arb.io.out.ready := state === s_ready
|
||||||
|
|
||||||
val pte = io.mem.resp.bits.data
|
val pte = new PTE().fromBits(io.mem.resp.bits.data)
|
||||||
|
val pte_addr = Cat(r_pte.ppn, vpn_idx).toUInt << log2Up(xLen/8)
|
||||||
|
|
||||||
when (arb.io.out.fire()) {
|
when (arb.io.out.fire()) {
|
||||||
r_req := arb.io.out.bits
|
r_req := arb.io.out.bits
|
||||||
r_req_dest := arb.io.chosen
|
r_req_dest := arb.io.chosen
|
||||||
r_pte := Cat(io.dpath.ptbr(paddrBits-1,pgIdxBits), pte(pgIdxBits-1,0))
|
r_pte.ppn := io.dpath.ptbr(paddrBits-1,pgIdxBits)
|
||||||
}
|
}
|
||||||
|
|
||||||
val perm_ok = (pte(8,3) & r_req.perm).orR
|
val (pte_cache_hit, pte_cache_data) = {
|
||||||
|
val size = log2Up(levels * 2)
|
||||||
|
val plru = new PseudoLRU(size)
|
||||||
|
val valid = Reg(init = Bits(0, size))
|
||||||
|
val tags = Mem(UInt(width = paddrBits), size)
|
||||||
|
val data = Mem(UInt(width = paddrBits - pgIdxBits), size)
|
||||||
|
|
||||||
|
val hits = Vec(tags.map(_ === pte_addr)).toBits & valid
|
||||||
|
val hit = hits.orR
|
||||||
|
when (io.mem.resp.valid && io.mem.resp.bits.data(1,0).andR && !hit) {
|
||||||
|
val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid))
|
||||||
|
valid(r) := true
|
||||||
|
tags(r) := pte_addr
|
||||||
|
data(r) := io.mem.resp.bits.data(paddrBits-1,pgIdxBits)
|
||||||
|
}
|
||||||
|
when (hit && state === s_req) { plru.access(OHToUInt(hits)) }
|
||||||
|
when (io.dpath.invalidate) { valid := 0 }
|
||||||
|
|
||||||
|
(hit, Mux1H(hits, data))
|
||||||
|
}
|
||||||
|
|
||||||
|
val perm_ok = (pte.perm & r_req.perm).orR
|
||||||
val is_store = r_req.perm(1) || r_req.perm(4)
|
val is_store = r_req.perm(1) || r_req.perm(4)
|
||||||
val set_dirty_bit = perm_ok && !pte(1) && (!pte(9) || (is_store && !pte(10)))
|
val set_dirty_bit = perm_ok && !pte.t && (!pte.r || (is_store && !pte.d))
|
||||||
when (io.mem.resp.valid && state === s_wait && !set_dirty_bit) {
|
when (io.mem.resp.valid && state === s_wait && !set_dirty_bit) {
|
||||||
r_pte := pte
|
r_pte := pte
|
||||||
}
|
}
|
||||||
@ -75,7 +107,7 @@ class PTW(n: Int) extends CoreModule
|
|||||||
io.mem.req.bits.phys := Bool(true)
|
io.mem.req.bits.phys := Bool(true)
|
||||||
io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD)
|
io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD)
|
||||||
io.mem.req.bits.typ := MT_D
|
io.mem.req.bits.typ := MT_D
|
||||||
io.mem.req.bits.addr := Cat(r_pte(paddrBits-1,pgIdxBits), vpn_idx).toUInt << log2Up(xLen/8)
|
io.mem.req.bits.addr := pte_addr
|
||||||
io.mem.req.bits.kill := Bool(false)
|
io.mem.req.bits.kill := Bool(false)
|
||||||
io.mem.req.bits.data := UInt(1<<9) | Mux(is_store, UInt(1<<10), UInt(0))
|
io.mem.req.bits.data := UInt(1<<9) | Mux(is_store, UInt(1<<10), UInt(0))
|
||||||
|
|
||||||
@ -89,9 +121,8 @@ class PTW(n: Int) extends CoreModule
|
|||||||
val me = r_req_dest === UInt(i)
|
val me = r_req_dest === UInt(i)
|
||||||
io.requestor(i).resp.valid := resp_val && me
|
io.requestor(i).resp.valid := resp_val && me
|
||||||
io.requestor(i).resp.bits.error := resp_err
|
io.requestor(i).resp.bits.error := resp_err
|
||||||
io.requestor(i).resp.bits.perm := r_pte(8,3)
|
io.requestor(i).resp.bits.pte := r_pte
|
||||||
io.requestor(i).resp.bits.dirty := r_pte(10)
|
io.requestor(i).resp.bits.pte.ppn := resp_ppn
|
||||||
io.requestor(i).resp.bits.ppn := resp_ppn.toUInt
|
|
||||||
io.requestor(i).invalidate := io.dpath.invalidate
|
io.requestor(i).invalidate := io.dpath.invalidate
|
||||||
io.requestor(i).status := io.dpath.status
|
io.requestor(i).status := io.dpath.status
|
||||||
}
|
}
|
||||||
@ -105,7 +136,12 @@ class PTW(n: Int) extends CoreModule
|
|||||||
count := UInt(0)
|
count := UInt(0)
|
||||||
}
|
}
|
||||||
is (s_req) {
|
is (s_req) {
|
||||||
when (io.mem.req.ready) {
|
when (pte_cache_hit && count < levels-1) {
|
||||||
|
io.mem.req.valid := false
|
||||||
|
state := s_req
|
||||||
|
count := count + 1
|
||||||
|
r_pte.ppn := pte_cache_data
|
||||||
|
}.elsewhen (io.mem.req.ready) {
|
||||||
state := s_wait
|
state := s_wait
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -115,10 +151,10 @@ class PTW(n: Int) extends CoreModule
|
|||||||
}
|
}
|
||||||
when (io.mem.resp.valid) {
|
when (io.mem.resp.valid) {
|
||||||
state := s_error
|
state := s_error
|
||||||
when (pte(0)) {
|
when (pte.v) {
|
||||||
when (set_dirty_bit) {
|
when (set_dirty_bit) {
|
||||||
state := s_set_dirty
|
state := s_set_dirty
|
||||||
}.elsewhen (!pte(1)) {
|
}.elsewhen (!pte.t) {
|
||||||
state := s_done
|
state := s_done
|
||||||
}.elsewhen (count < levels-1) {
|
}.elsewhen (count < levels-1) {
|
||||||
state := s_req
|
state := s_req
|
||||||
|
@ -109,7 +109,7 @@ class TLB extends TLBModule {
|
|||||||
val r_req = Reg(new TLBReq)
|
val r_req = Reg(new TLBReq)
|
||||||
|
|
||||||
val tag_cam = Module(new RocketCAM)
|
val tag_cam = Module(new RocketCAM)
|
||||||
val tag_ram = Mem(io.ptw.resp.bits.ppn.clone, entries)
|
val tag_ram = Mem(io.ptw.resp.bits.pte.ppn.clone, entries)
|
||||||
|
|
||||||
val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt
|
val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt
|
||||||
tag_cam.io.tag := lookup_tag
|
tag_cam.io.tag := lookup_tag
|
||||||
@ -128,8 +128,8 @@ class TLB extends TLBModule {
|
|||||||
val sx_array = Reg(Bits()) // supervisor execute permission
|
val sx_array = Reg(Bits()) // supervisor execute permission
|
||||||
val dirty_array = Reg(Bits()) // PTE dirty bit
|
val dirty_array = Reg(Bits()) // PTE dirty bit
|
||||||
when (io.ptw.resp.valid) {
|
when (io.ptw.resp.valid) {
|
||||||
val perm = io.ptw.resp.bits.perm & ~io.ptw.resp.bits.error.toSInt
|
val perm = io.ptw.resp.bits.pte.perm & ~io.ptw.resp.bits.error.toSInt
|
||||||
tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn
|
tag_ram(r_refill_waddr) := io.ptw.resp.bits.pte.ppn
|
||||||
valid_array := valid_array.bitSet(r_refill_waddr, !io.ptw.resp.bits.error)
|
valid_array := valid_array.bitSet(r_refill_waddr, !io.ptw.resp.bits.error)
|
||||||
ur_array := ur_array.bitSet(r_refill_waddr, perm(0) || perm(2))
|
ur_array := ur_array.bitSet(r_refill_waddr, perm(0) || perm(2))
|
||||||
uw_array := uw_array.bitSet(r_refill_waddr, perm(1))
|
uw_array := uw_array.bitSet(r_refill_waddr, perm(1))
|
||||||
@ -137,7 +137,7 @@ class TLB extends TLBModule {
|
|||||||
sr_array := sr_array.bitSet(r_refill_waddr, perm(3) || perm(5))
|
sr_array := sr_array.bitSet(r_refill_waddr, perm(3) || perm(5))
|
||||||
sw_array := sw_array.bitSet(r_refill_waddr, perm(4))
|
sw_array := sw_array.bitSet(r_refill_waddr, perm(4))
|
||||||
sx_array := sx_array.bitSet(r_refill_waddr, perm(5))
|
sx_array := sx_array.bitSet(r_refill_waddr, perm(5))
|
||||||
dirty_array := dirty_array.bitSet(r_refill_waddr, io.ptw.resp.bits.dirty)
|
dirty_array := dirty_array.bitSet(r_refill_waddr, io.ptw.resp.bits.pte.d)
|
||||||
}
|
}
|
||||||
|
|
||||||
// high if there are any unused (invalid) entries in the TLB
|
// high if there are any unused (invalid) entries in the TLB
|
||||||
|
Loading…
Reference in New Issue
Block a user