Improve TLB simulation performance
This commit is contained in:
		@@ -18,44 +18,6 @@ trait HasTLBParameters extends HasCoreParameters {
 | 
				
			|||||||
  val camTagBits = asIdBits + vpnBits
 | 
					  val camTagBits = asIdBits + vpnBits
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
abstract class TLBModule(implicit val p: Parameters) extends Module
 | 
					 | 
				
			||||||
  with HasTLBParameters
 | 
					 | 
				
			||||||
abstract class TLBBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
 | 
					 | 
				
			||||||
  with HasTLBParameters
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class CAMIO(implicit p: Parameters) extends TLBBundle()(p) {
 | 
					 | 
				
			||||||
    val clear        = Bool(INPUT)
 | 
					 | 
				
			||||||
    val clear_mask   = Bits(INPUT, entries)
 | 
					 | 
				
			||||||
    val tag          = Bits(INPUT, camTagBits)
 | 
					 | 
				
			||||||
    val hit          = Bool(OUTPUT)
 | 
					 | 
				
			||||||
    val hits         = UInt(OUTPUT, entries)
 | 
					 | 
				
			||||||
    val valid_bits   = Bits(OUTPUT, entries)
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    val write        = Bool(INPUT)
 | 
					 | 
				
			||||||
    val write_tag    = Bits(INPUT, camTagBits)
 | 
					 | 
				
			||||||
    val write_addr    = UInt(INPUT, camAddrBits)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class RocketCAM(implicit p: Parameters) extends TLBModule()(p) {
 | 
					 | 
				
			||||||
  val io = new CAMIO
 | 
					 | 
				
			||||||
  val cam_tags = Mem(entries, Bits(width = camTagBits))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  val vb_array = Reg(init=Bits(0, entries))
 | 
					 | 
				
			||||||
  when (io.write) {
 | 
					 | 
				
			||||||
    vb_array := vb_array.bitSet(io.write_addr, Bool(true))
 | 
					 | 
				
			||||||
    cam_tags(io.write_addr) := io.write_tag
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  when (io.clear) {
 | 
					 | 
				
			||||||
    vb_array := vb_array & ~io.clear_mask
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  val hits = (0 until entries).map(i => vb_array(i) && cam_tags(i) === io.tag)
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  io.valid_bits := vb_array
 | 
					 | 
				
			||||||
  io.hits := Vec(hits).toBits
 | 
					 | 
				
			||||||
  io.hit := io.hits.orR
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
 | 
					class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
 | 
				
			||||||
  val vpn = UInt(width = vpnBitsExtended)
 | 
					  val vpn = UInt(width = vpnBitsExtended)
 | 
				
			||||||
  val passthrough = Bool()
 | 
					  val passthrough = Bool()
 | 
				
			||||||
@@ -63,7 +25,7 @@ class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
 | 
				
			|||||||
  val store = Bool()
 | 
					  val store = Bool()
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TLBRespNoHitIndex(implicit p: Parameters) extends CoreBundle()(p) {
 | 
					class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
 | 
				
			||||||
  // lookup responses
 | 
					  // lookup responses
 | 
				
			||||||
  val miss = Bool(OUTPUT)
 | 
					  val miss = Bool(OUTPUT)
 | 
				
			||||||
  val ppn = UInt(OUTPUT, ppnBits)
 | 
					  val ppn = UInt(OUTPUT, ppnBits)
 | 
				
			||||||
@@ -72,101 +34,89 @@ class TLBRespNoHitIndex(implicit p: Parameters) extends CoreBundle()(p) {
 | 
				
			|||||||
  val xcpt_if = Bool(OUTPUT)
 | 
					  val xcpt_if = Bool(OUTPUT)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TLBResp(implicit p: Parameters) extends TLBRespNoHitIndex()(p) with HasTLBParameters {
 | 
					class TLB(implicit val p: Parameters) extends Module with HasTLBParameters {
 | 
				
			||||||
  val hit_idx = UInt(OUTPUT, entries)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class TLB(implicit p: Parameters) extends TLBModule()(p) {
 | 
					 | 
				
			||||||
  val io = new Bundle {
 | 
					  val io = new Bundle {
 | 
				
			||||||
    val req = Decoupled(new TLBReq).flip
 | 
					    val req = Decoupled(new TLBReq).flip
 | 
				
			||||||
    val resp = new TLBResp
 | 
					    val resp = new TLBResp
 | 
				
			||||||
    val ptw = new TLBPTWIO
 | 
					    val ptw = new TLBPTWIO
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val tag_cam = Module(new RocketCAM)
 | 
					  val valid = Reg(init = UInt(0, entries))
 | 
				
			||||||
  val tag_ram = Mem(entries, io.ptw.resp.bits.pte.ppn)
 | 
					  val ppns = Reg(Vec(entries, io.ptw.resp.bits.pte.ppn))
 | 
				
			||||||
 | 
					  val tags = Reg(Vec(entries, UInt(width = asIdBits + vpnBits)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4)
 | 
					  val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4)
 | 
				
			||||||
  val state = Reg(init=s_ready)
 | 
					  val state = Reg(init=s_ready)
 | 
				
			||||||
  val r_refill_tag = Reg(tag_cam.io.write_tag)
 | 
					  val r_refill_tag = Reg(UInt(width = asIdBits + vpnBits))
 | 
				
			||||||
  val r_refill_waddr = Reg(tag_cam.io.write_addr)
 | 
					  val r_refill_waddr = Reg(UInt(width = log2Ceil(entries)))
 | 
				
			||||||
  val r_req = Reg(new TLBReq)
 | 
					  val r_req = Reg(new TLBReq)
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0)).toUInt
 | 
					  val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0)).toUInt
 | 
				
			||||||
  tag_cam.io.tag := lookup_tag
 | 
					  val hitsVec = (0 until entries).map(i => valid(i) && tags(i) === lookup_tag)
 | 
				
			||||||
  tag_cam.io.write := state === s_wait && io.ptw.resp.valid
 | 
					  val hits = hitsVec.toBits
 | 
				
			||||||
  tag_cam.io.write_tag := r_refill_tag
 | 
					 | 
				
			||||||
  tag_cam.io.write_addr := r_refill_waddr
 | 
					 | 
				
			||||||
  val tag_hit_addr = OHToUInt(tag_cam.io.hits)
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  // permission bit arrays
 | 
					  // permission bit arrays
 | 
				
			||||||
  val ur_array = Reg(Vec(entries, Bool())) // user read permission
 | 
					  val ur_array = Reg(UInt(width = entries)) // user read permission
 | 
				
			||||||
  val uw_array = Reg(Vec(entries, Bool())) // user write permission
 | 
					  val uw_array = Reg(UInt(width = entries)) // user write permission
 | 
				
			||||||
  val ux_array = Reg(Vec(entries, Bool())) // user execute permission
 | 
					  val ux_array = Reg(UInt(width = entries)) // user execute permission
 | 
				
			||||||
  val sr_array = Reg(Vec(entries, Bool())) // supervisor read permission
 | 
					  val sr_array = Reg(UInt(width = entries)) // supervisor read permission
 | 
				
			||||||
  val sw_array = Reg(Vec(entries, Bool())) // supervisor write permission
 | 
					  val sw_array = Reg(UInt(width = entries)) // supervisor write permission
 | 
				
			||||||
  val sx_array = Reg(Vec(entries, Bool())) // supervisor execute permission
 | 
					  val sx_array = Reg(UInt(width = entries)) // supervisor execute permission
 | 
				
			||||||
  val dirty_array = Reg(Vec(entries, Bool())) // PTE dirty bit
 | 
					  val dirty_array = Reg(UInt(width = entries)) // PTE dirty bit
 | 
				
			||||||
  when (io.ptw.resp.valid) {
 | 
					  when (io.ptw.resp.valid) {
 | 
				
			||||||
    val pte = io.ptw.resp.bits.pte
 | 
					    val pte = io.ptw.resp.bits.pte
 | 
				
			||||||
    tag_ram(r_refill_waddr) := pte.ppn
 | 
					    ppns(r_refill_waddr) := pte.ppn
 | 
				
			||||||
    ur_array(r_refill_waddr) := pte.ur()
 | 
					    tags(r_refill_waddr) := r_refill_tag
 | 
				
			||||||
    uw_array(r_refill_waddr) := pte.uw()
 | 
					
 | 
				
			||||||
    ux_array(r_refill_waddr) := pte.ux()
 | 
					    val mask = UIntToOH(r_refill_waddr)
 | 
				
			||||||
    sr_array(r_refill_waddr) := pte.sr()
 | 
					    valid := valid | mask
 | 
				
			||||||
    sw_array(r_refill_waddr) := pte.sw()
 | 
					    ur_array := Mux(pte.ur(), ur_array | mask, ur_array & ~mask)
 | 
				
			||||||
    sx_array(r_refill_waddr) := pte.sx()
 | 
					    uw_array := Mux(pte.uw(), uw_array | mask, uw_array & ~mask)
 | 
				
			||||||
    dirty_array(r_refill_waddr) := pte.d
 | 
					    ux_array := Mux(pte.ux(), ux_array | mask, ux_array & ~mask)
 | 
				
			||||||
 | 
					    sr_array := Mux(pte.sr(), sr_array | mask, sr_array & ~mask)
 | 
				
			||||||
 | 
					    sw_array := Mux(pte.sw(), sw_array | mask, sw_array & ~mask)
 | 
				
			||||||
 | 
					    sx_array := Mux(pte.sx(), sx_array | mask, sx_array & ~mask)
 | 
				
			||||||
 | 
					    dirty_array := Mux(pte.d, dirty_array | mask, dirty_array & ~mask)
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
  // high if there are any unused (invalid) entries in the TLB
 | 
					  // high if there are any unused (invalid) entries in the TLB
 | 
				
			||||||
  val has_invalid_entry = !tag_cam.io.valid_bits.andR
 | 
					 | 
				
			||||||
  val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits)
 | 
					 | 
				
			||||||
  val plru = new PseudoLRU(entries)
 | 
					  val plru = new PseudoLRU(entries)
 | 
				
			||||||
  val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace)
 | 
					  val repl_waddr = Mux(!valid.andR, PriorityEncoder(~valid), plru.replace)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction
 | 
					  val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction
 | 
				
			||||||
  val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv)
 | 
					  val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv)
 | 
				
			||||||
  val priv_s = priv === PRV.S
 | 
					  val priv_s = priv === PRV.S
 | 
				
			||||||
  val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug
 | 
					  val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug
 | 
				
			||||||
  val req_xwr = Cat(!r_req.store, r_req.store, !(r_req.instruction || r_req.store))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val ur_bits = ur_array.toBits
 | 
					  val pum_ok = ~Mux(io.ptw.status.pum, ur_array, UInt(0))
 | 
				
			||||||
  val pum_ok = ~Mux(io.ptw.status.pum, ur_bits, UInt(0))
 | 
					  val r_array = Mux(priv_s, sr_array & pum_ok, ur_array)
 | 
				
			||||||
  val r_array = Mux(priv_s, sr_array.toBits & pum_ok, ur_bits)
 | 
					  val w_array = Mux(priv_s, sw_array & pum_ok, uw_array)
 | 
				
			||||||
  val w_array = Mux(priv_s, sw_array.toBits & pum_ok, uw_array.toBits)
 | 
					  val x_array = Mux(priv_s, sx_array, ux_array)
 | 
				
			||||||
  val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val vm_enabled = Bool(usingVM) && io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough
 | 
					  val vm_enabled = Bool(usingVM) && io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough
 | 
				
			||||||
  val bad_va =
 | 
					  val bad_va =
 | 
				
			||||||
    if (vpnBits == vpnBitsExtended) Bool(false)
 | 
					    if (vpnBits == vpnBitsExtended) Bool(false)
 | 
				
			||||||
    else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1)
 | 
					    else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1)
 | 
				
			||||||
  // it's only a store hit if the dirty bit is set
 | 
					  // it's only a store hit if the dirty bit is set
 | 
				
			||||||
  val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0)))
 | 
					  val tag_hits = hits & (dirty_array | ~Mux(io.req.bits.store, w_array, UInt(0)))
 | 
				
			||||||
  val tag_hit = tag_hits.orR
 | 
					  val tag_hit = tag_hits.orR
 | 
				
			||||||
  val tlb_hit = vm_enabled && tag_hit
 | 
					  val tlb_hit = vm_enabled && tag_hit
 | 
				
			||||||
  val tlb_miss = vm_enabled && !tag_hit && !bad_va
 | 
					  val tlb_miss = vm_enabled && !tag_hit && !bad_va
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  when (io.req.valid && tlb_hit) {
 | 
					  when (io.req.valid && tlb_hit) {
 | 
				
			||||||
    plru.access(OHToUInt(tag_cam.io.hits))
 | 
					    plru.access(OHToUInt(hits))
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val paddr = Cat(io.resp.ppn, UInt(0, pgIdxBits))
 | 
					  val paddr = Cat(io.resp.ppn, UInt(0, pgIdxBits))
 | 
				
			||||||
  val addr_prot = addrMap.getProt(paddr)
 | 
					  val addr_prot = addrMap.getProt(paddr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  io.req.ready := state === s_ready
 | 
					  io.req.ready := state === s_ready
 | 
				
			||||||
  io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & tag_cam.io.hits).orR)
 | 
					  io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & hits).orR)
 | 
				
			||||||
  io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & tag_cam.io.hits).orR)
 | 
					  io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & hits).orR)
 | 
				
			||||||
  io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & tag_cam.io.hits).orR)
 | 
					  io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & hits).orR)
 | 
				
			||||||
  io.resp.miss := tlb_miss
 | 
					  io.resp.miss := tlb_miss
 | 
				
			||||||
  io.resp.ppn := Mux(vm_enabled, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(ppnBits-1,0))
 | 
					  io.resp.ppn := Mux(vm_enabled, Mux1H(hitsVec, ppns), io.req.bits.vpn(ppnBits-1,0))
 | 
				
			||||||
  io.resp.hit_idx := tag_cam.io.hits
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // clear entries on a TLB flush.
 | 
					 | 
				
			||||||
  // TODO: selective flushing.  careful with superpage mappings (flush it all)
 | 
					 | 
				
			||||||
  tag_cam.io.clear := io.ptw.invalidate
 | 
					 | 
				
			||||||
  tag_cam.io.clear_mask := ~UInt(0, entries)
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  io.ptw.req.valid := state === s_request
 | 
					  io.ptw.req.valid := state === s_request
 | 
				
			||||||
  io.ptw.req.bits.addr := r_refill_tag
 | 
					  io.ptw.req.bits.addr := r_refill_tag
 | 
				
			||||||
  io.ptw.req.bits.prv := io.ptw.status.prv
 | 
					  io.ptw.req.bits.prv := io.ptw.status.prv
 | 
				
			||||||
@@ -195,6 +145,10 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) {
 | 
				
			|||||||
    when (io.ptw.resp.valid) {
 | 
					    when (io.ptw.resp.valid) {
 | 
				
			||||||
      state := s_ready
 | 
					      state := s_ready
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    when (io.ptw.invalidate) {
 | 
				
			||||||
 | 
					      valid := 0
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user