Merge pull request #918 from freechipsproject/icache-prefetch
Icache prefetch
This commit is contained in:
commit
1be1433f04
@ -141,6 +141,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
icache.io.s2_vaddr := s2_pc
|
||||
icache.io.s1_kill := s2_redirect || tlb.io.resp.miss || s2_replay
|
||||
icache.io.s2_kill := s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt
|
||||
icache.io.s2_prefetch := s2_tlb_resp.prefetchable
|
||||
|
||||
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || !s2_tlb_resp.miss && icache.io.s2_kill)
|
||||
fq.io.enq.bits.pc := s2_pc
|
||||
|
@ -21,6 +21,7 @@ case class ICacheParams(
|
||||
tagECC: Code = new IdentityCode,
|
||||
dataECC: Code = new IdentityCode,
|
||||
itimAddr: Option[BigInt] = None,
|
||||
prefetch: Boolean = false,
|
||||
blockBytes: Int = 64,
|
||||
latency: Int = 2,
|
||||
fetchBytes: Int = 4) extends L1CacheParams {
|
||||
@ -37,7 +38,9 @@ class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1ICache
|
||||
|
||||
class ICache(val icacheParams: ICacheParams, val hartid: Int)(implicit p: Parameters) extends LazyModule {
|
||||
lazy val module = new ICacheModule(this)
|
||||
val masterNode = TLClientNode(TLClientParameters(name = s"Core ${hartid} ICache"))
|
||||
val masterNode = TLClientNode(TLClientParameters(
|
||||
sourceId = IdRange(0, 1 + icacheParams.prefetch.toInt), // 0=refill, 1=hint
|
||||
name = s"Core ${hartid} ICache"))
|
||||
|
||||
val size = icacheParams.nSets * icacheParams.nWays * icacheParams.blockBytes
|
||||
val device = new SimpleDevice("itim", Seq("sifive,itim0"))
|
||||
@ -72,6 +75,7 @@ class ICacheBundle(outer: ICache) extends CoreBundle()(outer.p) {
|
||||
val s2_vaddr = UInt(INPUT, vaddrBits) // delayed two cycles w.r.t. req
|
||||
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
|
||||
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
|
||||
val s2_prefetch = Bool(INPUT) // should I$ prefetch next line on a miss?
|
||||
|
||||
val resp = Valid(new ICacheResp(outer))
|
||||
val invalidate = Bool(INPUT)
|
||||
@ -126,23 +130,28 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
||||
|
||||
val invalidated = Reg(Bool())
|
||||
val refill_valid = RegInit(false.B)
|
||||
val send_hint = RegInit(false.B)
|
||||
val refill_fire = tl_out.a.fire() && !send_hint
|
||||
val hint_outstanding = RegInit(false.B)
|
||||
val s2_miss = s2_valid && !s2_hit && !io.s2_kill && !RegNext(refill_valid)
|
||||
val refill_addr = RegEnable(io.s1_paddr, s1_valid && !(refill_valid || s2_miss))
|
||||
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
||||
val refill_idx = refill_addr(untagBits-1,blockOffBits)
|
||||
val refill_one_beat = tl_out.d.fire() && edge_out.hasData(tl_out.d.bits)
|
||||
|
||||
io.req.ready := !(tl_out.d.fire() || s0_slaveValid || s3_slaveValid)
|
||||
io.req.ready := !(refill_one_beat || s0_slaveValid || s3_slaveValid)
|
||||
val s0_valid = io.req.fire()
|
||||
val s0_vaddr = io.req.bits.addr
|
||||
s1_valid := s0_valid
|
||||
|
||||
val (_, _, refill_done, refill_cnt) = edge_out.count(tl_out.d)
|
||||
val (_, _, d_done, refill_cnt) = edge_out.count(tl_out.d)
|
||||
val refill_done = refill_one_beat && d_done
|
||||
tl_out.d.ready := !s3_slaveValid
|
||||
require (edge_out.manager.minLatency > 0)
|
||||
|
||||
val repl_way = if (isDM) UInt(0) else {
|
||||
// pick a way that is not used by the scratchpad
|
||||
val v0 = LFSR16(tl_out.a.fire())(log2Up(nWays)-1,0)
|
||||
val v0 = LFSR16(refill_fire)(log2Up(nWays)-1,0)
|
||||
var v = v0
|
||||
for (i <- log2Ceil(nWays) - 1 to 0 by -1) {
|
||||
val mask = nWays - (BigInt(1) << (i + 1))
|
||||
@ -162,7 +171,7 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
||||
}
|
||||
|
||||
val vb_array = Reg(init=Bits(0, nSets*nWays))
|
||||
when (tl_out.d.fire()) {
|
||||
when (refill_one_beat) {
|
||||
accruedRefillError := refillError
|
||||
// clear bit when refill starts so hit-under-miss doesn't fetch bad data
|
||||
vb_array := vb_array.bitSet(Cat(repl_way, refill_idx), refill_done && !invalidated)
|
||||
@ -205,8 +214,8 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
||||
def wordMatch(addr: UInt) = addr.extract(log2Ceil(tl_out.d.bits.data.getWidth/8)-1, log2Ceil(wordBits/8)) === i
|
||||
def row(addr: UInt) = addr(untagBits-1, blockOffBits-log2Ceil(refillCycles))
|
||||
val s0_ren = (s0_valid && wordMatch(s0_vaddr)) || (s0_slaveValid && wordMatch(s0_slaveAddr))
|
||||
val wen = (tl_out.d.fire() && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr))
|
||||
val mem_idx = Mux(tl_out.d.fire(), (refill_idx << log2Ceil(refillCycles)) | refill_cnt,
|
||||
val wen = (refill_one_beat && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr))
|
||||
val mem_idx = Mux(refill_one_beat, (refill_idx << log2Ceil(refillCycles)) | refill_cnt,
|
||||
Mux(s3_slaveValid, row(s1s3_slaveAddr),
|
||||
Mux(s0_slaveValid, row(s0_slaveAddr),
|
||||
row(s0_vaddr))))
|
||||
@ -295,17 +304,43 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
||||
tl.e.ready := true
|
||||
}
|
||||
}
|
||||
|
||||
tl_out.a.valid := s2_miss && !refill_valid
|
||||
tl_out.a.bits := edge_out.Get(
|
||||
fromSource = UInt(0),
|
||||
toAddress = (refill_addr >> blockOffBits) << blockOffBits,
|
||||
lgSize = lgCacheBlockBytes)._2
|
||||
if (cacheParams.prefetch) {
|
||||
val (crosses_page, next_block) = Split(refill_addr(pgIdxBits-1, blockOffBits) +& 1, pgIdxBits-blockOffBits)
|
||||
when (tl_out.a.fire()) {
|
||||
send_hint := !hint_outstanding && io.s2_prefetch && !crosses_page
|
||||
when (send_hint) {
|
||||
send_hint := false
|
||||
hint_outstanding := true
|
||||
}
|
||||
}
|
||||
when (refill_done) {
|
||||
send_hint := false
|
||||
}
|
||||
when (tl_out.d.fire() && !refill_one_beat) {
|
||||
hint_outstanding := false
|
||||
}
|
||||
|
||||
when (send_hint) {
|
||||
tl_out.a.valid := true
|
||||
tl_out.a.bits := edge_out.Hint(
|
||||
fromSource = UInt(1),
|
||||
toAddress = Cat(refill_addr >> pgIdxBits, next_block) << blockOffBits,
|
||||
lgSize = lgCacheBlockBytes,
|
||||
param = TLHints.PREFETCH_READ)._2
|
||||
}
|
||||
}
|
||||
tl_out.b.ready := Bool(true)
|
||||
tl_out.c.valid := Bool(false)
|
||||
tl_out.e.valid := Bool(false)
|
||||
assert(!(tl_out.a.valid && addrMaybeInScratchpad(tl_out.a.bits.address)))
|
||||
|
||||
when (!refill_valid) { invalidated := false.B }
|
||||
when (tl_out.a.fire()) { refill_valid := true.B }
|
||||
when (refill_fire) { refill_valid := true.B }
|
||||
when (refill_done) { refill_valid := false.B}
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val ae = new TLBExceptions
|
||||
val ma = new TLBExceptions
|
||||
val cacheable = Bool()
|
||||
val prefetchable = Bool()
|
||||
}
|
||||
|
||||
class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) {
|
||||
@ -113,6 +114,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters
|
||||
def fastCheck(member: TLManagerParameters => Boolean) =
|
||||
legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => Bool(b))
|
||||
val cacheable = fastCheck(_.supportsAcquireB)
|
||||
val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous
|
||||
val prot_r = fastCheck(_.supportsGet) && pmp.io.r
|
||||
val prot_w = fastCheck(_.supportsPutFull) && pmp.io.w
|
||||
val prot_al = fastCheck(_.supportsLogical) || cacheable
|
||||
@ -181,6 +183,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters
|
||||
val pal_array = Cat(Fill(2, prot_al), entries.init.map(_.pal).asUInt)
|
||||
val eff_array = Cat(Fill(2, prot_eff), entries.init.map(_.eff).asUInt)
|
||||
val c_array = Cat(Fill(2, cacheable), entries.init.map(_.c).asUInt)
|
||||
val prefetchable_array = Cat(cacheable && homogeneous, false.B, entries.init.map(_.c).asUInt)
|
||||
|
||||
val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1)).orR
|
||||
val bad_va = vm_enabled &&
|
||||
@ -226,6 +229,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters
|
||||
io.resp.ma.st := (ma_st_array & hits).orR
|
||||
io.resp.ma.inst := false // this is up to the pipeline to figure out
|
||||
io.resp.cacheable := (c_array & hits).orR
|
||||
io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers.forall(m => !m.supportsAcquireB || m.supportsHint)
|
||||
io.resp.miss := do_refill || tlb_miss || multipleHits
|
||||
io.resp.paddr := Cat(ppn, pgOffset)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user