1
0

Merge pull request #918 from freechipsproject/icache-prefetch

Icache prefetch
This commit is contained in:
Andrew Waterman 2017-08-02 21:22:20 -07:00 committed by GitHub
commit 1be1433f04
3 changed files with 48 additions and 8 deletions

View File

@ -141,6 +141,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
icache.io.s2_vaddr := s2_pc
icache.io.s1_kill := s2_redirect || tlb.io.resp.miss || s2_replay
icache.io.s2_kill := s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt
icache.io.s2_prefetch := s2_tlb_resp.prefetchable
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || !s2_tlb_resp.miss && icache.io.s2_kill)
fq.io.enq.bits.pc := s2_pc

View File

@ -21,6 +21,7 @@ case class ICacheParams(
tagECC: Code = new IdentityCode,
dataECC: Code = new IdentityCode,
itimAddr: Option[BigInt] = None,
prefetch: Boolean = false,
blockBytes: Int = 64,
latency: Int = 2,
fetchBytes: Int = 4) extends L1CacheParams {
@ -37,7 +38,9 @@ class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1ICache
class ICache(val icacheParams: ICacheParams, val hartid: Int)(implicit p: Parameters) extends LazyModule {
lazy val module = new ICacheModule(this)
val masterNode = TLClientNode(TLClientParameters(name = s"Core ${hartid} ICache"))
val masterNode = TLClientNode(TLClientParameters(
sourceId = IdRange(0, 1 + icacheParams.prefetch.toInt), // 0=refill, 1=hint
name = s"Core ${hartid} ICache"))
val size = icacheParams.nSets * icacheParams.nWays * icacheParams.blockBytes
val device = new SimpleDevice("itim", Seq("sifive,itim0"))
@ -72,6 +75,7 @@ class ICacheBundle(outer: ICache) extends CoreBundle()(outer.p) {
val s2_vaddr = UInt(INPUT, vaddrBits) // delayed two cycles w.r.t. req
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
val s2_prefetch = Bool(INPUT) // should I$ prefetch next line on a miss?
val resp = Valid(new ICacheResp(outer))
val invalidate = Bool(INPUT)
@ -126,23 +130,28 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
val invalidated = Reg(Bool())
val refill_valid = RegInit(false.B)
val send_hint = RegInit(false.B)
val refill_fire = tl_out.a.fire() && !send_hint
val hint_outstanding = RegInit(false.B)
val s2_miss = s2_valid && !s2_hit && !io.s2_kill && !RegNext(refill_valid)
val refill_addr = RegEnable(io.s1_paddr, s1_valid && !(refill_valid || s2_miss))
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
val refill_idx = refill_addr(untagBits-1,blockOffBits)
val refill_one_beat = tl_out.d.fire() && edge_out.hasData(tl_out.d.bits)
io.req.ready := !(tl_out.d.fire() || s0_slaveValid || s3_slaveValid)
io.req.ready := !(refill_one_beat || s0_slaveValid || s3_slaveValid)
val s0_valid = io.req.fire()
val s0_vaddr = io.req.bits.addr
s1_valid := s0_valid
val (_, _, refill_done, refill_cnt) = edge_out.count(tl_out.d)
val (_, _, d_done, refill_cnt) = edge_out.count(tl_out.d)
val refill_done = refill_one_beat && d_done
tl_out.d.ready := !s3_slaveValid
require (edge_out.manager.minLatency > 0)
val repl_way = if (isDM) UInt(0) else {
// pick a way that is not used by the scratchpad
val v0 = LFSR16(tl_out.a.fire())(log2Up(nWays)-1,0)
val v0 = LFSR16(refill_fire)(log2Up(nWays)-1,0)
var v = v0
for (i <- log2Ceil(nWays) - 1 to 0 by -1) {
val mask = nWays - (BigInt(1) << (i + 1))
@ -162,7 +171,7 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
}
val vb_array = Reg(init=Bits(0, nSets*nWays))
when (tl_out.d.fire()) {
when (refill_one_beat) {
accruedRefillError := refillError
// clear bit when refill starts so hit-under-miss doesn't fetch bad data
vb_array := vb_array.bitSet(Cat(repl_way, refill_idx), refill_done && !invalidated)
@ -205,8 +214,8 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
def wordMatch(addr: UInt) = addr.extract(log2Ceil(tl_out.d.bits.data.getWidth/8)-1, log2Ceil(wordBits/8)) === i
def row(addr: UInt) = addr(untagBits-1, blockOffBits-log2Ceil(refillCycles))
val s0_ren = (s0_valid && wordMatch(s0_vaddr)) || (s0_slaveValid && wordMatch(s0_slaveAddr))
val wen = (tl_out.d.fire() && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr))
val mem_idx = Mux(tl_out.d.fire(), (refill_idx << log2Ceil(refillCycles)) | refill_cnt,
val wen = (refill_one_beat && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr))
val mem_idx = Mux(refill_one_beat, (refill_idx << log2Ceil(refillCycles)) | refill_cnt,
Mux(s3_slaveValid, row(s1s3_slaveAddr),
Mux(s0_slaveValid, row(s0_slaveAddr),
row(s0_vaddr))))
@ -295,17 +304,43 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
tl.e.ready := true
}
}
tl_out.a.valid := s2_miss && !refill_valid
tl_out.a.bits := edge_out.Get(
fromSource = UInt(0),
toAddress = (refill_addr >> blockOffBits) << blockOffBits,
lgSize = lgCacheBlockBytes)._2
if (cacheParams.prefetch) {
val (crosses_page, next_block) = Split(refill_addr(pgIdxBits-1, blockOffBits) +& 1, pgIdxBits-blockOffBits)
when (tl_out.a.fire()) {
send_hint := !hint_outstanding && io.s2_prefetch && !crosses_page
when (send_hint) {
send_hint := false
hint_outstanding := true
}
}
when (refill_done) {
send_hint := false
}
when (tl_out.d.fire() && !refill_one_beat) {
hint_outstanding := false
}
when (send_hint) {
tl_out.a.valid := true
tl_out.a.bits := edge_out.Hint(
fromSource = UInt(1),
toAddress = Cat(refill_addr >> pgIdxBits, next_block) << blockOffBits,
lgSize = lgCacheBlockBytes,
param = TLHints.PREFETCH_READ)._2
}
}
tl_out.b.ready := Bool(true)
tl_out.c.valid := Bool(false)
tl_out.e.valid := Bool(false)
assert(!(tl_out.a.valid && addrMaybeInScratchpad(tl_out.a.bits.address)))
when (!refill_valid) { invalidated := false.B }
when (tl_out.a.fire()) { refill_valid := true.B }
when (refill_fire) { refill_valid := true.B }
when (refill_done) { refill_valid := false.B}
}

View File

@ -49,6 +49,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
val ae = new TLBExceptions
val ma = new TLBExceptions
val cacheable = Bool()
val prefetchable = Bool()
}
class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) {
@ -113,6 +114,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters
def fastCheck(member: TLManagerParameters => Boolean) =
legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => Bool(b))
val cacheable = fastCheck(_.supportsAcquireB)
val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous
val prot_r = fastCheck(_.supportsGet) && pmp.io.r
val prot_w = fastCheck(_.supportsPutFull) && pmp.io.w
val prot_al = fastCheck(_.supportsLogical) || cacheable
@ -181,6 +183,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters
val pal_array = Cat(Fill(2, prot_al), entries.init.map(_.pal).asUInt)
val eff_array = Cat(Fill(2, prot_eff), entries.init.map(_.eff).asUInt)
val c_array = Cat(Fill(2, cacheable), entries.init.map(_.c).asUInt)
val prefetchable_array = Cat(cacheable && homogeneous, false.B, entries.init.map(_.c).asUInt)
val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1)).orR
val bad_va = vm_enabled &&
@ -226,6 +229,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters
io.resp.ma.st := (ma_st_array & hits).orR
io.resp.ma.inst := false // this is up to the pipeline to figure out
io.resp.cacheable := (c_array & hits).orR
io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers.forall(m => !m.supportsAcquireB || m.supportsHint)
io.resp.miss := do_refill || tlb_miss || multipleHits
io.resp.paddr := Cat(ppn, pgOffset)