Merge pull request #918 from freechipsproject/icache-prefetch
Icache prefetch
This commit is contained in:
commit
1be1433f04
@ -141,6 +141,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
icache.io.s2_vaddr := s2_pc
|
icache.io.s2_vaddr := s2_pc
|
||||||
icache.io.s1_kill := s2_redirect || tlb.io.resp.miss || s2_replay
|
icache.io.s1_kill := s2_redirect || tlb.io.resp.miss || s2_replay
|
||||||
icache.io.s2_kill := s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt
|
icache.io.s2_kill := s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt
|
||||||
|
icache.io.s2_prefetch := s2_tlb_resp.prefetchable
|
||||||
|
|
||||||
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || !s2_tlb_resp.miss && icache.io.s2_kill)
|
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || !s2_tlb_resp.miss && icache.io.s2_kill)
|
||||||
fq.io.enq.bits.pc := s2_pc
|
fq.io.enq.bits.pc := s2_pc
|
||||||
|
@ -21,6 +21,7 @@ case class ICacheParams(
|
|||||||
tagECC: Code = new IdentityCode,
|
tagECC: Code = new IdentityCode,
|
||||||
dataECC: Code = new IdentityCode,
|
dataECC: Code = new IdentityCode,
|
||||||
itimAddr: Option[BigInt] = None,
|
itimAddr: Option[BigInt] = None,
|
||||||
|
prefetch: Boolean = false,
|
||||||
blockBytes: Int = 64,
|
blockBytes: Int = 64,
|
||||||
latency: Int = 2,
|
latency: Int = 2,
|
||||||
fetchBytes: Int = 4) extends L1CacheParams {
|
fetchBytes: Int = 4) extends L1CacheParams {
|
||||||
@ -37,7 +38,9 @@ class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1ICache
|
|||||||
|
|
||||||
class ICache(val icacheParams: ICacheParams, val hartid: Int)(implicit p: Parameters) extends LazyModule {
|
class ICache(val icacheParams: ICacheParams, val hartid: Int)(implicit p: Parameters) extends LazyModule {
|
||||||
lazy val module = new ICacheModule(this)
|
lazy val module = new ICacheModule(this)
|
||||||
val masterNode = TLClientNode(TLClientParameters(name = s"Core ${hartid} ICache"))
|
val masterNode = TLClientNode(TLClientParameters(
|
||||||
|
sourceId = IdRange(0, 1 + icacheParams.prefetch.toInt), // 0=refill, 1=hint
|
||||||
|
name = s"Core ${hartid} ICache"))
|
||||||
|
|
||||||
val size = icacheParams.nSets * icacheParams.nWays * icacheParams.blockBytes
|
val size = icacheParams.nSets * icacheParams.nWays * icacheParams.blockBytes
|
||||||
val device = new SimpleDevice("itim", Seq("sifive,itim0"))
|
val device = new SimpleDevice("itim", Seq("sifive,itim0"))
|
||||||
@ -72,6 +75,7 @@ class ICacheBundle(outer: ICache) extends CoreBundle()(outer.p) {
|
|||||||
val s2_vaddr = UInt(INPUT, vaddrBits) // delayed two cycles w.r.t. req
|
val s2_vaddr = UInt(INPUT, vaddrBits) // delayed two cycles w.r.t. req
|
||||||
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
|
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
|
||||||
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
|
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
|
||||||
|
val s2_prefetch = Bool(INPUT) // should I$ prefetch next line on a miss?
|
||||||
|
|
||||||
val resp = Valid(new ICacheResp(outer))
|
val resp = Valid(new ICacheResp(outer))
|
||||||
val invalidate = Bool(INPUT)
|
val invalidate = Bool(INPUT)
|
||||||
@ -126,23 +130,28 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
|||||||
|
|
||||||
val invalidated = Reg(Bool())
|
val invalidated = Reg(Bool())
|
||||||
val refill_valid = RegInit(false.B)
|
val refill_valid = RegInit(false.B)
|
||||||
|
val send_hint = RegInit(false.B)
|
||||||
|
val refill_fire = tl_out.a.fire() && !send_hint
|
||||||
|
val hint_outstanding = RegInit(false.B)
|
||||||
val s2_miss = s2_valid && !s2_hit && !io.s2_kill && !RegNext(refill_valid)
|
val s2_miss = s2_valid && !s2_hit && !io.s2_kill && !RegNext(refill_valid)
|
||||||
val refill_addr = RegEnable(io.s1_paddr, s1_valid && !(refill_valid || s2_miss))
|
val refill_addr = RegEnable(io.s1_paddr, s1_valid && !(refill_valid || s2_miss))
|
||||||
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
||||||
val refill_idx = refill_addr(untagBits-1,blockOffBits)
|
val refill_idx = refill_addr(untagBits-1,blockOffBits)
|
||||||
|
val refill_one_beat = tl_out.d.fire() && edge_out.hasData(tl_out.d.bits)
|
||||||
|
|
||||||
io.req.ready := !(tl_out.d.fire() || s0_slaveValid || s3_slaveValid)
|
io.req.ready := !(refill_one_beat || s0_slaveValid || s3_slaveValid)
|
||||||
val s0_valid = io.req.fire()
|
val s0_valid = io.req.fire()
|
||||||
val s0_vaddr = io.req.bits.addr
|
val s0_vaddr = io.req.bits.addr
|
||||||
s1_valid := s0_valid
|
s1_valid := s0_valid
|
||||||
|
|
||||||
val (_, _, refill_done, refill_cnt) = edge_out.count(tl_out.d)
|
val (_, _, d_done, refill_cnt) = edge_out.count(tl_out.d)
|
||||||
|
val refill_done = refill_one_beat && d_done
|
||||||
tl_out.d.ready := !s3_slaveValid
|
tl_out.d.ready := !s3_slaveValid
|
||||||
require (edge_out.manager.minLatency > 0)
|
require (edge_out.manager.minLatency > 0)
|
||||||
|
|
||||||
val repl_way = if (isDM) UInt(0) else {
|
val repl_way = if (isDM) UInt(0) else {
|
||||||
// pick a way that is not used by the scratchpad
|
// pick a way that is not used by the scratchpad
|
||||||
val v0 = LFSR16(tl_out.a.fire())(log2Up(nWays)-1,0)
|
val v0 = LFSR16(refill_fire)(log2Up(nWays)-1,0)
|
||||||
var v = v0
|
var v = v0
|
||||||
for (i <- log2Ceil(nWays) - 1 to 0 by -1) {
|
for (i <- log2Ceil(nWays) - 1 to 0 by -1) {
|
||||||
val mask = nWays - (BigInt(1) << (i + 1))
|
val mask = nWays - (BigInt(1) << (i + 1))
|
||||||
@ -162,7 +171,7 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
|||||||
}
|
}
|
||||||
|
|
||||||
val vb_array = Reg(init=Bits(0, nSets*nWays))
|
val vb_array = Reg(init=Bits(0, nSets*nWays))
|
||||||
when (tl_out.d.fire()) {
|
when (refill_one_beat) {
|
||||||
accruedRefillError := refillError
|
accruedRefillError := refillError
|
||||||
// clear bit when refill starts so hit-under-miss doesn't fetch bad data
|
// clear bit when refill starts so hit-under-miss doesn't fetch bad data
|
||||||
vb_array := vb_array.bitSet(Cat(repl_way, refill_idx), refill_done && !invalidated)
|
vb_array := vb_array.bitSet(Cat(repl_way, refill_idx), refill_done && !invalidated)
|
||||||
@ -205,8 +214,8 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
|||||||
def wordMatch(addr: UInt) = addr.extract(log2Ceil(tl_out.d.bits.data.getWidth/8)-1, log2Ceil(wordBits/8)) === i
|
def wordMatch(addr: UInt) = addr.extract(log2Ceil(tl_out.d.bits.data.getWidth/8)-1, log2Ceil(wordBits/8)) === i
|
||||||
def row(addr: UInt) = addr(untagBits-1, blockOffBits-log2Ceil(refillCycles))
|
def row(addr: UInt) = addr(untagBits-1, blockOffBits-log2Ceil(refillCycles))
|
||||||
val s0_ren = (s0_valid && wordMatch(s0_vaddr)) || (s0_slaveValid && wordMatch(s0_slaveAddr))
|
val s0_ren = (s0_valid && wordMatch(s0_vaddr)) || (s0_slaveValid && wordMatch(s0_slaveAddr))
|
||||||
val wen = (tl_out.d.fire() && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr))
|
val wen = (refill_one_beat && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr))
|
||||||
val mem_idx = Mux(tl_out.d.fire(), (refill_idx << log2Ceil(refillCycles)) | refill_cnt,
|
val mem_idx = Mux(refill_one_beat, (refill_idx << log2Ceil(refillCycles)) | refill_cnt,
|
||||||
Mux(s3_slaveValid, row(s1s3_slaveAddr),
|
Mux(s3_slaveValid, row(s1s3_slaveAddr),
|
||||||
Mux(s0_slaveValid, row(s0_slaveAddr),
|
Mux(s0_slaveValid, row(s0_slaveAddr),
|
||||||
row(s0_vaddr))))
|
row(s0_vaddr))))
|
||||||
@ -295,17 +304,43 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
|||||||
tl.e.ready := true
|
tl.e.ready := true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tl_out.a.valid := s2_miss && !refill_valid
|
tl_out.a.valid := s2_miss && !refill_valid
|
||||||
tl_out.a.bits := edge_out.Get(
|
tl_out.a.bits := edge_out.Get(
|
||||||
fromSource = UInt(0),
|
fromSource = UInt(0),
|
||||||
toAddress = (refill_addr >> blockOffBits) << blockOffBits,
|
toAddress = (refill_addr >> blockOffBits) << blockOffBits,
|
||||||
lgSize = lgCacheBlockBytes)._2
|
lgSize = lgCacheBlockBytes)._2
|
||||||
|
if (cacheParams.prefetch) {
|
||||||
|
val (crosses_page, next_block) = Split(refill_addr(pgIdxBits-1, blockOffBits) +& 1, pgIdxBits-blockOffBits)
|
||||||
|
when (tl_out.a.fire()) {
|
||||||
|
send_hint := !hint_outstanding && io.s2_prefetch && !crosses_page
|
||||||
|
when (send_hint) {
|
||||||
|
send_hint := false
|
||||||
|
hint_outstanding := true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
when (refill_done) {
|
||||||
|
send_hint := false
|
||||||
|
}
|
||||||
|
when (tl_out.d.fire() && !refill_one_beat) {
|
||||||
|
hint_outstanding := false
|
||||||
|
}
|
||||||
|
|
||||||
|
when (send_hint) {
|
||||||
|
tl_out.a.valid := true
|
||||||
|
tl_out.a.bits := edge_out.Hint(
|
||||||
|
fromSource = UInt(1),
|
||||||
|
toAddress = Cat(refill_addr >> pgIdxBits, next_block) << blockOffBits,
|
||||||
|
lgSize = lgCacheBlockBytes,
|
||||||
|
param = TLHints.PREFETCH_READ)._2
|
||||||
|
}
|
||||||
|
}
|
||||||
tl_out.b.ready := Bool(true)
|
tl_out.b.ready := Bool(true)
|
||||||
tl_out.c.valid := Bool(false)
|
tl_out.c.valid := Bool(false)
|
||||||
tl_out.e.valid := Bool(false)
|
tl_out.e.valid := Bool(false)
|
||||||
assert(!(tl_out.a.valid && addrMaybeInScratchpad(tl_out.a.bits.address)))
|
assert(!(tl_out.a.valid && addrMaybeInScratchpad(tl_out.a.bits.address)))
|
||||||
|
|
||||||
when (!refill_valid) { invalidated := false.B }
|
when (!refill_valid) { invalidated := false.B }
|
||||||
when (tl_out.a.fire()) { refill_valid := true.B }
|
when (refill_fire) { refill_valid := true.B }
|
||||||
when (refill_done) { refill_valid := false.B}
|
when (refill_done) { refill_valid := false.B}
|
||||||
}
|
}
|
||||||
|
@ -49,6 +49,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
|
|||||||
val ae = new TLBExceptions
|
val ae = new TLBExceptions
|
||||||
val ma = new TLBExceptions
|
val ma = new TLBExceptions
|
||||||
val cacheable = Bool()
|
val cacheable = Bool()
|
||||||
|
val prefetchable = Bool()
|
||||||
}
|
}
|
||||||
|
|
||||||
class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) {
|
class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) {
|
||||||
@ -113,6 +114,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters
|
|||||||
def fastCheck(member: TLManagerParameters => Boolean) =
|
def fastCheck(member: TLManagerParameters => Boolean) =
|
||||||
legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => Bool(b))
|
legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => Bool(b))
|
||||||
val cacheable = fastCheck(_.supportsAcquireB)
|
val cacheable = fastCheck(_.supportsAcquireB)
|
||||||
|
val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous
|
||||||
val prot_r = fastCheck(_.supportsGet) && pmp.io.r
|
val prot_r = fastCheck(_.supportsGet) && pmp.io.r
|
||||||
val prot_w = fastCheck(_.supportsPutFull) && pmp.io.w
|
val prot_w = fastCheck(_.supportsPutFull) && pmp.io.w
|
||||||
val prot_al = fastCheck(_.supportsLogical) || cacheable
|
val prot_al = fastCheck(_.supportsLogical) || cacheable
|
||||||
@ -181,6 +183,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters
|
|||||||
val pal_array = Cat(Fill(2, prot_al), entries.init.map(_.pal).asUInt)
|
val pal_array = Cat(Fill(2, prot_al), entries.init.map(_.pal).asUInt)
|
||||||
val eff_array = Cat(Fill(2, prot_eff), entries.init.map(_.eff).asUInt)
|
val eff_array = Cat(Fill(2, prot_eff), entries.init.map(_.eff).asUInt)
|
||||||
val c_array = Cat(Fill(2, cacheable), entries.init.map(_.c).asUInt)
|
val c_array = Cat(Fill(2, cacheable), entries.init.map(_.c).asUInt)
|
||||||
|
val prefetchable_array = Cat(cacheable && homogeneous, false.B, entries.init.map(_.c).asUInt)
|
||||||
|
|
||||||
val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1)).orR
|
val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1)).orR
|
||||||
val bad_va = vm_enabled &&
|
val bad_va = vm_enabled &&
|
||||||
@ -226,6 +229,7 @@ class TLB(lgMaxSize: Int, nEntries: Int)(implicit edge: TLEdgeOut, p: Parameters
|
|||||||
io.resp.ma.st := (ma_st_array & hits).orR
|
io.resp.ma.st := (ma_st_array & hits).orR
|
||||||
io.resp.ma.inst := false // this is up to the pipeline to figure out
|
io.resp.ma.inst := false // this is up to the pipeline to figure out
|
||||||
io.resp.cacheable := (c_array & hits).orR
|
io.resp.cacheable := (c_array & hits).orR
|
||||||
|
io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers.forall(m => !m.supportsAcquireB || m.supportsHint)
|
||||||
io.resp.miss := do_refill || tlb_miss || multipleHits
|
io.resp.miss := do_refill || tlb_miss || multipleHits
|
||||||
io.resp.paddr := Cat(ppn, pgOffset)
|
io.resp.paddr := Cat(ppn, pgOffset)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user