Improve BTB critical path at slight accuracy cost
Make entries fully associative on lower 14 bits only, not full address.
This commit is contained in:
parent
11c8857b5d
commit
74d8d672bf
@ -11,16 +11,18 @@ import util._
|
|||||||
|
|
||||||
case class BTBParams(
|
case class BTBParams(
|
||||||
nEntries: Int = 40,
|
nEntries: Int = 40,
|
||||||
|
nMatchBits: Int = 14,
|
||||||
|
nPages: Int = 6,
|
||||||
nRAS: Int = 2,
|
nRAS: Int = 2,
|
||||||
updatesOutOfOrder: Boolean = false)
|
updatesOutOfOrder: Boolean = false)
|
||||||
|
|
||||||
trait HasBtbParameters extends HasCoreParameters {
|
trait HasBtbParameters extends HasCoreParameters {
|
||||||
val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0))
|
val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0))
|
||||||
val matchBits = pgIdxBits max log2Ceil(p(coreplex.CacheBlockBytes) * tileParams.icache.get.nSets)
|
val matchBits = btbParams.nMatchBits max log2Ceil(p(coreplex.CacheBlockBytes) * tileParams.icache.get.nSets)
|
||||||
val entries = btbParams.nEntries
|
val entries = btbParams.nEntries
|
||||||
val nRAS = btbParams.nRAS
|
val nRAS = btbParams.nRAS
|
||||||
val updatesOutOfOrder = btbParams.updatesOutOfOrder
|
val updatesOutOfOrder = btbParams.updatesOutOfOrder
|
||||||
val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages
|
val nPages = (btbParams.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages
|
||||||
val opaqueBits = log2Up(entries)
|
val opaqueBits = log2Up(entries)
|
||||||
val nBHT = 1 << log2Up(entries*2)
|
val nBHT = 1 << log2Up(entries*2)
|
||||||
}
|
}
|
||||||
@ -150,8 +152,6 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
|||||||
val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
|
val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
|
||||||
val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits)))
|
val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits)))
|
||||||
val pageValid = Reg(init = UInt(0, nPages))
|
val pageValid = Reg(init = UInt(0, nPages))
|
||||||
val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0))
|
|
||||||
val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0))
|
|
||||||
|
|
||||||
val isValid = Reg(init = UInt(0, entries))
|
val isValid = Reg(init = UInt(0, entries))
|
||||||
val isReturn = Reg(UInt(width = entries))
|
val isReturn = Reg(UInt(width = entries))
|
||||||
@ -163,31 +163,29 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
|||||||
val p = page(addr)
|
val p = page(addr)
|
||||||
pageValid & pages.map(_ === p).asUInt
|
pageValid & pages.map(_ === p).asUInt
|
||||||
}
|
}
|
||||||
private def tagMatch(addr: UInt, pgMatch: UInt) = {
|
private def idxMatch(addr: UInt) = {
|
||||||
val idxMatch = idxs.map(_ === addr(matchBits-1, log2Up(coreInstBytes))).asUInt
|
val idx = addr(matchBits-1, log2Up(coreInstBytes))
|
||||||
val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).asUInt
|
idxs.map(_ === idx).asUInt & isValid
|
||||||
idxMatch & idxPageMatch & isValid
|
|
||||||
}
|
}
|
||||||
|
|
||||||
val r_btb_update = Pipe(io.btb_update)
|
val r_btb_update = Pipe(io.btb_update)
|
||||||
val update_target = io.req.bits.addr
|
val update_target = io.req.bits.addr
|
||||||
|
|
||||||
val pageHit = pageMatch(io.req.bits.addr)
|
val pageHit = pageMatch(io.req.bits.addr)
|
||||||
val hitsVec = tagMatch(io.req.bits.addr, pageHit)
|
val idxHit = idxMatch(io.req.bits.addr)
|
||||||
val hits = hitsVec.asUInt
|
|
||||||
val updatePageHit = pageMatch(r_btb_update.bits.pc)
|
val updatePageHit = pageMatch(r_btb_update.bits.pc)
|
||||||
|
val (updateHit, updateHitAddr) =
|
||||||
val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit)
|
if (updatesOutOfOrder) {
|
||||||
val updateHit = if (updatesOutOfOrder) updateHits.orR else r_btb_update.bits.prediction.valid
|
val updateHits = (pageHit << 1)(Mux1H(idxMatch(r_btb_update.bits.pc), idxPages))
|
||||||
val updateHitAddr = if (updatesOutOfOrder) OHToUInt(updateHits) else r_btb_update.bits.prediction.bits.entry
|
(updateHits.orR, OHToUInt(updateHits))
|
||||||
|
} else (r_btb_update.bits.prediction.valid, r_btb_update.bits.prediction.bits.entry)
|
||||||
val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1
|
|
||||||
|
|
||||||
val useUpdatePageHit = updatePageHit.orR
|
val useUpdatePageHit = updatePageHit.orR
|
||||||
val usePageHit = pageHit.orR
|
val usePageHit = pageHit.orR
|
||||||
val doIdxPageRepl = !useUpdatePageHit
|
val doIdxPageRepl = !useUpdatePageHit
|
||||||
val nextPageRepl = Reg(UInt(width = log2Ceil(nPages)))
|
val nextPageRepl = Reg(UInt(width = log2Ceil(nPages)))
|
||||||
val idxPageRepl = Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), UIntToOH(nextPageRepl))
|
val idxPageRepl = Cat(pageHit(nPages-2,0), pageHit(nPages-1)) | Mux(usePageHit, UInt(0), UIntToOH(nextPageRepl))
|
||||||
val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
|
val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
|
||||||
val idxPageUpdate = OHToUInt(idxPageUpdateOH)
|
val idxPageUpdate = OHToUInt(idxPageUpdateOH)
|
||||||
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
|
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
|
||||||
@ -195,7 +193,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
|||||||
val samePage = page(r_btb_update.bits.pc) === page(update_target)
|
val samePage = page(r_btb_update.bits.pc) === page(update_target)
|
||||||
val doTgtPageRepl = !samePage && !usePageHit
|
val doTgtPageRepl = !samePage && !usePageHit
|
||||||
val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1)))
|
val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1)))
|
||||||
val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl))
|
val tgtPageUpdate = OHToUInt(pageHit | Mux(usePageHit, UInt(0), tgtPageRepl))
|
||||||
val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
|
val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
|
||||||
|
|
||||||
when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) {
|
when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) {
|
||||||
@ -205,11 +203,12 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
|||||||
}
|
}
|
||||||
|
|
||||||
when (r_btb_update.valid) {
|
when (r_btb_update.valid) {
|
||||||
|
val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1
|
||||||
val waddr = Mux(updateHit, updateHitAddr, nextRepl)
|
val waddr = Mux(updateHit, updateHitAddr, nextRepl)
|
||||||
val mask = UIntToOH(waddr)
|
val mask = UIntToOH(waddr)
|
||||||
idxs(waddr) := r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes))
|
idxs(waddr) := r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes))
|
||||||
tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
|
tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
|
||||||
idxPages(waddr) := idxPageUpdate
|
idxPages(waddr) := idxPageUpdate +& 1 // the +1 corresponds to the <<1 on io.resp.valid
|
||||||
tgtPages(waddr) := tgtPageUpdate
|
tgtPages(waddr) := tgtPageUpdate
|
||||||
isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask)
|
isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask)
|
||||||
isReturn := Mux(r_btb_update.bits.isReturn, isReturn | mask, isReturn & ~mask)
|
isReturn := Mux(r_btb_update.bits.isReturn, isReturn | mask, isReturn & ~mask)
|
||||||
@ -231,21 +230,21 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
|||||||
pageValid := pageValid | tgtPageReplEn | idxPageReplEn
|
pageValid := pageValid | tgtPageReplEn | idxPageReplEn
|
||||||
}
|
}
|
||||||
|
|
||||||
io.resp.valid := hits.orR
|
io.resp.valid := (pageHit << 1)(Mux1H(idxHit, idxPages))
|
||||||
io.resp.bits.taken := true
|
io.resp.bits.taken := true
|
||||||
io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes))
|
io.resp.bits.target := Cat(pages(Mux1H(idxHit, tgtPages)), Mux1H(idxHit, tgts) << log2Up(coreInstBytes))
|
||||||
io.resp.bits.entry := OHToUInt(hits)
|
io.resp.bits.entry := OHToUInt(idxHit)
|
||||||
io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(hitsVec, brIdx) else UInt(0))
|
io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(idxHit, brIdx) else UInt(0))
|
||||||
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
|
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
|
||||||
|
|
||||||
// if multiple entries for same PC land in BTB, zap them
|
// if multiple entries for same PC land in BTB, zap them
|
||||||
when (PopCountAtLeast(hits, 2)) {
|
when (PopCountAtLeast(idxHit, 2)) {
|
||||||
isValid := isValid & ~hits
|
isValid := isValid & ~idxHit
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nBHT > 0) {
|
if (nBHT > 0) {
|
||||||
val bht = new BHT(nBHT)
|
val bht = new BHT(nBHT)
|
||||||
val isBranch = !(hits & isJump).orR
|
val isBranch = !(idxHit & isJump).orR
|
||||||
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
|
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
|
||||||
val update_btb_hit = io.bht_update.bits.prediction.valid
|
val update_btb_hit = io.bht_update.bits.prediction.valid
|
||||||
when (io.bht_update.valid && update_btb_hit) {
|
when (io.bht_update.valid && update_btb_hit) {
|
||||||
@ -257,7 +256,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
|||||||
|
|
||||||
if (nRAS > 0) {
|
if (nRAS > 0) {
|
||||||
val ras = new RAS(nRAS)
|
val ras = new RAS(nRAS)
|
||||||
val doPeek = (hits & isReturn).orR
|
val doPeek = (idxHit & isReturn).orR
|
||||||
when (!ras.isEmpty && doPeek) {
|
when (!ras.isEmpty && doPeek) {
|
||||||
io.resp.bits.target := ras.peek
|
io.resp.bits.target := ras.peek
|
||||||
}
|
}
|
||||||
|
@ -80,6 +80,7 @@ class IBuf(implicit p: Parameters) extends CoreModule {
|
|||||||
val xcpt_if = valid & (Mux(buf.xcpt_if, bufMask, UInt(0)) | Mux(io.imem.bits.xcpt_if, ~bufMask, UInt(0)))
|
val xcpt_if = valid & (Mux(buf.xcpt_if, bufMask, UInt(0)) | Mux(io.imem.bits.xcpt_if, ~bufMask, UInt(0)))
|
||||||
val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0)))
|
val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0)))
|
||||||
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
|
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
|
||||||
|
assert(!io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits)
|
||||||
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
|
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
|
||||||
val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask
|
val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user