1
0

Improve BTB critical path at slight accuracy cost

Make entries fully associative on lower 14 bits only, not full address.
This commit is contained in:
Andrew Waterman 2017-03-05 23:01:07 -08:00
parent 11c8857b5d
commit 74d8d672bf
2 changed files with 27 additions and 27 deletions

View File

@ -11,16 +11,18 @@ import util._
case class BTBParams( case class BTBParams(
nEntries: Int = 40, nEntries: Int = 40,
nMatchBits: Int = 14,
nPages: Int = 6,
nRAS: Int = 2, nRAS: Int = 2,
updatesOutOfOrder: Boolean = false) updatesOutOfOrder: Boolean = false)
trait HasBtbParameters extends HasCoreParameters { trait HasBtbParameters extends HasCoreParameters {
val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0)) val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0))
val matchBits = pgIdxBits max log2Ceil(p(coreplex.CacheBlockBytes) * tileParams.icache.get.nSets) val matchBits = btbParams.nMatchBits max log2Ceil(p(coreplex.CacheBlockBytes) * tileParams.icache.get.nSets)
val entries = btbParams.nEntries val entries = btbParams.nEntries
val nRAS = btbParams.nRAS val nRAS = btbParams.nRAS
val updatesOutOfOrder = btbParams.updatesOutOfOrder val updatesOutOfOrder = btbParams.updatesOutOfOrder
val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages val nPages = (btbParams.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages
val opaqueBits = log2Up(entries) val opaqueBits = log2Up(entries)
val nBHT = 1 << log2Up(entries*2) val nBHT = 1 << log2Up(entries*2)
} }
@ -150,8 +152,6 @@ class BTB(implicit p: Parameters) extends BtbModule {
val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages)))) val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits))) val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits)))
val pageValid = Reg(init = UInt(0, nPages)) val pageValid = Reg(init = UInt(0, nPages))
val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0))
val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0))
val isValid = Reg(init = UInt(0, entries)) val isValid = Reg(init = UInt(0, entries))
val isReturn = Reg(UInt(width = entries)) val isReturn = Reg(UInt(width = entries))
@ -163,31 +163,29 @@ class BTB(implicit p: Parameters) extends BtbModule {
val p = page(addr) val p = page(addr)
pageValid & pages.map(_ === p).asUInt pageValid & pages.map(_ === p).asUInt
} }
private def tagMatch(addr: UInt, pgMatch: UInt) = { private def idxMatch(addr: UInt) = {
val idxMatch = idxs.map(_ === addr(matchBits-1, log2Up(coreInstBytes))).asUInt val idx = addr(matchBits-1, log2Up(coreInstBytes))
val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).asUInt idxs.map(_ === idx).asUInt & isValid
idxMatch & idxPageMatch & isValid
} }
val r_btb_update = Pipe(io.btb_update) val r_btb_update = Pipe(io.btb_update)
val update_target = io.req.bits.addr val update_target = io.req.bits.addr
val pageHit = pageMatch(io.req.bits.addr) val pageHit = pageMatch(io.req.bits.addr)
val hitsVec = tagMatch(io.req.bits.addr, pageHit) val idxHit = idxMatch(io.req.bits.addr)
val hits = hitsVec.asUInt
val updatePageHit = pageMatch(r_btb_update.bits.pc) val updatePageHit = pageMatch(r_btb_update.bits.pc)
val (updateHit, updateHitAddr) =
val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit) if (updatesOutOfOrder) {
val updateHit = if (updatesOutOfOrder) updateHits.orR else r_btb_update.bits.prediction.valid val updateHits = (pageHit << 1)(Mux1H(idxMatch(r_btb_update.bits.pc), idxPages))
val updateHitAddr = if (updatesOutOfOrder) OHToUInt(updateHits) else r_btb_update.bits.prediction.bits.entry (updateHits.orR, OHToUInt(updateHits))
} else (r_btb_update.bits.prediction.valid, r_btb_update.bits.prediction.bits.entry)
val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1
val useUpdatePageHit = updatePageHit.orR val useUpdatePageHit = updatePageHit.orR
val usePageHit = pageHit.orR val usePageHit = pageHit.orR
val doIdxPageRepl = !useUpdatePageHit val doIdxPageRepl = !useUpdatePageHit
val nextPageRepl = Reg(UInt(width = log2Ceil(nPages))) val nextPageRepl = Reg(UInt(width = log2Ceil(nPages)))
val idxPageRepl = Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), UIntToOH(nextPageRepl)) val idxPageRepl = Cat(pageHit(nPages-2,0), pageHit(nPages-1)) | Mux(usePageHit, UInt(0), UIntToOH(nextPageRepl))
val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageUpdate = OHToUInt(idxPageUpdateOH)
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
@ -195,7 +193,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
val samePage = page(r_btb_update.bits.pc) === page(update_target) val samePage = page(r_btb_update.bits.pc) === page(update_target)
val doTgtPageRepl = !samePage && !usePageHit val doTgtPageRepl = !samePage && !usePageHit
val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1))) val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1)))
val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl)) val tgtPageUpdate = OHToUInt(pageHit | Mux(usePageHit, UInt(0), tgtPageRepl))
val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) { when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) {
@ -205,11 +203,12 @@ class BTB(implicit p: Parameters) extends BtbModule {
} }
when (r_btb_update.valid) { when (r_btb_update.valid) {
val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1
val waddr = Mux(updateHit, updateHitAddr, nextRepl) val waddr = Mux(updateHit, updateHitAddr, nextRepl)
val mask = UIntToOH(waddr) val mask = UIntToOH(waddr)
idxs(waddr) := r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes)) idxs(waddr) := r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes))
tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes)) tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
idxPages(waddr) := idxPageUpdate idxPages(waddr) := idxPageUpdate +& 1 // the +1 corresponds to the <<1 on io.resp.valid
tgtPages(waddr) := tgtPageUpdate tgtPages(waddr) := tgtPageUpdate
isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask) isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask)
isReturn := Mux(r_btb_update.bits.isReturn, isReturn | mask, isReturn & ~mask) isReturn := Mux(r_btb_update.bits.isReturn, isReturn | mask, isReturn & ~mask)
@ -231,21 +230,21 @@ class BTB(implicit p: Parameters) extends BtbModule {
pageValid := pageValid | tgtPageReplEn | idxPageReplEn pageValid := pageValid | tgtPageReplEn | idxPageReplEn
} }
io.resp.valid := hits.orR io.resp.valid := (pageHit << 1)(Mux1H(idxHit, idxPages))
io.resp.bits.taken := true io.resp.bits.taken := true
io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes)) io.resp.bits.target := Cat(pages(Mux1H(idxHit, tgtPages)), Mux1H(idxHit, tgts) << log2Up(coreInstBytes))
io.resp.bits.entry := OHToUInt(hits) io.resp.bits.entry := OHToUInt(idxHit)
io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(hitsVec, brIdx) else UInt(0)) io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(idxHit, brIdx) else UInt(0))
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1)) io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
// if multiple entries for same PC land in BTB, zap them // if multiple entries for same PC land in BTB, zap them
when (PopCountAtLeast(hits, 2)) { when (PopCountAtLeast(idxHit, 2)) {
isValid := isValid & ~hits isValid := isValid & ~idxHit
} }
if (nBHT > 0) { if (nBHT > 0) {
val bht = new BHT(nBHT) val bht = new BHT(nBHT)
val isBranch = !(hits & isJump).orR val isBranch = !(idxHit & isJump).orR
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch) val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
val update_btb_hit = io.bht_update.bits.prediction.valid val update_btb_hit = io.bht_update.bits.prediction.valid
when (io.bht_update.valid && update_btb_hit) { when (io.bht_update.valid && update_btb_hit) {
@ -257,7 +256,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
if (nRAS > 0) { if (nRAS > 0) {
val ras = new RAS(nRAS) val ras = new RAS(nRAS)
val doPeek = (hits & isReturn).orR val doPeek = (idxHit & isReturn).orR
when (!ras.isEmpty && doPeek) { when (!ras.isEmpty && doPeek) {
io.resp.bits.target := ras.peek io.resp.bits.target := ras.peek
} }

View File

@ -80,6 +80,7 @@ class IBuf(implicit p: Parameters) extends CoreModule {
val xcpt_if = valid & (Mux(buf.xcpt_if, bufMask, UInt(0)) | Mux(io.imem.bits.xcpt_if, ~bufMask, UInt(0))) val xcpt_if = valid & (Mux(buf.xcpt_if, bufMask, UInt(0)) | Mux(io.imem.bits.xcpt_if, ~bufMask, UInt(0)))
val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0))) val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0)))
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0)) val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
assert(!io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits)
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0)) val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask