1
0

Fixed BHT update error.

- separated out BTB/BHT update
   - BHT updates counters on every branch
   - BTB update only on mispredicted and taken branches
This commit is contained in:
Christopher Celio 2014-11-16 22:02:27 -08:00
parent fea31d2167
commit 6749f67b7f
4 changed files with 54 additions and 40 deletions

View File

@ -51,7 +51,7 @@ class BHTResp extends Bundle with BTBParameters {
// - updated speculatively in fetch (if there's a BTB hit). // - updated speculatively in fetch (if there's a BTB hit).
// - on a mispredict, the history register is reset (again, only if BTB hit). // - on a mispredict, the history register is reset (again, only if BTB hit).
// The counter table: // The counter table:
// - each counter corresponds with the "fetch pc" (not the PC of the branch). // - each counter corresponds with the address of the fetch packet ("fetch pc").
// - updated when a branch resolves (and BTB was a hit for that branch). // - updated when a branch resolves (and BTB was a hit for that branch).
// The updating branch must provide its "fetch pc". // The updating branch must provide its "fetch pc".
class BHT(nbht: Int) { class BHT(nbht: Int) {
@ -65,10 +65,10 @@ class BHT(nbht: Int) {
when (update) { history := Cat(taken, history(nbhtbits-1,1)) } when (update) { history := Cat(taken, history(nbhtbits-1,1)) }
res res
} }
def update(addr: UInt, d: BHTResp, taken: Bool): Unit = { def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = {
val index = addr(nbhtbits+1,2) ^ d.history val index = addr(nbhtbits+1,2) ^ d.history
table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken))
history := Cat(taken, d.history(nbhtbits-1,1)) when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) }
} }
private val table = Mem(UInt(width = 2), nbht) private val table = Mem(UInt(width = 2), nbht)
@ -88,6 +88,15 @@ class BTBUpdate extends Bundle with BTBParameters {
val br_pc = UInt(width = vaddrBits) val br_pc = UInt(width = vaddrBits)
} }
// BHT update occurs during branch resolution on all conditional branches.
// - "pc" is what future fetch PCs will tag match against.
class BHTUpdate extends Bundle with BTBParameters {
val prediction = Valid(new BTBResp)
val pc = UInt(width = vaddrBits)
val taken = Bool()
val mispredict = Bool()
}
class RASUpdate extends Bundle with BTBParameters { class RASUpdate extends Bundle with BTBParameters {
val isCall = Bool() val isCall = Bool()
val isReturn = Bool() val isReturn = Bool()
@ -120,7 +129,8 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
val io = new Bundle { val io = new Bundle {
val req = Valid(new BTBReq).flip val req = Valid(new BTBReq).flip
val resp = Valid(new BTBResp) val resp = Valid(new BTBResp)
val update = Valid(new BTBUpdate).flip val btb_update = Valid(new BTBUpdate).flip
val bht_update = Valid(new BHTUpdate).flip
val ras_update = Valid(new RASUpdate).flip val ras_update = Valid(new RASUpdate).flip
val invalidate = Bool(INPUT) val invalidate = Bool(INPUT)
} }
@ -151,67 +161,62 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
idxValid & idxMatch & idxPageMatch idxValid & idxMatch & idxPageMatch
} }
val r_update = Pipe(io.update) val r_btb_update = Pipe(io.btb_update)
val update_target = io.req.bits.addr val update_target = io.req.bits.addr
val pageHit = pageMatch(io.req.bits.addr) val pageHit = pageMatch(io.req.bits.addr)
val hits = tagMatch(io.req.bits.addr, pageHit) val hits = tagMatch(io.req.bits.addr, pageHit)
val updatePageHit = pageMatch(r_update.bits.pc) val updatePageHit = pageMatch(r_btb_update.bits.pc)
val updateHits = tagMatch(r_update.bits.pc, updatePageHit) val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit)
private var lfsr = LFSR16(r_update.valid) private var lfsr = LFSR16(r_btb_update.valid)
def rand(width: Int) = { def rand(width: Int) = {
lfsr = lfsr(lfsr.getWidth-1,1) lfsr = lfsr(lfsr.getWidth-1,1)
Random.oneHot(width, lfsr) Random.oneHot(width, lfsr)
} }
val updateHit = r_update.bits.prediction.valid val updateHit = r_btb_update.bits.prediction.valid
val updateTarget = r_update.bits.taken
val useUpdatePageHit = updatePageHit.orR val useUpdatePageHit = updatePageHit.orR
val doIdxPageRepl = updateTarget && !useUpdatePageHit val doIdxPageRepl = !useUpdatePageHit
val idxPageRepl = UInt() val idxPageRepl = UInt()
val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageUpdate = OHToUInt(idxPageUpdateOH)
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
val samePage = page(r_update.bits.pc) === page(update_target) val samePage = page(r_btb_update.bits.pc) === page(update_target)
val usePageHit = (pageHit & ~idxPageReplEn).orR val usePageHit = (pageHit & ~idxPageReplEn).orR
val doTgtPageRepl = updateTarget && !samePage && !usePageHit val doTgtPageRepl = !samePage && !usePageHit
val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(nPages-2,0) << 1 | idxPageUpdateOH(nPages-1)) val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(nPages-2,0) << 1 | idxPageUpdateOH(nPages-1))
val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl)) val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl))
val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
val doPageRepl = doIdxPageRepl || doTgtPageRepl val doPageRepl = doIdxPageRepl || doTgtPageRepl
val pageReplEn = idxPageReplEn | tgtPageReplEn val pageReplEn = idxPageReplEn | tgtPageReplEn
idxPageRepl := UIntToOH(Counter(r_update.valid && doPageRepl, nPages)._1) idxPageRepl := UIntToOH(Counter(r_btb_update.valid && doPageRepl, nPages)._1)
when (r_update.valid && updateTarget) { when (r_btb_update.valid) {
assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target") assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target")
val nextRepl = Counter(!updateHit, entries)._1 val nextRepl = Counter(!updateHit, entries)._1
var waddr:UInt = null val waddr =
if (!updates_out_of_order) { if (updates_out_of_order) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl)
waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl) else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl)
} else {
println(" BTB accepts out-of-order updates.")
waddr = Mux(updateHits.orR, OHToUInt(updateHits), nextRepl)
}
// invalidate entries if we stomp on pages they depend upon // invalidate entries if we stomp on pages they depend upon
idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits
idxValid(waddr) := Bool(true) idxValid(waddr) := Bool(true)
idxs(waddr) := r_update.bits.pc idxs(waddr) := r_btb_update.bits.pc
tgts(waddr) := update_target tgts(waddr) := update_target
idxPages(waddr) := idxPageUpdate idxPages(waddr) := idxPageUpdate
tgtPages(waddr) := tgtPageUpdate tgtPages(waddr) := tgtPageUpdate
useRAS(waddr) := r_update.bits.isReturn useRAS(waddr) := r_btb_update.bits.isReturn
isJump(waddr) := r_update.bits.isJump isJump(waddr) := r_btb_update.bits.isJump
if (params(FetchWidth) == 1) { if (params(FetchWidth) == 1) {
brIdx(waddr) := UInt(0) brIdx(waddr) := UInt(0)
} else { } else {
brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(params(CoreInstBits)/8)
} }
require(nPages % 2 == 0) require(nPages % 2 == 0)
@ -222,9 +227,9 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
when (en && pageReplEn(i)) { pages(i) := data } when (en && pageReplEn(i)) { pages(i) := data }
writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl),
Mux(idxWritesEven, page(r_update.bits.pc), page(update_target))) Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target)))
writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl), writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl),
Mux(idxWritesEven, page(update_target), page(r_update.bits.pc))) Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc)))
when (doPageRepl) { pageValid := pageValid | pageReplEn } when (doPageRepl) { pageValid := pageValid | pageReplEn }
} }
@ -243,17 +248,16 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
io.resp.bits.mask := UInt(1) io.resp.bits.mask := UInt(1)
} else { } else {
// note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case
val all_ones = UInt((1 << (params(FetchWidth)+1))-1)
io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)),
all_ones) SInt(-1))
} }
if (nBHT > 0) { if (nBHT > 0) {
val bht = new BHT(nBHT) val bht = new BHT(nBHT)
val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump)) val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump))
val update_btb_hit = io.update.bits.prediction.valid val update_btb_hit = io.bht_update.bits.prediction.valid
when (io.update.valid && update_btb_hit && !io.update.bits.isJump) { when (io.bht_update.valid && update_btb_hit) {
bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, io.update.bits.taken) bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict)
} }
when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false }
io.resp.bits.bht := res io.resp.bits.bht := res

View File

@ -652,17 +652,24 @@ class Control extends Module
Mux(replay_wb, PC_WB, // replay Mux(replay_wb, PC_WB, // replay
PC_MEM))) PC_MEM)))
io.imem.btb_update.valid := take_pc_mem && !take_pc_wb io.imem.btb_update.valid := io.dpath.mem_misprediction && ((mem_reg_branch && io.dpath.mem_br_taken) || mem_reg_jalr || mem_reg_jal) && !take_pc_wb
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.btb_update.bits.taken := mem_reg_branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump
io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr
io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra
io.imem.bht_update.valid := mem_reg_branch && !take_pc_wb
io.imem.bht_update.bits.taken := io.dpath.mem_br_taken
io.imem.bht_update.bits.mispredict := io.dpath.mem_misprediction
io.imem.bht_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.bht_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !take_pc_wb io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !take_pc_wb
io.imem.ras_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) io.imem.ras_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0)
io.imem.ras_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra io.imem.ras_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra
io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.req.valid := take_pc io.imem.req.valid := take_pc
val bypassDst = Array(id_raddr1, id_raddr2) val bypassDst = Array(id_raddr1, id_raddr2)

View File

@ -287,6 +287,7 @@ class Datapath extends Module
io.imem.btb_update.bits.pc := mem_reg_pc io.imem.btb_update.bits.pc := mem_reg_pc
io.imem.btb_update.bits.target := io.imem.req.bits.pc io.imem.btb_update.bits.target := io.imem.req.bits.pc
io.imem.btb_update.bits.br_pc := mem_reg_pc io.imem.btb_update.bits.br_pc := mem_reg_pc
io.imem.bht_update.bits.pc := mem_reg_pc
io.imem.ras_update.bits.returnAddr := mem_int_wdata io.imem.ras_update.bits.returnAddr := mem_int_wdata
// for hazard/bypass opportunity detection // for hazard/bypass opportunity detection

View File

@ -33,6 +33,7 @@ class CPUFrontendIO extends CoreBundle {
val resp = Decoupled(new FrontendResp).flip val resp = Decoupled(new FrontendResp).flip
val btb_resp = Valid(new BTBResp).flip val btb_resp = Valid(new BTBResp).flip
val btb_update = Valid(new BTBUpdate) val btb_update = Valid(new BTBUpdate)
val bht_update = Valid(new BHTUpdate)
val ras_update = Valid(new RASUpdate) val ras_update = Valid(new RASUpdate)
val ptw = new TLBPTWIO().flip val ptw = new TLBPTWIO().flip
val invalidate = Bool(OUTPUT) val invalidate = Bool(OUTPUT)
@ -88,7 +89,8 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule
btb.io.req.valid := !stall && !icmiss btb.io.req.valid := !stall && !icmiss
btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes) btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes)
btb.io.update := io.cpu.btb_update btb.io.btb_update := io.cpu.btb_update
btb.io.bht_update := io.cpu.bht_update
btb.io.ras_update := io.cpu.ras_update btb.io.ras_update := io.cpu.ras_update
btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate