1
0

Improve frontend branch prediction

- Put correctness responsibility on Frontend, not IBuf, for improved
  separation of concerns.  Frontend must detect case that the BTB
  predicts a taken branch in the middle of an instruction.

- Pass BTB information down pipeline unconditionally, fixing case that
  screws up the branch history when the BTB misses and the instruction
  is misaligned.

- Remove jumpInFrontend option; it's now unconditional.

- Default to one-bit counters in the BHT.  For tiny BHTs like these, it's
  more resource efficient to have a larger index space than to have
  hysteresis.
This commit is contained in:
Andrew Waterman
2017-11-08 17:23:25 -08:00
parent 989eeb78f9
commit d0c6cbba6b
5 changed files with 177 additions and 157 deletions

View File

@@ -12,7 +12,6 @@ class Instruction(implicit val p: Parameters) extends ParameterizedBundle with H
val xcpt0 = new FrontendExceptions // exceptions on first half of instruction
val xcpt1 = new FrontendExceptions // exceptions on second half of instruction
val replay = Bool()
val btb_hit = Bool()
val rvc = Bool()
val inst = new ExpandedInstruction
val raw = UInt(width = 32)
@@ -34,13 +33,12 @@ class IBuf(implicit p: Parameters) extends CoreModule {
val n = fetchWidth - 1
val nBufValid = if (n == 0) UInt(0) else Reg(init=UInt(0, log2Ceil(fetchWidth)))
val buf = Reg(io.imem.bits)
val ibufBTBHit = Reg(Bool())
val ibufBTBResp = Reg(new BTBResp)
val pcWordMask = UInt(coreInstBytes*fetchWidth-1, vaddrBitsExtended)
val pcWordBits = io.imem.bits.pc.extract(log2Ceil(fetchWidth*coreInstBytes)-1, log2Ceil(coreInstBytes))
val nReady = Wire(init = UInt(0, log2Ceil(fetchWidth+1)))
val nIC = Mux(io.imem.bits.btb.valid && io.imem.bits.btb.bits.taken, io.imem.bits.btb.bits.bridx +& 1, UInt(fetchWidth)) - pcWordBits
val nIC = Mux(io.imem.bits.btb.taken, io.imem.bits.btb.bridx +& 1, UInt(fetchWidth)) - pcWordBits
val nICReady = nReady - nBufValid
val nValid = Mux(io.imem.valid, nIC, UInt(0)) + nBufValid
io.imem.ready := io.inst(0).ready && nReady >= nBufValid && (nICReady >= nIC || n >= nIC - nICReady)
@@ -52,7 +50,6 @@ class IBuf(implicit p: Parameters) extends CoreModule {
val shiftedBuf = shiftInsnRight(buf.data(n*coreInstBits-1, coreInstBits), (nReady-1)(log2Ceil(n-1)-1,0))
buf.data := Cat(buf.data(n*coreInstBits-1, (n-1)*coreInstBits), shiftedBuf((n-1)*coreInstBits-1, 0))
buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask
ibufBTBResp.bridx := ibufBTBResp.bridx - nReady
}
when (io.imem.valid && nReady >= nBufValid && nICReady < nIC && n >= nIC - nICReady) {
val shamt = pcWordBits + nICReady
@@ -60,9 +57,7 @@ class IBuf(implicit p: Parameters) extends CoreModule {
buf := io.imem.bits
buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n*coreInstBits-1,0)
buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask
ibufBTBHit := io.imem.bits.btb.valid && io.imem.bits.btb.bits.bridx >= shamt
ibufBTBResp := io.imem.bits.btb.bits
ibufBTBResp.bridx := io.imem.bits.btb.bits.bridx - shamt
ibufBTBResp := io.imem.bits.btb
}
}
when (io.kill) {
@@ -81,12 +76,9 @@ class IBuf(implicit p: Parameters) extends CoreModule {
val xcpt = (0 until bufMask.getWidth).map(i => Mux(bufMask(i), buf.xcpt, io.imem.bits.xcpt))
val buf_replay = Mux(buf.replay, bufMask, UInt(0))
val ic_replay = buf_replay | Mux(io.imem.bits.replay, valid & ~bufMask, UInt(0))
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
assert(!io.imem.valid || !io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits)
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask
assert(!io.imem.valid || !io.imem.bits.btb.taken || io.imem.bits.btb.bridx >= pcWordBits)
io.btb_resp := Mux((ibufBTBHitMask & bufMask).orR, ibufBTBResp, io.imem.bits.btb.bits)
io.btb_resp := io.imem.bits.btb
io.pc := Mux(nBufValid > 0, buf.pc, io.imem.bits.pc)
expand(0, 0, inst)
@@ -97,15 +89,16 @@ class IBuf(implicit p: Parameters) extends CoreModule {
io.inst(i).bits.raw := curInst
if (usingCompressed) {
val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1)))
val replay = ic_replay(j) || (!exp.io.rvc && ic_replay(j+1))
val full_insn = exp.io.rvc || valid(j+1) || buf_replay(j)
io.inst(i).valid := valid(j) && full_insn
io.inst(i).bits.xcpt0 := xcpt(j)
io.inst(i).bits.xcpt1 := Mux(exp.io.rvc, 0.U, xcpt(j+1).asUInt).asTypeOf(new FrontendExceptions)
io.inst(i).bits.replay := replay
io.inst(i).bits.btb_hit := btbHitMask(j) || (!exp.io.rvc && btbHitMask(j+1))
io.inst(i).bits.rvc := exp.io.rvc
when ((bufMask(j) && exp.io.rvc) || bufMask(j+1)) { io.btb_resp := ibufBTBResp }
when (full_insn && (i == 0 || io.inst(i).ready)) { nReady := Mux(exp.io.rvc, j+1, j+2) }
expand(i+1, Mux(exp.io.rvc, j+1, j+2), Mux(exp.io.rvc, curInst >> 16, curInst >> 32))
@@ -116,7 +109,6 @@ class IBuf(implicit p: Parameters) extends CoreModule {
io.inst(i).bits.xcpt1 := 0.U.asTypeOf(new FrontendExceptions)
io.inst(i).bits.replay := ic_replay(i)
io.inst(i).bits.rvc := false
io.inst(i).bits.btb_hit := btbHitMask(i)
expand(i+1, null, curInst >> 32)
}