1
0
rocket-chip/src/main/scala/rocket/BTB.scala
Andrew Waterman d0c6cbba6b Improve frontend branch prediction
- Put correctness responsibility on Frontend, not IBuf, for improved
  separation of concerns.  Frontend must detect case that the BTB
  predicts a taken branch in the middle of an instruction.

- Pass BTB information down pipeline unconditionally, fixing case that
  screws up the branch history when the BTB misses and the instruction
  is misaligned.

- Remove jumpInFrontend option; it's now unconditional.

- Default to one-bit counters in the BHT.  For tiny BHTs like these, it's
  more resource efficient to have a larger index space than to have
  hysteresis.
2017-11-09 00:00:56 -08:00

326 lines
12 KiB
Scala

// See LICENSE.Berkeley for license details.
// See LICENSE.SiFive for license details.
package freechips.rocketchip.rocket
import Chisel._
import Chisel.ImplicitConversions._
import freechips.rocketchip.config.Parameters
import freechips.rocketchip.coreplex.CacheBlockBytes
import freechips.rocketchip.tile.HasCoreParameters
import freechips.rocketchip.util._
case class BHTParams(
nEntries: Int = 512,
counterLength: Int = 1,
historyLength: Int = 8,
historyBits: Int = 3)
case class BTBParams(
nEntries: Int = 28,
nMatchBits: Int = 14,
nPages: Int = 6,
nRAS: Int = 6,
bhtParams: Option[BHTParams] = Some(BHTParams()),
updatesOutOfOrder: Boolean = false)
trait HasBtbParameters extends HasCoreParameters {
val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0))
val matchBits = btbParams.nMatchBits max log2Ceil(p(CacheBlockBytes) * tileParams.icache.get.nSets)
val entries = btbParams.nEntries
val updatesOutOfOrder = btbParams.updatesOutOfOrder
val nPages = (btbParams.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages
}
abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters
abstract class BtbBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasBtbParameters
class RAS(nras: Int) {
def push(addr: UInt): Unit = {
when (count < nras) { count := count + 1 }
val nextPos = Mux(Bool(isPow2(nras)) || pos < nras-1, pos+1, UInt(0))
stack(nextPos) := addr
pos := nextPos
}
def peek: UInt = stack(pos)
def pop(): Unit = when (!isEmpty) {
count := count - 1
pos := Mux(Bool(isPow2(nras)) || pos > 0, pos-1, UInt(nras-1))
}
def clear(): Unit = count := UInt(0)
def isEmpty: Bool = count === UInt(0)
private val count = Reg(UInt(width = log2Up(nras+1)))
private val pos = Reg(UInt(width = log2Up(nras)))
private val stack = Reg(Vec(nras, UInt()))
}
class BHTResp(implicit p: Parameters) extends BtbBundle()(p) {
val history = UInt(width = btbParams.bhtParams.map(_.historyLength).getOrElse(1))
val value = UInt(width = btbParams.bhtParams.map(_.counterLength).getOrElse(1))
def taken = value(0)
def strongly_taken = value === 1
}
// BHT contains table of 2-bit counters and a global history register.
// The BHT only predicts and updates when there is a BTB hit.
// The global history:
// - updated speculatively in fetch (if there's a BTB hit).
// - on a mispredict, the history register is reset (again, only if BTB hit).
// The counter table:
// - each counter corresponds with the address of the fetch packet ("fetch pc").
// - updated when a branch resolves (and BTB was a hit for that branch).
// The updating branch must provide its "fetch pc".
class BHT(params: BHTParams)(implicit val p: Parameters) extends HasCoreParameters {
def index(addr: UInt, history: UInt) = {
def hashHistory(hist: UInt) = if (params.historyLength == params.historyBits) hist else {
val k = math.sqrt(3)/2
val i = BigDecimal(k * math.pow(2, params.historyLength)).toBigInt
(i.U * hist)(params.historyLength-1, params.historyLength-params.historyBits)
}
def hashAddr(addr: UInt) = {
val hi = addr >> log2Ceil(fetchBytes)
hi(log2Ceil(params.nEntries)-1, 0) ^ (hi >> log2Ceil(params.nEntries))(1, 0)
}
hashAddr(addr) ^ (hashHistory(history) << (log2Up(params.nEntries) - params.historyBits))
}
def get(addr: UInt): BHTResp = {
val res = Wire(new BHTResp)
res.value := table(index(addr, history))
res.history := history
res
}
def updateTable(addr: UInt, d: BHTResp, taken: Bool): Unit = {
table(index(addr, d.history)) := (params.counterLength match {
case 1 => taken
case 2 => Cat(taken ^ d.value(0), d.value === 1 || d.value(1) && taken)
})
}
def resetHistory(d: BHTResp): Unit = {
history := d.history
}
def updateHistory(addr: UInt, d: BHTResp, taken: Bool): Unit = {
history := Cat(taken, d.history >> 1)
}
def advanceHistory(taken: Bool): Unit = {
history := Cat(taken, history >> 1)
}
private val table = Mem(params.nEntries, UInt(width = params.counterLength))
val history = Reg(UInt(width = params.historyLength))
}
object CFIType {
def SZ = 2
def apply() = UInt(width = SZ)
def branch = 0.U
def jump = 1.U
def call = 2.U
def ret = 3.U
}
// BTB update occurs during branch resolution (and only on a mispredict).
// - "pc" is what future fetch PCs will tag match against.
// - "br_pc" is the PC of the branch instruction.
class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val prediction = new BTBResp
val pc = UInt(width = vaddrBits)
val target = UInt(width = vaddrBits)
val taken = Bool()
val isValid = Bool()
val br_pc = UInt(width = vaddrBits)
val cfiType = CFIType()
}
// BHT update occurs during branch resolution on all conditional branches.
// - "pc" is what future fetch PCs will tag match against.
class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val prediction = new BHTResp
val pc = UInt(width = vaddrBits)
val branch = Bool()
val taken = Bool()
val mispredict = Bool()
}
class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val cfiType = CFIType()
val returnAddr = UInt(width = vaddrBits)
}
// - "bridx" is the low-order PC bits of the predicted branch (after
// shifting off the lowest log(inst_bytes) bits off).
// - "mask" provides a mask of valid instructions (instructions are
// masked off by the predicted taken branch from the BTB).
class BTBResp(implicit p: Parameters) extends BtbBundle()(p) {
val cfiType = CFIType()
val taken = Bool()
val mask = Bits(width = fetchWidth)
val bridx = Bits(width = log2Up(fetchWidth))
val target = UInt(width = vaddrBits)
val entry = UInt(width = log2Up(entries + 1))
val bht = new BHTResp
}
class BTBReq(implicit p: Parameters) extends BtbBundle()(p) {
val addr = UInt(width = vaddrBits)
}
// fully-associative branch target buffer
// Higher-performance processors may cause BTB updates to occur out-of-order,
// which requires an extra CAM port for updates (to ensure no duplicates get
// placed in BTB).
class BTB(implicit p: Parameters) extends BtbModule {
val io = new Bundle {
val req = Valid(new BTBReq).flip
val resp = Valid(new BTBResp)
val btb_update = Valid(new BTBUpdate).flip
val bht_update = Valid(new BHTUpdate).flip
val bht_advance = Valid(new BTBResp).flip
val ras_update = Valid(new RASUpdate).flip
val ras_head = Valid(UInt(width = vaddrBits))
val flush = Bool().asInput
}
val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
val idxPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
val tgts = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits)))
val pageValid = Reg(init = UInt(0, nPages))
val isValid = Reg(init = UInt(0, entries))
val cfiType = Reg(Vec(entries, CFIType()))
val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth))))
private def page(addr: UInt) = addr >> matchBits
private def pageMatch(addr: UInt) = {
val p = page(addr)
pageValid & pages.map(_ === p).asUInt
}
private def idxMatch(addr: UInt) = {
val idx = addr(matchBits-1, log2Up(coreInstBytes))
idxs.map(_ === idx).asUInt & isValid
}
val r_btb_update = Pipe(io.btb_update)
val update_target = io.req.bits.addr
val pageHit = pageMatch(io.req.bits.addr)
val idxHit = idxMatch(io.req.bits.addr)
val updatePageHit = pageMatch(r_btb_update.bits.pc)
val (updateHit, updateHitAddr) =
if (updatesOutOfOrder) {
val updateHits = (pageHit << 1)(Mux1H(idxMatch(r_btb_update.bits.pc), idxPages))
(updateHits.orR, OHToUInt(updateHits))
} else (r_btb_update.bits.prediction.entry < entries, r_btb_update.bits.prediction.entry)
val useUpdatePageHit = updatePageHit.orR
val usePageHit = pageHit.orR
val doIdxPageRepl = !useUpdatePageHit
val nextPageRepl = Reg(UInt(width = log2Ceil(nPages)))
val idxPageRepl = Cat(pageHit(nPages-2,0), pageHit(nPages-1)) | Mux(usePageHit, UInt(0), UIntToOH(nextPageRepl))
val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
val idxPageUpdate = OHToUInt(idxPageUpdateOH)
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
val samePage = page(r_btb_update.bits.pc) === page(update_target)
val doTgtPageRepl = !samePage && !usePageHit
val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1)))
val tgtPageUpdate = OHToUInt(pageHit | Mux(usePageHit, UInt(0), tgtPageRepl))
val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) {
val both = doIdxPageRepl && doTgtPageRepl
val next = nextPageRepl + Mux[UInt](both, 2, 1)
nextPageRepl := Mux(next >= nPages, next(0), next)
}
val repl = new PseudoLRU(entries)
val waddr = Mux(updateHit, updateHitAddr, repl.replace)
val r_resp = Pipe(io.resp)
when (r_resp.valid && r_resp.bits.taken || r_btb_update.valid) {
repl.access(Mux(r_btb_update.valid, waddr, r_resp.bits.entry))
}
when (r_btb_update.valid) {
val mask = UIntToOH(waddr)
idxs(waddr) := r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes))
tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
idxPages(waddr) := idxPageUpdate +& 1 // the +1 corresponds to the <<1 on io.resp.valid
tgtPages(waddr) := tgtPageUpdate
cfiType(waddr) := r_btb_update.bits.cfiType
isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask)
if (fetchWidth > 1)
brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes)
require(nPages % 2 == 0)
val idxWritesEven = !idxPageUpdate(0)
def writeBank(i: Int, mod: Int, en: UInt, data: UInt) =
for (i <- i until nPages by mod)
when (en(i)) { pages(i) := data }
writeBank(0, 2, Mux(idxWritesEven, idxPageReplEn, tgtPageReplEn),
Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target)))
writeBank(1, 2, Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn),
Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc)))
pageValid := pageValid | tgtPageReplEn | idxPageReplEn
}
io.resp.valid := (pageHit << 1)(Mux1H(idxHit, idxPages))
io.resp.bits.taken := true
io.resp.bits.target := Cat(pages(Mux1H(idxHit, tgtPages)), Mux1H(idxHit, tgts) << log2Up(coreInstBytes))
io.resp.bits.entry := OHToUInt(idxHit)
io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(idxHit, brIdx) else UInt(0))
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
io.resp.bits.cfiType := Mux1H(idxHit, cfiType)
// if multiple entries for same PC land in BTB, zap them
when (PopCountAtLeast(idxHit, 2)) {
isValid := isValid & ~idxHit
}
when (io.flush) {
isValid := 0
}
if (btbParams.bhtParams.nonEmpty) {
val bht = new BHT(btbParams.bhtParams.get)
val isBranch = (idxHit & cfiType.map(_ === CFIType.branch).asUInt).orR
val res = bht.get(io.req.bits.addr)
when (io.bht_advance.valid) {
bht.advanceHistory(io.bht_advance.bits.bht.taken)
}
when (io.bht_update.valid) {
when (io.bht_update.bits.branch) {
bht.updateTable(io.bht_update.bits.pc, io.bht_update.bits.prediction, io.bht_update.bits.taken)
when (io.bht_update.bits.mispredict) {
bht.updateHistory(io.bht_update.bits.pc, io.bht_update.bits.prediction, io.bht_update.bits.taken)
}
}.elsewhen (io.bht_update.bits.mispredict) {
bht.resetHistory(io.bht_update.bits.prediction)
}
}
when (!res.taken && isBranch) { io.resp.bits.taken := false }
io.resp.bits.bht := res
}
if (btbParams.nRAS > 0) {
val ras = new RAS(btbParams.nRAS)
val doPeek = (idxHit & cfiType.map(_ === CFIType.ret).asUInt).orR
io.ras_head.valid := !ras.isEmpty
io.ras_head.bits := ras.peek
when (!ras.isEmpty && doPeek) {
io.resp.bits.target := ras.peek
}
when (io.ras_update.valid) {
when (io.ras_update.bits.cfiType === CFIType.call) {
ras.push(io.ras_update.bits.returnAddr)
}.elsewhen (io.ras_update.bits.cfiType === CFIType.ret) {
ras.pop()
}
}
}
}