// See LICENSE.Berkeley for license details. // See LICENSE.SiFive for license details. package freechips.rocketchip.rocket import Chisel._ import Chisel.ImplicitConversions._ import freechips.rocketchip.config.Parameters import freechips.rocketchip.coreplex.CacheBlockBytes import freechips.rocketchip.tile.HasCoreParameters import freechips.rocketchip.util._ case class BHTParams( nEntries: Int = 512, counterLength: Int = 1, historyLength: Int = 8, historyBits: Int = 3) case class BTBParams( nEntries: Int = 28, nMatchBits: Int = 14, nPages: Int = 6, nRAS: Int = 6, bhtParams: Option[BHTParams] = Some(BHTParams()), updatesOutOfOrder: Boolean = false) trait HasBtbParameters extends HasCoreParameters { val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0)) val matchBits = btbParams.nMatchBits max log2Ceil(p(CacheBlockBytes) * tileParams.icache.get.nSets) val entries = btbParams.nEntries val updatesOutOfOrder = btbParams.updatesOutOfOrder val nPages = (btbParams.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages } abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters abstract class BtbBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) with HasBtbParameters class RAS(nras: Int) { def push(addr: UInt): Unit = { when (count < nras) { count := count + 1 } val nextPos = Mux(Bool(isPow2(nras)) || pos < nras-1, pos+1, UInt(0)) stack(nextPos) := addr pos := nextPos } def peek: UInt = stack(pos) def pop(): Unit = when (!isEmpty) { count := count - 1 pos := Mux(Bool(isPow2(nras)) || pos > 0, pos-1, UInt(nras-1)) } def clear(): Unit = count := UInt(0) def isEmpty: Bool = count === UInt(0) private val count = Reg(UInt(width = log2Up(nras+1))) private val pos = Reg(UInt(width = log2Up(nras))) private val stack = Reg(Vec(nras, UInt())) } class BHTResp(implicit p: Parameters) extends BtbBundle()(p) { val history = UInt(width = btbParams.bhtParams.map(_.historyLength).getOrElse(1)) val value = UInt(width = btbParams.bhtParams.map(_.counterLength).getOrElse(1)) def taken = value(0) def strongly_taken = value === 1 } // BHT contains table of 2-bit counters and a global history register. // The BHT only predicts and updates when there is a BTB hit. // The global history: // - updated speculatively in fetch (if there's a BTB hit). // - on a mispredict, the history register is reset (again, only if BTB hit). // The counter table: // - each counter corresponds with the address of the fetch packet ("fetch pc"). // - updated when a branch resolves (and BTB was a hit for that branch). // The updating branch must provide its "fetch pc". class BHT(params: BHTParams)(implicit val p: Parameters) extends HasCoreParameters { def index(addr: UInt, history: UInt) = { def hashHistory(hist: UInt) = if (params.historyLength == params.historyBits) hist else { val k = math.sqrt(3)/2 val i = BigDecimal(k * math.pow(2, params.historyLength)).toBigInt (i.U * hist)(params.historyLength-1, params.historyLength-params.historyBits) } def hashAddr(addr: UInt) = { val hi = addr >> log2Ceil(fetchBytes) hi(log2Ceil(params.nEntries)-1, 0) ^ (hi >> log2Ceil(params.nEntries))(1, 0) } hashAddr(addr) ^ (hashHistory(history) << (log2Up(params.nEntries) - params.historyBits)) } def get(addr: UInt): BHTResp = { val res = Wire(new BHTResp) res.value := table(index(addr, history)) res.history := history res } def updateTable(addr: UInt, d: BHTResp, taken: Bool): Unit = { table(index(addr, d.history)) := (params.counterLength match { case 1 => taken case 2 => Cat(taken ^ d.value(0), d.value === 1 || d.value(1) && taken) }) } def resetHistory(d: BHTResp): Unit = { history := d.history } def updateHistory(addr: UInt, d: BHTResp, taken: Bool): Unit = { history := Cat(taken, d.history >> 1) } def advanceHistory(taken: Bool): Unit = { history := Cat(taken, history >> 1) } private val table = Mem(params.nEntries, UInt(width = params.counterLength)) val history = Reg(UInt(width = params.historyLength)) } object CFIType { def SZ = 2 def apply() = UInt(width = SZ) def branch = 0.U def jump = 1.U def call = 2.U def ret = 3.U } // BTB update occurs during branch resolution (and only on a mispredict). // - "pc" is what future fetch PCs will tag match against. // - "br_pc" is the PC of the branch instruction. class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) { val prediction = new BTBResp val pc = UInt(width = vaddrBits) val target = UInt(width = vaddrBits) val taken = Bool() val isValid = Bool() val br_pc = UInt(width = vaddrBits) val cfiType = CFIType() } // BHT update occurs during branch resolution on all conditional branches. // - "pc" is what future fetch PCs will tag match against. class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) { val prediction = new BHTResp val pc = UInt(width = vaddrBits) val branch = Bool() val taken = Bool() val mispredict = Bool() } class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) { val cfiType = CFIType() val returnAddr = UInt(width = vaddrBits) } // - "bridx" is the low-order PC bits of the predicted branch (after // shifting off the lowest log(inst_bytes) bits off). // - "mask" provides a mask of valid instructions (instructions are // masked off by the predicted taken branch from the BTB). class BTBResp(implicit p: Parameters) extends BtbBundle()(p) { val cfiType = CFIType() val taken = Bool() val mask = Bits(width = fetchWidth) val bridx = Bits(width = log2Up(fetchWidth)) val target = UInt(width = vaddrBits) val entry = UInt(width = log2Up(entries + 1)) val bht = new BHTResp } class BTBReq(implicit p: Parameters) extends BtbBundle()(p) { val addr = UInt(width = vaddrBits) } // fully-associative branch target buffer // Higher-performance processors may cause BTB updates to occur out-of-order, // which requires an extra CAM port for updates (to ensure no duplicates get // placed in BTB). class BTB(implicit p: Parameters) extends BtbModule { val io = new Bundle { val req = Valid(new BTBReq).flip val resp = Valid(new BTBResp) val btb_update = Valid(new BTBUpdate).flip val bht_update = Valid(new BHTUpdate).flip val bht_advance = Valid(new BTBResp).flip val ras_update = Valid(new RASUpdate).flip val ras_head = Valid(UInt(width = vaddrBits)) val flush = Bool().asInput } val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes)))) val idxPages = Reg(Vec(entries, UInt(width=log2Up(nPages)))) val tgts = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes)))) val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages)))) val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits))) val pageValid = Reg(init = UInt(0, nPages)) val isValid = Reg(init = UInt(0, entries)) val cfiType = Reg(Vec(entries, CFIType())) val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth)))) private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { val p = page(addr) pageValid & pages.map(_ === p).asUInt } private def idxMatch(addr: UInt) = { val idx = addr(matchBits-1, log2Up(coreInstBytes)) idxs.map(_ === idx).asUInt & isValid } val r_btb_update = Pipe(io.btb_update) val update_target = io.req.bits.addr val pageHit = pageMatch(io.req.bits.addr) val idxHit = idxMatch(io.req.bits.addr) val updatePageHit = pageMatch(r_btb_update.bits.pc) val (updateHit, updateHitAddr) = if (updatesOutOfOrder) { val updateHits = (pageHit << 1)(Mux1H(idxMatch(r_btb_update.bits.pc), idxPages)) (updateHits.orR, OHToUInt(updateHits)) } else (r_btb_update.bits.prediction.entry < entries, r_btb_update.bits.prediction.entry) val useUpdatePageHit = updatePageHit.orR val usePageHit = pageHit.orR val doIdxPageRepl = !useUpdatePageHit val nextPageRepl = Reg(UInt(width = log2Ceil(nPages))) val idxPageRepl = Cat(pageHit(nPages-2,0), pageHit(nPages-1)) | Mux(usePageHit, UInt(0), UIntToOH(nextPageRepl)) val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) val samePage = page(r_btb_update.bits.pc) === page(update_target) val doTgtPageRepl = !samePage && !usePageHit val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1))) val tgtPageUpdate = OHToUInt(pageHit | Mux(usePageHit, UInt(0), tgtPageRepl)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) { val both = doIdxPageRepl && doTgtPageRepl val next = nextPageRepl + Mux[UInt](both, 2, 1) nextPageRepl := Mux(next >= nPages, next(0), next) } val repl = new PseudoLRU(entries) val waddr = Mux(updateHit, updateHitAddr, repl.replace) val r_resp = Pipe(io.resp) when (r_resp.valid && r_resp.bits.taken || r_btb_update.valid) { repl.access(Mux(r_btb_update.valid, waddr, r_resp.bits.entry)) } when (r_btb_update.valid) { val mask = UIntToOH(waddr) idxs(waddr) := r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes)) tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes)) idxPages(waddr) := idxPageUpdate +& 1 // the +1 corresponds to the <<1 on io.resp.valid tgtPages(waddr) := tgtPageUpdate cfiType(waddr) := r_btb_update.bits.cfiType isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask) if (fetchWidth > 1) brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes) require(nPages % 2 == 0) val idxWritesEven = !idxPageUpdate(0) def writeBank(i: Int, mod: Int, en: UInt, data: UInt) = for (i <- i until nPages by mod) when (en(i)) { pages(i) := data } writeBank(0, 2, Mux(idxWritesEven, idxPageReplEn, tgtPageReplEn), Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target))) writeBank(1, 2, Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn), Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc))) pageValid := pageValid | tgtPageReplEn | idxPageReplEn } io.resp.valid := (pageHit << 1)(Mux1H(idxHit, idxPages)) io.resp.bits.taken := true io.resp.bits.target := Cat(pages(Mux1H(idxHit, tgtPages)), Mux1H(idxHit, tgts) << log2Up(coreInstBytes)) io.resp.bits.entry := OHToUInt(idxHit) io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(idxHit, brIdx) else UInt(0)) io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1)) io.resp.bits.cfiType := Mux1H(idxHit, cfiType) // if multiple entries for same PC land in BTB, zap them when (PopCountAtLeast(idxHit, 2)) { isValid := isValid & ~idxHit } when (io.flush) { isValid := 0 } if (btbParams.bhtParams.nonEmpty) { val bht = new BHT(btbParams.bhtParams.get) val isBranch = (idxHit & cfiType.map(_ === CFIType.branch).asUInt).orR val res = bht.get(io.req.bits.addr) when (io.bht_advance.valid) { bht.advanceHistory(io.bht_advance.bits.bht.taken) } when (io.bht_update.valid) { when (io.bht_update.bits.branch) { bht.updateTable(io.bht_update.bits.pc, io.bht_update.bits.prediction, io.bht_update.bits.taken) when (io.bht_update.bits.mispredict) { bht.updateHistory(io.bht_update.bits.pc, io.bht_update.bits.prediction, io.bht_update.bits.taken) } }.elsewhen (io.bht_update.bits.mispredict) { bht.resetHistory(io.bht_update.bits.prediction) } } when (!res.taken && isBranch) { io.resp.bits.taken := false } io.resp.bits.bht := res } if (btbParams.nRAS > 0) { val ras = new RAS(btbParams.nRAS) val doPeek = (idxHit & cfiType.map(_ === CFIType.ret).asUInt).orR io.ras_head.valid := !ras.isEmpty io.ras_head.bits := ras.peek when (!ras.isEmpty && doPeek) { io.resp.bits.target := ras.peek } when (io.ras_update.valid) { when (io.ras_update.bits.cfiType === CFIType.call) { ras.push(io.ras_update.bits.returnAddr) }.elsewhen (io.ras_update.bits.cfiType === CFIType.ret) { ras.pop() } } } }