From fea31d2167f00a4842827b9c06122c18d79eb9e4 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Tue, 11 Nov 2014 03:34:05 -0800 Subject: [PATCH] Significant changes and fixes to BTB for superscalar fetch. - BTBUpdate only occurs on mispredicts now. - RASUpdate broken out from BTBUpdate (allows RASUpdate to be performed in Decode). - Added optional 2nd CAM port to BTB for updates (for when updates to the BTB may occur out-of-order). - Fixed resp.mask bit logic. --- rocket/src/main/scala/btb.scala | 89 +++++++++++++++++------------- rocket/src/main/scala/ctrl.scala | 11 ++-- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/icache.scala | 6 +- 4 files changed, 63 insertions(+), 45 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index d0506601..5614561d 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -65,36 +65,40 @@ class BHT(nbht: Int) { when (update) { history := Cat(taken, history(nbhtbits-1,1)) } res } - def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = { + def update(addr: UInt, d: BHTResp, taken: Bool): Unit = { val index = addr(nbhtbits+1,2) ^ d.history table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) - when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } + history := Cat(taken, d.history(nbhtbits-1,1)) } private val table = Mem(UInt(width = 2), nbht) val history = Reg(UInt(width = nbhtbits)) } -// BTB update occurs during branch resolution. +// BTB update occurs during branch resolution (and only on a mispredict). // - "pc" is what future fetch PCs will tag match against. // - "br_pc" is the PC of the branch instruction. -// - "bridx" is the low-order PC bits of the predicted branch (after -// shifting off the lowest log(inst_bytes) bits off). -// - "resp.mask" provides a mask of valid instructions (instructions are -// masked off by the predicted taken branch). class BTBUpdate extends Bundle with BTBParameters { val prediction = Valid(new BTBResp) val pc = UInt(width = vaddrBits) val target = UInt(width = vaddrBits) - val returnAddr = UInt(width = vaddrBits) val taken = Bool() val isJump = Bool() - val isCall = Bool() val isReturn = Bool() val br_pc = UInt(width = vaddrBits) - val mispredict = Bool() } +class RASUpdate extends Bundle with BTBParameters { + val isCall = Bool() + val isReturn = Bool() + val returnAddr = UInt(width = vaddrBits) + val prediction = Valid(new BTBResp) +} + +// - "bridx" is the low-order PC bits of the predicted branch (after +// shifting off the lowest log(inst_bytes) bits off). +// - "resp.mask" provides a mask of valid instructions (instructions are +// masked off by the predicted taken branch). class BTBResp extends Bundle with BTBParameters { val taken = Bool() val mask = Bits(width = params(FetchWidth)) @@ -109,11 +113,15 @@ class BTBReq extends Bundle with BTBParameters { } // fully-associative branch target buffer -class BTB extends Module with BTBParameters { +// Higher-performance processors may cause BTB updates to occur out-of-order, +// which requires an extra CAM port for updates (to ensure no duplicates get +// placed in BTB). +class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParameters { val io = new Bundle { val req = Valid(new BTBReq).flip val resp = Valid(new BTBResp) val update = Valid(new BTBUpdate).flip + val ras_update = Valid(new RASUpdate).flip val invalidate = Bool(INPUT) } @@ -158,8 +166,7 @@ class BTB extends Module with BTBParameters { } val updateHit = r_update.bits.prediction.valid - val updateValid = r_update.bits.mispredict || updateHit && Bool(nBHT > 0) - val updateTarget = updateValid && r_update.bits.mispredict && r_update.bits.taken + val updateTarget = r_update.bits.taken val useUpdatePageHit = updatePageHit.orR val doIdxPageRepl = updateTarget && !useUpdatePageHit @@ -179,27 +186,32 @@ class BTB extends Module with BTBParameters { val pageReplEn = idxPageReplEn | tgtPageReplEn idxPageRepl := UIntToOH(Counter(r_update.valid && doPageRepl, nPages)._1) - when (r_update.valid && !(updateValid && !updateTarget)) { - val nextRepl = Counter(!updateHit && updateValid, entries)._1 - val waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl) + when (r_update.valid && updateTarget) { + assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target") + + val nextRepl = Counter(!updateHit, entries)._1 + var waddr:UInt = null + if (!updates_out_of_order) { + waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl) + } else { + println(" BTB accepts out-of-order updates.") + waddr = Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) + } // invalidate entries if we stomp on pages they depend upon idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits - idxValid(waddr) := updateValid - when (updateTarget) { - assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target") - idxs(waddr) := r_update.bits.pc - tgts(waddr) := update_target - idxPages(waddr) := idxPageUpdate - tgtPages(waddr) := tgtPageUpdate - useRAS(waddr) := r_update.bits.isReturn - isJump(waddr) := r_update.bits.isJump - if (params(FetchWidth) == 1) { - brIdx(waddr) := UInt(0) - } else { - brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) - } + idxValid(waddr) := Bool(true) + idxs(waddr) := r_update.bits.pc + tgts(waddr) := update_target + idxPages(waddr) := idxPageUpdate + tgtPages(waddr) := tgtPageUpdate + useRAS(waddr) := r_update.bits.isReturn + isJump(waddr) := r_update.bits.isJump + if (params(FetchWidth) == 1) { + brIdx(waddr) := UInt(0) + } else { + brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) } require(nPages % 2 == 0) @@ -231,7 +243,9 @@ class BTB extends Module with BTBParameters { io.resp.bits.mask := UInt(1) } else { // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case - io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) + val all_ones = UInt((1 << (params(FetchWidth)+1))-1) + io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), + all_ones) } if (nBHT > 0) { @@ -239,8 +253,7 @@ class BTB extends Module with BTBParameters { val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump)) val update_btb_hit = io.update.bits.prediction.valid when (io.update.valid && update_btb_hit && !io.update.bits.isJump) { - bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, - io.update.bits.taken, io.update.bits.mispredict) + bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, io.update.bits.taken) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res @@ -252,13 +265,13 @@ class BTB extends Module with BTBParameters { when (!ras.isEmpty && doPeek) { io.resp.bits.target := ras.peek } - when (io.update.valid) { - when (io.update.bits.isCall) { - ras.push(io.update.bits.returnAddr) + when (io.ras_update.valid) { + when (io.ras_update.bits.isCall) { + ras.push(io.ras_update.bits.returnAddr) when (doPeek) { - io.resp.bits.target := io.update.bits.returnAddr + io.resp.bits.target := io.ras_update.bits.returnAddr } - }.elsewhen (io.update.bits.isReturn && io.update.bits.prediction.valid) { + }.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) { ras.pop } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6ea50107..f3aff610 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -652,15 +652,18 @@ class Control extends Module Mux(replay_wb, PC_WB, // replay PC_MEM))) - io.imem.btb_update.valid := (mem_reg_branch || io.imem.btb_update.bits.isJump) && !take_pc_wb + io.imem.btb_update.valid := take_pc_mem && !take_pc_wb io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp io.imem.btb_update.bits.taken := mem_reg_branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump - io.imem.btb_update.bits.mispredict := take_pc_mem io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr - io.imem.btb_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra - io.imem.req.valid := take_pc + io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !take_pc_wb + io.imem.ras_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) + io.imem.ras_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra + io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit + io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp + io.imem.req.valid := take_pc val bypassDst = Array(id_raddr1, id_raddr2) val bypassSrc = Array.fill(NBYP)((Bool(true), UInt(0))) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 08e9d53f..4e05f50c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -286,8 +286,8 @@ class Datapath extends Module wb_reg_pc)).toUInt // PC_WB io.imem.btb_update.bits.pc := mem_reg_pc io.imem.btb_update.bits.target := io.imem.req.bits.pc - io.imem.btb_update.bits.returnAddr := mem_int_wdata io.imem.btb_update.bits.br_pc := mem_reg_pc + io.imem.ras_update.bits.returnAddr := mem_int_wdata // for hazard/bypass opportunity detection io.ctrl.ex_waddr := ex_reg_inst(11,7) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index cdf21bca..7f94a64f 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -33,18 +33,19 @@ class CPUFrontendIO extends CoreBundle { val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp).flip val btb_update = Valid(new BTBUpdate) + val ras_update = Valid(new RASUpdate) val ptw = new TLBPTWIO().flip val invalidate = Bool(OUTPUT) } -class Frontend extends FrontendModule +class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule { val io = new Bundle { val cpu = new CPUFrontendIO().flip val mem = new UncachedTileLinkIO } - val btb = Module(new BTB) + val btb = Module(new BTB(btb_updates_out_of_order)) val icache = Module(new ICache) val tlb = Module(new TLB(params(NITLBEntries))) @@ -88,6 +89,7 @@ class Frontend extends FrontendModule btb.io.req.valid := !stall && !icmiss btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes) btb.io.update := io.cpu.btb_update + btb.io.ras_update := io.cpu.ras_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate tlb.io.ptw <> io.cpu.ptw