From d0c6cbba6ba3e7e63db88823c22e5cf093771b8c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Nov 2017 17:23:25 -0800 Subject: [PATCH] Improve frontend branch prediction - Put correctness responsibility on Frontend, not IBuf, for improved separation of concerns. Frontend must detect case that the BTB predicts a taken branch in the middle of an instruction. - Pass BTB information down pipeline unconditionally, fixing case that screws up the branch history when the BTB misses and the instruction is misaligned. - Remove jumpInFrontend option; it's now unconditional. - Default to one-bit counters in the BHT. For tiny BHTs like these, it's more resource efficient to have a larger index space than to have hysteresis. --- src/main/scala/rocket/BTB.scala | 84 ++++++---- src/main/scala/rocket/Frontend.scala | 208 +++++++++++++------------ src/main/scala/rocket/IBuf.scala | 22 +-- src/main/scala/rocket/RocketCore.scala | 19 +-- src/main/scala/tile/Core.scala | 1 - 5 files changed, 177 insertions(+), 157 deletions(-) diff --git a/src/main/scala/rocket/BTB.scala b/src/main/scala/rocket/BTB.scala index d594fe6f..90413d0b 100644 --- a/src/main/scala/rocket/BTB.scala +++ b/src/main/scala/rocket/BTB.scala @@ -10,12 +10,18 @@ import freechips.rocketchip.coreplex.CacheBlockBytes import freechips.rocketchip.tile.HasCoreParameters import freechips.rocketchip.util._ +case class BHTParams( + nEntries: Int = 512, + counterLength: Int = 1, + historyLength: Int = 8, + historyBits: Int = 3) + case class BTBParams( nEntries: Int = 28, nMatchBits: Int = 14, nPages: Int = 6, nRAS: Int = 6, - nBHT: Int = 256, + bhtParams: Option[BHTParams] = Some(BHTParams()), updatesOutOfOrder: Boolean = false) trait HasBtbParameters extends HasCoreParameters { @@ -51,9 +57,10 @@ class RAS(nras: Int) { } class BHTResp(implicit p: Parameters) extends BtbBundle()(p) { - val history = UInt(width = log2Up(btbParams.nBHT).max(1)) - val value = UInt(width = 2) - val taken = Bool() + val history = UInt(width = btbParams.bhtParams.map(_.historyLength).getOrElse(1)) + val value = UInt(width = btbParams.bhtParams.map(_.counterLength).getOrElse(1)) + def taken = value(0) + def strongly_taken = value === 1 } // BHT contains table of 2-bit counters and a global history register. @@ -65,32 +72,43 @@ class BHTResp(implicit p: Parameters) extends BtbBundle()(p) { // - each counter corresponds with the address of the fetch packet ("fetch pc"). // - updated when a branch resolves (and BTB was a hit for that branch). // The updating branch must provide its "fetch pc". -class BHT(nbht: Int)(implicit val p: Parameters) extends HasCoreParameters { - val nbhtbits = log2Up(nbht) +class BHT(params: BHTParams)(implicit val p: Parameters) extends HasCoreParameters { + def index(addr: UInt, history: UInt) = { + def hashHistory(hist: UInt) = if (params.historyLength == params.historyBits) hist else { + val k = math.sqrt(3)/2 + val i = BigDecimal(k * math.pow(2, params.historyLength)).toBigInt + (i.U * hist)(params.historyLength-1, params.historyLength-params.historyBits) + } + def hashAddr(addr: UInt) = { + val hi = addr >> log2Ceil(fetchBytes) + hi(log2Ceil(params.nEntries)-1, 0) ^ (hi >> log2Ceil(params.nEntries))(1, 0) + } + hashAddr(addr) ^ (hashHistory(history) << (log2Up(params.nEntries) - params.historyBits)) + } def get(addr: UInt): BHTResp = { val res = Wire(new BHTResp) - val index = addr(nbhtbits+log2Up(coreInstBytes)-1, log2Up(coreInstBytes)) ^ history - res.value := table(index) + res.value := table(index(addr, history)) res.history := history - res.taken := res.value(0) res } def updateTable(addr: UInt, d: BHTResp, taken: Bool): Unit = { - val index = addr(nbhtbits+log2Up(coreInstBytes)-1, log2Up(coreInstBytes)) ^ d.history - table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) + table(index(addr, d.history)) := (params.counterLength match { + case 1 => taken + case 2 => Cat(taken ^ d.value(0), d.value === 1 || d.value(1) && taken) + }) } def resetHistory(d: BHTResp): Unit = { history := d.history } def updateHistory(addr: UInt, d: BHTResp, taken: Bool): Unit = { - history := Cat(taken, d.history(nbhtbits-1,1)) + history := Cat(taken, d.history >> 1) } def advanceHistory(taken: Bool): Unit = { - history := Cat(taken, history(nbhtbits-1,1)) + history := Cat(taken, history >> 1) } - private val table = Mem(nbht, UInt(width = 2)) - val history = Reg(UInt(width = nbhtbits)) + private val table = Mem(params.nEntries, UInt(width = params.counterLength)) + val history = Reg(UInt(width = params.historyLength)) } object CFIType { @@ -106,7 +124,7 @@ object CFIType { // - "pc" is what future fetch PCs will tag match against. // - "br_pc" is the PC of the branch instruction. class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) { - val prediction = Valid(new BTBResp) + val prediction = new BTBResp val pc = UInt(width = vaddrBits) val target = UInt(width = vaddrBits) val taken = Bool() @@ -118,8 +136,9 @@ class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) { // BHT update occurs during branch resolution on all conditional branches. // - "pc" is what future fetch PCs will tag match against. class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) { - val prediction = Valid(new BTBResp) + val prediction = new BHTResp val pc = UInt(width = vaddrBits) + val branch = Bool() val taken = Bool() val mispredict = Bool() } @@ -127,7 +146,6 @@ class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) { class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) { val cfiType = CFIType() val returnAddr = UInt(width = vaddrBits) - val prediction = Valid(new BTBResp) } // - "bridx" is the low-order PC bits of the predicted branch (after @@ -161,6 +179,7 @@ class BTB(implicit p: Parameters) extends BtbModule { val bht_advance = Valid(new BTBResp).flip val ras_update = Valid(new RASUpdate).flip val ras_head = Valid(UInt(width = vaddrBits)) + val flush = Bool().asInput } val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes)))) @@ -195,7 +214,7 @@ class BTB(implicit p: Parameters) extends BtbModule { if (updatesOutOfOrder) { val updateHits = (pageHit << 1)(Mux1H(idxMatch(r_btb_update.bits.pc), idxPages)) (updateHits.orR, OHToUInt(updateHits)) - } else (r_btb_update.bits.prediction.valid && r_btb_update.bits.prediction.bits.entry < entries, r_btb_update.bits.prediction.bits.entry) + } else (r_btb_update.bits.prediction.entry < entries, r_btb_update.bits.prediction.entry) val useUpdatePageHit = updatePageHit.orR val usePageHit = pageHit.orR @@ -220,7 +239,7 @@ class BTB(implicit p: Parameters) extends BtbModule { val repl = new PseudoLRU(entries) val waddr = Mux(updateHit, updateHitAddr, repl.replace) - val r_resp = Pipe(io.req.valid && io.resp.valid, io.resp.bits) + val r_resp = Pipe(io.resp) when (r_resp.valid && r_resp.bits.taken || r_btb_update.valid) { repl.access(Mux(r_btb_update.valid, waddr, r_resp.bits.entry)) } @@ -262,24 +281,25 @@ class BTB(implicit p: Parameters) extends BtbModule { when (PopCountAtLeast(idxHit, 2)) { isValid := isValid & ~idxHit } + when (io.flush) { + isValid := 0 + } - if (btbParams.nBHT > 0) { - val bht = new BHT(btbParams.nBHT) + if (btbParams.bhtParams.nonEmpty) { + val bht = new BHT(btbParams.bhtParams.get) val isBranch = (idxHit & cfiType.map(_ === CFIType.branch).asUInt).orR val res = bht.get(io.req.bits.addr) - when (io.req.valid && io.resp.valid && isBranch) { - bht.advanceHistory(res.taken) - } when (io.bht_advance.valid) { bht.advanceHistory(io.bht_advance.bits.bht.taken) } - when (io.btb_update.valid) { - bht.resetHistory(io.btb_update.bits.prediction.bits.bht) - } when (io.bht_update.valid) { - bht.updateTable(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken) - when (io.bht_update.bits.mispredict) { - bht.updateHistory(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken) + when (io.bht_update.bits.branch) { + bht.updateTable(io.bht_update.bits.pc, io.bht_update.bits.prediction, io.bht_update.bits.taken) + when (io.bht_update.bits.mispredict) { + bht.updateHistory(io.bht_update.bits.pc, io.bht_update.bits.prediction, io.bht_update.bits.taken) + } + }.elsewhen (io.bht_update.bits.mispredict) { + bht.resetHistory(io.bht_update.bits.prediction) } } when (!res.taken && isBranch) { io.resp.bits.taken := false } @@ -297,7 +317,7 @@ class BTB(implicit p: Parameters) extends BtbModule { when (io.ras_update.valid) { when (io.ras_update.bits.cfiType === CFIType.call) { ras.push(io.ras_update.bits.returnAddr) - }.elsewhen (io.ras_update.bits.cfiType === CFIType.ret && io.ras_update.bits.prediction.valid) { + }.elsewhen (io.ras_update.bits.cfiType === CFIType.ret) { ras.pop() } } diff --git a/src/main/scala/rocket/Frontend.scala b/src/main/scala/rocket/Frontend.scala index 408088c3..85ecbb46 100644 --- a/src/main/scala/rocket/Frontend.scala +++ b/src/main/scala/rocket/Frontend.scala @@ -28,7 +28,7 @@ class FrontendExceptions extends Bundle { } class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { - val btb = Valid(new BTBResp) + val btb = new BTBResp val pc = UInt(width = vaddrBitsExtended) // ID stage PC val data = UInt(width = fetchWidth * coreInstBits) val mask = Bits(width = fetchWidth) @@ -86,6 +86,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) val s2_pc = RegInit(t = UInt(width = vaddrBitsExtended), alignPC(io.reset_vector)) val s2_btb_resp_valid = if (usingBTB) Reg(Bool()) else false.B val s2_btb_resp_bits = Reg(new BTBResp) + val s2_btb_taken = s2_btb_resp_valid && s2_btb_resp_bits.taken val s2_tlb_resp = Reg(tlb.io.resp) val s2_xcpt = s2_tlb_resp.ae.inst || s2_tlb_resp.pf.inst val s2_speculative = Reg(init=Bool(false)) @@ -143,13 +144,14 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) fq.io.enq.bits.data := icache.io.resp.bits.data fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes)) fq.io.enq.bits.replay := icache.io.resp.bits.replay || icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt - fq.io.enq.bits.btb.valid := s2_btb_resp_valid - fq.io.enq.bits.btb.bits := s2_btb_resp_bits + fq.io.enq.bits.btb := s2_btb_resp_bits + fq.io.enq.bits.btb.taken := s2_btb_taken fq.io.enq.bits.xcpt := s2_tlb_resp when (icache.io.resp.valid && icache.io.resp.bits.ae) { fq.io.enq.bits.xcpt.ae.inst := true } if (usingBTB) { val btb = Module(new BTB) + btb.io.flush := false btb.io.req.valid := false btb.io.req.bits.addr := s1_pc btb.io.btb_update := io.cpu.btb_update @@ -166,111 +168,123 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) predicted_taken := Bool(true) } - if (!coreParams.jumpInFrontend) { - // push RAS speculatively - btb.io.ras_update.valid := btb.io.req.valid && btb.io.resp.valid && btb.io.resp.bits.cfiType.isOneOf(CFIType.call, CFIType.ret) - val returnAddrLSBs = btb.io.resp.bits.bridx +& 1 - btb.io.ras_update.bits.returnAddr := - Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1))) - btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType - btb.io.ras_update.bits.prediction.valid := true - } else { - val s2_btb_hit = s2_btb_resp_valid && s2_btb_resp_bits.taken - val s2_base_pc = ~(~s2_pc | (fetchBytes-1)) - val taken_idx = Wire(UInt()) - val after_idx = Wire(UInt()) - val useRAS = Wire(init=false.B) + val s2_base_pc = ~(~s2_pc | (fetchBytes-1)) + val taken_idx = Wire(UInt()) + val after_idx = Wire(UInt()) + val useRAS = Wire(init=false.B) + val updateBTB = Wire(init=false.B) - def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = { - val prevRVI = prevValid && prevBits(1,0) === 3 - val valid = fq.io.enq.bits.mask(idx) && !prevRVI - val bits = fq.io.enq.bits.data(coreInstBits*(idx+1)-1, coreInstBits*idx) - val rvc = bits(1,0) =/= 3 - val rviBits = Cat(bits, prevBits) - val rviBranch = rviBits(6,0) === Instructions.BEQ.value.asUInt()(6,0) - val rviJump = rviBits(6,0) === Instructions.JAL.value.asUInt()(6,0) - val rviJALR = rviBits(6,0) === Instructions.JALR.value.asUInt()(6,0) - val rviReturn = rviJALR && !rviBits(7) && BitPat("b00?01") === rviBits(19,15) - val rviCall = (rviJALR || rviJump) && rviBits(7) - val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ - val rvcJAL = Bool(xLen == 32) && bits === Instructions.C_JAL - val rvcJump = bits === Instructions.C_J || rvcJAL - val rvcImm = Mux(bits(14), new RVCDecoder(bits).bImm.asSInt, new RVCDecoder(bits).jImm.asSInt) - val rvcJR = bits === Instructions.C_MV && bits(6,2) === 0 - val rvcReturn = rvcJR && BitPat("b00?01") === bits(11,7) - val rvcJALR = bits === Instructions.C_ADD && bits(6,2) === 0 - val rvcCall = rvcJAL || rvcJALR - val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), ImmGen(IMM_SB, rviBits)) - val taken = - prevRVI && (rviJump || rviJALR || rviBranch && s2_btb_resp_bits.bht.taken) || - valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && s2_btb_resp_bits.bht.taken) - val predictReturn = btb.io.ras_head.valid && (prevRVI && rviReturn || valid && rvcReturn) - val predictBranch = - prevRVI && (rviJump || rviBranch && s2_btb_resp_bits.bht.taken) || - valid && (rvcJump || rvcBranch && s2_btb_resp_bits.bht.taken) + def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = { + def insnIsRVC(bits: UInt) = bits(1,0) =/= 3 + val prevRVI = prevValid && !insnIsRVC(prevBits) + val valid = fq.io.enq.bits.mask(idx) && !prevRVI + val bits = fq.io.enq.bits.data(coreInstBits*(idx+1)-1, coreInstBits*idx) + val rvc = insnIsRVC(bits) + val rviBits = Cat(bits, prevBits) + val rviBranch = rviBits(6,0) === Instructions.BEQ.value.asUInt()(6,0) + val rviJump = rviBits(6,0) === Instructions.JAL.value.asUInt()(6,0) + val rviJALR = rviBits(6,0) === Instructions.JALR.value.asUInt()(6,0) + val rviReturn = rviJALR && !rviBits(7) && BitPat("b00?01") === rviBits(19,15) + val rviCall = (rviJALR || rviJump) && rviBits(7) + val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ + val rvcJAL = Bool(xLen == 32) && bits === Instructions.C_JAL + val rvcJump = bits === Instructions.C_J || rvcJAL + val rvcImm = Mux(bits(14), new RVCDecoder(bits).bImm.asSInt, new RVCDecoder(bits).jImm.asSInt) + val rvcJR = bits === Instructions.C_MV && bits(6,2) === 0 + val rvcReturn = rvcJR && BitPat("b00?01") === bits(11,7) + val rvcJALR = bits === Instructions.C_ADD && bits(6,2) === 0 + val rvcCall = rvcJAL || rvcJALR + val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), ImmGen(IMM_SB, rviBits)) + val taken = + prevRVI && (rviJump || rviJALR || rviBranch && s2_btb_resp_bits.bht.taken) || + valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && s2_btb_resp_bits.bht.taken) + val predictReturn = btb.io.ras_head.valid && (prevRVI && rviReturn || valid && rvcReturn) + val predictJump = prevRVI && rviJump || valid && rvcJump + val predictBranch = s2_btb_resp_bits.bht.taken && (prevRVI && rviBranch || valid && rvcBranch) - when (!prevTaken) { - taken_idx := idx - after_idx := idx + 1 - btb.io.ras_update.valid := fq.io.enq.fire() && !wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn)) - btb.io.ras_update.bits.prediction.valid := true - btb.io.ras_update.bits.cfiType := Mux(Mux(prevRVI, rviReturn, rvcReturn), CFIType.ret, CFIType.call) + when (s2_valid && s2_btb_resp_valid && s2_btb_resp_bits.bridx === idx && valid && !rvc) { + // The BTB has predicted that the middle of an RVI instruction is + // a branch! Flush the BTB and the pipeline. + btb.io.flush := true + fq.io.enq.bits.replay := true + } - when (!s2_btb_hit) { - when (fq.io.enq.fire() && taken && !predictBranch && !predictReturn) { - wrong_path := true - } - when (s2_valid && predictReturn) { - useRAS := true - } - when (s2_valid && predictBranch) { - val pc = s2_base_pc | (idx*coreInstBytes) - val npc = - if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm) - else Mux(prevRVI, pc - coreInstBytes, pc).asSInt + Mux(prevRVI, rviImm, rvcImm) - predicted_npc := npc.asUInt - } + when (!prevTaken) { + taken_idx := idx + after_idx := idx + 1 + btb.io.ras_update.valid := fq.io.enq.fire() && !wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn)) + btb.io.ras_update.bits.cfiType := Mux(Mux(prevRVI, rviReturn, rvcReturn), CFIType.ret, + Mux(Mux(prevRVI, rviCall, rvcCall), CFIType.call, + Mux(Mux(prevRVI, rviBranch, rvcBranch), CFIType.branch, + CFIType.jump))) - when (prevRVI && rviBranch || valid && rvcBranch) { - btb.io.bht_advance.valid := fq.io.enq.fire() && !wrong_path && !s2_btb_resp_valid - btb.io.bht_advance.bits := s2_btb_resp_bits - } + when (!s2_btb_taken) { + when (fq.io.enq.fire() && taken && !predictBranch && !predictJump && !predictReturn) { + wrong_path := true + } + when (s2_valid && predictReturn) { + useRAS := true + } + when (s2_valid && (predictBranch || predictJump)) { + val pc = s2_base_pc | (idx*coreInstBytes) + val npc = + if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm) + else Mux(prevRVI, pc - coreInstBytes, pc).asSInt + Mux(prevRVI, rviImm, rvcImm) + predicted_npc := npc.asUInt } } + when (prevRVI && rviBranch || valid && rvcBranch) { + btb.io.bht_advance.valid := fq.io.enq.fire() && !wrong_path + btb.io.bht_advance.bits := s2_btb_resp_bits + } + when (!s2_btb_resp_valid && (predictBranch && s2_btb_resp_bits.bht.strongly_taken || predictJump || predictReturn)) { + updateBTB := true + } + } - if (idx == fetchWidth-1) { - when (fq.io.enq.fire()) { - s2_partial_insn_valid := false - when (valid && !prevTaken && !rvc) { - s2_partial_insn_valid := true - s2_partial_insn := bits | 0x3 - } + if (idx == fetchWidth-1) { + when (fq.io.enq.fire()) { + s2_partial_insn_valid := false + when (valid && !prevTaken && !rvc) { + s2_partial_insn_valid := true + s2_partial_insn := bits | 0x3 } - prevTaken || taken - } else { - scanInsns(idx + 1, valid, bits, prevTaken || taken) - } - } - - btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes)) - - val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B) - when (useRAS) { - predicted_npc := btb.io.ras_head.bits - } - when (fq.io.enq.fire() && s2_btb_hit) { - s2_partial_insn_valid := false - } - when (!s2_btb_hit) { - fq.io.enq.bits.btb.bits.bridx := taken_idx - when (taken) { - fq.io.enq.bits.btb.valid := true - fq.io.enq.bits.btb.bits.taken := true - fq.io.enq.bits.btb.bits.entry := UInt(tileParams.btb.get.nEntries) - when (fq.io.enq.fire()) { s2_redirect := true } } + prevTaken || taken + } else { + scanInsns(idx + 1, valid, bits, prevTaken || taken) } } + + when (!io.cpu.btb_update.valid) { + val fetch_bubble_likely = !fq.io.mask(1) + btb.io.btb_update.valid := fq.io.enq.fire() && !wrong_path && fetch_bubble_likely && updateBTB + btb.io.btb_update.bits.prediction.entry := UInt(tileParams.btb.get.nEntries) + btb.io.btb_update.bits.isValid := true + btb.io.btb_update.bits.cfiType := btb.io.ras_update.bits.cfiType + btb.io.btb_update.bits.br_pc := s2_base_pc | (taken_idx << log2Ceil(coreInstBytes)) + btb.io.btb_update.bits.pc := s2_base_pc + } + + btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes)) + + val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B) + when (useRAS) { + predicted_npc := btb.io.ras_head.bits + } + when (fq.io.enq.fire() && (s2_btb_taken || taken)) { + s2_partial_insn_valid := false + } + when (!s2_btb_taken) { + when (taken) { + fq.io.enq.bits.btb.bridx := taken_idx + fq.io.enq.bits.btb.taken := true + fq.io.enq.bits.btb.entry := UInt(tileParams.btb.get.nEntries) + when (fq.io.enq.fire()) { s2_redirect := true } + } + } + + assert(!s2_partial_insn_valid || fq.io.enq.bits.mask(0)) when (s2_redirect) { s2_partial_insn_valid := false } when (io.cpu.req.valid) { wrong_path := false } } diff --git a/src/main/scala/rocket/IBuf.scala b/src/main/scala/rocket/IBuf.scala index d43e0590..da7d180c 100644 --- a/src/main/scala/rocket/IBuf.scala +++ b/src/main/scala/rocket/IBuf.scala @@ -12,7 +12,6 @@ class Instruction(implicit val p: Parameters) extends ParameterizedBundle with H val xcpt0 = new FrontendExceptions // exceptions on first half of instruction val xcpt1 = new FrontendExceptions // exceptions on second half of instruction val replay = Bool() - val btb_hit = Bool() val rvc = Bool() val inst = new ExpandedInstruction val raw = UInt(width = 32) @@ -34,13 +33,12 @@ class IBuf(implicit p: Parameters) extends CoreModule { val n = fetchWidth - 1 val nBufValid = if (n == 0) UInt(0) else Reg(init=UInt(0, log2Ceil(fetchWidth))) val buf = Reg(io.imem.bits) - val ibufBTBHit = Reg(Bool()) val ibufBTBResp = Reg(new BTBResp) val pcWordMask = UInt(coreInstBytes*fetchWidth-1, vaddrBitsExtended) val pcWordBits = io.imem.bits.pc.extract(log2Ceil(fetchWidth*coreInstBytes)-1, log2Ceil(coreInstBytes)) val nReady = Wire(init = UInt(0, log2Ceil(fetchWidth+1))) - val nIC = Mux(io.imem.bits.btb.valid && io.imem.bits.btb.bits.taken, io.imem.bits.btb.bits.bridx +& 1, UInt(fetchWidth)) - pcWordBits + val nIC = Mux(io.imem.bits.btb.taken, io.imem.bits.btb.bridx +& 1, UInt(fetchWidth)) - pcWordBits val nICReady = nReady - nBufValid val nValid = Mux(io.imem.valid, nIC, UInt(0)) + nBufValid io.imem.ready := io.inst(0).ready && nReady >= nBufValid && (nICReady >= nIC || n >= nIC - nICReady) @@ -52,7 +50,6 @@ class IBuf(implicit p: Parameters) extends CoreModule { val shiftedBuf = shiftInsnRight(buf.data(n*coreInstBits-1, coreInstBits), (nReady-1)(log2Ceil(n-1)-1,0)) buf.data := Cat(buf.data(n*coreInstBits-1, (n-1)*coreInstBits), shiftedBuf((n-1)*coreInstBits-1, 0)) buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask - ibufBTBResp.bridx := ibufBTBResp.bridx - nReady } when (io.imem.valid && nReady >= nBufValid && nICReady < nIC && n >= nIC - nICReady) { val shamt = pcWordBits + nICReady @@ -60,9 +57,7 @@ class IBuf(implicit p: Parameters) extends CoreModule { buf := io.imem.bits buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n*coreInstBits-1,0) buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask - ibufBTBHit := io.imem.bits.btb.valid && io.imem.bits.btb.bits.bridx >= shamt - ibufBTBResp := io.imem.bits.btb.bits - ibufBTBResp.bridx := io.imem.bits.btb.bits.bridx - shamt + ibufBTBResp := io.imem.bits.btb } } when (io.kill) { @@ -81,12 +76,9 @@ class IBuf(implicit p: Parameters) extends CoreModule { val xcpt = (0 until bufMask.getWidth).map(i => Mux(bufMask(i), buf.xcpt, io.imem.bits.xcpt)) val buf_replay = Mux(buf.replay, bufMask, UInt(0)) val ic_replay = buf_replay | Mux(io.imem.bits.replay, valid & ~bufMask, UInt(0)) - val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0)) - assert(!io.imem.valid || !io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits) - val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0)) - val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask + assert(!io.imem.valid || !io.imem.bits.btb.taken || io.imem.bits.btb.bridx >= pcWordBits) - io.btb_resp := Mux((ibufBTBHitMask & bufMask).orR, ibufBTBResp, io.imem.bits.btb.bits) + io.btb_resp := io.imem.bits.btb io.pc := Mux(nBufValid > 0, buf.pc, io.imem.bits.pc) expand(0, 0, inst) @@ -97,15 +89,16 @@ class IBuf(implicit p: Parameters) extends CoreModule { io.inst(i).bits.raw := curInst if (usingCompressed) { - val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1))) + val replay = ic_replay(j) || (!exp.io.rvc && ic_replay(j+1)) val full_insn = exp.io.rvc || valid(j+1) || buf_replay(j) io.inst(i).valid := valid(j) && full_insn io.inst(i).bits.xcpt0 := xcpt(j) io.inst(i).bits.xcpt1 := Mux(exp.io.rvc, 0.U, xcpt(j+1).asUInt).asTypeOf(new FrontendExceptions) io.inst(i).bits.replay := replay - io.inst(i).bits.btb_hit := btbHitMask(j) || (!exp.io.rvc && btbHitMask(j+1)) io.inst(i).bits.rvc := exp.io.rvc + when ((bufMask(j) && exp.io.rvc) || bufMask(j+1)) { io.btb_resp := ibufBTBResp } + when (full_insn && (i == 0 || io.inst(i).ready)) { nReady := Mux(exp.io.rvc, j+1, j+2) } expand(i+1, Mux(exp.io.rvc, j+1, j+2), Mux(exp.io.rvc, curInst >> 16, curInst >> 32)) @@ -116,7 +109,6 @@ class IBuf(implicit p: Parameters) extends CoreModule { io.inst(i).bits.xcpt1 := 0.U.asTypeOf(new FrontendExceptions) io.inst(i).bits.replay := ic_replay(i) io.inst(i).bits.rvc := false - io.inst(i).bits.btb_hit := btbHitMask(i) expand(i+1, null, curInst >> 32) } diff --git a/src/main/scala/rocket/RocketCore.scala b/src/main/scala/rocket/RocketCore.scala index ecf4c59c..e210cbb1 100644 --- a/src/main/scala/rocket/RocketCore.scala +++ b/src/main/scala/rocket/RocketCore.scala @@ -30,7 +30,6 @@ case class RocketCoreParams( mtvecWritable: Boolean = true, fastLoadWord: Boolean = true, fastLoadByte: Boolean = false, - jumpInFrontend: Boolean = true, tileControlAddr: Option[BigInt] = None, mulDiv: Option[MulDivParams] = Some(MulDivParams()), fpu: Option[FPUParams] = Some(FPUParams()) @@ -123,7 +122,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val ex_reg_xcpt_interrupt = Reg(Bool()) val ex_reg_valid = Reg(Bool()) val ex_reg_rvc = Reg(Bool()) - val ex_reg_btb_hit = Reg(Bool()) val ex_reg_btb_resp = Reg(new BTBResp) val ex_reg_xcpt = Reg(Bool()) val ex_reg_flush_pipe = Reg(Bool()) @@ -137,7 +135,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val mem_reg_xcpt_interrupt = Reg(Bool()) val mem_reg_valid = Reg(Bool()) val mem_reg_rvc = Reg(Bool()) - val mem_reg_btb_hit = Reg(Bool()) val mem_reg_btb_resp = Reg(new BTBResp) val mem_reg_xcpt = Reg(Bool()) val mem_reg_replay = Reg(Bool()) @@ -292,7 +289,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) ex_reg_replay := !take_pc && ibuf.io.inst(0).valid && ibuf.io.inst(0).bits.replay ex_reg_xcpt := !ctrl_killd && id_xcpt ex_reg_xcpt_interrupt := !take_pc && ibuf.io.inst(0).valid && csr.io.interrupt - ex_reg_btb_hit := ibuf.io.inst(0).bits.btb_hit when (!ctrl_killd) { ex_ctrl := id_ctrl @@ -374,7 +370,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal - val mem_direction_misprediction = (Bool(coreParams.jumpInFrontend) || mem_reg_btb_hit) && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken + val mem_direction_misprediction = mem_ctrl.branch && mem_br_taken =/= (usingBTB && mem_reg_btb_resp.taken) val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_sfence) @@ -393,7 +389,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd) mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd) mem_reg_sfence := ex_sfence - mem_reg_btb_hit := ex_reg_btb_hit mem_reg_btb_resp := ex_reg_btb_resp mem_reg_flush_pipe := ex_reg_flush_pipe mem_reg_slow_bypass := ex_slow_bypass @@ -608,8 +603,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) ibuf.io.inst(0).ready := !ctrl_stalld - io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_wrong_npc && (!mem_cfi || mem_cfi_taken)) - io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi + io.imem.btb_update.valid := mem_reg_valid && !take_pc_wb && mem_wrong_npc && (!mem_cfi || mem_cfi_taken) + io.imem.btb_update.bits.isValid := mem_cfi io.imem.btb_update.bits.cfiType := Mux((mem_ctrl.jal || mem_ctrl.jalr) && mem_waddr(0), CFIType.call, Mux(mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00?01"), CFIType.ret, @@ -618,14 +613,14 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) io.imem.btb_update.bits.target := io.imem.req.bits.pc io.imem.btb_update.bits.br_pc := (if (usingCompressed) mem_reg_pc + Mux(mem_reg_rvc, UInt(0), UInt(2)) else mem_reg_pc) io.imem.btb_update.bits.pc := ~(~io.imem.btb_update.bits.br_pc | (coreInstBytes*fetchWidth-1)) - io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit - io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp + io.imem.btb_update.bits.prediction := mem_reg_btb_resp - io.imem.bht_update.valid := mem_reg_valid && !take_pc_wb && mem_ctrl.branch + io.imem.bht_update.valid := mem_reg_valid && !take_pc_wb io.imem.bht_update.bits.pc := io.imem.btb_update.bits.pc io.imem.bht_update.bits.taken := mem_br_taken io.imem.bht_update.bits.mispredict := mem_wrong_npc - io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction + io.imem.bht_update.bits.branch := mem_ctrl.branch + io.imem.bht_update.bits.prediction := mem_reg_btb_resp.bht io.fpu.valid := !ctrl_killd && id_ctrl.fp io.fpu.killx := ctrl_killx diff --git a/src/main/scala/tile/Core.scala b/src/main/scala/tile/Core.scala index 26e20647..5a2cef6d 100644 --- a/src/main/scala/tile/Core.scala +++ b/src/main/scala/tile/Core.scala @@ -33,7 +33,6 @@ trait CoreParams { val nL2TLBEntries: Int val mtvecInit: Option[BigInt] val mtvecWritable: Boolean - val jumpInFrontend: Boolean val tileControlAddr: Option[BigInt] def instBytes: Int = instBits / 8