Perform some control-flow transfers within the Frontend
This commit is contained in:
parent
62c4080585
commit
15878d4691
@ -11,21 +11,20 @@ import freechips.rocketchip.tile.HasCoreParameters
|
|||||||
import freechips.rocketchip.util._
|
import freechips.rocketchip.util._
|
||||||
|
|
||||||
case class BTBParams(
|
case class BTBParams(
|
||||||
nEntries: Int = 40,
|
nEntries: Int = 32,
|
||||||
nMatchBits: Int = 14,
|
nMatchBits: Int = 14,
|
||||||
nPages: Int = 6,
|
nPages: Int = 6,
|
||||||
nRAS: Int = 2,
|
nRAS: Int = 6,
|
||||||
|
nBHT: Int = 256,
|
||||||
updatesOutOfOrder: Boolean = false)
|
updatesOutOfOrder: Boolean = false)
|
||||||
|
|
||||||
trait HasBtbParameters extends HasCoreParameters {
|
trait HasBtbParameters extends HasCoreParameters {
|
||||||
val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0))
|
val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0))
|
||||||
val matchBits = btbParams.nMatchBits max log2Ceil(p(CacheBlockBytes) * tileParams.icache.get.nSets)
|
val matchBits = btbParams.nMatchBits max log2Ceil(p(CacheBlockBytes) * tileParams.icache.get.nSets)
|
||||||
val entries = btbParams.nEntries
|
val entries = btbParams.nEntries
|
||||||
val nRAS = btbParams.nRAS
|
|
||||||
val updatesOutOfOrder = btbParams.updatesOutOfOrder
|
val updatesOutOfOrder = btbParams.updatesOutOfOrder
|
||||||
val nPages = (btbParams.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages
|
val nPages = (btbParams.nPages + 1) / 2 * 2 // control logic assumes 2 divides pages
|
||||||
val opaqueBits = log2Up(entries)
|
val opaqueBits = log2Up(entries)
|
||||||
val nBHT = 1 << log2Up(entries*2)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters
|
abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters
|
||||||
@ -53,8 +52,9 @@ class RAS(nras: Int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class BHTResp(implicit p: Parameters) extends BtbBundle()(p) {
|
class BHTResp(implicit p: Parameters) extends BtbBundle()(p) {
|
||||||
val history = UInt(width = log2Up(nBHT).max(1))
|
val history = UInt(width = log2Up(btbParams.nBHT).max(1))
|
||||||
val value = UInt(width = 2)
|
val value = UInt(width = 2)
|
||||||
|
val taken = Bool()
|
||||||
}
|
}
|
||||||
|
|
||||||
// BHT contains table of 2-bit counters and a global history register.
|
// BHT contains table of 2-bit counters and a global history register.
|
||||||
@ -68,19 +68,26 @@ class BHTResp(implicit p: Parameters) extends BtbBundle()(p) {
|
|||||||
// The updating branch must provide its "fetch pc".
|
// The updating branch must provide its "fetch pc".
|
||||||
class BHT(nbht: Int)(implicit val p: Parameters) extends HasCoreParameters {
|
class BHT(nbht: Int)(implicit val p: Parameters) extends HasCoreParameters {
|
||||||
val nbhtbits = log2Up(nbht)
|
val nbhtbits = log2Up(nbht)
|
||||||
def get(addr: UInt, update: Bool): BHTResp = {
|
def get(addr: UInt): BHTResp = {
|
||||||
val res = Wire(new BHTResp)
|
val res = Wire(new BHTResp)
|
||||||
val index = addr(nbhtbits+1, log2Up(coreInstBytes)) ^ history
|
val index = addr(nbhtbits+log2Up(coreInstBytes)-1, log2Up(coreInstBytes)) ^ history
|
||||||
res.value := table(index)
|
res.value := table(index)
|
||||||
res.history := history
|
res.history := history
|
||||||
val taken = res.value(0)
|
res.taken := res.value(0)
|
||||||
when (update) { history := Cat(taken, history(nbhtbits-1,1)) }
|
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = {
|
def updateTable(addr: UInt, d: BHTResp, taken: Bool): Unit = {
|
||||||
val index = addr(nbhtbits+1, log2Up(coreInstBytes)) ^ d.history
|
val index = addr(nbhtbits+log2Up(coreInstBytes)-1, log2Up(coreInstBytes)) ^ d.history
|
||||||
table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken))
|
table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken))
|
||||||
when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) }
|
}
|
||||||
|
def resetHistory(d: BHTResp): Unit = {
|
||||||
|
history := d.history
|
||||||
|
}
|
||||||
|
def updateHistory(addr: UInt, d: BHTResp, taken: Bool): Unit = {
|
||||||
|
history := Cat(taken, d.history(nbhtbits-1,1))
|
||||||
|
}
|
||||||
|
def advanceHistory(taken: Bool): Unit = {
|
||||||
|
history := Cat(taken, history(nbhtbits-1,1))
|
||||||
}
|
}
|
||||||
|
|
||||||
private val table = Mem(nbht, UInt(width = 2))
|
private val table = Mem(nbht, UInt(width = 2))
|
||||||
@ -152,7 +159,9 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
|||||||
val resp = Valid(new BTBResp)
|
val resp = Valid(new BTBResp)
|
||||||
val btb_update = Valid(new BTBUpdate).flip
|
val btb_update = Valid(new BTBUpdate).flip
|
||||||
val bht_update = Valid(new BHTUpdate).flip
|
val bht_update = Valid(new BHTUpdate).flip
|
||||||
|
val bht_advance = Valid(new BTBResp).flip
|
||||||
val ras_update = Valid(new RASUpdate).flip
|
val ras_update = Valid(new RASUpdate).flip
|
||||||
|
val ras_head = Valid(UInt(width = vaddrBits))
|
||||||
}
|
}
|
||||||
|
|
||||||
val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
|
val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
|
||||||
@ -250,21 +259,34 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
|||||||
isValid := isValid & ~idxHit
|
isValid := isValid & ~idxHit
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nBHT > 0) {
|
if (btbParams.nBHT > 0) {
|
||||||
val bht = new BHT(nBHT)
|
val bht = new BHT(btbParams.nBHT)
|
||||||
val isBranch = (idxHit & cfiType.map(_ === CFIType.branch).asUInt).orR
|
val isBranch = (idxHit & cfiType.map(_ === CFIType.branch).asUInt).orR
|
||||||
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
|
val res = bht.get(io.req.bits.addr)
|
||||||
val update_btb_hit = io.bht_update.bits.prediction.valid
|
when (io.req.valid && io.resp.valid && isBranch) {
|
||||||
when (io.bht_update.valid && update_btb_hit) {
|
bht.advanceHistory(res.taken)
|
||||||
bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict)
|
|
||||||
}
|
}
|
||||||
when (!res.value(0) && isBranch) { io.resp.bits.taken := false }
|
when (io.bht_advance.valid) {
|
||||||
|
bht.advanceHistory(io.bht_advance.bits.bht.taken)
|
||||||
|
}
|
||||||
|
when (io.btb_update.valid) {
|
||||||
|
bht.resetHistory(io.btb_update.bits.prediction.bits.bht)
|
||||||
|
}
|
||||||
|
when (io.bht_update.valid) {
|
||||||
|
bht.updateTable(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken)
|
||||||
|
when (io.bht_update.bits.mispredict) {
|
||||||
|
bht.updateHistory(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
when (!res.taken && isBranch) { io.resp.bits.taken := false }
|
||||||
io.resp.bits.bht := res
|
io.resp.bits.bht := res
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nRAS > 0) {
|
if (btbParams.nRAS > 0) {
|
||||||
val ras = new RAS(nRAS)
|
val ras = new RAS(btbParams.nRAS)
|
||||||
val doPeek = (idxHit & cfiType.map(_ === CFIType.ret).asUInt).orR
|
val doPeek = (idxHit & cfiType.map(_ === CFIType.ret).asUInt).orR
|
||||||
|
io.ras_head.valid := !ras.isEmpty
|
||||||
|
io.ras_head.bits := ras.peek
|
||||||
when (!ras.isEmpty && doPeek) {
|
when (!ras.isEmpty && doPeek) {
|
||||||
io.resp.bits.target := ras.peek
|
io.resp.bits.target := ras.peek
|
||||||
}
|
}
|
||||||
|
@ -81,21 +81,24 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
val icache = outer.icache.module
|
val icache = outer.icache.module
|
||||||
require(fetchWidth*coreInstBytes == outer.icacheParams.fetchBytes)
|
require(fetchWidth*coreInstBytes == outer.icacheParams.fetchBytes)
|
||||||
|
|
||||||
val tlb = Module(new TLB(log2Ceil(coreInstBytes*fetchWidth), nTLBEntries))
|
val fetchBytes = coreInstBytes * fetchWidth
|
||||||
val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 4, flow = true)) }
|
val tlb = Module(new TLB(log2Ceil(fetchBytes), nTLBEntries))
|
||||||
|
val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 5, flow = true)) }
|
||||||
|
|
||||||
val s0_valid = io.cpu.req.valid || !fq.io.mask(fq.io.mask.getWidth-2)
|
val s0_valid = io.cpu.req.valid || !fq.io.mask(fq.io.mask.getWidth-3)
|
||||||
val s1_pc = Reg(UInt(width=vaddrBitsExtended))
|
val s1_pc = Reg(UInt(width=vaddrBitsExtended))
|
||||||
val s1_speculative = Reg(Bool())
|
val s1_speculative = Reg(Bool())
|
||||||
val s2_valid = RegInit(false.B)
|
val s2_valid = RegInit(false.B)
|
||||||
val s2_pc = Reg(init=io.resetVector)
|
val s2_pc = RegInit(alignPC(io.resetVector))
|
||||||
val s2_btb_resp_valid = Reg(init=Bool(false))
|
val s2_btb_resp_valid = if (usingBTB) Reg(Bool()) else false.B
|
||||||
val s2_btb_resp_bits = Reg(new BTBResp)
|
val s2_btb_resp_bits = Reg(new BTBResp)
|
||||||
val s2_tlb_resp = Reg(tlb.io.resp)
|
val s2_tlb_resp = Reg(tlb.io.resp)
|
||||||
val s2_xcpt = !s2_tlb_resp.miss && fq.io.enq.bits.xcpt.asUInt.orR
|
val s2_xcpt = !s2_tlb_resp.miss && fq.io.enq.bits.xcpt.asUInt.orR
|
||||||
val s2_speculative = Reg(init=Bool(false))
|
val s2_speculative = Reg(init=Bool(false))
|
||||||
|
val s2_partial_insn_valid = RegInit(false.B)
|
||||||
|
val s2_partial_insn = Reg(UInt(width = coreInstBits))
|
||||||
|
val s2_wrong_path = Reg(Bool())
|
||||||
|
|
||||||
val fetchBytes = coreInstBytes * fetchWidth
|
|
||||||
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
|
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
|
||||||
val ntpc = s1_base_pc + fetchBytes.U
|
val ntpc = s1_base_pc + fetchBytes.U
|
||||||
val predicted_npc = Wire(init = ntpc)
|
val predicted_npc = Wire(init = ntpc)
|
||||||
@ -113,40 +116,15 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
else Bool(true)
|
else Bool(true)
|
||||||
s1_speculative := Mux(io.cpu.req.valid, io.cpu.req.bits.speculative, Mux(s2_replay, s2_speculative, s0_speculative))
|
s1_speculative := Mux(io.cpu.req.valid, io.cpu.req.bits.speculative, Mux(s2_replay, s2_speculative, s0_speculative))
|
||||||
|
|
||||||
|
val s2_redirect = Wire(init = io.cpu.req.valid)
|
||||||
s2_valid := false
|
s2_valid := false
|
||||||
when (!s2_replay && !io.cpu.req.valid) {
|
when (!s2_replay && !s2_redirect) {
|
||||||
s2_valid := true
|
s2_valid := true
|
||||||
s2_pc := s1_pc
|
s2_pc := s1_pc
|
||||||
s2_speculative := s1_speculative
|
s2_speculative := s1_speculative
|
||||||
s2_tlb_resp := tlb.io.resp
|
s2_tlb_resp := tlb.io.resp
|
||||||
}
|
}
|
||||||
|
|
||||||
if (usingBTB) {
|
|
||||||
val btb = Module(new BTB)
|
|
||||||
btb.io.req.valid := false
|
|
||||||
btb.io.req.bits.addr := s1_pc
|
|
||||||
btb.io.btb_update := io.cpu.btb_update
|
|
||||||
btb.io.bht_update := io.cpu.bht_update
|
|
||||||
btb.io.ras_update := io.cpu.ras_update
|
|
||||||
when (!s2_replay) {
|
|
||||||
btb.io.req.valid := true
|
|
||||||
s2_btb_resp_valid := btb.io.resp.valid
|
|
||||||
s2_btb_resp_bits := btb.io.resp.bits
|
|
||||||
}
|
|
||||||
when (btb.io.resp.valid && btb.io.resp.bits.taken) {
|
|
||||||
predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
|
|
||||||
predicted_taken := Bool(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
// push RAS speculatively
|
|
||||||
btb.io.ras_update.valid := btb.io.req.valid && btb.io.resp.valid && btb.io.resp.bits.cfiType.isOneOf(CFIType.call, CFIType.ret)
|
|
||||||
val returnAddrLSBs = btb.io.resp.bits.bridx +& 1
|
|
||||||
btb.io.ras_update.bits.returnAddr :=
|
|
||||||
Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1)))
|
|
||||||
btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType
|
|
||||||
btb.io.ras_update.bits.prediction.valid := true
|
|
||||||
}
|
|
||||||
|
|
||||||
io.ptw <> tlb.io.ptw
|
io.ptw <> tlb.io.ptw
|
||||||
tlb.io.req.valid := !s2_replay
|
tlb.io.req.valid := !s2_replay
|
||||||
tlb.io.req.bits.vaddr := s1_pc
|
tlb.io.req.bits.vaddr := s1_pc
|
||||||
@ -161,12 +139,12 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
icache.io.invalidate := io.cpu.flush_icache
|
icache.io.invalidate := io.cpu.flush_icache
|
||||||
icache.io.s1_paddr := tlb.io.resp.paddr
|
icache.io.s1_paddr := tlb.io.resp.paddr
|
||||||
icache.io.s2_vaddr := s2_pc
|
icache.io.s2_vaddr := s2_pc
|
||||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || s2_replay
|
icache.io.s1_kill := s2_redirect || tlb.io.resp.miss || s2_replay
|
||||||
icache.io.s2_kill := s2_valid && (s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt)
|
icache.io.s2_kill := s2_valid && (s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt)
|
||||||
|
|
||||||
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill)
|
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill)
|
||||||
fq.io.enq.bits.pc := s2_pc
|
fq.io.enq.bits.pc := s2_pc
|
||||||
io.cpu.npc := ~(~Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) | (coreInstBytes-1)) // discard LSB(s)
|
io.cpu.npc := alignPC(Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc))
|
||||||
|
|
||||||
fq.io.enq.bits.data := icache.io.resp.bits.data
|
fq.io.enq.bits.data := icache.io.resp.bits.data
|
||||||
fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
|
fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
|
||||||
@ -176,11 +154,126 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
fq.io.enq.bits.xcpt := s2_tlb_resp
|
fq.io.enq.bits.xcpt := s2_tlb_resp
|
||||||
when (icache.io.resp.valid && icache.io.resp.bits.ae) { fq.io.enq.bits.xcpt.ae.inst := true }
|
when (icache.io.resp.valid && icache.io.resp.bits.ae) { fq.io.enq.bits.xcpt.ae.inst := true }
|
||||||
|
|
||||||
|
if (usingBTB) {
|
||||||
|
val btb = Module(new BTB)
|
||||||
|
btb.io.req.valid := false
|
||||||
|
btb.io.req.bits.addr := s1_pc
|
||||||
|
btb.io.btb_update := io.cpu.btb_update
|
||||||
|
btb.io.bht_update := io.cpu.bht_update
|
||||||
|
btb.io.ras_update.valid := false
|
||||||
|
btb.io.bht_advance.valid := false
|
||||||
|
when (!s2_replay) {
|
||||||
|
btb.io.req.valid := !s2_redirect
|
||||||
|
s2_btb_resp_valid := btb.io.resp.valid
|
||||||
|
s2_btb_resp_bits := btb.io.resp.bits
|
||||||
|
}
|
||||||
|
when (btb.io.resp.valid && btb.io.resp.bits.taken) {
|
||||||
|
predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
|
||||||
|
predicted_taken := Bool(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!coreParams.jumpInFrontend) {
|
||||||
|
// push RAS speculatively
|
||||||
|
btb.io.ras_update.valid := btb.io.req.valid && btb.io.resp.valid && btb.io.resp.bits.cfiType.isOneOf(CFIType.call, CFIType.ret)
|
||||||
|
val returnAddrLSBs = btb.io.resp.bits.bridx +& 1
|
||||||
|
btb.io.ras_update.bits.returnAddr :=
|
||||||
|
Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1)))
|
||||||
|
btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType
|
||||||
|
btb.io.ras_update.bits.prediction.valid := true
|
||||||
|
} else when (fq.io.enq.fire()) {
|
||||||
|
val s2_btb_hit = s2_btb_resp_valid && s2_btb_resp_bits.taken
|
||||||
|
val s2_base_pc = ~(~s2_pc | (fetchBytes-1))
|
||||||
|
val taken_idx = Wire(UInt())
|
||||||
|
val after_idx = Wire(UInt())
|
||||||
|
|
||||||
|
def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = {
|
||||||
|
val prevRVI = prevValid && prevBits(1,0) === 3
|
||||||
|
val valid = fq.io.enq.bits.mask(idx) && !prevRVI
|
||||||
|
val bits = fq.io.enq.bits.data(coreInstBits*(idx+1)-1, coreInstBits*idx)
|
||||||
|
val rvc = bits(1,0) =/= 3
|
||||||
|
val rviBits = Cat(bits, prevBits)
|
||||||
|
val rviBranch = rviBits(6,0) === Instructions.BEQ.value.asUInt()(6,0)
|
||||||
|
val rviJump = rviBits(6,0) === Instructions.JAL.value.asUInt()(6,0)
|
||||||
|
val rviJALR = rviBits(6,0) === Instructions.JALR.value.asUInt()(6,0)
|
||||||
|
val rviReturn = rviJALR && !rviBits(7) && BitPat("b00?01") === rviBits(19,15)
|
||||||
|
val rviCall = (rviJALR || rviJump) && rviBits(7)
|
||||||
|
val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ
|
||||||
|
val rvcJAL = Bool(xLen == 32) && bits === Instructions.C_JAL
|
||||||
|
val rvcJump = bits === Instructions.C_J || rvcJAL
|
||||||
|
val rvcImm = Mux(bits(14), new RVCDecoder(bits).bImm.asSInt, 0.S) | Mux(bits(14,13) === 1, new RVCDecoder(bits).jImm.asSInt, 0.S)
|
||||||
|
val rvcJR = bits === Instructions.C_MV && bits(6,2) === 0
|
||||||
|
val rvcReturn = rvcJR && BitPat("b00?01") === bits(11,7)
|
||||||
|
val rvcJALR = bits === Instructions.C_ADD && bits(6,2) === 0
|
||||||
|
val rvcCall = rvcJAL || rvcJALR
|
||||||
|
val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), 0.S) | Mux(!rviBits(2), ImmGen(IMM_SB, rviBits), 0.S)
|
||||||
|
val taken =
|
||||||
|
prevRVI && (rviJump || rviJALR || rviBranch && s2_btb_resp_bits.bht.taken) ||
|
||||||
|
valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && s2_btb_resp_bits.bht.taken)
|
||||||
|
|
||||||
|
when (!prevTaken) {
|
||||||
|
taken_idx := idx
|
||||||
|
after_idx := idx + 1
|
||||||
|
btb.io.ras_update.valid := !s2_wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn))
|
||||||
|
btb.io.ras_update.bits.prediction.valid := true
|
||||||
|
btb.io.ras_update.bits.cfiType := Mux(Mux(prevRVI, rviReturn, rvcReturn), CFIType.ret, CFIType.call)
|
||||||
|
|
||||||
|
when (!s2_btb_hit) {
|
||||||
|
when (prevRVI && (rviJALR && !(rviReturn && btb.io.ras_head.valid)) ||
|
||||||
|
valid && (rvcJALR || (rvcJR && !btb.io.ras_head.valid))) {
|
||||||
|
s2_wrong_path := true
|
||||||
|
}
|
||||||
|
when (taken) {
|
||||||
|
val pc = s2_base_pc | (idx*coreInstBytes)
|
||||||
|
val npc =
|
||||||
|
if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm)
|
||||||
|
else Mux(prevRVI, pc - coreInstBytes, pc).asSInt + Mux(prevRVI, rviImm, rvcImm)
|
||||||
|
predicted_npc := Mux(prevRVI && rviReturn || valid && rvcReturn, btb.io.ras_head.bits, npc.asUInt)
|
||||||
|
}
|
||||||
|
|
||||||
|
when (prevRVI && rviBranch || valid && rvcBranch) {
|
||||||
|
btb.io.bht_advance.valid := !s2_wrong_path && !s2_btb_resp_valid
|
||||||
|
btb.io.bht_advance.bits := s2_btb_resp_bits
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idx == fetchWidth-1) {
|
||||||
|
s2_partial_insn_valid := false
|
||||||
|
when (valid && !prevTaken && !rvc) {
|
||||||
|
s2_partial_insn_valid := true
|
||||||
|
s2_partial_insn := bits | 0x3
|
||||||
|
}
|
||||||
|
prevTaken || taken
|
||||||
|
} else {
|
||||||
|
scanInsns(idx + 1, valid, bits, prevTaken || taken)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes))
|
||||||
|
|
||||||
|
val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B)
|
||||||
|
when (s2_btb_hit) {
|
||||||
|
s2_partial_insn_valid := false
|
||||||
|
}.otherwise {
|
||||||
|
fq.io.enq.bits.btb.bits.bridx := taken_idx
|
||||||
|
when (taken) {
|
||||||
|
fq.io.enq.bits.btb.valid := true
|
||||||
|
fq.io.enq.bits.btb.bits.taken := true
|
||||||
|
s2_redirect := true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
when (s2_redirect) { s2_partial_insn_valid := false }
|
||||||
|
when (io.cpu.req.valid) { s2_wrong_path := false }
|
||||||
|
}
|
||||||
|
|
||||||
io.cpu.resp <> fq.io.deq
|
io.cpu.resp <> fq.io.deq
|
||||||
|
|
||||||
// performance events
|
// performance events
|
||||||
io.cpu.perf.acquire := edge.done(icache.io.tl_out(0).a)
|
io.cpu.perf.acquire := edge.done(icache.io.tl_out(0).a)
|
||||||
io.cpu.perf.tlbMiss := io.ptw.req.fire()
|
io.cpu.perf.tlbMiss := io.ptw.req.fire()
|
||||||
|
|
||||||
|
def alignPC(pc: UInt) = ~(~pc | (coreInstBytes - 1))
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */
|
/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */
|
||||||
|
@ -29,7 +29,7 @@ case class RocketCoreParams(
|
|||||||
mtvecWritable: Boolean = true,
|
mtvecWritable: Boolean = true,
|
||||||
fastLoadWord: Boolean = true,
|
fastLoadWord: Boolean = true,
|
||||||
fastLoadByte: Boolean = false,
|
fastLoadByte: Boolean = false,
|
||||||
fastJAL: Boolean = false,
|
jumpInFrontend: Boolean = true,
|
||||||
mulDiv: Option[MulDivParams] = Some(MulDivParams()),
|
mulDiv: Option[MulDivParams] = Some(MulDivParams()),
|
||||||
fpu: Option[FPUParams] = Some(FPUParams())
|
fpu: Option[FPUParams] = Some(FPUParams())
|
||||||
) extends CoreParams {
|
) extends CoreParams {
|
||||||
@ -45,7 +45,6 @@ trait HasRocketCoreParameters extends HasCoreParameters {
|
|||||||
|
|
||||||
val fastLoadWord = rocketParams.fastLoadWord
|
val fastLoadWord = rocketParams.fastLoadWord
|
||||||
val fastLoadByte = rocketParams.fastLoadByte
|
val fastLoadByte = rocketParams.fastLoadByte
|
||||||
val fastJAL = rocketParams.fastJAL
|
|
||||||
val nBreakpoints = rocketParams.nBreakpoints
|
val nBreakpoints = rocketParams.nBreakpoints
|
||||||
val nPMPs = rocketParams.nPMPs
|
val nPMPs = rocketParams.nPMPs
|
||||||
val nPerfCounters = rocketParams.nPerfCounters
|
val nPerfCounters = rocketParams.nPerfCounters
|
||||||
@ -170,9 +169,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
val wb_reg_rs2 = Reg(Bits())
|
val wb_reg_rs2 = Reg(Bits())
|
||||||
val take_pc_wb = Wire(Bool())
|
val take_pc_wb = Wire(Bool())
|
||||||
|
|
||||||
val take_pc_id = Wire(Bool())
|
|
||||||
val take_pc_mem_wb = take_pc_wb || take_pc_mem
|
val take_pc_mem_wb = take_pc_wb || take_pc_mem
|
||||||
val take_pc = take_pc_mem_wb || take_pc_id
|
val take_pc = take_pc_mem_wb
|
||||||
|
|
||||||
// decode stage
|
// decode stage
|
||||||
val ibuf = Module(new IBuf)
|
val ibuf = Module(new IBuf)
|
||||||
@ -195,7 +193,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
val id_rs = id_raddr.map(rf.read _)
|
val id_rs = id_raddr.map(rf.read _)
|
||||||
val ctrl_killd = Wire(Bool())
|
val ctrl_killd = Wire(Bool())
|
||||||
val id_npc = (ibuf.io.pc.asSInt + ImmGen(IMM_UJ, id_inst(0))).asUInt
|
val id_npc = (ibuf.io.pc.asSInt + ImmGen(IMM_UJ, id_inst(0))).asUInt
|
||||||
take_pc_id := Bool(fastJAL) && !ctrl_killd && id_ctrl.jal
|
|
||||||
|
|
||||||
val csr = Module(new CSRFile(perfEvents))
|
val csr = Module(new CSRFile(perfEvents))
|
||||||
val id_csr_en = id_ctrl.csr.isOneOf(CSR.S, CSR.C, CSR.W)
|
val id_csr_en = id_ctrl.csr.isOneOf(CSR.S, CSR.C, CSR.W)
|
||||||
@ -296,7 +293,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
ex_reg_xcpt := !ctrl_killd && id_xcpt
|
ex_reg_xcpt := !ctrl_killd && id_xcpt
|
||||||
ex_reg_xcpt_interrupt := !take_pc && ibuf.io.inst(0).valid && csr.io.interrupt
|
ex_reg_xcpt_interrupt := !take_pc && ibuf.io.inst(0).valid && csr.io.interrupt
|
||||||
ex_reg_btb_hit := ibuf.io.inst(0).bits.btb_hit
|
ex_reg_btb_hit := ibuf.io.inst(0).bits.btb_hit
|
||||||
when (ibuf.io.inst(0).bits.btb_hit) { ex_reg_btb_resp := ibuf.io.btb_resp }
|
|
||||||
|
|
||||||
when (!ctrl_killd) {
|
when (!ctrl_killd) {
|
||||||
ex_ctrl := id_ctrl
|
ex_ctrl := id_ctrl
|
||||||
@ -345,6 +341,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
ex_reg_cause := id_cause
|
ex_reg_cause := id_cause
|
||||||
ex_reg_inst := id_inst(0)
|
ex_reg_inst := id_inst(0)
|
||||||
ex_reg_pc := ibuf.io.pc
|
ex_reg_pc := ibuf.io.pc
|
||||||
|
ex_reg_btb_resp := ibuf.io.btb_resp
|
||||||
}
|
}
|
||||||
|
|
||||||
// replay inst in ex stage?
|
// replay inst in ex stage?
|
||||||
@ -367,15 +364,17 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
val mem_br_taken = mem_reg_wdata(0)
|
val mem_br_taken = mem_reg_wdata(0)
|
||||||
val mem_br_target = mem_reg_pc.asSInt +
|
val mem_br_target = mem_reg_pc.asSInt +
|
||||||
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
|
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
|
||||||
Mux(Bool(!fastJAL) && mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst),
|
Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst),
|
||||||
Mux(mem_reg_rvc, SInt(2), SInt(4))))
|
Mux(mem_reg_rvc, SInt(2), SInt(4))))
|
||||||
val mem_npc = (Mux(mem_ctrl.jalr || mem_reg_sfence, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt
|
val mem_npc = (Mux(mem_ctrl.jalr || mem_reg_sfence, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt
|
||||||
val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true)))
|
val mem_wrong_npc =
|
||||||
|
Mux(ex_pc_valid, mem_npc =/= ex_reg_pc,
|
||||||
|
Mux(ibuf.io.inst(0).valid || ibuf.io.imem.valid, mem_npc =/= ibuf.io.pc, Bool(true)))
|
||||||
val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1) && !mem_reg_sfence
|
val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1) && !mem_reg_sfence
|
||||||
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
|
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
|
||||||
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
|
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
|
||||||
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || (Bool(!fastJAL) && mem_ctrl.jal)
|
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal
|
||||||
val mem_direction_misprediction = mem_reg_btb_hit && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken
|
val mem_direction_misprediction = (Bool(coreParams.jumpInFrontend) || mem_reg_btb_hit) && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken
|
||||||
val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken
|
val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken
|
||||||
take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_sfence || (mem_ctrl.jalr && csr.io.status.debug))
|
take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_sfence || (mem_ctrl.jalr && csr.io.status.debug))
|
||||||
|
|
||||||
@ -391,7 +390,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
|
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
|
||||||
mem_reg_sfence := ex_sfence
|
mem_reg_sfence := ex_sfence
|
||||||
mem_reg_btb_hit := ex_reg_btb_hit
|
mem_reg_btb_hit := ex_reg_btb_hit
|
||||||
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
|
mem_reg_btb_resp := ex_reg_btb_resp
|
||||||
mem_reg_flush_pipe := ex_reg_flush_pipe
|
mem_reg_flush_pipe := ex_reg_flush_pipe
|
||||||
mem_reg_slow_bypass := ex_slow_bypass
|
mem_reg_slow_bypass := ex_slow_bypass
|
||||||
|
|
||||||
@ -583,8 +582,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
io.imem.req.bits.pc :=
|
io.imem.req.bits.pc :=
|
||||||
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
|
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
|
||||||
Mux(replay_wb || wb_reg_flush_pipe, wb_npc, // replay or flush
|
Mux(replay_wb || wb_reg_flush_pipe, wb_npc, // replay or flush
|
||||||
Mux(take_pc_mem || Bool(!fastJAL), mem_npc, // branch misprediction
|
mem_npc)) // branch misprediction
|
||||||
id_npc))) // JAL
|
|
||||||
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
|
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
|
||||||
io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence
|
io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence
|
||||||
io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0)
|
io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0)
|
||||||
@ -595,7 +593,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
|
|
||||||
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt
|
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt
|
||||||
|
|
||||||
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && (((mem_cfi_taken || !mem_cfi) && mem_wrong_npc) || (Bool(fastJAL) && mem_ctrl.jal && !mem_reg_btb_hit)))
|
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_misprediction)
|
||||||
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
|
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
|
||||||
io.imem.btb_update.bits.cfiType :=
|
io.imem.btb_update.bits.cfiType :=
|
||||||
Mux((mem_ctrl.jal || mem_ctrl.jalr) && mem_waddr(0), CFIType.call,
|
Mux((mem_ctrl.jal || mem_ctrl.jalr) && mem_waddr(0), CFIType.call,
|
||||||
|
@ -26,6 +26,7 @@ trait CoreParams {
|
|||||||
val instBits: Int
|
val instBits: Int
|
||||||
val nLocalInterrupts: Int
|
val nLocalInterrupts: Int
|
||||||
val nL2TLBEntries: Int
|
val nL2TLBEntries: Int
|
||||||
|
val jumpInFrontend: Boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
trait HasCoreParameters extends HasTileParameters {
|
trait HasCoreParameters extends HasTileParameters {
|
||||||
|
Loading…
Reference in New Issue
Block a user