Significant changes and fixes to BTB for superscalar fetch.
- BTBUpdate only occurs on mispredicts now. - RASUpdate broken out from BTBUpdate (allows RASUpdate to be performed in Decode). - Added optional 2nd CAM port to BTB for updates (for when updates to the BTB may occur out-of-order). - Fixed resp.mask bit logic.
This commit is contained in:
parent
3be3cd7731
commit
fea31d2167
@ -65,36 +65,40 @@ class BHT(nbht: Int) {
|
|||||||
when (update) { history := Cat(taken, history(nbhtbits-1,1)) }
|
when (update) { history := Cat(taken, history(nbhtbits-1,1)) }
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = {
|
def update(addr: UInt, d: BHTResp, taken: Bool): Unit = {
|
||||||
val index = addr(nbhtbits+1,2) ^ d.history
|
val index = addr(nbhtbits+1,2) ^ d.history
|
||||||
table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken))
|
table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken))
|
||||||
when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) }
|
history := Cat(taken, d.history(nbhtbits-1,1))
|
||||||
}
|
}
|
||||||
|
|
||||||
private val table = Mem(UInt(width = 2), nbht)
|
private val table = Mem(UInt(width = 2), nbht)
|
||||||
val history = Reg(UInt(width = nbhtbits))
|
val history = Reg(UInt(width = nbhtbits))
|
||||||
}
|
}
|
||||||
|
|
||||||
// BTB update occurs during branch resolution.
|
// BTB update occurs during branch resolution (and only on a mispredict).
|
||||||
// - "pc" is what future fetch PCs will tag match against.
|
// - "pc" is what future fetch PCs will tag match against.
|
||||||
// - "br_pc" is the PC of the branch instruction.
|
// - "br_pc" is the PC of the branch instruction.
|
||||||
// - "bridx" is the low-order PC bits of the predicted branch (after
|
|
||||||
// shifting off the lowest log(inst_bytes) bits off).
|
|
||||||
// - "resp.mask" provides a mask of valid instructions (instructions are
|
|
||||||
// masked off by the predicted taken branch).
|
|
||||||
class BTBUpdate extends Bundle with BTBParameters {
|
class BTBUpdate extends Bundle with BTBParameters {
|
||||||
val prediction = Valid(new BTBResp)
|
val prediction = Valid(new BTBResp)
|
||||||
val pc = UInt(width = vaddrBits)
|
val pc = UInt(width = vaddrBits)
|
||||||
val target = UInt(width = vaddrBits)
|
val target = UInt(width = vaddrBits)
|
||||||
val returnAddr = UInt(width = vaddrBits)
|
|
||||||
val taken = Bool()
|
val taken = Bool()
|
||||||
val isJump = Bool()
|
val isJump = Bool()
|
||||||
val isCall = Bool()
|
|
||||||
val isReturn = Bool()
|
val isReturn = Bool()
|
||||||
val br_pc = UInt(width = vaddrBits)
|
val br_pc = UInt(width = vaddrBits)
|
||||||
val mispredict = Bool()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class RASUpdate extends Bundle with BTBParameters {
|
||||||
|
val isCall = Bool()
|
||||||
|
val isReturn = Bool()
|
||||||
|
val returnAddr = UInt(width = vaddrBits)
|
||||||
|
val prediction = Valid(new BTBResp)
|
||||||
|
}
|
||||||
|
|
||||||
|
// - "bridx" is the low-order PC bits of the predicted branch (after
|
||||||
|
// shifting off the lowest log(inst_bytes) bits off).
|
||||||
|
// - "resp.mask" provides a mask of valid instructions (instructions are
|
||||||
|
// masked off by the predicted taken branch).
|
||||||
class BTBResp extends Bundle with BTBParameters {
|
class BTBResp extends Bundle with BTBParameters {
|
||||||
val taken = Bool()
|
val taken = Bool()
|
||||||
val mask = Bits(width = params(FetchWidth))
|
val mask = Bits(width = params(FetchWidth))
|
||||||
@ -109,11 +113,15 @@ class BTBReq extends Bundle with BTBParameters {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// fully-associative branch target buffer
|
// fully-associative branch target buffer
|
||||||
class BTB extends Module with BTBParameters {
|
// Higher-performance processors may cause BTB updates to occur out-of-order,
|
||||||
|
// which requires an extra CAM port for updates (to ensure no duplicates get
|
||||||
|
// placed in BTB).
|
||||||
|
class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParameters {
|
||||||
val io = new Bundle {
|
val io = new Bundle {
|
||||||
val req = Valid(new BTBReq).flip
|
val req = Valid(new BTBReq).flip
|
||||||
val resp = Valid(new BTBResp)
|
val resp = Valid(new BTBResp)
|
||||||
val update = Valid(new BTBUpdate).flip
|
val update = Valid(new BTBUpdate).flip
|
||||||
|
val ras_update = Valid(new RASUpdate).flip
|
||||||
val invalidate = Bool(INPUT)
|
val invalidate = Bool(INPUT)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -158,8 +166,7 @@ class BTB extends Module with BTBParameters {
|
|||||||
}
|
}
|
||||||
|
|
||||||
val updateHit = r_update.bits.prediction.valid
|
val updateHit = r_update.bits.prediction.valid
|
||||||
val updateValid = r_update.bits.mispredict || updateHit && Bool(nBHT > 0)
|
val updateTarget = r_update.bits.taken
|
||||||
val updateTarget = updateValid && r_update.bits.mispredict && r_update.bits.taken
|
|
||||||
|
|
||||||
val useUpdatePageHit = updatePageHit.orR
|
val useUpdatePageHit = updatePageHit.orR
|
||||||
val doIdxPageRepl = updateTarget && !useUpdatePageHit
|
val doIdxPageRepl = updateTarget && !useUpdatePageHit
|
||||||
@ -179,16 +186,22 @@ class BTB extends Module with BTBParameters {
|
|||||||
val pageReplEn = idxPageReplEn | tgtPageReplEn
|
val pageReplEn = idxPageReplEn | tgtPageReplEn
|
||||||
idxPageRepl := UIntToOH(Counter(r_update.valid && doPageRepl, nPages)._1)
|
idxPageRepl := UIntToOH(Counter(r_update.valid && doPageRepl, nPages)._1)
|
||||||
|
|
||||||
when (r_update.valid && !(updateValid && !updateTarget)) {
|
when (r_update.valid && updateTarget) {
|
||||||
val nextRepl = Counter(!updateHit && updateValid, entries)._1
|
assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target")
|
||||||
val waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl)
|
|
||||||
|
val nextRepl = Counter(!updateHit, entries)._1
|
||||||
|
var waddr:UInt = null
|
||||||
|
if (!updates_out_of_order) {
|
||||||
|
waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl)
|
||||||
|
} else {
|
||||||
|
println(" BTB accepts out-of-order updates.")
|
||||||
|
waddr = Mux(updateHits.orR, OHToUInt(updateHits), nextRepl)
|
||||||
|
}
|
||||||
|
|
||||||
// invalidate entries if we stomp on pages they depend upon
|
// invalidate entries if we stomp on pages they depend upon
|
||||||
idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits
|
idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits
|
||||||
|
|
||||||
idxValid(waddr) := updateValid
|
idxValid(waddr) := Bool(true)
|
||||||
when (updateTarget) {
|
|
||||||
assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target")
|
|
||||||
idxs(waddr) := r_update.bits.pc
|
idxs(waddr) := r_update.bits.pc
|
||||||
tgts(waddr) := update_target
|
tgts(waddr) := update_target
|
||||||
idxPages(waddr) := idxPageUpdate
|
idxPages(waddr) := idxPageUpdate
|
||||||
@ -200,7 +213,6 @@ class BTB extends Module with BTBParameters {
|
|||||||
} else {
|
} else {
|
||||||
brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8)
|
brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
require(nPages % 2 == 0)
|
require(nPages % 2 == 0)
|
||||||
val idxWritesEven = (idxPageUpdateOH & Fill(nPages/2, UInt(1,2))).orR
|
val idxWritesEven = (idxPageUpdateOH & Fill(nPages/2, UInt(1,2))).orR
|
||||||
@ -231,7 +243,9 @@ class BTB extends Module with BTBParameters {
|
|||||||
io.resp.bits.mask := UInt(1)
|
io.resp.bits.mask := UInt(1)
|
||||||
} else {
|
} else {
|
||||||
// note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case
|
// note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case
|
||||||
io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1))
|
val all_ones = UInt((1 << (params(FetchWidth)+1))-1)
|
||||||
|
io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)),
|
||||||
|
all_ones)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nBHT > 0) {
|
if (nBHT > 0) {
|
||||||
@ -239,8 +253,7 @@ class BTB extends Module with BTBParameters {
|
|||||||
val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump))
|
val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump))
|
||||||
val update_btb_hit = io.update.bits.prediction.valid
|
val update_btb_hit = io.update.bits.prediction.valid
|
||||||
when (io.update.valid && update_btb_hit && !io.update.bits.isJump) {
|
when (io.update.valid && update_btb_hit && !io.update.bits.isJump) {
|
||||||
bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht,
|
bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, io.update.bits.taken)
|
||||||
io.update.bits.taken, io.update.bits.mispredict)
|
|
||||||
}
|
}
|
||||||
when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false }
|
when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false }
|
||||||
io.resp.bits.bht := res
|
io.resp.bits.bht := res
|
||||||
@ -252,13 +265,13 @@ class BTB extends Module with BTBParameters {
|
|||||||
when (!ras.isEmpty && doPeek) {
|
when (!ras.isEmpty && doPeek) {
|
||||||
io.resp.bits.target := ras.peek
|
io.resp.bits.target := ras.peek
|
||||||
}
|
}
|
||||||
when (io.update.valid) {
|
when (io.ras_update.valid) {
|
||||||
when (io.update.bits.isCall) {
|
when (io.ras_update.bits.isCall) {
|
||||||
ras.push(io.update.bits.returnAddr)
|
ras.push(io.ras_update.bits.returnAddr)
|
||||||
when (doPeek) {
|
when (doPeek) {
|
||||||
io.resp.bits.target := io.update.bits.returnAddr
|
io.resp.bits.target := io.ras_update.bits.returnAddr
|
||||||
}
|
}
|
||||||
}.elsewhen (io.update.bits.isReturn && io.update.bits.prediction.valid) {
|
}.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) {
|
||||||
ras.pop
|
ras.pop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -652,14 +652,17 @@ class Control extends Module
|
|||||||
Mux(replay_wb, PC_WB, // replay
|
Mux(replay_wb, PC_WB, // replay
|
||||||
PC_MEM)))
|
PC_MEM)))
|
||||||
|
|
||||||
io.imem.btb_update.valid := (mem_reg_branch || io.imem.btb_update.bits.isJump) && !take_pc_wb
|
io.imem.btb_update.valid := take_pc_mem && !take_pc_wb
|
||||||
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
|
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
|
||||||
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
|
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
|
||||||
io.imem.btb_update.bits.taken := mem_reg_branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump
|
io.imem.btb_update.bits.taken := mem_reg_branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump
|
||||||
io.imem.btb_update.bits.mispredict := take_pc_mem
|
|
||||||
io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr
|
io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr
|
||||||
io.imem.btb_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0)
|
|
||||||
io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra
|
io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra
|
||||||
|
io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !take_pc_wb
|
||||||
|
io.imem.ras_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0)
|
||||||
|
io.imem.ras_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra
|
||||||
|
io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit
|
||||||
|
io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp
|
||||||
io.imem.req.valid := take_pc
|
io.imem.req.valid := take_pc
|
||||||
|
|
||||||
val bypassDst = Array(id_raddr1, id_raddr2)
|
val bypassDst = Array(id_raddr1, id_raddr2)
|
||||||
|
@ -286,8 +286,8 @@ class Datapath extends Module
|
|||||||
wb_reg_pc)).toUInt // PC_WB
|
wb_reg_pc)).toUInt // PC_WB
|
||||||
io.imem.btb_update.bits.pc := mem_reg_pc
|
io.imem.btb_update.bits.pc := mem_reg_pc
|
||||||
io.imem.btb_update.bits.target := io.imem.req.bits.pc
|
io.imem.btb_update.bits.target := io.imem.req.bits.pc
|
||||||
io.imem.btb_update.bits.returnAddr := mem_int_wdata
|
|
||||||
io.imem.btb_update.bits.br_pc := mem_reg_pc
|
io.imem.btb_update.bits.br_pc := mem_reg_pc
|
||||||
|
io.imem.ras_update.bits.returnAddr := mem_int_wdata
|
||||||
|
|
||||||
// for hazard/bypass opportunity detection
|
// for hazard/bypass opportunity detection
|
||||||
io.ctrl.ex_waddr := ex_reg_inst(11,7)
|
io.ctrl.ex_waddr := ex_reg_inst(11,7)
|
||||||
|
@ -33,18 +33,19 @@ class CPUFrontendIO extends CoreBundle {
|
|||||||
val resp = Decoupled(new FrontendResp).flip
|
val resp = Decoupled(new FrontendResp).flip
|
||||||
val btb_resp = Valid(new BTBResp).flip
|
val btb_resp = Valid(new BTBResp).flip
|
||||||
val btb_update = Valid(new BTBUpdate)
|
val btb_update = Valid(new BTBUpdate)
|
||||||
|
val ras_update = Valid(new RASUpdate)
|
||||||
val ptw = new TLBPTWIO().flip
|
val ptw = new TLBPTWIO().flip
|
||||||
val invalidate = Bool(OUTPUT)
|
val invalidate = Bool(OUTPUT)
|
||||||
}
|
}
|
||||||
|
|
||||||
class Frontend extends FrontendModule
|
class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule
|
||||||
{
|
{
|
||||||
val io = new Bundle {
|
val io = new Bundle {
|
||||||
val cpu = new CPUFrontendIO().flip
|
val cpu = new CPUFrontendIO().flip
|
||||||
val mem = new UncachedTileLinkIO
|
val mem = new UncachedTileLinkIO
|
||||||
}
|
}
|
||||||
|
|
||||||
val btb = Module(new BTB)
|
val btb = Module(new BTB(btb_updates_out_of_order))
|
||||||
val icache = Module(new ICache)
|
val icache = Module(new ICache)
|
||||||
val tlb = Module(new TLB(params(NITLBEntries)))
|
val tlb = Module(new TLB(params(NITLBEntries)))
|
||||||
|
|
||||||
@ -88,6 +89,7 @@ class Frontend extends FrontendModule
|
|||||||
btb.io.req.valid := !stall && !icmiss
|
btb.io.req.valid := !stall && !icmiss
|
||||||
btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes)
|
btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes)
|
||||||
btb.io.update := io.cpu.btb_update
|
btb.io.update := io.cpu.btb_update
|
||||||
|
btb.io.ras_update := io.cpu.ras_update
|
||||||
btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate
|
btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate
|
||||||
|
|
||||||
tlb.io.ptw <> io.cpu.ptw
|
tlb.io.ptw <> io.cpu.ptw
|
||||||
|
Loading…
Reference in New Issue
Block a user