1
0

Fixed btb/icache bugs regarding resp mask, fw==1

This commit is contained in:
Christopher Celio 2014-10-20 18:45:23 -07:00
parent 91efdc379b
commit 08d2c13330
2 changed files with 15 additions and 7 deletions

View File

@ -79,7 +79,7 @@ class BHT(nbht: Int) {
// - "pc" is what future fetch PCs will tag match against. // - "pc" is what future fetch PCs will tag match against.
// - "br_pc" is the PC of the branch instruction. // - "br_pc" is the PC of the branch instruction.
// - "bridx" is the low-order PC bits of the predicted branch (after // - "bridx" is the low-order PC bits of the predicted branch (after
// shifting off the lowest log(inst_bytes) bits off). // shifting off the lowest log(inst_bytes) bits off).
// - "resp.mask" provides a mask of valid instructions (instructions are // - "resp.mask" provides a mask of valid instructions (instructions are
// masked off by the predicted taken branch). // masked off by the predicted taken branch).
class BTBUpdate extends Bundle with BTBParameters { class BTBUpdate extends Bundle with BTBParameters {
@ -196,9 +196,9 @@ class BTB extends Module with BTBParameters {
useRAS(waddr) := r_update.bits.isReturn useRAS(waddr) := r_update.bits.isReturn
isJump(waddr) := r_update.bits.isJump isJump(waddr) := r_update.bits.isJump
if (params(FetchWidth) == 1) { if (params(FetchWidth) == 1) {
brIdx(waddr) := UInt(0) brIdx(waddr) := UInt(0)
} else { } else {
brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8)
} }
} }
@ -226,8 +226,14 @@ class BTB extends Module with BTBParameters {
io.resp.bits.taken := io.resp.valid io.resp.bits.taken := io.resp.valid
io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts))
io.resp.bits.entry := OHToUInt(hits) io.resp.bits.entry := OHToUInt(hits)
io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1))
io.resp.bits.bridx := brIdx(io.resp.bits.entry) io.resp.bits.bridx := brIdx(io.resp.bits.entry)
if (params(FetchWidth) == 1) {
io.resp.bits.mask := UInt(1)
} else {
io.resp.bits.mask := Mux(io.resp.valid, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)),
((UInt(1) << UInt(params(FetchWidth)))-UInt(1)))
// val all_ones = UInt((1 << coreFetchWidth)-1)
}
if (nBHT > 0) { if (nBHT > 0) {
val bht = new BHT(nBHT) val bht = new BHT(nBHT)

View File

@ -58,9 +58,10 @@ class Frontend extends FrontendModule
val s2_xcpt_if = Reg(init=Bool(false)) val s2_xcpt_if = Reg(init=Bool(false))
val msb = vaddrBits-1 val msb = vaddrBits-1
val lsb = log2Up(coreFetchWidth*coreInstBytes)
val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target)
val ntpc_0 = s1_pc + UInt(coreInstBytes) val ntpc_0 = s1_pc + UInt(coreInstBytes*coreFetchWidth)
val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,0)) val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure
val icmiss = s2_valid && !icache.io.resp.valid val icmiss = s2_valid && !icache.io.resp.valid
val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc)
val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt
@ -115,7 +116,8 @@ class Frontend extends FrontendModule
val all_ones = UInt((1 << coreFetchWidth)-1) val all_ones = UInt((1 << coreFetchWidth)-1)
val msk_pc = if (coreFetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(coreFetchWidth) -1+2,2) val msk_pc = if (coreFetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(coreFetchWidth) -1+2,2)
io.cpu.resp.bits.mask := msk_pc & btb.io.resp.bits.mask // TODO what is the best way to handle the clock-gating of s2_btb_resp_bits?
io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc)
io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0)
io.cpu.resp.bits.xcpt_if := s2_xcpt_if io.cpu.resp.bits.xcpt_if := s2_xcpt_if