From 180d3d365d21f93d91b02e257ee4a6fa4be62f04 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Wed, 17 Sep 2014 14:24:03 -0700 Subject: [PATCH 1/7] Expanded front-end to support superscalar fetch. --- rocket/src/main/scala/btb.scala | 13 +++++++++++++ rocket/src/main/scala/core.scala | 3 +++ rocket/src/main/scala/dpath.scala | 4 ++-- rocket/src/main/scala/icache.scala | 25 +++++++++++++++++-------- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 02ca111e..e3762bfd 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -62,6 +62,14 @@ class BHT(nbht: Int) { val history = Reg(UInt(width = nbhtbits)) } +// BTB update occurs during branch resolution (i.e., PC redirection if a mispredict). +// - "pc" is what future fetch PCs will tag match against. +// - "br_pc" is the PC of the branch instruction. +// - "resp.mask" provides a mask of valid instructions (instructions are +// masked off by the predicted, taken branch). +// Assumption: superscalar commits are batched together into a single +// "taken" update ("history compression"), and correspond to the +// superscalar fetch 1:1. class BTBUpdate extends Bundle with BTBParameters { val prediction = Valid(new BTBResp) val pc = UInt(width = vaddrBits) @@ -71,11 +79,13 @@ class BTBUpdate extends Bundle with BTBParameters { val isJump = Bool() val isCall = Bool() val isReturn = Bool() + val br_pc = UInt(width = vaddrBits) val incorrectTarget = Bool() } class BTBResp extends Bundle with BTBParameters { val taken = Bool() + val mask = Bits(width = log2Up(params(FetchWidth))) val target = UInt(width = vaddrBits) val entry = UInt(width = opaqueBits) val bht = new BHTResp @@ -102,6 +112,7 @@ class BTB extends Module with BTBParameters { val useRAS = Reg(UInt(width = entries)) val isJump = Reg(UInt(width = entries)) + val brIdx = Mem(UInt(width=log2Up(params(FetchWidth))), entries) private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { @@ -167,6 +178,7 @@ class BTB extends Module with BTBParameters { tgtPages(waddr) := tgtPageUpdate useRAS(waddr) := update.bits.isReturn isJump(waddr) := update.bits.isJump + brIdx(waddr) := update.bits.br_pc } require(nPages % 2 == 0) @@ -193,6 +205,7 @@ class BTB extends Module with BTBParameters { io.resp.bits.taken := io.resp.valid io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) + io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) if (nBHT > 0) { val bht = new BHT(nBHT) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index f02f1424..43409b46 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -9,6 +9,7 @@ import uncore._ case object BuildFPU extends Field[Option[() => FPU]] case object XprLen extends Field[Int] case object NMultXpr extends Field[Int] +case object FetchWidth extends Field[Int] case object RetireWidth extends Field[Int] case object UseVM extends Field[Boolean] case object FastLoadWord extends Field[Boolean] @@ -20,6 +21,7 @@ case object CoreDCacheReqTagBits extends Field[Int] abstract trait CoreParameters extends UsesParameters { val xprLen = params(XprLen) + val coreFetchWidth = params(FetchWidth) val coreInstBits = params(CoreInstBits) val coreInstBytes = coreInstBits/8 val coreDataBits = xprLen @@ -28,6 +30,7 @@ abstract trait CoreParameters extends UsesParameters { val coreMaxAddrBits = math.max(params(PPNBits),params(VPNBits)+1) + params(PgIdxBits) if(params(FastLoadByte)) require(params(FastLoadWord)) + require(params(FetchWidth) == 1) // for now... require(params(RetireWidth) == 1) // for now... } abstract class CoreBundle extends Bundle with CoreParameters diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 7df1a8d3..ef4f353c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -47,9 +47,9 @@ class Datapath extends Module val wb_reg_rs2 = Reg(Bits()) // instruction decode stage - val id_inst = io.imem.resp.bits.data + val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1) val id_pc = io.imem.resp.bits.pc - + class RegFile { private val rf = Mem(UInt(width = 64), 31) private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index fb2dbb0c..05c36902 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -1,5 +1,3 @@ -// See LICENSE for license details. - package rocket import Chisel._ @@ -24,7 +22,8 @@ class FrontendReq extends CoreBundle { class FrontendResp extends CoreBundle { val pc = UInt(width = params(VAddrBits)+1) // ID stage PC - val data = Bits(width = coreInstBits) + val data = Vec.fill(coreFetchWidth) (Bits(width = coreInstBits)) + val mask = Bits(width = coreFetchWidth) val xcpt_ma = Bool() val xcpt_if = Bool() } @@ -60,12 +59,12 @@ class Frontend extends FrontendModule val msb = vaddrBits-1 val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) - val pcp4_0 = s1_pc + UInt(coreInstBytes) - val pcp4 = Cat(s1_pc(msb) & pcp4_0(msb), pcp4_0(msb,0)) + val ntpc_0 = s1_pc + UInt(coreInstBytes) + val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,0)) val icmiss = s2_valid && !icache.io.resp.valid - val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, pcp4) + val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & rowBytes) === (s1_pc & rowBytes)) + val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { @@ -106,7 +105,17 @@ class Frontend extends FrontendModule io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) io.cpu.resp.bits.pc := s2_pc & SInt(-coreInstBytes) // discard PC LSBs - io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreInstBytes)) << log2Up(coreInstBits)) + + + val fetch_data = icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) + for (i <- 0 until coreFetchWidth) { + io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) + } + + val all_ones = UInt((1 << coreFetchWidth)-1) + val msk_pc = all_ones << s2_pc(log2Up(coreFetchWidth)-1+2,2) + io.cpu.resp.bits.mask := msk_pc & btb.io.resp.bits.mask + io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if From 59eb7d194dfd7f004a8c8837adce7f83186e2cb4 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Fri, 3 Oct 2014 16:08:08 -0700 Subject: [PATCH 2/7] Finalize superscalar btb. --- rocket/src/main/scala/btb.scala | 41 +++++++++++++-------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 81a6d459..94f7a6ac 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -51,45 +51,34 @@ class BHTResp extends Bundle with BTBParameters { // - updated speculatively in fetch (if there's a BTB hit). // - on a mispredict, the history register is reset (again, only if BTB hit). // The counter table: -// - each PC has its own counter, updated when a branch resolves (and BTB hit). -// - the BTB provides the predicted branch PC, allowing us to properly index -// the counter table and provide the prediction for that specific branch. -// Critical path concerns may require only providing a single counter for -// the entire fetch packet, but that complicates how multiple branches -// update that line. -class BHT(nbht: Int, fetchwidth: Int) { +// - each counter corresponds with the "fetch pc" (not the PC of the branch). +// - updated when a branch resolves (and BTB was a hit for that branch). +// The updating branch must provide its "fetch pc" in addition to its own PC. +class BHT(nbht: Int) { val nbhtbits = log2Up(nbht) - private val logfw = if (fetchwidth == 1) 0 else log2Up(fetchwidth) - - def get(fetch_addr: UInt, bridx: UInt, update: Bool): BHTResp = { + def get(addr: UInt, bridx: UInt, update: Bool): BHTResp = { val res = new BHTResp - val aligned_addr = fetch_addr >> UInt(logfw + 2) - val index = aligned_addr ^ history - val counters = table(index) - res.value := (counters >> (bridx<<1)) & Bits(0x3) + val index = addr(nbhtbits+1,2) ^ history + res.value := table(index) res.history := history val taken = res.value(0) when (update) { history := Cat(taken, history(nbhtbits-1,1)) } res } def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = { - val aligned_addr = addr >> UInt(logfw + 2) - val index = aligned_addr ^ d.history - val new_cntr = Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) - var bridx: UInt = null - if (logfw == 0) bridx = UInt(0) else bridx = addr(logfw+1,2) - val mask = Bits(0x3) << (bridx<<1) - table.write(index, new_cntr, mask) + val index = addr(nbhtbits+1,2) ^ d.history + table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } } - private val table = Mem(UInt(width = 2*fetchwidth), nbht) + private val table = Mem(UInt(width = 2), nbht) val history = Reg(UInt(width = nbhtbits)) } // BTB update occurs during branch resolution. // - "pc" is what future fetch PCs will tag match against. // - "br_pc" is the PC of the branch instruction. +// - "bridx" is the low-order PC bits of the predicted branch. // - "resp.mask" provides a mask of valid instructions (instructions are // masked off by the predicted taken branch). class BTBUpdate extends Bundle with BTBParameters { @@ -107,7 +96,8 @@ class BTBUpdate extends Bundle with BTBParameters { class BTBResp extends Bundle with BTBParameters { val taken = Bool() - val mask = Bits(width = log2Up(params(FetchWidth))) + val mask = Bits(width = params(FetchWidth)) + val bridx = Bits(width = log2Up(params(FetchWidth))) val target = UInt(width = vaddrBits) val entry = UInt(width = opaqueBits) val bht = new BHTResp @@ -232,13 +222,14 @@ class BTB extends Module with BTBParameters { io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) + io.resp.bits.bridx := brIdx(io.resp.bits.entry) if (nBHT > 0) { - val bht = new BHT(nBHT, params(FetchWidth)) + val bht = new BHT(nBHT) val res = bht.get(io.req.bits.addr, brIdx(io.resp.bits.entry), io.req.valid && hits.orR && !Mux1H(hits, isJump)) val update_btb_hit = io.update.bits.prediction.valid when (io.update.valid && update_btb_hit && !io.update.bits.isJump) { - bht.update(io.update.bits.br_pc, io.update.bits.prediction.bits.bht, + bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, io.update.bits.taken, io.update.bits.incorrectTarget) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } From 08d2c1333041712a65edd84b1291c1de196c0f08 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Mon, 20 Oct 2014 18:45:23 -0700 Subject: [PATCH 3/7] Fixed btb/icache bugs regarding resp mask, fw==1 --- rocket/src/main/scala/btb.scala | 14 ++++++++++---- rocket/src/main/scala/icache.scala | 8 +++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index b323ef98..c5e7636d 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -79,7 +79,7 @@ class BHT(nbht: Int) { // - "pc" is what future fetch PCs will tag match against. // - "br_pc" is the PC of the branch instruction. // - "bridx" is the low-order PC bits of the predicted branch (after -// shifting off the lowest log(inst_bytes) bits off). +// shifting off the lowest log(inst_bytes) bits off). // - "resp.mask" provides a mask of valid instructions (instructions are // masked off by the predicted taken branch). class BTBUpdate extends Bundle with BTBParameters { @@ -196,9 +196,9 @@ class BTB extends Module with BTBParameters { useRAS(waddr) := r_update.bits.isReturn isJump(waddr) := r_update.bits.isJump if (params(FetchWidth) == 1) { - brIdx(waddr) := UInt(0) + brIdx(waddr) := UInt(0) } else { - brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) + brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) } } @@ -226,8 +226,14 @@ class BTB extends Module with BTBParameters { io.resp.bits.taken := io.resp.valid io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) - io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) io.resp.bits.bridx := brIdx(io.resp.bits.entry) + if (params(FetchWidth) == 1) { + io.resp.bits.mask := UInt(1) + } else { + io.resp.bits.mask := Mux(io.resp.valid, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), + ((UInt(1) << UInt(params(FetchWidth)))-UInt(1))) +// val all_ones = UInt((1 << coreFetchWidth)-1) + } if (nBHT > 0) { val bht = new BHT(nBHT) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 0c7fbaba..c854760b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -58,9 +58,10 @@ class Frontend extends FrontendModule val s2_xcpt_if = Reg(init=Bool(false)) val msb = vaddrBits-1 + val lsb = log2Up(coreFetchWidth*coreInstBytes) val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) - val ntpc_0 = s1_pc + UInt(coreInstBytes) - val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,0)) + val ntpc_0 = s1_pc + UInt(coreInstBytes*coreFetchWidth) + val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure val icmiss = s2_valid && !icache.io.resp.valid val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt @@ -115,7 +116,8 @@ class Frontend extends FrontendModule val all_ones = UInt((1 << coreFetchWidth)-1) val msk_pc = if (coreFetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(coreFetchWidth) -1+2,2) - io.cpu.resp.bits.mask := msk_pc & btb.io.resp.bits.mask + // TODO what is the best way to handle the clock-gating of s2_btb_resp_bits? + io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc) io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if From 3be3cd7731ec5bbc582d03545b8dfa45596ba1a8 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Mon, 3 Nov 2014 01:13:22 -0800 Subject: [PATCH 4/7] Fixed error with icache/btb resp mask. --- rocket/src/main/scala/btb.scala | 5 ++--- rocket/src/main/scala/icache.scala | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index c5e7636d..d0506601 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -230,9 +230,8 @@ class BTB extends Module with BTBParameters { if (params(FetchWidth) == 1) { io.resp.bits.mask := UInt(1) } else { - io.resp.bits.mask := Mux(io.resp.valid, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), - ((UInt(1) << UInt(params(FetchWidth)))-UInt(1))) -// val all_ones = UInt((1 << coreFetchWidth)-1) + // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case + io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) } if (nBHT > 0) { diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c854760b..cdf21bca 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -114,9 +114,8 @@ class Frontend extends FrontendModule io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) } - val all_ones = UInt((1 << coreFetchWidth)-1) + val all_ones = UInt((1 << (coreFetchWidth+1))-1) val msk_pc = if (coreFetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(coreFetchWidth) -1+2,2) - // TODO what is the best way to handle the clock-gating of s2_btb_resp_bits? io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc) io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0) From fea31d2167f00a4842827b9c06122c18d79eb9e4 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Tue, 11 Nov 2014 03:34:05 -0800 Subject: [PATCH 5/7] Significant changes and fixes to BTB for superscalar fetch. - BTBUpdate only occurs on mispredicts now. - RASUpdate broken out from BTBUpdate (allows RASUpdate to be performed in Decode). - Added optional 2nd CAM port to BTB for updates (for when updates to the BTB may occur out-of-order). - Fixed resp.mask bit logic. --- rocket/src/main/scala/btb.scala | 89 +++++++++++++++++------------- rocket/src/main/scala/ctrl.scala | 11 ++-- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/icache.scala | 6 +- 4 files changed, 63 insertions(+), 45 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index d0506601..5614561d 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -65,36 +65,40 @@ class BHT(nbht: Int) { when (update) { history := Cat(taken, history(nbhtbits-1,1)) } res } - def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = { + def update(addr: UInt, d: BHTResp, taken: Bool): Unit = { val index = addr(nbhtbits+1,2) ^ d.history table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) - when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } + history := Cat(taken, d.history(nbhtbits-1,1)) } private val table = Mem(UInt(width = 2), nbht) val history = Reg(UInt(width = nbhtbits)) } -// BTB update occurs during branch resolution. +// BTB update occurs during branch resolution (and only on a mispredict). // - "pc" is what future fetch PCs will tag match against. // - "br_pc" is the PC of the branch instruction. -// - "bridx" is the low-order PC bits of the predicted branch (after -// shifting off the lowest log(inst_bytes) bits off). -// - "resp.mask" provides a mask of valid instructions (instructions are -// masked off by the predicted taken branch). class BTBUpdate extends Bundle with BTBParameters { val prediction = Valid(new BTBResp) val pc = UInt(width = vaddrBits) val target = UInt(width = vaddrBits) - val returnAddr = UInt(width = vaddrBits) val taken = Bool() val isJump = Bool() - val isCall = Bool() val isReturn = Bool() val br_pc = UInt(width = vaddrBits) - val mispredict = Bool() } +class RASUpdate extends Bundle with BTBParameters { + val isCall = Bool() + val isReturn = Bool() + val returnAddr = UInt(width = vaddrBits) + val prediction = Valid(new BTBResp) +} + +// - "bridx" is the low-order PC bits of the predicted branch (after +// shifting off the lowest log(inst_bytes) bits off). +// - "resp.mask" provides a mask of valid instructions (instructions are +// masked off by the predicted taken branch). class BTBResp extends Bundle with BTBParameters { val taken = Bool() val mask = Bits(width = params(FetchWidth)) @@ -109,11 +113,15 @@ class BTBReq extends Bundle with BTBParameters { } // fully-associative branch target buffer -class BTB extends Module with BTBParameters { +// Higher-performance processors may cause BTB updates to occur out-of-order, +// which requires an extra CAM port for updates (to ensure no duplicates get +// placed in BTB). +class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParameters { val io = new Bundle { val req = Valid(new BTBReq).flip val resp = Valid(new BTBResp) val update = Valid(new BTBUpdate).flip + val ras_update = Valid(new RASUpdate).flip val invalidate = Bool(INPUT) } @@ -158,8 +166,7 @@ class BTB extends Module with BTBParameters { } val updateHit = r_update.bits.prediction.valid - val updateValid = r_update.bits.mispredict || updateHit && Bool(nBHT > 0) - val updateTarget = updateValid && r_update.bits.mispredict && r_update.bits.taken + val updateTarget = r_update.bits.taken val useUpdatePageHit = updatePageHit.orR val doIdxPageRepl = updateTarget && !useUpdatePageHit @@ -179,27 +186,32 @@ class BTB extends Module with BTBParameters { val pageReplEn = idxPageReplEn | tgtPageReplEn idxPageRepl := UIntToOH(Counter(r_update.valid && doPageRepl, nPages)._1) - when (r_update.valid && !(updateValid && !updateTarget)) { - val nextRepl = Counter(!updateHit && updateValid, entries)._1 - val waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl) + when (r_update.valid && updateTarget) { + assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target") + + val nextRepl = Counter(!updateHit, entries)._1 + var waddr:UInt = null + if (!updates_out_of_order) { + waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl) + } else { + println(" BTB accepts out-of-order updates.") + waddr = Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) + } // invalidate entries if we stomp on pages they depend upon idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits - idxValid(waddr) := updateValid - when (updateTarget) { - assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target") - idxs(waddr) := r_update.bits.pc - tgts(waddr) := update_target - idxPages(waddr) := idxPageUpdate - tgtPages(waddr) := tgtPageUpdate - useRAS(waddr) := r_update.bits.isReturn - isJump(waddr) := r_update.bits.isJump - if (params(FetchWidth) == 1) { - brIdx(waddr) := UInt(0) - } else { - brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) - } + idxValid(waddr) := Bool(true) + idxs(waddr) := r_update.bits.pc + tgts(waddr) := update_target + idxPages(waddr) := idxPageUpdate + tgtPages(waddr) := tgtPageUpdate + useRAS(waddr) := r_update.bits.isReturn + isJump(waddr) := r_update.bits.isJump + if (params(FetchWidth) == 1) { + brIdx(waddr) := UInt(0) + } else { + brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) } require(nPages % 2 == 0) @@ -231,7 +243,9 @@ class BTB extends Module with BTBParameters { io.resp.bits.mask := UInt(1) } else { // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case - io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) + val all_ones = UInt((1 << (params(FetchWidth)+1))-1) + io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), + all_ones) } if (nBHT > 0) { @@ -239,8 +253,7 @@ class BTB extends Module with BTBParameters { val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump)) val update_btb_hit = io.update.bits.prediction.valid when (io.update.valid && update_btb_hit && !io.update.bits.isJump) { - bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, - io.update.bits.taken, io.update.bits.mispredict) + bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, io.update.bits.taken) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res @@ -252,13 +265,13 @@ class BTB extends Module with BTBParameters { when (!ras.isEmpty && doPeek) { io.resp.bits.target := ras.peek } - when (io.update.valid) { - when (io.update.bits.isCall) { - ras.push(io.update.bits.returnAddr) + when (io.ras_update.valid) { + when (io.ras_update.bits.isCall) { + ras.push(io.ras_update.bits.returnAddr) when (doPeek) { - io.resp.bits.target := io.update.bits.returnAddr + io.resp.bits.target := io.ras_update.bits.returnAddr } - }.elsewhen (io.update.bits.isReturn && io.update.bits.prediction.valid) { + }.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) { ras.pop } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6ea50107..f3aff610 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -652,15 +652,18 @@ class Control extends Module Mux(replay_wb, PC_WB, // replay PC_MEM))) - io.imem.btb_update.valid := (mem_reg_branch || io.imem.btb_update.bits.isJump) && !take_pc_wb + io.imem.btb_update.valid := take_pc_mem && !take_pc_wb io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp io.imem.btb_update.bits.taken := mem_reg_branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump - io.imem.btb_update.bits.mispredict := take_pc_mem io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr - io.imem.btb_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra - io.imem.req.valid := take_pc + io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !take_pc_wb + io.imem.ras_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) + io.imem.ras_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra + io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit + io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp + io.imem.req.valid := take_pc val bypassDst = Array(id_raddr1, id_raddr2) val bypassSrc = Array.fill(NBYP)((Bool(true), UInt(0))) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 08e9d53f..4e05f50c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -286,8 +286,8 @@ class Datapath extends Module wb_reg_pc)).toUInt // PC_WB io.imem.btb_update.bits.pc := mem_reg_pc io.imem.btb_update.bits.target := io.imem.req.bits.pc - io.imem.btb_update.bits.returnAddr := mem_int_wdata io.imem.btb_update.bits.br_pc := mem_reg_pc + io.imem.ras_update.bits.returnAddr := mem_int_wdata // for hazard/bypass opportunity detection io.ctrl.ex_waddr := ex_reg_inst(11,7) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index cdf21bca..7f94a64f 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -33,18 +33,19 @@ class CPUFrontendIO extends CoreBundle { val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp).flip val btb_update = Valid(new BTBUpdate) + val ras_update = Valid(new RASUpdate) val ptw = new TLBPTWIO().flip val invalidate = Bool(OUTPUT) } -class Frontend extends FrontendModule +class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule { val io = new Bundle { val cpu = new CPUFrontendIO().flip val mem = new UncachedTileLinkIO } - val btb = Module(new BTB) + val btb = Module(new BTB(btb_updates_out_of_order)) val icache = Module(new ICache) val tlb = Module(new TLB(params(NITLBEntries))) @@ -88,6 +89,7 @@ class Frontend extends FrontendModule btb.io.req.valid := !stall && !icmiss btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes) btb.io.update := io.cpu.btb_update + btb.io.ras_update := io.cpu.ras_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate tlb.io.ptw <> io.cpu.ptw From 6749f67b7ff2b75665e3db277ab15b5e4d0c181e Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sun, 16 Nov 2014 22:02:27 -0800 Subject: [PATCH 6/7] Fixed BHT update error. - separated out BTB/BHT update - BHT updates counters on every branch - BTB update only on mispredicted and taken branches --- rocket/src/main/scala/btb.scala | 78 ++++++++++++++++-------------- rocket/src/main/scala/ctrl.scala | 11 ++++- rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/icache.scala | 4 +- 4 files changed, 54 insertions(+), 40 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 5614561d..62a5daf8 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -51,7 +51,7 @@ class BHTResp extends Bundle with BTBParameters { // - updated speculatively in fetch (if there's a BTB hit). // - on a mispredict, the history register is reset (again, only if BTB hit). // The counter table: -// - each counter corresponds with the "fetch pc" (not the PC of the branch). +// - each counter corresponds with the address of the fetch packet ("fetch pc"). // - updated when a branch resolves (and BTB was a hit for that branch). // The updating branch must provide its "fetch pc". class BHT(nbht: Int) { @@ -65,10 +65,10 @@ class BHT(nbht: Int) { when (update) { history := Cat(taken, history(nbhtbits-1,1)) } res } - def update(addr: UInt, d: BHTResp, taken: Bool): Unit = { + def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = { val index = addr(nbhtbits+1,2) ^ d.history table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) - history := Cat(taken, d.history(nbhtbits-1,1)) + when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } } private val table = Mem(UInt(width = 2), nbht) @@ -88,6 +88,15 @@ class BTBUpdate extends Bundle with BTBParameters { val br_pc = UInt(width = vaddrBits) } +// BHT update occurs during branch resolution on all conditional branches. +// - "pc" is what future fetch PCs will tag match against. +class BHTUpdate extends Bundle with BTBParameters { + val prediction = Valid(new BTBResp) + val pc = UInt(width = vaddrBits) + val taken = Bool() + val mispredict = Bool() +} + class RASUpdate extends Bundle with BTBParameters { val isCall = Bool() val isReturn = Bool() @@ -96,9 +105,9 @@ class RASUpdate extends Bundle with BTBParameters { } // - "bridx" is the low-order PC bits of the predicted branch (after -// shifting off the lowest log(inst_bytes) bits off). +// shifting off the lowest log(inst_bytes) bits off). // - "resp.mask" provides a mask of valid instructions (instructions are -// masked off by the predicted taken branch). +// masked off by the predicted taken branch). class BTBResp extends Bundle with BTBParameters { val taken = Bool() val mask = Bits(width = params(FetchWidth)) @@ -120,7 +129,8 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val io = new Bundle { val req = Valid(new BTBReq).flip val resp = Valid(new BTBResp) - val update = Valid(new BTBUpdate).flip + val btb_update = Valid(new BTBUpdate).flip + val bht_update = Valid(new BHTUpdate).flip val ras_update = Valid(new RASUpdate).flip val invalidate = Bool(INPUT) } @@ -151,67 +161,62 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete idxValid & idxMatch & idxPageMatch } - val r_update = Pipe(io.update) + val r_btb_update = Pipe(io.btb_update) val update_target = io.req.bits.addr val pageHit = pageMatch(io.req.bits.addr) val hits = tagMatch(io.req.bits.addr, pageHit) - val updatePageHit = pageMatch(r_update.bits.pc) - val updateHits = tagMatch(r_update.bits.pc, updatePageHit) + val updatePageHit = pageMatch(r_btb_update.bits.pc) + val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit) - private var lfsr = LFSR16(r_update.valid) + private var lfsr = LFSR16(r_btb_update.valid) def rand(width: Int) = { lfsr = lfsr(lfsr.getWidth-1,1) Random.oneHot(width, lfsr) } - val updateHit = r_update.bits.prediction.valid - val updateTarget = r_update.bits.taken + val updateHit = r_btb_update.bits.prediction.valid val useUpdatePageHit = updatePageHit.orR - val doIdxPageRepl = updateTarget && !useUpdatePageHit + val doIdxPageRepl = !useUpdatePageHit val idxPageRepl = UInt() val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) - val samePage = page(r_update.bits.pc) === page(update_target) + val samePage = page(r_btb_update.bits.pc) === page(update_target) val usePageHit = (pageHit & ~idxPageReplEn).orR - val doTgtPageRepl = updateTarget && !samePage && !usePageHit + val doTgtPageRepl = !samePage && !usePageHit val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(nPages-2,0) << 1 | idxPageUpdateOH(nPages-1)) val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) val doPageRepl = doIdxPageRepl || doTgtPageRepl val pageReplEn = idxPageReplEn | tgtPageReplEn - idxPageRepl := UIntToOH(Counter(r_update.valid && doPageRepl, nPages)._1) + idxPageRepl := UIntToOH(Counter(r_btb_update.valid && doPageRepl, nPages)._1) - when (r_update.valid && updateTarget) { - assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target") + when (r_btb_update.valid) { + assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target") val nextRepl = Counter(!updateHit, entries)._1 - var waddr:UInt = null - if (!updates_out_of_order) { - waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl) - } else { - println(" BTB accepts out-of-order updates.") - waddr = Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) - } + val waddr = + if (updates_out_of_order) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) + else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) // invalidate entries if we stomp on pages they depend upon idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits - idxValid(waddr) := Bool(true) - idxs(waddr) := r_update.bits.pc + idxValid(waddr) := Bool(true) + idxs(waddr) := r_btb_update.bits.pc tgts(waddr) := update_target idxPages(waddr) := idxPageUpdate tgtPages(waddr) := tgtPageUpdate - useRAS(waddr) := r_update.bits.isReturn - isJump(waddr) := r_update.bits.isJump + useRAS(waddr) := r_btb_update.bits.isReturn + isJump(waddr) := r_btb_update.bits.isJump if (params(FetchWidth) == 1) { brIdx(waddr) := UInt(0) } else { - brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) + brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) } require(nPages % 2 == 0) @@ -222,9 +227,9 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete when (en && pageReplEn(i)) { pages(i) := data } writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), - Mux(idxWritesEven, page(r_update.bits.pc), page(update_target))) + Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target))) writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl), - Mux(idxWritesEven, page(update_target), page(r_update.bits.pc))) + Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc))) when (doPageRepl) { pageValid := pageValid | pageReplEn } } @@ -243,17 +248,16 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete io.resp.bits.mask := UInt(1) } else { // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case - val all_ones = UInt((1 << (params(FetchWidth)+1))-1) io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), - all_ones) + SInt(-1)) } if (nBHT > 0) { val bht = new BHT(nBHT) val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump)) - val update_btb_hit = io.update.bits.prediction.valid - when (io.update.valid && update_btb_hit && !io.update.bits.isJump) { - bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, io.update.bits.taken) + val update_btb_hit = io.bht_update.bits.prediction.valid + when (io.bht_update.valid && update_btb_hit) { + bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f3aff610..18ec1b25 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -652,17 +652,24 @@ class Control extends Module Mux(replay_wb, PC_WB, // replay PC_MEM))) - io.imem.btb_update.valid := take_pc_mem && !take_pc_wb + io.imem.btb_update.valid := io.dpath.mem_misprediction && ((mem_reg_branch && io.dpath.mem_br_taken) || mem_reg_jalr || mem_reg_jal) && !take_pc_wb io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp - io.imem.btb_update.bits.taken := mem_reg_branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra + + io.imem.bht_update.valid := mem_reg_branch && !take_pc_wb + io.imem.bht_update.bits.taken := io.dpath.mem_br_taken + io.imem.bht_update.bits.mispredict := io.dpath.mem_misprediction + io.imem.bht_update.bits.prediction.valid := mem_reg_btb_hit + io.imem.bht_update.bits.prediction.bits := mem_reg_btb_resp + io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !take_pc_wb io.imem.ras_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) io.imem.ras_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp + io.imem.req.valid := take_pc val bypassDst = Array(id_raddr1, id_raddr2) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 4e05f50c..beff52fb 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -287,6 +287,7 @@ class Datapath extends Module io.imem.btb_update.bits.pc := mem_reg_pc io.imem.btb_update.bits.target := io.imem.req.bits.pc io.imem.btb_update.bits.br_pc := mem_reg_pc + io.imem.bht_update.bits.pc := mem_reg_pc io.imem.ras_update.bits.returnAddr := mem_int_wdata // for hazard/bypass opportunity detection diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 7f94a64f..7b4cf57e 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -33,6 +33,7 @@ class CPUFrontendIO extends CoreBundle { val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp).flip val btb_update = Valid(new BTBUpdate) + val bht_update = Valid(new BHTUpdate) val ras_update = Valid(new RASUpdate) val ptw = new TLBPTWIO().flip val invalidate = Bool(OUTPUT) @@ -88,7 +89,8 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule btb.io.req.valid := !stall && !icmiss btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes) - btb.io.update := io.cpu.btb_update + btb.io.btb_update := io.cpu.btb_update + btb.io.bht_update := io.cpu.bht_update btb.io.ras_update := io.cpu.ras_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate From f19b3ca43e5375956bec18b8621749ad28d8d2b7 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sun, 16 Nov 2014 22:04:33 -0800 Subject: [PATCH 7/7] Deleted extra spaces at EOL in ctrl.scala --- rocket/src/main/scala/ctrl.scala | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 18ec1b25..247a12b9 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -79,7 +79,7 @@ abstract trait DecodeConstants // | | | | | | | | | | | | | | | | | | | | | | | | | amo // | | | | | | | | | | | | | | | | | | | | | | | | | | List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,CSR.X,N,X,X,X,X,X) - + val table: Array[(UInt, List[UInt])] } @@ -162,7 +162,7 @@ object XDecode extends DecodeConstants SRL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), SRA-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), SLLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), SRLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), SRAIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), @@ -322,12 +322,12 @@ class Control extends Module if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) - + val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_rocc_val: Bool) :: (id_branch: Bool) :: (id_jal: Bool) :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs val id_sel_alu2 :: id_sel_alu1 :: id_sel_imm :: (id_fn_dw: Bool) :: id_fn_alu :: cs1 = cs0 val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: cs2 = cs1 val id_csr :: (id_fence_i: Bool) :: (id_sret: Bool) :: (id_syscall: Bool) :: (id_replay_next: Bool) :: (id_fence: Bool) :: (id_amo: Bool) :: Nil = cs2 - + val ex_reg_xcpt_interrupt = Reg(Bool()) val ex_reg_valid = Reg(Bool()) val ex_reg_branch = Reg(Bool()) @@ -470,14 +470,14 @@ class Control extends Module ex_reg_wen := Bool(false) ex_reg_fp_wen := Bool(false) ex_reg_sret := Bool(false) - ex_reg_flush_inst := Bool(false) + ex_reg_flush_inst := Bool(false) ex_reg_fp_val := Bool(false) ex_reg_rocc_val := Bool(false) ex_reg_replay_next := Bool(false) ex_reg_load_use := Bool(false) ex_reg_csr := CSR.N ex_reg_xcpt := Bool(false) - } + } .otherwise { ex_reg_branch := id_branch ex_reg_jal := id_jal @@ -514,7 +514,7 @@ class Control extends Module val (ex_xcpt, ex_cause) = checkExceptions(List( (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), (ex_reg_fp_val && io.fpu.illegal_rm, UInt(Causes.illegal_instruction)))) - + mem_reg_replay := !take_pc_mem_wb && replay_ex mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt && !mem_reg_replay_next when (ex_xcpt) { mem_reg_cause := ex_cause } @@ -599,7 +599,7 @@ class Control extends Module } val wb_set_sboard = wb_reg_div_mul_val || wb_dcache_miss || wb_reg_rocc_val - val replay_wb_common = + val replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.csr_replay val wb_rocc_val = wb_reg_rocc_val && !replay_wb_common val replay_wb = replay_wb_common || wb_reg_rocc_val && !io.rocc.cmd.ready @@ -699,7 +699,7 @@ class Control extends Module io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) val id_ex_hazard = data_hazard_ex && (ex_reg_csr != CSR.N || ex_reg_jalr || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) || fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) - + // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. val mem_mem_cmd_bh = if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass