diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 02ca111e..e3762bfd 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -62,6 +62,14 @@ class BHT(nbht: Int) { val history = Reg(UInt(width = nbhtbits)) } +// BTB update occurs during branch resolution (i.e., PC redirection if a mispredict). +// - "pc" is what future fetch PCs will tag match against. +// - "br_pc" is the PC of the branch instruction. +// - "resp.mask" provides a mask of valid instructions (instructions are +// masked off by the predicted, taken branch). +// Assumption: superscalar commits are batched together into a single +// "taken" update ("history compression"), and correspond to the +// superscalar fetch 1:1. class BTBUpdate extends Bundle with BTBParameters { val prediction = Valid(new BTBResp) val pc = UInt(width = vaddrBits) @@ -71,11 +79,13 @@ class BTBUpdate extends Bundle with BTBParameters { val isJump = Bool() val isCall = Bool() val isReturn = Bool() + val br_pc = UInt(width = vaddrBits) val incorrectTarget = Bool() } class BTBResp extends Bundle with BTBParameters { val taken = Bool() + val mask = Bits(width = log2Up(params(FetchWidth))) val target = UInt(width = vaddrBits) val entry = UInt(width = opaqueBits) val bht = new BHTResp @@ -102,6 +112,7 @@ class BTB extends Module with BTBParameters { val useRAS = Reg(UInt(width = entries)) val isJump = Reg(UInt(width = entries)) + val brIdx = Mem(UInt(width=log2Up(params(FetchWidth))), entries) private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { @@ -167,6 +178,7 @@ class BTB extends Module with BTBParameters { tgtPages(waddr) := tgtPageUpdate useRAS(waddr) := update.bits.isReturn isJump(waddr) := update.bits.isJump + brIdx(waddr) := update.bits.br_pc } require(nPages % 2 == 0) @@ -193,6 +205,7 @@ class BTB extends Module with BTBParameters { io.resp.bits.taken := io.resp.valid io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) + io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) if (nBHT > 0) { val bht = new BHT(nBHT) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index f02f1424..43409b46 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -9,6 +9,7 @@ import uncore._ case object BuildFPU extends Field[Option[() => FPU]] case object XprLen extends Field[Int] case object NMultXpr extends Field[Int] +case object FetchWidth extends Field[Int] case object RetireWidth extends Field[Int] case object UseVM extends Field[Boolean] case object FastLoadWord extends Field[Boolean] @@ -20,6 +21,7 @@ case object CoreDCacheReqTagBits extends Field[Int] abstract trait CoreParameters extends UsesParameters { val xprLen = params(XprLen) + val coreFetchWidth = params(FetchWidth) val coreInstBits = params(CoreInstBits) val coreInstBytes = coreInstBits/8 val coreDataBits = xprLen @@ -28,6 +30,7 @@ abstract trait CoreParameters extends UsesParameters { val coreMaxAddrBits = math.max(params(PPNBits),params(VPNBits)+1) + params(PgIdxBits) if(params(FastLoadByte)) require(params(FastLoadWord)) + require(params(FetchWidth) == 1) // for now... require(params(RetireWidth) == 1) // for now... } abstract class CoreBundle extends Bundle with CoreParameters diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 7df1a8d3..ef4f353c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -47,9 +47,9 @@ class Datapath extends Module val wb_reg_rs2 = Reg(Bits()) // instruction decode stage - val id_inst = io.imem.resp.bits.data + val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1) val id_pc = io.imem.resp.bits.pc - + class RegFile { private val rf = Mem(UInt(width = 64), 31) private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index fb2dbb0c..05c36902 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -1,5 +1,3 @@ -// See LICENSE for license details. - package rocket import Chisel._ @@ -24,7 +22,8 @@ class FrontendReq extends CoreBundle { class FrontendResp extends CoreBundle { val pc = UInt(width = params(VAddrBits)+1) // ID stage PC - val data = Bits(width = coreInstBits) + val data = Vec.fill(coreFetchWidth) (Bits(width = coreInstBits)) + val mask = Bits(width = coreFetchWidth) val xcpt_ma = Bool() val xcpt_if = Bool() } @@ -60,12 +59,12 @@ class Frontend extends FrontendModule val msb = vaddrBits-1 val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) - val pcp4_0 = s1_pc + UInt(coreInstBytes) - val pcp4 = Cat(s1_pc(msb) & pcp4_0(msb), pcp4_0(msb,0)) + val ntpc_0 = s1_pc + UInt(coreInstBytes) + val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,0)) val icmiss = s2_valid && !icache.io.resp.valid - val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, pcp4) + val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & rowBytes) === (s1_pc & rowBytes)) + val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { @@ -106,7 +105,17 @@ class Frontend extends FrontendModule io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) io.cpu.resp.bits.pc := s2_pc & SInt(-coreInstBytes) // discard PC LSBs - io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreInstBytes)) << log2Up(coreInstBits)) + + + val fetch_data = icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) + for (i <- 0 until coreFetchWidth) { + io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) + } + + val all_ones = UInt((1 << coreFetchWidth)-1) + val msk_pc = all_ones << s2_pc(log2Up(coreFetchWidth)-1+2,2) + io.cpu.resp.bits.mask := msk_pc & btb.io.resp.bits.mask + io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if