e8c8d2af71
Fundamental new features: * Added tile package: This package is intended to hold components re-usable across different types of tile. Will be the future location of TL2-RoCC accelerators and new diplomatic versions of intra-tile interfaces. * Adopted [ModuleName]Params convention: Code base was very inconsistent about what to name case classes that provide parameters to modules. Settled on calling them [ModuleName]Params to distinguish them from config.Parameters and config.Config. So far applied mostly only to case classes defined within rocket and tile. * Defined RocketTileParams: A nested case class containing case classes for all the components of a tile (L1 caches and core). Allows all such parameters to vary per-tile. * Defined RocketCoreParams: All the parameters that can be varied per-core. * Defined L1CacheParams: A trait defining the parameters common to L1 caches, made concrete in different derived case classes. * Defined RocketTilesKey: A sequence of RocketTileParams, one for every tile to be created. * Provided HeterogeneousDualCoreConfig: An example of making a heterogeneous chip with two cores, one big and one little. * Changes to legacy code: ReplacementPolicy moved to package util. L1Metadata moved to package tile. Legacy L2 cache agent removed because it can no longer share the metadata array implementation with the L1. Legacy GroundTests on life support. Additional changes that got rolled in along the way: * rocket: Fix critical path through BTB for I$ index bits > pgIdxBits * coreplex: tiles connected via :=* * groundtest: updated to use TileParams * tilelink: cache cork requirements are relaxed to allow more cacheless masters
276 lines
11 KiB
Scala
276 lines
11 KiB
Scala
// See LICENSE.Berkeley for license details.
|
|
// See LICENSE.SiFive for license details.
|
|
|
|
package rocket
|
|
|
|
import Chisel._
|
|
import Chisel.ImplicitConversions._
|
|
import config._
|
|
import tile.HasCoreParameters
|
|
import util._
|
|
|
|
case class BTBParams(
|
|
nEntries: Int = 40,
|
|
nRAS: Int = 2,
|
|
updatesOutOfOrder: Boolean = false)
|
|
|
|
trait HasBtbParameters extends HasCoreParameters {
|
|
val btbParams = tileParams.btb.getOrElse(BTBParams(nEntries = 0))
|
|
val matchBits = pgIdxBits max log2Ceil(p(coreplex.CacheBlockBytes) * tileParams.icache.get.nSets)
|
|
val entries = btbParams.nEntries
|
|
val nRAS = btbParams.nRAS
|
|
val updatesOutOfOrder = btbParams.updatesOutOfOrder
|
|
val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages
|
|
val opaqueBits = log2Up(entries)
|
|
val nBHT = 1 << log2Up(entries*2)
|
|
}
|
|
|
|
abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters
|
|
abstract class BtbBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
|
|
with HasBtbParameters
|
|
|
|
class RAS(nras: Int) {
|
|
def push(addr: UInt): Unit = {
|
|
when (count < nras) { count := count + 1 }
|
|
val nextPos = Mux(Bool(isPow2(nras)) || pos < nras-1, pos+1, UInt(0))
|
|
stack(nextPos) := addr
|
|
pos := nextPos
|
|
}
|
|
def peek: UInt = stack(pos)
|
|
def pop(): Unit = when (!isEmpty) {
|
|
count := count - 1
|
|
pos := Mux(Bool(isPow2(nras)) || pos > 0, pos-1, UInt(nras-1))
|
|
}
|
|
def clear(): Unit = count := UInt(0)
|
|
def isEmpty: Bool = count === UInt(0)
|
|
|
|
private val count = Reg(UInt(width = log2Up(nras+1)))
|
|
private val pos = Reg(UInt(width = log2Up(nras)))
|
|
private val stack = Reg(Vec(nras, UInt()))
|
|
}
|
|
|
|
class BHTResp(implicit p: Parameters) extends BtbBundle()(p) {
|
|
val history = UInt(width = log2Up(nBHT).max(1))
|
|
val value = UInt(width = 2)
|
|
}
|
|
|
|
// BHT contains table of 2-bit counters and a global history register.
|
|
// The BHT only predicts and updates when there is a BTB hit.
|
|
// The global history:
|
|
// - updated speculatively in fetch (if there's a BTB hit).
|
|
// - on a mispredict, the history register is reset (again, only if BTB hit).
|
|
// The counter table:
|
|
// - each counter corresponds with the address of the fetch packet ("fetch pc").
|
|
// - updated when a branch resolves (and BTB was a hit for that branch).
|
|
// The updating branch must provide its "fetch pc".
|
|
class BHT(nbht: Int)(implicit val p: Parameters) extends HasCoreParameters {
|
|
val nbhtbits = log2Up(nbht)
|
|
def get(addr: UInt, update: Bool): BHTResp = {
|
|
val res = Wire(new BHTResp)
|
|
val index = addr(nbhtbits+1, log2Up(coreInstBytes)) ^ history
|
|
res.value := table(index)
|
|
res.history := history
|
|
val taken = res.value(0)
|
|
when (update) { history := Cat(taken, history(nbhtbits-1,1)) }
|
|
res
|
|
}
|
|
def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = {
|
|
val index = addr(nbhtbits+1, log2Up(coreInstBytes)) ^ d.history
|
|
table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken))
|
|
when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) }
|
|
}
|
|
|
|
private val table = Mem(nbht, UInt(width = 2))
|
|
val history = Reg(UInt(width = nbhtbits))
|
|
}
|
|
|
|
// BTB update occurs during branch resolution (and only on a mispredict).
|
|
// - "pc" is what future fetch PCs will tag match against.
|
|
// - "br_pc" is the PC of the branch instruction.
|
|
class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) {
|
|
val prediction = Valid(new BTBResp)
|
|
val pc = UInt(width = vaddrBits)
|
|
val target = UInt(width = vaddrBits)
|
|
val taken = Bool()
|
|
val isValid = Bool()
|
|
val isJump = Bool()
|
|
val isReturn = Bool()
|
|
val br_pc = UInt(width = vaddrBits)
|
|
}
|
|
|
|
// BHT update occurs during branch resolution on all conditional branches.
|
|
// - "pc" is what future fetch PCs will tag match against.
|
|
class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) {
|
|
val prediction = Valid(new BTBResp)
|
|
val pc = UInt(width = vaddrBits)
|
|
val taken = Bool()
|
|
val mispredict = Bool()
|
|
}
|
|
|
|
class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
|
|
val isCall = Bool()
|
|
val isReturn = Bool()
|
|
val returnAddr = UInt(width = vaddrBits)
|
|
val prediction = Valid(new BTBResp)
|
|
}
|
|
|
|
// - "bridx" is the low-order PC bits of the predicted branch (after
|
|
// shifting off the lowest log(inst_bytes) bits off).
|
|
// - "mask" provides a mask of valid instructions (instructions are
|
|
// masked off by the predicted taken branch from the BTB).
|
|
class BTBResp(implicit p: Parameters) extends BtbBundle()(p) {
|
|
val taken = Bool()
|
|
val mask = Bits(width = fetchWidth)
|
|
val bridx = Bits(width = log2Up(fetchWidth))
|
|
val target = UInt(width = vaddrBits)
|
|
val entry = UInt(width = opaqueBits)
|
|
val bht = new BHTResp
|
|
}
|
|
|
|
class BTBReq(implicit p: Parameters) extends BtbBundle()(p) {
|
|
val addr = UInt(width = vaddrBits)
|
|
}
|
|
|
|
// fully-associative branch target buffer
|
|
// Higher-performance processors may cause BTB updates to occur out-of-order,
|
|
// which requires an extra CAM port for updates (to ensure no duplicates get
|
|
// placed in BTB).
|
|
class BTB(implicit p: Parameters) extends BtbModule {
|
|
val io = new Bundle {
|
|
val req = Valid(new BTBReq).flip
|
|
val resp = Valid(new BTBResp)
|
|
val btb_update = Valid(new BTBUpdate).flip
|
|
val bht_update = Valid(new BHTUpdate).flip
|
|
val ras_update = Valid(new RASUpdate).flip
|
|
}
|
|
|
|
val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
|
|
val idxPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
|
|
val tgts = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
|
|
val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
|
|
val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits)))
|
|
val pageValid = Reg(init = UInt(0, nPages))
|
|
val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0))
|
|
val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0))
|
|
|
|
val isValid = Reg(init = UInt(0, entries))
|
|
val isReturn = Reg(UInt(width = entries))
|
|
val isJump = Reg(UInt(width = entries))
|
|
val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth))))
|
|
|
|
private def page(addr: UInt) = addr >> matchBits
|
|
private def pageMatch(addr: UInt) = {
|
|
val p = page(addr)
|
|
pageValid & pages.map(_ === p).asUInt
|
|
}
|
|
private def tagMatch(addr: UInt, pgMatch: UInt) = {
|
|
val idxMatch = idxs.map(_ === addr(matchBits-1, log2Up(coreInstBytes))).asUInt
|
|
val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).asUInt
|
|
idxMatch & idxPageMatch & isValid
|
|
}
|
|
|
|
val r_btb_update = Pipe(io.btb_update)
|
|
val update_target = io.req.bits.addr
|
|
|
|
val pageHit = pageMatch(io.req.bits.addr)
|
|
val hitsVec = tagMatch(io.req.bits.addr, pageHit)
|
|
val hits = hitsVec.asUInt
|
|
val updatePageHit = pageMatch(r_btb_update.bits.pc)
|
|
|
|
val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit)
|
|
val updateHit = if (updatesOutOfOrder) updateHits.orR else r_btb_update.bits.prediction.valid
|
|
val updateHitAddr = if (updatesOutOfOrder) OHToUInt(updateHits) else r_btb_update.bits.prediction.bits.entry
|
|
|
|
val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1
|
|
|
|
val useUpdatePageHit = updatePageHit.orR
|
|
val usePageHit = pageHit.orR
|
|
val doIdxPageRepl = !useUpdatePageHit
|
|
val nextPageRepl = Reg(UInt(width = log2Ceil(nPages)))
|
|
val idxPageRepl = Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), UIntToOH(nextPageRepl))
|
|
val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
|
|
val idxPageUpdate = OHToUInt(idxPageUpdateOH)
|
|
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
|
|
|
|
val samePage = page(r_btb_update.bits.pc) === page(update_target)
|
|
val doTgtPageRepl = !samePage && !usePageHit
|
|
val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1)))
|
|
val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl))
|
|
val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
|
|
|
|
when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) {
|
|
val both = doIdxPageRepl && doTgtPageRepl
|
|
val next = nextPageRepl + Mux[UInt](both, 2, 1)
|
|
nextPageRepl := Mux(next >= nPages, next(0), next)
|
|
}
|
|
|
|
when (r_btb_update.valid) {
|
|
val waddr = Mux(updateHit, updateHitAddr, nextRepl)
|
|
val mask = UIntToOH(waddr)
|
|
idxs(waddr) := r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes))
|
|
tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
|
|
idxPages(waddr) := idxPageUpdate
|
|
tgtPages(waddr) := tgtPageUpdate
|
|
isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask)
|
|
isReturn := Mux(r_btb_update.bits.isReturn, isReturn | mask, isReturn & ~mask)
|
|
isJump := Mux(r_btb_update.bits.isJump, isJump | mask, isJump & ~mask)
|
|
if (fetchWidth > 1)
|
|
brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes)
|
|
|
|
require(nPages % 2 == 0)
|
|
val idxWritesEven = !idxPageUpdate(0)
|
|
|
|
def writeBank(i: Int, mod: Int, en: UInt, data: UInt) =
|
|
for (i <- i until nPages by mod)
|
|
when (en(i)) { pages(i) := data }
|
|
|
|
writeBank(0, 2, Mux(idxWritesEven, idxPageReplEn, tgtPageReplEn),
|
|
Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target)))
|
|
writeBank(1, 2, Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn),
|
|
Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc)))
|
|
pageValid := pageValid | tgtPageReplEn | idxPageReplEn
|
|
}
|
|
|
|
io.resp.valid := hits.orR
|
|
io.resp.bits.taken := true
|
|
io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes))
|
|
io.resp.bits.entry := OHToUInt(hits)
|
|
io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(hitsVec, brIdx) else UInt(0))
|
|
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
|
|
|
|
// if multiple entries for same PC land in BTB, zap them
|
|
when (PopCountAtLeast(hits, 2)) {
|
|
isValid := isValid & ~hits
|
|
}
|
|
|
|
if (nBHT > 0) {
|
|
val bht = new BHT(nBHT)
|
|
val isBranch = !(hits & isJump).orR
|
|
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
|
|
val update_btb_hit = io.bht_update.bits.prediction.valid
|
|
when (io.bht_update.valid && update_btb_hit) {
|
|
bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict)
|
|
}
|
|
when (!res.value(0) && isBranch) { io.resp.bits.taken := false }
|
|
io.resp.bits.bht := res
|
|
}
|
|
|
|
if (nRAS > 0) {
|
|
val ras = new RAS(nRAS)
|
|
val doPeek = (hits & isReturn).orR
|
|
when (!ras.isEmpty && doPeek) {
|
|
io.resp.bits.target := ras.peek
|
|
}
|
|
when (io.ras_update.valid) {
|
|
when (io.ras_update.bits.isCall) {
|
|
ras.push(io.ras_update.bits.returnAddr)
|
|
when (doPeek) {
|
|
io.resp.bits.target := io.ras_update.bits.returnAddr
|
|
}
|
|
}.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) {
|
|
ras.pop()
|
|
}
|
|
}
|
|
}
|
|
}
|