Make BTB more complexity-effective
BTB entries reference a small number of unique pages, so we separate the storage of pages from indices. This makes much larger BTBs feasible. It's easy to exacerbate cycle time this way, so one-hot encoding is used as needed.
This commit is contained in:
parent
804b09c8c5
commit
e3b12e0b85
121
rocket/src/main/scala/btb.scala
Normal file
121
rocket/src/main/scala/btb.scala
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
package rocket
|
||||||
|
|
||||||
|
import Chisel._
|
||||||
|
import Util._
|
||||||
|
import Node._
|
||||||
|
import uncore.constants.AddressConstants._
|
||||||
|
|
||||||
|
case class BTBConfig(entries: Int) {
|
||||||
|
val matchBits = PGIDX_BITS
|
||||||
|
val pages0 = 1 + log2Up(entries) // is this sensible? what about matchBits?
|
||||||
|
val pages = (pages0+1)/2*2 // control logic assumes 2 divides pages
|
||||||
|
}
|
||||||
|
|
||||||
|
// fully-associative branch target buffer
|
||||||
|
class BTB(conf: BTBConfig) extends Module {
|
||||||
|
val io = new Bundle {
|
||||||
|
val current_pc = UInt(INPUT, VADDR_BITS)
|
||||||
|
val hit = Bool(OUTPUT)
|
||||||
|
val target = UInt(OUTPUT, VADDR_BITS)
|
||||||
|
val wen = Bool(INPUT)
|
||||||
|
val taken = Bool(INPUT)
|
||||||
|
val invalidate = Bool(INPUT)
|
||||||
|
val correct_pc = UInt(INPUT, VADDR_BITS)
|
||||||
|
val correct_target = UInt(INPUT, VADDR_BITS)
|
||||||
|
}
|
||||||
|
|
||||||
|
val idxValid = Vec.fill(conf.entries){Reg(init=Bool(false))}
|
||||||
|
val idxs = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))}
|
||||||
|
val idxPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))}
|
||||||
|
val idxPagesOH = idxPages.map(UIntToOH(_)(conf.pages-1,0))
|
||||||
|
val tgts = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))}
|
||||||
|
val tgtPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))}
|
||||||
|
val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0))
|
||||||
|
val pages = Vec.fill(conf.pages){Reg(UInt(width=VADDR_BITS-conf.matchBits))}
|
||||||
|
val pageValid = Vec.fill(conf.pages){Reg(init=Bool(false))}
|
||||||
|
|
||||||
|
private def page(addr: UInt) = addr >> conf.matchBits
|
||||||
|
private def pageMatch(addr: UInt) = {
|
||||||
|
val p = page(addr)
|
||||||
|
Vec(pages.map(_ === p)).toBits & pageValid.toBits
|
||||||
|
}
|
||||||
|
private def tagMatch(addr: UInt): UInt = tagMatch(addr, pageMatch(addr))
|
||||||
|
private def tagMatch(addr: UInt, pgMatch: UInt): UInt = {
|
||||||
|
val idx = addr(conf.matchBits-1,0)
|
||||||
|
val idxMatch = idxs.map(_ === idx).toBits
|
||||||
|
val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits
|
||||||
|
idxValid.toBits & idxMatch & idxPageMatch
|
||||||
|
}
|
||||||
|
|
||||||
|
val hits = tagMatch(io.current_pc)
|
||||||
|
val idxPageMatch = pageMatch(io.correct_pc)
|
||||||
|
val tgtPageMatch = pageMatch(io.correct_target)
|
||||||
|
val updates = tagMatch(io.correct_pc, idxPageMatch)
|
||||||
|
val anyUpdates = updates.orR
|
||||||
|
|
||||||
|
private var lfsr = LFSR16(io.wen)
|
||||||
|
def rand(width: Int) = {
|
||||||
|
lfsr = lfsr(lfsr.getWidth-1,1)
|
||||||
|
Random.oneHot(width, lfsr)
|
||||||
|
}
|
||||||
|
def randOrInvalid(valid: UInt) =
|
||||||
|
Mux(!valid.andR, PriorityEncoderOH(~valid), rand(valid.getWidth))
|
||||||
|
|
||||||
|
val idxRepl = randOrInvalid(idxValid.toBits)
|
||||||
|
val idxWen = updates.toBits | idxRepl & ~anyUpdates.toSInt
|
||||||
|
|
||||||
|
val useIdxPageMatch = idxPageMatch.orR
|
||||||
|
val doIdxPageRepl = !useIdxPageMatch && io.taken
|
||||||
|
val idxPageRepl = rand(conf.pages)
|
||||||
|
val idxPageUpdate = Mux(useIdxPageMatch, idxPageMatch, idxPageRepl)
|
||||||
|
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
|
||||||
|
|
||||||
|
val samePage = page(io.correct_pc) === page(io.correct_target)
|
||||||
|
val useTgtPageMatch = (tgtPageMatch & ~idxPageReplEn).orR
|
||||||
|
val doTgtPageRepl = !useTgtPageMatch && io.taken && !samePage
|
||||||
|
val tgtPageRepl = Mux(samePage, idxPageUpdate, idxPageUpdate(conf.pages-2,0) << 1 | idxPageUpdate(conf.pages-1))
|
||||||
|
val tgtPageUpdate = Mux(useTgtPageMatch, tgtPageMatch, tgtPageRepl)
|
||||||
|
val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
|
||||||
|
|
||||||
|
val pageReplEn = idxPageReplEn | tgtPageReplEn
|
||||||
|
|
||||||
|
when (io.wen) {
|
||||||
|
for (i <- 0 until conf.entries) {
|
||||||
|
when (idxWen(i)) {
|
||||||
|
idxValid(i) := io.taken
|
||||||
|
when (io.taken) {
|
||||||
|
idxs(i) := io.correct_pc
|
||||||
|
idxPages(i) := OHToUInt(idxPageUpdate)
|
||||||
|
tgts(i) := io.correct_target
|
||||||
|
tgtPages(i) := OHToUInt(tgtPageUpdate)
|
||||||
|
}
|
||||||
|
}.elsewhen ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) {
|
||||||
|
idxValid(i) := false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
require(conf.pages % 2 == 0)
|
||||||
|
val idxWritesEven = (idxPageUpdate & Fill(conf.pages/2, UInt(1,2))).orR
|
||||||
|
|
||||||
|
def writeBank(i: Int, mod: Int, en: Bool, data: UInt) = {
|
||||||
|
for (i <- i until conf.pages by mod) {
|
||||||
|
when (en && pageReplEn(i)) {
|
||||||
|
pages(i) := data
|
||||||
|
pageValid(i) := true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl),
|
||||||
|
Mux(idxWritesEven, page(io.correct_pc), page(io.correct_target)))
|
||||||
|
writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl),
|
||||||
|
Mux(idxWritesEven, page(io.correct_target), page(io.correct_pc)))
|
||||||
|
}
|
||||||
|
|
||||||
|
when (io.invalidate) {
|
||||||
|
idxValid.foreach(_ := false)
|
||||||
|
pageValid.foreach(_ := false)
|
||||||
|
}
|
||||||
|
|
||||||
|
io.hit := hits.toBits.orR
|
||||||
|
io.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts))
|
||||||
|
}
|
@ -7,56 +7,6 @@ import uncore.HTIFIO
|
|||||||
import uncore.constants.AddressConstants._
|
import uncore.constants.AddressConstants._
|
||||||
import scala.math._
|
import scala.math._
|
||||||
|
|
||||||
class DpathBTBIO extends Bundle
|
|
||||||
{
|
|
||||||
val current_pc = UInt(INPUT, VADDR_BITS)
|
|
||||||
val hit = Bool(OUTPUT)
|
|
||||||
val target = UInt(OUTPUT, VADDR_BITS)
|
|
||||||
val wen = Bool(INPUT)
|
|
||||||
val clr = Bool(INPUT)
|
|
||||||
val invalidate = Bool(INPUT)
|
|
||||||
val correct_pc = UInt(INPUT, VADDR_BITS)
|
|
||||||
val correct_target = UInt(INPUT, VADDR_BITS)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fully-associative branch target buffer
|
|
||||||
class rocketDpathBTB(entries: Int) extends Module
|
|
||||||
{
|
|
||||||
val io = new DpathBTBIO
|
|
||||||
|
|
||||||
var hit_reduction = Bool(false)
|
|
||||||
val hit = Bool()
|
|
||||||
val update = Bool()
|
|
||||||
var update_reduction = Bool(false)
|
|
||||||
val valid = Vec.fill(entries){Reg(init=Bool(false))}
|
|
||||||
val hits = Vec.fill(entries){Bool()}
|
|
||||||
val updates = Vec.fill(entries){Bool()}
|
|
||||||
val targets = Vec.fill(entries){Reg(UInt())}
|
|
||||||
val anyUpdate = updates.toBits.orR
|
|
||||||
|
|
||||||
val random_way = Random(entries, io.wen)
|
|
||||||
val invalid_way = valid.indexWhere((x: Bool) => !x)
|
|
||||||
val repl_way = Mux(valid.contains(Bool(false)), invalid_way, random_way)
|
|
||||||
|
|
||||||
for (i <- 0 until entries) {
|
|
||||||
val tag = Reg(UInt())
|
|
||||||
hits(i) := valid(i) && tag === io.current_pc
|
|
||||||
updates(i) := valid(i) && tag === io.correct_pc
|
|
||||||
|
|
||||||
when (io.wen && (updates(i) || !anyUpdate && UInt(i) === repl_way)) {
|
|
||||||
valid(i) := Bool(false)
|
|
||||||
when (!io.clr) {
|
|
||||||
valid(i) := Bool(true)
|
|
||||||
tag := io.correct_pc
|
|
||||||
targets(i) := io.correct_target
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
io.hit := hits.toBits.orR
|
|
||||||
io.target := Mux1H(hits, targets)
|
|
||||||
}
|
|
||||||
|
|
||||||
class Status extends Bundle {
|
class Status extends Bundle {
|
||||||
val ip = Bits(width = 8)
|
val ip = Bits(width = 8)
|
||||||
val im = Bits(width = 8)
|
val im = Bits(width = 8)
|
||||||
@ -294,16 +244,3 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module
|
|||||||
reg_status.ip := 0
|
reg_status.ip := 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class ioReadPort(d: Int, w: Int) extends Bundle
|
|
||||||
{
|
|
||||||
override def clone = new ioReadPort(d, w).asInstanceOf[this.type]
|
|
||||||
}
|
|
||||||
|
|
||||||
class ioWritePort(d: Int, w: Int) extends Bundle
|
|
||||||
{
|
|
||||||
val addr = UInt(INPUT, log2Up(d))
|
|
||||||
val en = Bool(INPUT)
|
|
||||||
val data = Bits(INPUT, w)
|
|
||||||
override def clone = new ioWritePort(d, w).asInstanceOf[this.type]
|
|
||||||
}
|
|
@ -6,7 +6,7 @@ import Util._
|
|||||||
|
|
||||||
case class ICacheConfig(sets: Int, assoc: Int,
|
case class ICacheConfig(sets: Int, assoc: Int,
|
||||||
ibytes: Int = 4,
|
ibytes: Int = 4,
|
||||||
ntlb: Int = 8, nbtb: Int = 8,
|
ntlb: Int = 8, btb: BTBConfig = BTBConfig(8),
|
||||||
code: Code = new IdentityCode)
|
code: Code = new IdentityCode)
|
||||||
{
|
{
|
||||||
val w = 1
|
val w = 1
|
||||||
@ -55,7 +55,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu
|
|||||||
val mem = new UncachedTileLinkIO
|
val mem = new UncachedTileLinkIO
|
||||||
}
|
}
|
||||||
|
|
||||||
val btb = Module(new rocketDpathBTB(c.nbtb))
|
val btb = Module(new BTB(c.btb))
|
||||||
val icache = Module(new ICache)
|
val icache = Module(new ICache)
|
||||||
val tlb = Module(new TLB(c.ntlb))
|
val tlb = Module(new TLB(c.ntlb))
|
||||||
|
|
||||||
@ -94,7 +94,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu
|
|||||||
|
|
||||||
btb.io.current_pc := s1_pc
|
btb.io.current_pc := s1_pc
|
||||||
btb.io.wen := io.cpu.req.bits.mispredict
|
btb.io.wen := io.cpu.req.bits.mispredict
|
||||||
btb.io.clr := !io.cpu.req.bits.taken
|
btb.io.taken := io.cpu.req.bits.taken
|
||||||
btb.io.correct_pc := io.cpu.req.bits.currentpc
|
btb.io.correct_pc := io.cpu.req.bits.currentpc
|
||||||
btb.io.correct_target := io.cpu.req.bits.pc
|
btb.io.correct_target := io.cpu.req.bits.pc
|
||||||
btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate
|
btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate
|
||||||
|
@ -11,12 +11,15 @@ object Util {
|
|||||||
implicit def intToUInt(x: Int): UInt = UInt(x)
|
implicit def intToUInt(x: Int): UInt = UInt(x)
|
||||||
implicit def booleanToBool(x: Boolean): Bits = Bool(x)
|
implicit def booleanToBool(x: Boolean): Bits = Bool(x)
|
||||||
implicit def intSeqToUIntSeq(x: Iterable[Int]): Iterable[UInt] = x.map(UInt(_))
|
implicit def intSeqToUIntSeq(x: Iterable[Int]): Iterable[UInt] = x.map(UInt(_))
|
||||||
|
implicit def seqToVec[T <: Data](x: Iterable[T]): Vec[T] = Vec(x)
|
||||||
implicit def wcToUInt(c: WideCounter): UInt = c.value
|
implicit def wcToUInt(c: WideCounter): UInt = c.value
|
||||||
|
|
||||||
implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0
|
implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0
|
||||||
implicit def intToBooleanToInt(x: Int): BooleanToInt = new BooleanToInt(x)
|
implicit def intToBooleanToInt(x: Int): BooleanToInt = new BooleanToInt(x)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
import Util._
|
||||||
|
|
||||||
object AVec
|
object AVec
|
||||||
{
|
{
|
||||||
def apply[T <: Data](elts: Seq[T]): Vec[T] = Vec(elts)
|
def apply[T <: Data](elts: Seq[T]): Vec[T] = Vec(elts)
|
||||||
@ -141,18 +144,20 @@ case class WideCounter(width: Int, inc: UInt = UInt(1))
|
|||||||
|
|
||||||
object Random
|
object Random
|
||||||
{
|
{
|
||||||
def apply(mod: Int, inc: Bool = Bool(true)): UInt = {
|
def apply(mod: Int, random: UInt): UInt = {
|
||||||
if (isPow2(mod)) {
|
if (isPow2(mod)) random(log2Up(mod)-1,0)
|
||||||
require(mod <= 65536)
|
else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod))
|
||||||
LFSR16(inc)(log2Up(mod)-1,0).toUInt
|
}
|
||||||
} else {
|
def apply(mod: Int): UInt = apply(mod, randomizer)
|
||||||
val max = 1 << log2Up(mod*8)
|
def oneHot(mod: Int, random: UInt): UInt = {
|
||||||
val rand_pow2 = apply(max, inc)
|
if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0))
|
||||||
|
else PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod)).toBits
|
||||||
|
}
|
||||||
|
def oneHot(mod: Int): UInt = oneHot(mod, randomizer)
|
||||||
|
|
||||||
var res = UInt(mod-1)
|
private def randomizer = LFSR16()
|
||||||
for (i <- mod-1 to 1 by -1)
|
private def round(x: Double): Int =
|
||||||
res = Mux(rand_pow2 < UInt(i*max/mod), UInt(i-1), res)
|
if (x.toInt.toDouble == x) x.toInt else (x.toInt + 1) & -2
|
||||||
res
|
private def partition(value: UInt, slices: Int) =
|
||||||
}
|
Vec.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices))
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user