From e3b12e0b85de931c3452bd2368774ed2fadef820 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 25 Mar 2014 05:22:04 -0700 Subject: [PATCH] Make BTB more complexity-effective BTB entries reference a small number of unique pages, so we separate the storage of pages from indices. This makes much larger BTBs feasible. It's easy to exacerbate cycle time this way, so one-hot encoding is used as needed. --- rocket/src/main/scala/btb.scala | 121 ++++++++++++++++++ .../scala/{dpath_util.scala => csr.scala} | 63 --------- rocket/src/main/scala/icache.scala | 6 +- rocket/src/main/scala/util.scala | 31 +++-- 4 files changed, 142 insertions(+), 79 deletions(-) create mode 100644 rocket/src/main/scala/btb.scala rename rocket/src/main/scala/{dpath_util.scala => csr.scala} (82%) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala new file mode 100644 index 00000000..cdbc4733 --- /dev/null +++ b/rocket/src/main/scala/btb.scala @@ -0,0 +1,121 @@ +package rocket + +import Chisel._ +import Util._ +import Node._ +import uncore.constants.AddressConstants._ + +case class BTBConfig(entries: Int) { + val matchBits = PGIDX_BITS + val pages0 = 1 + log2Up(entries) // is this sensible? what about matchBits? + val pages = (pages0+1)/2*2 // control logic assumes 2 divides pages +} + +// fully-associative branch target buffer +class BTB(conf: BTBConfig) extends Module { + val io = new Bundle { + val current_pc = UInt(INPUT, VADDR_BITS) + val hit = Bool(OUTPUT) + val target = UInt(OUTPUT, VADDR_BITS) + val wen = Bool(INPUT) + val taken = Bool(INPUT) + val invalidate = Bool(INPUT) + val correct_pc = UInt(INPUT, VADDR_BITS) + val correct_target = UInt(INPUT, VADDR_BITS) + } + + val idxValid = Vec.fill(conf.entries){Reg(init=Bool(false))} + val idxs = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} + val idxPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} + val idxPagesOH = idxPages.map(UIntToOH(_)(conf.pages-1,0)) + val tgts = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} + val tgtPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} + val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0)) + val pages = Vec.fill(conf.pages){Reg(UInt(width=VADDR_BITS-conf.matchBits))} + val pageValid = Vec.fill(conf.pages){Reg(init=Bool(false))} + + private def page(addr: UInt) = addr >> conf.matchBits + private def pageMatch(addr: UInt) = { + val p = page(addr) + Vec(pages.map(_ === p)).toBits & pageValid.toBits + } + private def tagMatch(addr: UInt): UInt = tagMatch(addr, pageMatch(addr)) + private def tagMatch(addr: UInt, pgMatch: UInt): UInt = { + val idx = addr(conf.matchBits-1,0) + val idxMatch = idxs.map(_ === idx).toBits + val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits + idxValid.toBits & idxMatch & idxPageMatch + } + + val hits = tagMatch(io.current_pc) + val idxPageMatch = pageMatch(io.correct_pc) + val tgtPageMatch = pageMatch(io.correct_target) + val updates = tagMatch(io.correct_pc, idxPageMatch) + val anyUpdates = updates.orR + + private var lfsr = LFSR16(io.wen) + def rand(width: Int) = { + lfsr = lfsr(lfsr.getWidth-1,1) + Random.oneHot(width, lfsr) + } + def randOrInvalid(valid: UInt) = + Mux(!valid.andR, PriorityEncoderOH(~valid), rand(valid.getWidth)) + + val idxRepl = randOrInvalid(idxValid.toBits) + val idxWen = updates.toBits | idxRepl & ~anyUpdates.toSInt + + val useIdxPageMatch = idxPageMatch.orR + val doIdxPageRepl = !useIdxPageMatch && io.taken + val idxPageRepl = rand(conf.pages) + val idxPageUpdate = Mux(useIdxPageMatch, idxPageMatch, idxPageRepl) + val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) + + val samePage = page(io.correct_pc) === page(io.correct_target) + val useTgtPageMatch = (tgtPageMatch & ~idxPageReplEn).orR + val doTgtPageRepl = !useTgtPageMatch && io.taken && !samePage + val tgtPageRepl = Mux(samePage, idxPageUpdate, idxPageUpdate(conf.pages-2,0) << 1 | idxPageUpdate(conf.pages-1)) + val tgtPageUpdate = Mux(useTgtPageMatch, tgtPageMatch, tgtPageRepl) + val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) + + val pageReplEn = idxPageReplEn | tgtPageReplEn + + when (io.wen) { + for (i <- 0 until conf.entries) { + when (idxWen(i)) { + idxValid(i) := io.taken + when (io.taken) { + idxs(i) := io.correct_pc + idxPages(i) := OHToUInt(idxPageUpdate) + tgts(i) := io.correct_target + tgtPages(i) := OHToUInt(tgtPageUpdate) + } + }.elsewhen ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) { + idxValid(i) := false + } + } + + require(conf.pages % 2 == 0) + val idxWritesEven = (idxPageUpdate & Fill(conf.pages/2, UInt(1,2))).orR + + def writeBank(i: Int, mod: Int, en: Bool, data: UInt) = { + for (i <- i until conf.pages by mod) { + when (en && pageReplEn(i)) { + pages(i) := data + pageValid(i) := true + } + } + } + writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), + Mux(idxWritesEven, page(io.correct_pc), page(io.correct_target))) + writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl), + Mux(idxWritesEven, page(io.correct_target), page(io.correct_pc))) + } + + when (io.invalidate) { + idxValid.foreach(_ := false) + pageValid.foreach(_ := false) + } + + io.hit := hits.toBits.orR + io.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) +} diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/csr.scala similarity index 82% rename from rocket/src/main/scala/dpath_util.scala rename to rocket/src/main/scala/csr.scala index e539cbd2..421079a4 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/csr.scala @@ -7,56 +7,6 @@ import uncore.HTIFIO import uncore.constants.AddressConstants._ import scala.math._ -class DpathBTBIO extends Bundle -{ - val current_pc = UInt(INPUT, VADDR_BITS) - val hit = Bool(OUTPUT) - val target = UInt(OUTPUT, VADDR_BITS) - val wen = Bool(INPUT) - val clr = Bool(INPUT) - val invalidate = Bool(INPUT) - val correct_pc = UInt(INPUT, VADDR_BITS) - val correct_target = UInt(INPUT, VADDR_BITS) -} - -// fully-associative branch target buffer -class rocketDpathBTB(entries: Int) extends Module -{ - val io = new DpathBTBIO - - var hit_reduction = Bool(false) - val hit = Bool() - val update = Bool() - var update_reduction = Bool(false) - val valid = Vec.fill(entries){Reg(init=Bool(false))} - val hits = Vec.fill(entries){Bool()} - val updates = Vec.fill(entries){Bool()} - val targets = Vec.fill(entries){Reg(UInt())} - val anyUpdate = updates.toBits.orR - - val random_way = Random(entries, io.wen) - val invalid_way = valid.indexWhere((x: Bool) => !x) - val repl_way = Mux(valid.contains(Bool(false)), invalid_way, random_way) - - for (i <- 0 until entries) { - val tag = Reg(UInt()) - hits(i) := valid(i) && tag === io.current_pc - updates(i) := valid(i) && tag === io.correct_pc - - when (io.wen && (updates(i) || !anyUpdate && UInt(i) === repl_way)) { - valid(i) := Bool(false) - when (!io.clr) { - valid(i) := Bool(true) - tag := io.correct_pc - targets(i) := io.correct_target - } - } - } - - io.hit := hits.toBits.orR - io.target := Mux1H(hits, targets) -} - class Status extends Bundle { val ip = Bits(width = 8) val im = Bits(width = 8) @@ -294,16 +244,3 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module reg_status.ip := 0 } } - -class ioReadPort(d: Int, w: Int) extends Bundle -{ - override def clone = new ioReadPort(d, w).asInstanceOf[this.type] -} - -class ioWritePort(d: Int, w: Int) extends Bundle -{ - val addr = UInt(INPUT, log2Up(d)) - val en = Bool(INPUT) - val data = Bits(INPUT, w) - override def clone = new ioWritePort(d, w).asInstanceOf[this.type] -} diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 61faf477..07ff1510 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -6,7 +6,7 @@ import Util._ case class ICacheConfig(sets: Int, assoc: Int, ibytes: Int = 4, - ntlb: Int = 8, nbtb: Int = 8, + ntlb: Int = 8, btb: BTBConfig = BTBConfig(8), code: Code = new IdentityCode) { val w = 1 @@ -55,7 +55,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu val mem = new UncachedTileLinkIO } - val btb = Module(new rocketDpathBTB(c.nbtb)) + val btb = Module(new BTB(c.btb)) val icache = Module(new ICache) val tlb = Module(new TLB(c.ntlb)) @@ -94,7 +94,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu btb.io.current_pc := s1_pc btb.io.wen := io.cpu.req.bits.mispredict - btb.io.clr := !io.cpu.req.bits.taken + btb.io.taken := io.cpu.req.bits.taken btb.io.correct_pc := io.cpu.req.bits.currentpc btb.io.correct_target := io.cpu.req.bits.pc btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index d917a522..24016763 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -11,12 +11,15 @@ object Util { implicit def intToUInt(x: Int): UInt = UInt(x) implicit def booleanToBool(x: Boolean): Bits = Bool(x) implicit def intSeqToUIntSeq(x: Iterable[Int]): Iterable[UInt] = x.map(UInt(_)) + implicit def seqToVec[T <: Data](x: Iterable[T]): Vec[T] = Vec(x) implicit def wcToUInt(c: WideCounter): UInt = c.value implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 implicit def intToBooleanToInt(x: Int): BooleanToInt = new BooleanToInt(x) } +import Util._ + object AVec { def apply[T <: Data](elts: Seq[T]): Vec[T] = Vec(elts) @@ -141,18 +144,20 @@ case class WideCounter(width: Int, inc: UInt = UInt(1)) object Random { - def apply(mod: Int, inc: Bool = Bool(true)): UInt = { - if (isPow2(mod)) { - require(mod <= 65536) - LFSR16(inc)(log2Up(mod)-1,0).toUInt - } else { - val max = 1 << log2Up(mod*8) - val rand_pow2 = apply(max, inc) - - var res = UInt(mod-1) - for (i <- mod-1 to 1 by -1) - res = Mux(rand_pow2 < UInt(i*max/mod), UInt(i-1), res) - res - } + def apply(mod: Int, random: UInt): UInt = { + if (isPow2(mod)) random(log2Up(mod)-1,0) + else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod)) } + def apply(mod: Int): UInt = apply(mod, randomizer) + def oneHot(mod: Int, random: UInt): UInt = { + if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0)) + else PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod)).toBits + } + def oneHot(mod: Int): UInt = oneHot(mod, randomizer) + + private def randomizer = LFSR16() + private def round(x: Double): Int = + if (x.toInt.toDouble == x) x.toInt else (x.toInt + 1) & -2 + private def partition(value: UInt, slices: Int) = + Vec.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices)) }