Make BTB more complexity-effective
BTB entries reference a small number of unique pages, so we separate the storage of pages from indices. This makes much larger BTBs feasible. It's easy to exacerbate cycle time this way, so one-hot encoding is used as needed.
This commit is contained in:
		
							
								
								
									
										121
									
								
								rocket/src/main/scala/btb.scala
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								rocket/src/main/scala/btb.scala
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,121 @@ | ||||
| package rocket | ||||
|  | ||||
| import Chisel._ | ||||
| import Util._ | ||||
| import Node._ | ||||
| import uncore.constants.AddressConstants._ | ||||
|  | ||||
| case class BTBConfig(entries: Int) { | ||||
|   val matchBits = PGIDX_BITS | ||||
|   val pages0 = 1 + log2Up(entries) // is this sensible? what about matchBits? | ||||
|   val pages = (pages0+1)/2*2 // control logic assumes 2 divides pages | ||||
| } | ||||
|  | ||||
| // fully-associative branch target buffer | ||||
| class BTB(conf: BTBConfig) extends Module { | ||||
|   val io = new Bundle { | ||||
|     val current_pc     = UInt(INPUT, VADDR_BITS) | ||||
|     val hit            = Bool(OUTPUT) | ||||
|     val target         = UInt(OUTPUT, VADDR_BITS) | ||||
|     val wen            = Bool(INPUT) | ||||
|     val taken          = Bool(INPUT) | ||||
|     val invalidate     = Bool(INPUT) | ||||
|     val correct_pc     = UInt(INPUT, VADDR_BITS) | ||||
|     val correct_target = UInt(INPUT, VADDR_BITS) | ||||
|   } | ||||
|  | ||||
|   val idxValid = Vec.fill(conf.entries){Reg(init=Bool(false))} | ||||
|   val idxs = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} | ||||
|   val idxPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} | ||||
|   val idxPagesOH = idxPages.map(UIntToOH(_)(conf.pages-1,0)) | ||||
|   val tgts = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} | ||||
|   val tgtPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} | ||||
|   val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0)) | ||||
|   val pages = Vec.fill(conf.pages){Reg(UInt(width=VADDR_BITS-conf.matchBits))} | ||||
|   val pageValid = Vec.fill(conf.pages){Reg(init=Bool(false))} | ||||
|  | ||||
|   private def page(addr: UInt) = addr >> conf.matchBits | ||||
|   private def pageMatch(addr: UInt) = { | ||||
|     val p = page(addr) | ||||
|     Vec(pages.map(_ === p)).toBits & pageValid.toBits | ||||
|   } | ||||
|   private def tagMatch(addr: UInt): UInt = tagMatch(addr, pageMatch(addr)) | ||||
|   private def tagMatch(addr: UInt, pgMatch: UInt): UInt = { | ||||
|     val idx = addr(conf.matchBits-1,0) | ||||
|     val idxMatch = idxs.map(_ === idx).toBits | ||||
|     val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits | ||||
|     idxValid.toBits & idxMatch & idxPageMatch | ||||
|   } | ||||
|  | ||||
|   val hits = tagMatch(io.current_pc) | ||||
|   val idxPageMatch = pageMatch(io.correct_pc) | ||||
|   val tgtPageMatch = pageMatch(io.correct_target) | ||||
|   val updates = tagMatch(io.correct_pc, idxPageMatch) | ||||
|   val anyUpdates = updates.orR | ||||
|  | ||||
|   private var lfsr = LFSR16(io.wen) | ||||
|   def rand(width: Int) = { | ||||
|     lfsr = lfsr(lfsr.getWidth-1,1) | ||||
|     Random.oneHot(width, lfsr) | ||||
|   } | ||||
|   def randOrInvalid(valid: UInt) = | ||||
|     Mux(!valid.andR, PriorityEncoderOH(~valid), rand(valid.getWidth)) | ||||
|  | ||||
|   val idxRepl = randOrInvalid(idxValid.toBits) | ||||
|   val idxWen = updates.toBits | idxRepl & ~anyUpdates.toSInt | ||||
|  | ||||
|   val useIdxPageMatch = idxPageMatch.orR | ||||
|   val doIdxPageRepl = !useIdxPageMatch && io.taken | ||||
|   val idxPageRepl = rand(conf.pages) | ||||
|   val idxPageUpdate = Mux(useIdxPageMatch, idxPageMatch, idxPageRepl) | ||||
|   val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) | ||||
|  | ||||
|   val samePage = page(io.correct_pc) === page(io.correct_target) | ||||
|   val useTgtPageMatch = (tgtPageMatch & ~idxPageReplEn).orR | ||||
|   val doTgtPageRepl = !useTgtPageMatch && io.taken && !samePage | ||||
|   val tgtPageRepl = Mux(samePage, idxPageUpdate, idxPageUpdate(conf.pages-2,0) << 1 | idxPageUpdate(conf.pages-1)) | ||||
|   val tgtPageUpdate = Mux(useTgtPageMatch, tgtPageMatch, tgtPageRepl) | ||||
|   val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) | ||||
|  | ||||
|   val pageReplEn = idxPageReplEn | tgtPageReplEn | ||||
|  | ||||
|   when (io.wen) { | ||||
|     for (i <- 0 until conf.entries) { | ||||
|       when (idxWen(i)) { | ||||
|         idxValid(i) := io.taken | ||||
|         when (io.taken) { | ||||
|           idxs(i) := io.correct_pc | ||||
|           idxPages(i) := OHToUInt(idxPageUpdate) | ||||
|           tgts(i) := io.correct_target | ||||
|           tgtPages(i) := OHToUInt(tgtPageUpdate) | ||||
|         } | ||||
|       }.elsewhen ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) { | ||||
|         idxValid(i) := false | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     require(conf.pages % 2 == 0) | ||||
|     val idxWritesEven = (idxPageUpdate & Fill(conf.pages/2, UInt(1,2))).orR | ||||
|  | ||||
|     def writeBank(i: Int, mod: Int, en: Bool, data: UInt) = { | ||||
|       for (i <- i until conf.pages by mod) { | ||||
|         when (en && pageReplEn(i)) { | ||||
|           pages(i) := data | ||||
|           pageValid(i) := true | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), | ||||
|       Mux(idxWritesEven, page(io.correct_pc), page(io.correct_target))) | ||||
|     writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl), | ||||
|       Mux(idxWritesEven, page(io.correct_target), page(io.correct_pc))) | ||||
|   } | ||||
|  | ||||
|   when (io.invalidate) { | ||||
|     idxValid.foreach(_ := false) | ||||
|     pageValid.foreach(_ := false) | ||||
|   } | ||||
|  | ||||
|   io.hit    := hits.toBits.orR | ||||
|   io.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) | ||||
| } | ||||
| @@ -7,56 +7,6 @@ import uncore.HTIFIO | ||||
| import uncore.constants.AddressConstants._ | ||||
| import scala.math._ | ||||
| 
 | ||||
| class DpathBTBIO extends Bundle | ||||
| { | ||||
|   val current_pc     = UInt(INPUT, VADDR_BITS) | ||||
|   val hit            = Bool(OUTPUT) | ||||
|   val target         = UInt(OUTPUT, VADDR_BITS) | ||||
|   val wen            = Bool(INPUT) | ||||
|   val clr            = Bool(INPUT) | ||||
|   val invalidate     = Bool(INPUT) | ||||
|   val correct_pc     = UInt(INPUT, VADDR_BITS) | ||||
|   val correct_target = UInt(INPUT, VADDR_BITS) | ||||
| } | ||||
| 
 | ||||
| // fully-associative branch target buffer | ||||
| class rocketDpathBTB(entries: Int) extends Module | ||||
| { | ||||
|   val io = new DpathBTBIO | ||||
| 
 | ||||
|   var hit_reduction = Bool(false) | ||||
|   val hit = Bool() | ||||
|   val update = Bool() | ||||
|   var update_reduction = Bool(false) | ||||
|   val valid = Vec.fill(entries){Reg(init=Bool(false))} | ||||
|   val hits = Vec.fill(entries){Bool()} | ||||
|   val updates = Vec.fill(entries){Bool()} | ||||
|   val targets = Vec.fill(entries){Reg(UInt())} | ||||
|   val anyUpdate = updates.toBits.orR | ||||
| 
 | ||||
|   val random_way = Random(entries, io.wen) | ||||
|   val invalid_way = valid.indexWhere((x: Bool) => !x) | ||||
|   val repl_way = Mux(valid.contains(Bool(false)), invalid_way, random_way) | ||||
| 
 | ||||
|   for (i <- 0 until entries) { | ||||
|     val tag = Reg(UInt()) | ||||
|     hits(i) := valid(i) && tag === io.current_pc | ||||
|     updates(i) := valid(i) && tag === io.correct_pc | ||||
| 
 | ||||
|     when (io.wen && (updates(i) || !anyUpdate && UInt(i) === repl_way)) { | ||||
|       valid(i) := Bool(false) | ||||
|       when (!io.clr) { | ||||
|         valid(i) := Bool(true) | ||||
|         tag := io.correct_pc | ||||
|         targets(i) := io.correct_target | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   io.hit    := hits.toBits.orR | ||||
|   io.target := Mux1H(hits, targets) | ||||
| } | ||||
| 
 | ||||
| class Status extends Bundle { | ||||
|   val ip = Bits(width = 8) | ||||
|   val im = Bits(width = 8) | ||||
| @@ -294,16 +244,3 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module | ||||
|     reg_status.ip := 0 | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| class ioReadPort(d: Int, w: Int) extends Bundle | ||||
| { | ||||
|   override def clone = new ioReadPort(d, w).asInstanceOf[this.type] | ||||
| } | ||||
| 
 | ||||
| class ioWritePort(d: Int, w: Int) extends Bundle | ||||
| { | ||||
|   val addr = UInt(INPUT, log2Up(d)) | ||||
|   val en   = Bool(INPUT) | ||||
|   val data = Bits(INPUT, w) | ||||
|   override def clone = new ioWritePort(d, w).asInstanceOf[this.type] | ||||
| } | ||||
| @@ -6,7 +6,7 @@ import Util._ | ||||
|  | ||||
| case class ICacheConfig(sets: Int, assoc: Int, | ||||
|                         ibytes: Int = 4, | ||||
|                         ntlb: Int = 8, nbtb: Int = 8, | ||||
|                         ntlb: Int = 8, btb: BTBConfig = BTBConfig(8), | ||||
|                         code: Code = new IdentityCode) | ||||
| { | ||||
|   val w = 1 | ||||
| @@ -55,7 +55,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu | ||||
|     val mem = new UncachedTileLinkIO | ||||
|   } | ||||
|    | ||||
|   val btb = Module(new rocketDpathBTB(c.nbtb)) | ||||
|   val btb = Module(new BTB(c.btb)) | ||||
|   val icache = Module(new ICache) | ||||
|   val tlb = Module(new TLB(c.ntlb)) | ||||
|  | ||||
| @@ -94,7 +94,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu | ||||
|  | ||||
|   btb.io.current_pc := s1_pc | ||||
|   btb.io.wen := io.cpu.req.bits.mispredict | ||||
|   btb.io.clr := !io.cpu.req.bits.taken | ||||
|   btb.io.taken := io.cpu.req.bits.taken | ||||
|   btb.io.correct_pc := io.cpu.req.bits.currentpc | ||||
|   btb.io.correct_target := io.cpu.req.bits.pc | ||||
|   btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate | ||||
|   | ||||
| @@ -11,12 +11,15 @@ object Util { | ||||
|   implicit def intToUInt(x: Int): UInt = UInt(x) | ||||
|   implicit def booleanToBool(x: Boolean): Bits = Bool(x) | ||||
|   implicit def intSeqToUIntSeq(x: Iterable[Int]): Iterable[UInt] = x.map(UInt(_)) | ||||
|   implicit def seqToVec[T <: Data](x: Iterable[T]): Vec[T] = Vec(x) | ||||
|   implicit def wcToUInt(c: WideCounter): UInt = c.value | ||||
|  | ||||
|   implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 | ||||
|   implicit def intToBooleanToInt(x: Int): BooleanToInt = new BooleanToInt(x) | ||||
| } | ||||
|  | ||||
| import Util._ | ||||
|  | ||||
| object AVec | ||||
| { | ||||
|   def apply[T <: Data](elts: Seq[T]): Vec[T] = Vec(elts) | ||||
| @@ -141,18 +144,20 @@ case class WideCounter(width: Int, inc: UInt = UInt(1)) | ||||
|  | ||||
| object Random | ||||
| { | ||||
|   def apply(mod: Int, inc: Bool = Bool(true)): UInt = { | ||||
|     if (isPow2(mod)) { | ||||
|       require(mod <= 65536) | ||||
|       LFSR16(inc)(log2Up(mod)-1,0).toUInt | ||||
|     } else { | ||||
|       val max = 1 << log2Up(mod*8) | ||||
|       val rand_pow2 = apply(max, inc) | ||||
|  | ||||
|       var res = UInt(mod-1) | ||||
|       for (i <- mod-1 to 1 by -1) | ||||
|         res = Mux(rand_pow2 < UInt(i*max/mod), UInt(i-1), res) | ||||
|       res | ||||
|     } | ||||
|   def apply(mod: Int, random: UInt): UInt = { | ||||
|     if (isPow2(mod)) random(log2Up(mod)-1,0) | ||||
|     else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod)) | ||||
|   } | ||||
|   def apply(mod: Int): UInt = apply(mod, randomizer) | ||||
|   def oneHot(mod: Int, random: UInt): UInt = { | ||||
|     if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0)) | ||||
|     else PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod)).toBits | ||||
|   } | ||||
|   def oneHot(mod: Int): UInt = oneHot(mod, randomizer) | ||||
|  | ||||
|   private def randomizer = LFSR16() | ||||
|   private def round(x: Double): Int = | ||||
|     if (x.toInt.toDouble == x) x.toInt else (x.toInt + 1) & -2 | ||||
|   private def partition(value: UInt, slices: Int) = | ||||
|     Vec.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices)) | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user