From 7690de07e1af7d954241267a223041fa1fb7a103 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 2 Dec 2015 17:17:49 -0800 Subject: [PATCH] allow icache to configure which side of the way mux gets buffered --- rocket/src/main/scala/frontend.scala | 27 ++++++++++++++++++++------- rocket/src/main/scala/icache.scala | 14 ++++++++++++-- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 36a5a2d0..fd45d299 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -46,14 +46,15 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val s2_btb_resp_valid = Reg(init=Bool(false)) val s2_btb_resp_bits = Reg(btb.io.resp.bits) val s2_xcpt_if = Reg(init=Bool(false)) - val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true)) + val s2_resp_valid = Wire(init=Bool(false)) + val s2_resp_data = Wire(UInt(width = rowBits)) val msb = vaddrBits-1 val lsb = log2Up(fetchWidth*coreInstBytes) val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) val ntpc_0 = s1_pc + UInt(coreInstBytes*fetchWidth) val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure - val icmiss = s2_valid && !icbuf.io.deq.valid + val icmiss = s2_valid && !s2_resp_valid val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) @@ -101,17 +102,29 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa icmiss || io.ptw.invalidate icache.io.resp.ready := !stall && !s1_same_block - io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icbuf.io.deq.valid) + io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid) io.cpu.resp.bits.pc := s2_pc io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) - icbuf.io.enq <> icache.io.resp - icbuf.io.deq.ready := !stall && !s1_same_block + // if the ways are buffered, we don't need to buffer again + if (p(ICacheBufferWays)) { + icache.io.resp.ready := !stall && !s1_same_block + + s2_resp_valid := icache.io.resp.valid + s2_resp_data := icache.io.resp.bits.datablock + } else { + val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true)) + icbuf.io.enq <> icache.io.resp + icbuf.io.deq.ready := !stall && !s1_same_block + + s2_resp_valid := icbuf.io.deq.valid + s2_resp_data := icbuf.io.deq.bits.datablock + } require(fetchWidth * coreInstBytes <= rowBytes) val fetch_data = - if (fetchWidth * coreInstBytes == rowBytes) icbuf.io.deq.bits.datablock - else icbuf.io.deq.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) + if (fetchWidth * coreInstBytes == rowBytes) s2_resp_data + else s2_resp_data >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) for (i <- 0 until fetchWidth) { io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 1b3cf067..e2ef913d 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,6 +5,8 @@ import uncore._ import Util._ import cde.{Parameters, Field} +case object ICacheBufferWays extends Field[Boolean] + trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { val outerDataBeats = p(TLKey(p(TLId))).dataBeats val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat @@ -124,10 +126,18 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara s1_dout(i) := 0 when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s1_dout(i) := s1_rdata } } - io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout) // output signals - io.resp.valid := s1_hit + if (p(ICacheBufferWays)) { + val s2_hit = RegEnable(s1_hit, !stall) + val s2_tag_hit = RegEnable(s1_tag_hit, !stall) + val s2_dout = RegEnable(s1_dout, !stall) + io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) + io.resp.valid := s2_hit + } else { + io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout) + io.resp.valid := s1_hit + } io.mem.acquire.valid := (state === s_request) io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)