1
0

allow icache to configure which side of the way mux gets buffered

This commit is contained in:
Howard Mao 2015-12-02 17:17:49 -08:00
parent 369ee74a2c
commit 7690de07e1
2 changed files with 32 additions and 9 deletions

View File

@ -46,14 +46,15 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
val s2_btb_resp_valid = Reg(init=Bool(false)) val s2_btb_resp_valid = Reg(init=Bool(false))
val s2_btb_resp_bits = Reg(btb.io.resp.bits) val s2_btb_resp_bits = Reg(btb.io.resp.bits)
val s2_xcpt_if = Reg(init=Bool(false)) val s2_xcpt_if = Reg(init=Bool(false))
val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true)) val s2_resp_valid = Wire(init=Bool(false))
val s2_resp_data = Wire(UInt(width = rowBits))
val msb = vaddrBits-1 val msb = vaddrBits-1
val lsb = log2Up(fetchWidth*coreInstBytes) val lsb = log2Up(fetchWidth*coreInstBytes)
val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target)
val ntpc_0 = s1_pc + UInt(coreInstBytes*fetchWidth) val ntpc_0 = s1_pc + UInt(coreInstBytes*fetchWidth)
val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure
val icmiss = s2_valid && !icbuf.io.deq.valid val icmiss = s2_valid && !s2_resp_valid
val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc)
val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt
val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes))
@ -101,17 +102,29 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
icmiss || io.ptw.invalidate icmiss || io.ptw.invalidate
icache.io.resp.ready := !stall && !s1_same_block icache.io.resp.ready := !stall && !s1_same_block
io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icbuf.io.deq.valid) io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid)
io.cpu.resp.bits.pc := s2_pc io.cpu.resp.bits.pc := s2_pc
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
// if the ways are buffered, we don't need to buffer again
if (p(ICacheBufferWays)) {
icache.io.resp.ready := !stall && !s1_same_block
s2_resp_valid := icache.io.resp.valid
s2_resp_data := icache.io.resp.bits.datablock
} else {
val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true))
icbuf.io.enq <> icache.io.resp icbuf.io.enq <> icache.io.resp
icbuf.io.deq.ready := !stall && !s1_same_block icbuf.io.deq.ready := !stall && !s1_same_block
s2_resp_valid := icbuf.io.deq.valid
s2_resp_data := icbuf.io.deq.bits.datablock
}
require(fetchWidth * coreInstBytes <= rowBytes) require(fetchWidth * coreInstBytes <= rowBytes)
val fetch_data = val fetch_data =
if (fetchWidth * coreInstBytes == rowBytes) icbuf.io.deq.bits.datablock if (fetchWidth * coreInstBytes == rowBytes) s2_resp_data
else icbuf.io.deq.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) else s2_resp_data >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits))
for (i <- 0 until fetchWidth) { for (i <- 0 until fetchWidth) {
io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits)

View File

@ -5,6 +5,8 @@ import uncore._
import Util._ import Util._
import cde.{Parameters, Field} import cde.{Parameters, Field}
case object ICacheBufferWays extends Field[Boolean]
trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters {
val outerDataBeats = p(TLKey(p(TLId))).dataBeats val outerDataBeats = p(TLKey(p(TLId))).dataBeats
val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat
@ -124,10 +126,18 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara
s1_dout(i) := 0 s1_dout(i) := 0
when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s1_dout(i) := s1_rdata } when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s1_dout(i) := s1_rdata }
} }
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
// output signals // output signals
if (p(ICacheBufferWays)) {
val s2_hit = RegEnable(s1_hit, !stall)
val s2_tag_hit = RegEnable(s1_tag_hit, !stall)
val s2_dout = RegEnable(s1_dout, !stall)
io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout)
io.resp.valid := s2_hit
} else {
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
io.resp.valid := s1_hit io.resp.valid := s1_hit
}
io.mem.acquire.valid := (state === s_request) io.mem.acquire.valid := (state === s_request)
io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits) io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)