1
0

Merge pull request #923 from freechipsproject/critical-paths

Critical paths
This commit is contained in:
Yunsup Lee 2017-08-05 17:02:22 -07:00 committed by GitHub
commit 6389120dbd
8 changed files with 100 additions and 82 deletions

View File

@ -58,7 +58,7 @@ endif
ifeq ($(SUITE),UnittestSuite) ifeq ($(SUITE),UnittestSuite)
PROJECT=freechips.rocketchip.unittest PROJECT=freechips.rocketchip.unittest
CONFIGS=AMBAUnitTestConfig TLSimpleUnitTestConfig TLWidthUnitTestConfig CONFIGS=TLSimpleUnitTestConfig TLWidthUnitTestConfig
endif endif
ifeq ($(SUITE), JtagDtmSuite) ifeq ($(SUITE), JtagDtmSuite)

View File

@ -6,7 +6,7 @@ import Chisel._
import Chisel.ImplicitConversions._ import Chisel.ImplicitConversions._
import freechips.rocketchip.config.Parameters import freechips.rocketchip.config.Parameters
import freechips.rocketchip.coreplex.{RationalCrossing, RocketCrossing, RocketTilesKey} import freechips.rocketchip.coreplex.{RationalCrossing, RocketCrossing, RocketTilesKey}
import freechips.rocketchip.diplomacy.AddressSet import freechips.rocketchip.diplomacy.{AddressSet, RegionType}
import freechips.rocketchip.tilelink._ import freechips.rocketchip.tilelink._
import freechips.rocketchip.util._ import freechips.rocketchip.util._
import TLMessages._ import TLMessages._
@ -50,7 +50,7 @@ class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
class DCacheMetadataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { class DCacheMetadataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
val write = Bool() val write = Bool()
val idx = UInt(width = idxBits) val addr = UInt(width = vaddrBitsExtended)
val way_en = UInt(width = nWays) val way_en = UInt(width = nWays)
val data = new L1Metadata val data = new L1Metadata
} }
@ -102,7 +102,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s1_req = Reg(io.cpu.req.bits) val s1_req = Reg(io.cpu.req.bits)
when (metaArb.io.out.valid && !metaArb.io.out.bits.write) { when (metaArb.io.out.valid && !metaArb.io.out.bits.write) {
s1_req := io.cpu.req.bits s1_req := io.cpu.req.bits
s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0)) s1_req.addr := Cat(metaArb.io.out.bits.addr >> blockOffBits, io.cpu.req.bits.addr(blockOffBits-1,0))
when (!metaArb.io.in(7).ready) { s1_req.phys := true }
} }
val s1_read = isRead(s1_req.cmd) val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd) val s1_write = isWrite(s1_req.cmd)
@ -137,7 +138,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s1_didntRead = RegEnable(s0_needsRead && !dataArb.io.in(3).ready, metaArb.io.out.valid && !metaArb.io.out.bits.write) val s1_didntRead = RegEnable(s0_needsRead && !dataArb.io.in(3).ready, metaArb.io.out.valid && !metaArb.io.out.bits.write)
metaArb.io.in(7).valid := io.cpu.req.valid metaArb.io.in(7).valid := io.cpu.req.valid
metaArb.io.in(7).bits.write := false metaArb.io.in(7).bits.write := false
metaArb.io.in(7).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB) metaArb.io.in(7).bits.addr := io.cpu.req.bits.addr
metaArb.io.in(7).bits.way_en := ~UInt(0, nWays) metaArb.io.in(7).bits.way_en := ~UInt(0, nWays)
metaArb.io.in(7).bits.data := metaArb.io.in(4).bits.data metaArb.io.in(7).bits.data := metaArb.io.in(4).bits.data
when (!metaArb.io.in(7).ready) { io.cpu.req.ready := false } when (!metaArb.io.in(7).ready) { io.cpu.req.ready := false }
@ -160,7 +161,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true } when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true }
val s1_paddr = tlb.io.resp.paddr val s1_paddr = tlb.io.resp.paddr
val s1_tag = Mux(s1_probe, probe_bits.address, s1_paddr) >> untagBits
val s1_victim_way = Wire(init = replacer.way) val s1_victim_way = Wire(init = replacer.way)
val (s1_hit_way, s1_hit_state, s1_meta, s1_victim_meta) = val (s1_hit_way, s1_hit_state, s1_meta, s1_victim_meta) =
if (usingDataScratchpad) { if (usingDataScratchpad) {
@ -171,13 +171,15 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
(inScratchpad, hitState, Seq(tECC.encode(dummyMeta.asUInt)), dummyMeta) (inScratchpad, hitState, Seq(tECC.encode(dummyMeta.asUInt)), dummyMeta)
} else { } else {
val metaReq = metaArb.io.out val metaReq = metaArb.io.out
val metaIdx = metaReq.bits.addr(idxMSB, idxLSB)
when (metaReq.valid && metaReq.bits.write) { when (metaReq.valid && metaReq.bits.write) {
val wdata = tECC.encode(metaReq.bits.data.asUInt) val wdata = tECC.encode(metaReq.bits.data.asUInt)
val wmask = if (nWays == 1) Seq(true.B) else metaReq.bits.way_en.toBools val wmask = if (nWays == 1) Seq(true.B) else metaReq.bits.way_en.toBools
tag_array.write(metaReq.bits.idx, Vec.fill(nWays)(wdata), wmask) tag_array.write(metaIdx, Vec.fill(nWays)(wdata), wmask)
} }
val s1_meta = tag_array.read(metaReq.bits.idx, metaReq.valid && !metaReq.bits.write) val s1_meta = tag_array.read(metaIdx, metaReq.valid && !metaReq.bits.write)
val s1_meta_uncorrected = s1_meta.map(tECC.decode(_).uncorrected.asTypeOf(new L1Metadata)) val s1_meta_uncorrected = s1_meta.map(tECC.decode(_).uncorrected.asTypeOf(new L1Metadata))
val s1_tag = s1_paddr >> untagBits
val s1_meta_hit_way = s1_meta_uncorrected.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt val s1_meta_hit_way = s1_meta_uncorrected.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt
val s1_meta_hit_state = ClientMetadata.onReset.fromBits( val s1_meta_hit_state = ClientMetadata.onReset.fromBits(
s1_meta_uncorrected.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0))) s1_meta_uncorrected.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0)))
@ -254,14 +256,14 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
metaArb.io.in(1).valid := s2_meta_error && (s2_valid_masked || s2_flush_valid_pre_tag_ecc || s2_probe) metaArb.io.in(1).valid := s2_meta_error && (s2_valid_masked || s2_flush_valid_pre_tag_ecc || s2_probe)
metaArb.io.in(1).bits.write := true metaArb.io.in(1).bits.write := true
metaArb.io.in(1).bits.way_en := PriorityEncoderOH(s2_meta_errors) metaArb.io.in(1).bits.way_en := PriorityEncoderOH(s2_meta_errors)
metaArb.io.in(1).bits.idx := Mux(s2_probe, probe_bits.address, s2_req.addr)(idxMSB, idxLSB) metaArb.io.in(1).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, Mux(s2_probe, probe_bits.address, s2_req.addr)(idxMSB, 0))
metaArb.io.in(1).bits.data := PriorityMux(s2_meta_errors, s2_meta_corrected) metaArb.io.in(1).bits.data := PriorityMux(s2_meta_errors, s2_meta_corrected)
// tag updates on hit/miss // tag updates on hit/miss
metaArb.io.in(2).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty) metaArb.io.in(2).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)
metaArb.io.in(2).bits.write := true metaArb.io.in(2).bits.write := true
metaArb.io.in(2).bits.way_en := s2_victim_way metaArb.io.in(2).bits.way_en := s2_victim_way
metaArb.io.in(2).bits.idx := s2_req.addr(idxMSB, idxLSB) metaArb.io.in(2).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, s2_req.addr(idxMSB, 0))
metaArb.io.in(2).bits.data.coh := Mux(s2_valid_hit, s2_new_hit_state, ClientMetadata.onReset) metaArb.io.in(2).bits.data.coh := Mux(s2_valid_hit, s2_new_hit_state, ClientMetadata.onReset)
metaArb.io.in(2).bits.data.tag := s2_req.addr >> untagBits metaArb.io.in(2).bits.data.tag := s2_req.addr >> untagBits
@ -463,7 +465,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
assert(!metaArb.io.in(3).valid || metaArb.io.in(3).ready) assert(!metaArb.io.in(3).valid || metaArb.io.in(3).ready)
metaArb.io.in(3).bits.write := true metaArb.io.in(3).bits.write := true
metaArb.io.in(3).bits.way_en := s2_victim_way metaArb.io.in(3).bits.way_en := s2_victim_way
metaArb.io.in(3).bits.idx := s2_req.addr(idxMSB, idxLSB) metaArb.io.in(3).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, s2_req.addr(idxMSB, 0))
metaArb.io.in(3).bits.data.coh := s2_hit_state.onGrant(s2_req.cmd, tl_out.d.bits.param) metaArb.io.in(3).bits.data.coh := s2_hit_state.onGrant(s2_req.cmd, tl_out.d.bits.param)
metaArb.io.in(3).bits.data.tag := s2_req.addr >> untagBits metaArb.io.in(3).bits.data.tag := s2_req.addr >> untagBits
// don't accept uncached grants if there's a structural hazard on s2_data... // don't accept uncached grants if there's a structural hazard on s2_data...
@ -485,7 +487,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
metaArb.io.in(6).valid := tl_out.b.valid && !block_probe metaArb.io.in(6).valid := tl_out.b.valid && !block_probe
tl_out.b.ready := metaArb.io.in(6).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit) tl_out.b.ready := metaArb.io.in(6).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit)
metaArb.io.in(6).bits.write := false metaArb.io.in(6).bits.write := false
metaArb.io.in(6).bits.idx := tl_out.b.bits.address(idxMSB, idxLSB) metaArb.io.in(6).bits.addr := Cat(io.cpu.req.bits.addr >> paddrBits, tl_out.b.bits.address)
metaArb.io.in(6).bits.way_en := ~UInt(0, nWays) metaArb.io.in(6).bits.way_en := ~UInt(0, nWays)
metaArb.io.in(6).bits.data := metaArb.io.in(4).bits.data metaArb.io.in(6).bits.data := metaArb.io.in(4).bits.data
@ -529,7 +531,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
} }
when (release_state === s_probe_retry) { when (release_state === s_probe_retry) {
metaArb.io.in(6).valid := true metaArb.io.in(6).valid := true
metaArb.io.in(6).bits.idx := probe_bits.address(idxMSB, idxLSB) metaArb.io.in(6).bits.addr := Cat(io.cpu.req.bits.addr >> paddrBits, probe_bits.address)
when (metaArb.io.in(6).ready) { when (metaArb.io.in(6).ready) {
release_state := s_ready release_state := s_ready
s1_probe := true s1_probe := true
@ -572,7 +574,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
metaArb.io.in(4).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta) metaArb.io.in(4).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta)
metaArb.io.in(4).bits.write := true metaArb.io.in(4).bits.write := true
metaArb.io.in(4).bits.way_en := releaseWay metaArb.io.in(4).bits.way_en := releaseWay
metaArb.io.in(4).bits.idx := tl_out.c.bits.address(idxMSB, idxLSB) metaArb.io.in(4).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, tl_out.c.bits.address(idxMSB, 0))
metaArb.io.in(4).bits.data.coh := newCoh metaArb.io.in(4).bits.data.coh := newCoh
metaArb.io.in(4).bits.data.tag := tl_out.c.bits.address >> untagBits metaArb.io.in(4).bits.data.tag := tl_out.c.bits.address >> untagBits
when (metaArb.io.in(4).fire()) { release_state := s_ready } when (metaArb.io.in(4).fire()) { release_state := s_ready }
@ -638,7 +640,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val flushCounterNext = flushCounter +& 1 val flushCounterNext = flushCounter +& 1
val flushDone = (flushCounterNext >> log2Ceil(nSets)) === nWays val flushDone = (flushCounterNext >> log2Ceil(nSets)) === nWays
val flushCounterWrap = flushCounterNext(log2Ceil(nSets)-1, 0) val flushCounterWrap = flushCounterNext(log2Ceil(nSets)-1, 0)
when (tl_out_a.fire() && !s2_uncached) { flushed := false }
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) { when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
io.cpu.s2_nack := !flushed io.cpu.s2_nack := !flushed
when (!flushed) { when (!flushed) {
@ -648,24 +649,28 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
s1_flush_valid := metaArb.io.in(5).fire() && !s1_flush_valid && !s2_flush_valid_pre_tag_ecc && release_state === s_ready && !release_ack_wait s1_flush_valid := metaArb.io.in(5).fire() && !s1_flush_valid && !s2_flush_valid_pre_tag_ecc && release_state === s_ready && !release_ack_wait
metaArb.io.in(5).valid := flushing metaArb.io.in(5).valid := flushing
metaArb.io.in(5).bits.write := false metaArb.io.in(5).bits.write := false
metaArb.io.in(5).bits.idx := flushCounter metaArb.io.in(5).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, flushCounter(idxBits-1, 0) << blockOffBits)
metaArb.io.in(5).bits.way_en := ~UInt(0, nWays) metaArb.io.in(5).bits.way_en := ~UInt(0, nWays)
metaArb.io.in(5).bits.data := metaArb.io.in(4).bits.data metaArb.io.in(5).bits.data := metaArb.io.in(4).bits.data
when (flushing) { // Only flush D$ on FENCE.I if some cached executable regions are untracked.
s1_victim_way := flushCounter >> log2Up(nSets) if (!edge.manager.managers.forall(m => !m.supportsAcquireB || !m.executable || m.regionType >= RegionType.TRACKED)) {
when (s2_flush_valid) { when (tl_out_a.fire() && !s2_uncached) { flushed := false }
flushCounter := flushCounterNext when (flushing) {
when (flushDone) { s1_victim_way := flushCounter >> log2Up(nSets)
flushed := true when (s2_flush_valid) {
if (!isPow2(nWays)) flushCounter := flushCounterWrap flushCounter := flushCounterNext
when (flushDone) {
flushed := true
if (!isPow2(nWays)) flushCounter := flushCounterWrap
}
}
when (flushed && release_state === s_ready && !release_ack_wait) {
flushing := false
} }
}
when (flushed && release_state === s_ready && !release_ack_wait) {
flushing := false
} }
} }
metaArb.io.in(0).valid := resetting metaArb.io.in(0).valid := resetting
metaArb.io.in(0).bits.idx := flushCounter metaArb.io.in(0).bits.addr := metaArb.io.in(5).bits.addr
metaArb.io.in(0).bits.write := true metaArb.io.in(0).bits.write := true
metaArb.io.in(0).bits.way_en := ~UInt(0, nWays) metaArb.io.in(0).bits.way_en := ~UInt(0, nWays)
metaArb.io.in(0).bits.data.coh := ClientMetadata.onReset metaArb.io.in(0).bits.data.coh := ClientMetadata.onReset

View File

@ -86,6 +86,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 5, flow = true)) } val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 5, flow = true)) }
val s0_valid = io.cpu.req.valid || !fq.io.mask(fq.io.mask.getWidth-3) val s0_valid = io.cpu.req.valid || !fq.io.mask(fq.io.mask.getWidth-3)
val s1_valid = RegNext(s0_valid)
val s1_pc = Reg(UInt(width=vaddrBitsExtended)) val s1_pc = Reg(UInt(width=vaddrBitsExtended))
val s1_speculative = Reg(Bool()) val s1_speculative = Reg(Bool())
val s2_valid = RegInit(false.B) val s2_valid = RegInit(false.B)
@ -143,13 +144,13 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
icache.io.s2_kill := s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt icache.io.s2_kill := s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt
icache.io.s2_prefetch := s2_tlb_resp.prefetchable icache.io.s2_prefetch := s2_tlb_resp.prefetchable
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || !s2_tlb_resp.miss && icache.io.s2_kill) fq.io.enq.valid := RegNext(s1_valid) && s2_valid && (icache.io.resp.valid || !s2_tlb_resp.miss && icache.io.s2_kill)
fq.io.enq.bits.pc := s2_pc fq.io.enq.bits.pc := s2_pc
io.cpu.npc := alignPC(Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)) io.cpu.npc := alignPC(Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc))
fq.io.enq.bits.data := icache.io.resp.bits.data fq.io.enq.bits.data := icache.io.resp.bits.data
fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes)) fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
fq.io.enq.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt fq.io.enq.bits.replay := icache.io.resp.bits.replay || icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt
fq.io.enq.bits.btb.valid := s2_btb_resp_valid fq.io.enq.bits.btb.valid := s2_btb_resp_valid
fq.io.enq.bits.btb.bits := s2_btb_resp_bits fq.io.enq.bits.btb.bits := s2_btb_resp_bits
fq.io.enq.bits.xcpt := s2_tlb_resp fq.io.enq.bits.xcpt := s2_tlb_resp

View File

@ -43,24 +43,24 @@ class IBuf(implicit p: Parameters) extends CoreModule {
val nIC = Mux(io.imem.bits.btb.valid && io.imem.bits.btb.bits.taken, io.imem.bits.btb.bits.bridx +& 1, UInt(fetchWidth)) - pcWordBits val nIC = Mux(io.imem.bits.btb.valid && io.imem.bits.btb.bits.taken, io.imem.bits.btb.bits.bridx +& 1, UInt(fetchWidth)) - pcWordBits
val nICReady = nReady - nBufValid val nICReady = nReady - nBufValid
val nValid = Mux(io.imem.valid, nIC, UInt(0)) + nBufValid val nValid = Mux(io.imem.valid, nIC, UInt(0)) + nBufValid
io.imem.ready := nReady >= nBufValid && (nICReady >= nIC || n >= nIC - nICReady) io.imem.ready := io.inst(0).ready && nReady >= nBufValid && (nICReady >= nIC || n >= nIC - nICReady)
if (n > 0) { if (n > 0) {
nBufValid := Mux(nReady >= nBufValid, UInt(0), nBufValid - nReady) when (io.inst(0).ready) {
if (n > 1) when (nReady > 0 && nReady < nBufValid) { nBufValid := Mux(nReady >= nBufValid, UInt(0), nBufValid - nReady)
val shiftedBuf = shiftInsnRight(buf.data(n*coreInstBits-1, coreInstBits), (nReady-1)(log2Ceil(n-1)-1,0)) if (n > 1) when (nReady > 0 && nReady < nBufValid) {
buf.data := Cat(buf.data(n*coreInstBits-1, (n-1)*coreInstBits), shiftedBuf((n-1)*coreInstBits-1, 0)) val shiftedBuf = shiftInsnRight(buf.data(n*coreInstBits-1, coreInstBits), (nReady-1)(log2Ceil(n-1)-1,0))
buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask buf.data := Cat(buf.data(n*coreInstBits-1, (n-1)*coreInstBits), shiftedBuf((n-1)*coreInstBits-1, 0))
ibufBTBResp.bridx := ibufBTBResp.bridx - nReady buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask
} ibufBTBResp.bridx := ibufBTBResp.bridx - nReady
when (io.imem.valid && nReady >= nBufValid && nICReady < nIC && n >= nIC - nICReady) { }
val shamt = pcWordBits + nICReady when (io.imem.valid && nReady >= nBufValid && nICReady < nIC && n >= nIC - nICReady) {
nBufValid := nIC - nICReady val shamt = pcWordBits + nICReady
buf := io.imem.bits nBufValid := nIC - nICReady
buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n*coreInstBits-1,0) buf := io.imem.bits
buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n*coreInstBits-1,0)
ibufBTBHit := io.imem.bits.btb.valid buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask
when (io.imem.bits.btb.valid) { ibufBTBHit := io.imem.bits.btb.valid
ibufBTBResp := io.imem.bits.btb.bits ibufBTBResp := io.imem.bits.btb.bits
ibufBTBResp.bridx := io.imem.bits.btb.bits.bridx + nICReady ibufBTBResp.bridx := io.imem.bits.btb.bits.bridx + nICReady
} }
@ -76,10 +76,11 @@ class IBuf(implicit p: Parameters) extends CoreModule {
val icMask = (~UInt(0, fetchWidth*coreInstBits) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth*coreInstBits-1,0) val icMask = (~UInt(0, fetchWidth*coreInstBits) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth*coreInstBits-1,0)
val inst = icData & icMask | buf.data & ~icMask val inst = icData & icMask | buf.data & ~icMask
val valid = (UIntToOH(nValid) - 1)(fetchWidth-1, 0) val valid = UIntToOH1(nValid, fetchWidth)
val bufMask = UIntToOH(nBufValid) - 1 val bufMask = UIntToOH1(nBufValid, fetchWidth)
val xcpt = (0 until bufMask.getWidth).map(i => Mux(bufMask(i), buf.xcpt, io.imem.bits.xcpt)) val xcpt = (0 until bufMask.getWidth).map(i => Mux(bufMask(i), buf.xcpt, io.imem.bits.xcpt))
val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0))) val buf_replay = Mux(buf.replay, bufMask, UInt(0))
val ic_replay = buf_replay | Mux(io.imem.bits.replay, valid & ~bufMask, UInt(0))
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0)) val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
assert(!io.imem.valid || !io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits) assert(!io.imem.valid || !io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits)
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0)) val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
@ -97,18 +98,19 @@ class IBuf(implicit p: Parameters) extends CoreModule {
if (usingCompressed) { if (usingCompressed) {
val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1))) val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1)))
io.inst(i).valid := valid(j) && (exp.io.rvc || valid(j+1) || xcpt(j+1).asUInt.orR || replay) val full_insn = exp.io.rvc || valid(j+1) || xcpt(j+1).asUInt.orR || buf_replay(j)
io.inst(i).valid := valid(j) && full_insn
io.inst(i).bits.xcpt0 := xcpt(j) io.inst(i).bits.xcpt0 := xcpt(j)
io.inst(i).bits.xcpt1 := Mux(exp.io.rvc, 0.U, xcpt(j+1).asUInt).asTypeOf(new FrontendExceptions) io.inst(i).bits.xcpt1 := Mux(exp.io.rvc, 0.U, xcpt(j+1).asUInt).asTypeOf(new FrontendExceptions)
io.inst(i).bits.replay := replay io.inst(i).bits.replay := replay
io.inst(i).bits.btb_hit := btbHitMask(j) || (!exp.io.rvc && btbHitMask(j+1)) io.inst(i).bits.btb_hit := btbHitMask(j) || (!exp.io.rvc && btbHitMask(j+1))
io.inst(i).bits.rvc := exp.io.rvc io.inst(i).bits.rvc := exp.io.rvc
when (io.inst(i).fire()) { nReady := Mux(exp.io.rvc, j+1, j+2) } when (full_insn && (i == 0 || io.inst(i).ready)) { nReady := Mux(exp.io.rvc, j+1, j+2) }
expand(i+1, Mux(exp.io.rvc, j+1, j+2), Mux(exp.io.rvc, curInst >> 16, curInst >> 32)) expand(i+1, Mux(exp.io.rvc, j+1, j+2), Mux(exp.io.rvc, curInst >> 16, curInst >> 32))
} else { } else {
when (io.inst(i).ready) { nReady := i+1 } when (i == 0 || io.inst(i).ready) { nReady := i+1 }
io.inst(i).valid := valid(i) io.inst(i).valid := valid(i)
io.inst(i).bits.xcpt0 := xcpt(i) io.inst(i).bits.xcpt0 := xcpt(i)
io.inst(i).bits.xcpt1 := 0.U.asTypeOf(new FrontendExceptions) io.inst(i).bits.xcpt1 := 0.U.asTypeOf(new FrontendExceptions)

View File

@ -63,6 +63,7 @@ class ICache(val icacheParams: ICacheParams, val hartid: Int)(implicit p: Parame
class ICacheResp(outer: ICache) extends Bundle { class ICacheResp(outer: ICache) extends Bundle {
val data = UInt(width = outer.icacheParams.fetchBytes*8) val data = UInt(width = outer.icacheParams.fetchBytes*8)
val replay = Bool()
val ae = Bool() val ae = Bool()
override def cloneType = new ICacheResp(outer).asInstanceOf[this.type] override def cloneType = new ICacheResp(outer).asInstanceOf[this.type]
@ -259,7 +260,8 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
io.resp.bits.data := s2_data_decoded.uncorrected io.resp.bits.data := s2_data_decoded.uncorrected
io.resp.bits.ae := s2_tl_error io.resp.bits.ae := s2_tl_error
io.resp.valid := s2_valid && s2_hit && !s2_disparity io.resp.bits.replay := s2_disparity
io.resp.valid := s2_valid && s2_hit
tl_in.map { tl => tl_in.map { tl =>
val respValid = RegInit(false.B) val respValid = RegInit(false.B)

View File

@ -135,7 +135,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
val l2_refill = RegNext(false.B) val l2_refill = RegNext(false.B)
io.dpath.perf.l2miss := false io.dpath.perf.l2miss := false
val (l2_hit, l2_pte) = if (coreParams.nL2TLBEntries == 0) (false.B, Wire(new PTE)) else { val (l2_hit, l2_valid, l2_pte) = if (coreParams.nL2TLBEntries == 0) (false.B, false.B, Wire(new PTE)) else {
val code = new ParityCode val code = new ParityCode
require(isPow2(coreParams.nL2TLBEntries)) require(isPow2(coreParams.nL2TLBEntries))
val idxBits = log2Ceil(coreParams.nL2TLBEntries) val idxBits = log2Ceil(coreParams.nL2TLBEntries)
@ -176,23 +176,25 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
val s0_valid = !l2_refill && arb.io.out.fire() val s0_valid = !l2_refill && arb.io.out.fire()
val s1_valid = RegNext(s0_valid) val s1_valid = RegNext(s0_valid)
val s2_valid = RegNext(s1_valid && valid(r_idx)) val s2_valid = RegNext(s1_valid)
val s1_rdata = ram.read(arb.io.out.bits.addr(idxBits-1, 0), s0_valid) val s1_rdata = ram.read(arb.io.out.bits.addr(idxBits-1, 0), s0_valid)
val s2_rdata = code.decode(RegEnable(s1_rdata, s1_valid)) val s2_rdata = code.decode(RegEnable(s1_rdata, s1_valid))
when (s2_valid && s2_rdata.error) { valid := 0.U } val s2_valid_bit = RegEnable(valid(r_idx), s1_valid)
val s2_g = RegEnable(g(r_idx), s1_valid)
when (s2_valid && s2_valid_bit && s2_rdata.error) { valid := 0.U }
val s2_entry = s2_rdata.uncorrected.asTypeOf(new Entry) val s2_entry = s2_rdata.uncorrected.asTypeOf(new Entry)
val s2_hit = s2_valid && !s2_rdata.error && r_tag === s2_entry.tag val s2_hit = s2_valid && s2_valid_bit && !s2_rdata.error && r_tag === s2_entry.tag
io.dpath.perf.l2miss := s2_valid && !(r_tag === s2_entry.tag) io.dpath.perf.l2miss := s2_valid && !(s2_valid_bit && r_tag === s2_entry.tag)
val s2_pte = Wire(new PTE) val s2_pte = Wire(new PTE)
s2_pte := s2_entry s2_pte := s2_entry
s2_pte.g := g(r_idx) s2_pte.g := s2_g
s2_pte.v := true s2_pte.v := true
(s2_hit, s2_pte) (s2_hit, s2_valid && s2_valid_bit, s2_pte)
} }
io.mem.req.valid := state === s_req && !l2_hit io.mem.req.valid := state === s_req && !l2_valid
io.mem.req.bits.phys := Bool(true) io.mem.req.bits.phys := Bool(true)
io.mem.req.bits.cmd := M_XRD io.mem.req.bits.cmd := M_XRD
io.mem.req.bits.typ := log2Ceil(xLen/8) io.mem.req.bits.typ := log2Ceil(xLen/8)
@ -231,7 +233,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
s1_kill := true s1_kill := true
count := count + 1 count := count + 1
r_pte.ppn := pte_cache_data r_pte.ppn := pte_cache_data
}.elsewhen (io.mem.req.ready) { }.elsewhen (io.mem.req.fire()) {
state := s_wait1 state := s_wait1
} }
} }

View File

@ -376,14 +376,18 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal
val mem_direction_misprediction = (Bool(coreParams.jumpInFrontend) || mem_reg_btb_hit) && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken val mem_direction_misprediction = (Bool(coreParams.jumpInFrontend) || mem_reg_btb_hit) && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken
val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken
take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_sfence || (mem_ctrl.jalr && csr.io.status.debug)) take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_sfence)
mem_reg_valid := !ctrl_killx mem_reg_valid := !ctrl_killx
mem_reg_replay := !take_pc_mem_wb && replay_ex mem_reg_replay := !take_pc_mem_wb && replay_ex
mem_reg_xcpt := !ctrl_killx && ex_xcpt mem_reg_xcpt := !ctrl_killx && ex_xcpt
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
when (ex_pc_valid) { // on pipeline flushes, cause mem_npc to hold the sequential npc, which
// will drive the W-stage npc mux
when (mem_reg_valid && mem_reg_flush_pipe) {
mem_reg_sfence := false
}.elsewhen (ex_pc_valid) {
mem_ctrl := ex_ctrl mem_ctrl := ex_ctrl
mem_reg_rvc := ex_reg_rvc mem_reg_rvc := ex_reg_rvc
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd) mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
@ -398,10 +402,16 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
mem_reg_inst := ex_reg_inst mem_reg_inst := ex_reg_inst
mem_reg_pc := ex_reg_pc mem_reg_pc := ex_reg_pc
mem_reg_wdata := alu.io.out mem_reg_wdata := alu.io.out
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc || ex_sfence)) { when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc || ex_sfence)) {
val typ = Mux(ex_ctrl.rocc, log2Ceil(xLen/8).U, ex_ctrl.mem_type) val typ = Mux(ex_ctrl.rocc, log2Ceil(xLen/8).U, ex_ctrl.mem_type)
mem_reg_rs2 := new StoreGen(typ, 0.U, ex_rs(1), coreDataBytes).data mem_reg_rs2 := new StoreGen(typ, 0.U, ex_rs(1), coreDataBytes).data
} }
when (ex_ctrl.jalr && csr.io.status.debug) {
// flush I$ on D-mode JALR to effect uncached fetch without D$ flush
mem_ctrl.fence_i := true
mem_reg_flush_pipe := true
}
} }
val mem_breakpoint = (mem_reg_load && bpu.io.xcpt_ld) || (mem_reg_store && bpu.io.xcpt_st) val mem_breakpoint = (mem_reg_load && bpu.io.xcpt_ld) || (mem_reg_store && bpu.io.xcpt_st)
@ -438,9 +448,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
wb_reg_cause := mem_cause wb_reg_cause := mem_cause
wb_reg_inst := mem_reg_inst wb_reg_inst := mem_reg_inst
wb_reg_pc := mem_reg_pc wb_reg_pc := mem_reg_pc
when (mem_ctrl.jalr && csr.io.status.debug) {
wb_ctrl.fence_i := true
}
} }
val (wb_xcpt, wb_cause) = checkExceptions(List( val (wb_xcpt, wb_cause) = checkExceptions(List(
@ -458,7 +465,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val replay_wb_common = io.dmem.s2_nack || wb_reg_replay val replay_wb_common = io.dmem.s2_nack || wb_reg_replay
val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
val replay_wb = replay_wb_common || replay_wb_rocc val replay_wb = replay_wb_common || replay_wb_rocc
val wb_npc = encodeVirtualAddress(wb_reg_pc, wb_reg_pc + Mux(replay_wb, 0.U, Mux(wb_reg_rvc, 2.U, 4.U)))
take_pc_wb := replay_wb || wb_xcpt || csr.io.eret || wb_reg_flush_pipe take_pc_wb := replay_wb || wb_xcpt || csr.io.eret || wb_reg_flush_pipe
// writeback arbitration // writeback arbitration
@ -585,9 +591,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
io.imem.req.valid := take_pc io.imem.req.valid := take_pc
io.imem.req.bits.speculative := !take_pc_wb io.imem.req.bits.speculative := !take_pc_wb
io.imem.req.bits.pc := io.imem.req.bits.pc :=
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
Mux(replay_wb || wb_reg_flush_pipe, wb_npc, // replay or flush Mux(replay_wb, wb_reg_pc, // replay
mem_npc)) // branch misprediction mem_npc)) // flush or branch misprediction
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence
io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0) io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0)
@ -596,7 +602,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
io.imem.sfence.bits.asid := wb_reg_rs2 io.imem.sfence.bits.asid := wb_reg_rs2
io.ptw.sfence := io.imem.sfence io.ptw.sfence := io.imem.sfence
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt ibuf.io.inst(0).ready := !ctrl_stalld
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_wrong_npc && (!mem_cfi || mem_cfi_taken)) io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_wrong_npc && (!mem_cfi || mem_cfi_taken))
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi

View File

@ -20,20 +20,20 @@ class ShiftQueue[T <: Data](gen: T,
private val valid = RegInit(Vec.fill(entries) { Bool(false) }) private val valid = RegInit(Vec.fill(entries) { Bool(false) })
private val elts = Reg(Vec(entries, gen)) private val elts = Reg(Vec(entries, gen))
private val do_enq = io.enq.fire()
private val do_deq = io.deq.fire()
for (i <- 0 until entries) { for (i <- 0 until entries) {
def paddedValid(i: Int) = if (i == -1) true.B else if (i == entries) false.B else valid(i)
val wdata = if (i == entries-1) io.enq.bits else Mux(valid(i+1), elts(i+1), io.enq.bits) val wdata = if (i == entries-1) io.enq.bits else Mux(valid(i+1), elts(i+1), io.enq.bits)
val shiftDown = if (i == entries-1) false.B else io.deq.ready && valid(i+1) val wen =
val enqNew = io.enq.fire() && Mux(io.deq.ready, valid(i), !valid(i) && (if (i == 0) true.B else valid(i-1))) Mux(io.deq.ready,
when (shiftDown || enqNew) { elts(i) := wdata } paddedValid(i+1) || io.enq.fire() && valid(i),
} io.enq.fire() && paddedValid(i-1) && !valid(i))
when (wen) { elts(i) := wdata }
val padded = Seq(true.B) ++ valid ++ Seq(false.B) valid(i) :=
for (i <- 0 until entries) { Mux(io.deq.ready,
when ( do_enq && !do_deq && padded(i+0)) { valid(i) := true.B } paddedValid(i+1) || io.enq.fire() && (Bool(i == 0 && !flow) || valid(i)),
when (!do_enq && do_deq && !padded(i+2)) { valid(i) := false.B } io.enq.fire() && paddedValid(i-1) || valid(i))
} }
io.enq.ready := !valid(entries-1) io.enq.ready := !valid(entries-1)