Merge pull request #923 from freechipsproject/critical-paths
Critical paths
This commit is contained in:
commit
6389120dbd
@ -58,7 +58,7 @@ endif
|
|||||||
|
|
||||||
ifeq ($(SUITE),UnittestSuite)
|
ifeq ($(SUITE),UnittestSuite)
|
||||||
PROJECT=freechips.rocketchip.unittest
|
PROJECT=freechips.rocketchip.unittest
|
||||||
CONFIGS=AMBAUnitTestConfig TLSimpleUnitTestConfig TLWidthUnitTestConfig
|
CONFIGS=TLSimpleUnitTestConfig TLWidthUnitTestConfig
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(SUITE), JtagDtmSuite)
|
ifeq ($(SUITE), JtagDtmSuite)
|
||||||
|
@ -6,7 +6,7 @@ import Chisel._
|
|||||||
import Chisel.ImplicitConversions._
|
import Chisel.ImplicitConversions._
|
||||||
import freechips.rocketchip.config.Parameters
|
import freechips.rocketchip.config.Parameters
|
||||||
import freechips.rocketchip.coreplex.{RationalCrossing, RocketCrossing, RocketTilesKey}
|
import freechips.rocketchip.coreplex.{RationalCrossing, RocketCrossing, RocketTilesKey}
|
||||||
import freechips.rocketchip.diplomacy.AddressSet
|
import freechips.rocketchip.diplomacy.{AddressSet, RegionType}
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
import freechips.rocketchip.util._
|
import freechips.rocketchip.util._
|
||||||
import TLMessages._
|
import TLMessages._
|
||||||
@ -50,7 +50,7 @@ class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
|
|
||||||
class DCacheMetadataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
|
class DCacheMetadataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
|
||||||
val write = Bool()
|
val write = Bool()
|
||||||
val idx = UInt(width = idxBits)
|
val addr = UInt(width = vaddrBitsExtended)
|
||||||
val way_en = UInt(width = nWays)
|
val way_en = UInt(width = nWays)
|
||||||
val data = new L1Metadata
|
val data = new L1Metadata
|
||||||
}
|
}
|
||||||
@ -102,7 +102,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
val s1_req = Reg(io.cpu.req.bits)
|
val s1_req = Reg(io.cpu.req.bits)
|
||||||
when (metaArb.io.out.valid && !metaArb.io.out.bits.write) {
|
when (metaArb.io.out.valid && !metaArb.io.out.bits.write) {
|
||||||
s1_req := io.cpu.req.bits
|
s1_req := io.cpu.req.bits
|
||||||
s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0))
|
s1_req.addr := Cat(metaArb.io.out.bits.addr >> blockOffBits, io.cpu.req.bits.addr(blockOffBits-1,0))
|
||||||
|
when (!metaArb.io.in(7).ready) { s1_req.phys := true }
|
||||||
}
|
}
|
||||||
val s1_read = isRead(s1_req.cmd)
|
val s1_read = isRead(s1_req.cmd)
|
||||||
val s1_write = isWrite(s1_req.cmd)
|
val s1_write = isWrite(s1_req.cmd)
|
||||||
@ -137,7 +138,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
val s1_didntRead = RegEnable(s0_needsRead && !dataArb.io.in(3).ready, metaArb.io.out.valid && !metaArb.io.out.bits.write)
|
val s1_didntRead = RegEnable(s0_needsRead && !dataArb.io.in(3).ready, metaArb.io.out.valid && !metaArb.io.out.bits.write)
|
||||||
metaArb.io.in(7).valid := io.cpu.req.valid
|
metaArb.io.in(7).valid := io.cpu.req.valid
|
||||||
metaArb.io.in(7).bits.write := false
|
metaArb.io.in(7).bits.write := false
|
||||||
metaArb.io.in(7).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB)
|
metaArb.io.in(7).bits.addr := io.cpu.req.bits.addr
|
||||||
metaArb.io.in(7).bits.way_en := ~UInt(0, nWays)
|
metaArb.io.in(7).bits.way_en := ~UInt(0, nWays)
|
||||||
metaArb.io.in(7).bits.data := metaArb.io.in(4).bits.data
|
metaArb.io.in(7).bits.data := metaArb.io.in(4).bits.data
|
||||||
when (!metaArb.io.in(7).ready) { io.cpu.req.ready := false }
|
when (!metaArb.io.in(7).ready) { io.cpu.req.ready := false }
|
||||||
@ -160,7 +161,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true }
|
when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true }
|
||||||
|
|
||||||
val s1_paddr = tlb.io.resp.paddr
|
val s1_paddr = tlb.io.resp.paddr
|
||||||
val s1_tag = Mux(s1_probe, probe_bits.address, s1_paddr) >> untagBits
|
|
||||||
val s1_victim_way = Wire(init = replacer.way)
|
val s1_victim_way = Wire(init = replacer.way)
|
||||||
val (s1_hit_way, s1_hit_state, s1_meta, s1_victim_meta) =
|
val (s1_hit_way, s1_hit_state, s1_meta, s1_victim_meta) =
|
||||||
if (usingDataScratchpad) {
|
if (usingDataScratchpad) {
|
||||||
@ -171,13 +171,15 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
(inScratchpad, hitState, Seq(tECC.encode(dummyMeta.asUInt)), dummyMeta)
|
(inScratchpad, hitState, Seq(tECC.encode(dummyMeta.asUInt)), dummyMeta)
|
||||||
} else {
|
} else {
|
||||||
val metaReq = metaArb.io.out
|
val metaReq = metaArb.io.out
|
||||||
|
val metaIdx = metaReq.bits.addr(idxMSB, idxLSB)
|
||||||
when (metaReq.valid && metaReq.bits.write) {
|
when (metaReq.valid && metaReq.bits.write) {
|
||||||
val wdata = tECC.encode(metaReq.bits.data.asUInt)
|
val wdata = tECC.encode(metaReq.bits.data.asUInt)
|
||||||
val wmask = if (nWays == 1) Seq(true.B) else metaReq.bits.way_en.toBools
|
val wmask = if (nWays == 1) Seq(true.B) else metaReq.bits.way_en.toBools
|
||||||
tag_array.write(metaReq.bits.idx, Vec.fill(nWays)(wdata), wmask)
|
tag_array.write(metaIdx, Vec.fill(nWays)(wdata), wmask)
|
||||||
}
|
}
|
||||||
val s1_meta = tag_array.read(metaReq.bits.idx, metaReq.valid && !metaReq.bits.write)
|
val s1_meta = tag_array.read(metaIdx, metaReq.valid && !metaReq.bits.write)
|
||||||
val s1_meta_uncorrected = s1_meta.map(tECC.decode(_).uncorrected.asTypeOf(new L1Metadata))
|
val s1_meta_uncorrected = s1_meta.map(tECC.decode(_).uncorrected.asTypeOf(new L1Metadata))
|
||||||
|
val s1_tag = s1_paddr >> untagBits
|
||||||
val s1_meta_hit_way = s1_meta_uncorrected.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt
|
val s1_meta_hit_way = s1_meta_uncorrected.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt
|
||||||
val s1_meta_hit_state = ClientMetadata.onReset.fromBits(
|
val s1_meta_hit_state = ClientMetadata.onReset.fromBits(
|
||||||
s1_meta_uncorrected.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0)))
|
s1_meta_uncorrected.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0)))
|
||||||
@ -254,14 +256,14 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
metaArb.io.in(1).valid := s2_meta_error && (s2_valid_masked || s2_flush_valid_pre_tag_ecc || s2_probe)
|
metaArb.io.in(1).valid := s2_meta_error && (s2_valid_masked || s2_flush_valid_pre_tag_ecc || s2_probe)
|
||||||
metaArb.io.in(1).bits.write := true
|
metaArb.io.in(1).bits.write := true
|
||||||
metaArb.io.in(1).bits.way_en := PriorityEncoderOH(s2_meta_errors)
|
metaArb.io.in(1).bits.way_en := PriorityEncoderOH(s2_meta_errors)
|
||||||
metaArb.io.in(1).bits.idx := Mux(s2_probe, probe_bits.address, s2_req.addr)(idxMSB, idxLSB)
|
metaArb.io.in(1).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, Mux(s2_probe, probe_bits.address, s2_req.addr)(idxMSB, 0))
|
||||||
metaArb.io.in(1).bits.data := PriorityMux(s2_meta_errors, s2_meta_corrected)
|
metaArb.io.in(1).bits.data := PriorityMux(s2_meta_errors, s2_meta_corrected)
|
||||||
|
|
||||||
// tag updates on hit/miss
|
// tag updates on hit/miss
|
||||||
metaArb.io.in(2).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)
|
metaArb.io.in(2).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)
|
||||||
metaArb.io.in(2).bits.write := true
|
metaArb.io.in(2).bits.write := true
|
||||||
metaArb.io.in(2).bits.way_en := s2_victim_way
|
metaArb.io.in(2).bits.way_en := s2_victim_way
|
||||||
metaArb.io.in(2).bits.idx := s2_req.addr(idxMSB, idxLSB)
|
metaArb.io.in(2).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, s2_req.addr(idxMSB, 0))
|
||||||
metaArb.io.in(2).bits.data.coh := Mux(s2_valid_hit, s2_new_hit_state, ClientMetadata.onReset)
|
metaArb.io.in(2).bits.data.coh := Mux(s2_valid_hit, s2_new_hit_state, ClientMetadata.onReset)
|
||||||
metaArb.io.in(2).bits.data.tag := s2_req.addr >> untagBits
|
metaArb.io.in(2).bits.data.tag := s2_req.addr >> untagBits
|
||||||
|
|
||||||
@ -463,7 +465,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
assert(!metaArb.io.in(3).valid || metaArb.io.in(3).ready)
|
assert(!metaArb.io.in(3).valid || metaArb.io.in(3).ready)
|
||||||
metaArb.io.in(3).bits.write := true
|
metaArb.io.in(3).bits.write := true
|
||||||
metaArb.io.in(3).bits.way_en := s2_victim_way
|
metaArb.io.in(3).bits.way_en := s2_victim_way
|
||||||
metaArb.io.in(3).bits.idx := s2_req.addr(idxMSB, idxLSB)
|
metaArb.io.in(3).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, s2_req.addr(idxMSB, 0))
|
||||||
metaArb.io.in(3).bits.data.coh := s2_hit_state.onGrant(s2_req.cmd, tl_out.d.bits.param)
|
metaArb.io.in(3).bits.data.coh := s2_hit_state.onGrant(s2_req.cmd, tl_out.d.bits.param)
|
||||||
metaArb.io.in(3).bits.data.tag := s2_req.addr >> untagBits
|
metaArb.io.in(3).bits.data.tag := s2_req.addr >> untagBits
|
||||||
// don't accept uncached grants if there's a structural hazard on s2_data...
|
// don't accept uncached grants if there's a structural hazard on s2_data...
|
||||||
@ -485,7 +487,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
metaArb.io.in(6).valid := tl_out.b.valid && !block_probe
|
metaArb.io.in(6).valid := tl_out.b.valid && !block_probe
|
||||||
tl_out.b.ready := metaArb.io.in(6).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit)
|
tl_out.b.ready := metaArb.io.in(6).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit)
|
||||||
metaArb.io.in(6).bits.write := false
|
metaArb.io.in(6).bits.write := false
|
||||||
metaArb.io.in(6).bits.idx := tl_out.b.bits.address(idxMSB, idxLSB)
|
metaArb.io.in(6).bits.addr := Cat(io.cpu.req.bits.addr >> paddrBits, tl_out.b.bits.address)
|
||||||
metaArb.io.in(6).bits.way_en := ~UInt(0, nWays)
|
metaArb.io.in(6).bits.way_en := ~UInt(0, nWays)
|
||||||
metaArb.io.in(6).bits.data := metaArb.io.in(4).bits.data
|
metaArb.io.in(6).bits.data := metaArb.io.in(4).bits.data
|
||||||
|
|
||||||
@ -529,7 +531,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
}
|
}
|
||||||
when (release_state === s_probe_retry) {
|
when (release_state === s_probe_retry) {
|
||||||
metaArb.io.in(6).valid := true
|
metaArb.io.in(6).valid := true
|
||||||
metaArb.io.in(6).bits.idx := probe_bits.address(idxMSB, idxLSB)
|
metaArb.io.in(6).bits.addr := Cat(io.cpu.req.bits.addr >> paddrBits, probe_bits.address)
|
||||||
when (metaArb.io.in(6).ready) {
|
when (metaArb.io.in(6).ready) {
|
||||||
release_state := s_ready
|
release_state := s_ready
|
||||||
s1_probe := true
|
s1_probe := true
|
||||||
@ -572,7 +574,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
metaArb.io.in(4).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta)
|
metaArb.io.in(4).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta)
|
||||||
metaArb.io.in(4).bits.write := true
|
metaArb.io.in(4).bits.write := true
|
||||||
metaArb.io.in(4).bits.way_en := releaseWay
|
metaArb.io.in(4).bits.way_en := releaseWay
|
||||||
metaArb.io.in(4).bits.idx := tl_out.c.bits.address(idxMSB, idxLSB)
|
metaArb.io.in(4).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, tl_out.c.bits.address(idxMSB, 0))
|
||||||
metaArb.io.in(4).bits.data.coh := newCoh
|
metaArb.io.in(4).bits.data.coh := newCoh
|
||||||
metaArb.io.in(4).bits.data.tag := tl_out.c.bits.address >> untagBits
|
metaArb.io.in(4).bits.data.tag := tl_out.c.bits.address >> untagBits
|
||||||
when (metaArb.io.in(4).fire()) { release_state := s_ready }
|
when (metaArb.io.in(4).fire()) { release_state := s_ready }
|
||||||
@ -638,7 +640,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
val flushCounterNext = flushCounter +& 1
|
val flushCounterNext = flushCounter +& 1
|
||||||
val flushDone = (flushCounterNext >> log2Ceil(nSets)) === nWays
|
val flushDone = (flushCounterNext >> log2Ceil(nSets)) === nWays
|
||||||
val flushCounterWrap = flushCounterNext(log2Ceil(nSets)-1, 0)
|
val flushCounterWrap = flushCounterNext(log2Ceil(nSets)-1, 0)
|
||||||
when (tl_out_a.fire() && !s2_uncached) { flushed := false }
|
|
||||||
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
|
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
|
||||||
io.cpu.s2_nack := !flushed
|
io.cpu.s2_nack := !flushed
|
||||||
when (!flushed) {
|
when (!flushed) {
|
||||||
@ -648,24 +649,28 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
s1_flush_valid := metaArb.io.in(5).fire() && !s1_flush_valid && !s2_flush_valid_pre_tag_ecc && release_state === s_ready && !release_ack_wait
|
s1_flush_valid := metaArb.io.in(5).fire() && !s1_flush_valid && !s2_flush_valid_pre_tag_ecc && release_state === s_ready && !release_ack_wait
|
||||||
metaArb.io.in(5).valid := flushing
|
metaArb.io.in(5).valid := flushing
|
||||||
metaArb.io.in(5).bits.write := false
|
metaArb.io.in(5).bits.write := false
|
||||||
metaArb.io.in(5).bits.idx := flushCounter
|
metaArb.io.in(5).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, flushCounter(idxBits-1, 0) << blockOffBits)
|
||||||
metaArb.io.in(5).bits.way_en := ~UInt(0, nWays)
|
metaArb.io.in(5).bits.way_en := ~UInt(0, nWays)
|
||||||
metaArb.io.in(5).bits.data := metaArb.io.in(4).bits.data
|
metaArb.io.in(5).bits.data := metaArb.io.in(4).bits.data
|
||||||
when (flushing) {
|
// Only flush D$ on FENCE.I if some cached executable regions are untracked.
|
||||||
s1_victim_way := flushCounter >> log2Up(nSets)
|
if (!edge.manager.managers.forall(m => !m.supportsAcquireB || !m.executable || m.regionType >= RegionType.TRACKED)) {
|
||||||
when (s2_flush_valid) {
|
when (tl_out_a.fire() && !s2_uncached) { flushed := false }
|
||||||
flushCounter := flushCounterNext
|
when (flushing) {
|
||||||
when (flushDone) {
|
s1_victim_way := flushCounter >> log2Up(nSets)
|
||||||
flushed := true
|
when (s2_flush_valid) {
|
||||||
if (!isPow2(nWays)) flushCounter := flushCounterWrap
|
flushCounter := flushCounterNext
|
||||||
|
when (flushDone) {
|
||||||
|
flushed := true
|
||||||
|
if (!isPow2(nWays)) flushCounter := flushCounterWrap
|
||||||
|
}
|
||||||
|
}
|
||||||
|
when (flushed && release_state === s_ready && !release_ack_wait) {
|
||||||
|
flushing := false
|
||||||
}
|
}
|
||||||
}
|
|
||||||
when (flushed && release_state === s_ready && !release_ack_wait) {
|
|
||||||
flushing := false
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
metaArb.io.in(0).valid := resetting
|
metaArb.io.in(0).valid := resetting
|
||||||
metaArb.io.in(0).bits.idx := flushCounter
|
metaArb.io.in(0).bits.addr := metaArb.io.in(5).bits.addr
|
||||||
metaArb.io.in(0).bits.write := true
|
metaArb.io.in(0).bits.write := true
|
||||||
metaArb.io.in(0).bits.way_en := ~UInt(0, nWays)
|
metaArb.io.in(0).bits.way_en := ~UInt(0, nWays)
|
||||||
metaArb.io.in(0).bits.data.coh := ClientMetadata.onReset
|
metaArb.io.in(0).bits.data.coh := ClientMetadata.onReset
|
||||||
|
@ -86,6 +86,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 5, flow = true)) }
|
val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 5, flow = true)) }
|
||||||
|
|
||||||
val s0_valid = io.cpu.req.valid || !fq.io.mask(fq.io.mask.getWidth-3)
|
val s0_valid = io.cpu.req.valid || !fq.io.mask(fq.io.mask.getWidth-3)
|
||||||
|
val s1_valid = RegNext(s0_valid)
|
||||||
val s1_pc = Reg(UInt(width=vaddrBitsExtended))
|
val s1_pc = Reg(UInt(width=vaddrBitsExtended))
|
||||||
val s1_speculative = Reg(Bool())
|
val s1_speculative = Reg(Bool())
|
||||||
val s2_valid = RegInit(false.B)
|
val s2_valid = RegInit(false.B)
|
||||||
@ -143,13 +144,13 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
icache.io.s2_kill := s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt
|
icache.io.s2_kill := s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt
|
||||||
icache.io.s2_prefetch := s2_tlb_resp.prefetchable
|
icache.io.s2_prefetch := s2_tlb_resp.prefetchable
|
||||||
|
|
||||||
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || !s2_tlb_resp.miss && icache.io.s2_kill)
|
fq.io.enq.valid := RegNext(s1_valid) && s2_valid && (icache.io.resp.valid || !s2_tlb_resp.miss && icache.io.s2_kill)
|
||||||
fq.io.enq.bits.pc := s2_pc
|
fq.io.enq.bits.pc := s2_pc
|
||||||
io.cpu.npc := alignPC(Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc))
|
io.cpu.npc := alignPC(Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc))
|
||||||
|
|
||||||
fq.io.enq.bits.data := icache.io.resp.bits.data
|
fq.io.enq.bits.data := icache.io.resp.bits.data
|
||||||
fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
|
fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
|
||||||
fq.io.enq.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt
|
fq.io.enq.bits.replay := icache.io.resp.bits.replay || icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt
|
||||||
fq.io.enq.bits.btb.valid := s2_btb_resp_valid
|
fq.io.enq.bits.btb.valid := s2_btb_resp_valid
|
||||||
fq.io.enq.bits.btb.bits := s2_btb_resp_bits
|
fq.io.enq.bits.btb.bits := s2_btb_resp_bits
|
||||||
fq.io.enq.bits.xcpt := s2_tlb_resp
|
fq.io.enq.bits.xcpt := s2_tlb_resp
|
||||||
|
@ -43,24 +43,24 @@ class IBuf(implicit p: Parameters) extends CoreModule {
|
|||||||
val nIC = Mux(io.imem.bits.btb.valid && io.imem.bits.btb.bits.taken, io.imem.bits.btb.bits.bridx +& 1, UInt(fetchWidth)) - pcWordBits
|
val nIC = Mux(io.imem.bits.btb.valid && io.imem.bits.btb.bits.taken, io.imem.bits.btb.bits.bridx +& 1, UInt(fetchWidth)) - pcWordBits
|
||||||
val nICReady = nReady - nBufValid
|
val nICReady = nReady - nBufValid
|
||||||
val nValid = Mux(io.imem.valid, nIC, UInt(0)) + nBufValid
|
val nValid = Mux(io.imem.valid, nIC, UInt(0)) + nBufValid
|
||||||
io.imem.ready := nReady >= nBufValid && (nICReady >= nIC || n >= nIC - nICReady)
|
io.imem.ready := io.inst(0).ready && nReady >= nBufValid && (nICReady >= nIC || n >= nIC - nICReady)
|
||||||
|
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
nBufValid := Mux(nReady >= nBufValid, UInt(0), nBufValid - nReady)
|
when (io.inst(0).ready) {
|
||||||
if (n > 1) when (nReady > 0 && nReady < nBufValid) {
|
nBufValid := Mux(nReady >= nBufValid, UInt(0), nBufValid - nReady)
|
||||||
val shiftedBuf = shiftInsnRight(buf.data(n*coreInstBits-1, coreInstBits), (nReady-1)(log2Ceil(n-1)-1,0))
|
if (n > 1) when (nReady > 0 && nReady < nBufValid) {
|
||||||
buf.data := Cat(buf.data(n*coreInstBits-1, (n-1)*coreInstBits), shiftedBuf((n-1)*coreInstBits-1, 0))
|
val shiftedBuf = shiftInsnRight(buf.data(n*coreInstBits-1, coreInstBits), (nReady-1)(log2Ceil(n-1)-1,0))
|
||||||
buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask
|
buf.data := Cat(buf.data(n*coreInstBits-1, (n-1)*coreInstBits), shiftedBuf((n-1)*coreInstBits-1, 0))
|
||||||
ibufBTBResp.bridx := ibufBTBResp.bridx - nReady
|
buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask
|
||||||
}
|
ibufBTBResp.bridx := ibufBTBResp.bridx - nReady
|
||||||
when (io.imem.valid && nReady >= nBufValid && nICReady < nIC && n >= nIC - nICReady) {
|
}
|
||||||
val shamt = pcWordBits + nICReady
|
when (io.imem.valid && nReady >= nBufValid && nICReady < nIC && n >= nIC - nICReady) {
|
||||||
nBufValid := nIC - nICReady
|
val shamt = pcWordBits + nICReady
|
||||||
buf := io.imem.bits
|
nBufValid := nIC - nICReady
|
||||||
buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n*coreInstBits-1,0)
|
buf := io.imem.bits
|
||||||
buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask
|
buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n*coreInstBits-1,0)
|
||||||
ibufBTBHit := io.imem.bits.btb.valid
|
buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask
|
||||||
when (io.imem.bits.btb.valid) {
|
ibufBTBHit := io.imem.bits.btb.valid
|
||||||
ibufBTBResp := io.imem.bits.btb.bits
|
ibufBTBResp := io.imem.bits.btb.bits
|
||||||
ibufBTBResp.bridx := io.imem.bits.btb.bits.bridx + nICReady
|
ibufBTBResp.bridx := io.imem.bits.btb.bits.bridx + nICReady
|
||||||
}
|
}
|
||||||
@ -76,10 +76,11 @@ class IBuf(implicit p: Parameters) extends CoreModule {
|
|||||||
val icMask = (~UInt(0, fetchWidth*coreInstBits) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth*coreInstBits-1,0)
|
val icMask = (~UInt(0, fetchWidth*coreInstBits) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth*coreInstBits-1,0)
|
||||||
val inst = icData & icMask | buf.data & ~icMask
|
val inst = icData & icMask | buf.data & ~icMask
|
||||||
|
|
||||||
val valid = (UIntToOH(nValid) - 1)(fetchWidth-1, 0)
|
val valid = UIntToOH1(nValid, fetchWidth)
|
||||||
val bufMask = UIntToOH(nBufValid) - 1
|
val bufMask = UIntToOH1(nBufValid, fetchWidth)
|
||||||
val xcpt = (0 until bufMask.getWidth).map(i => Mux(bufMask(i), buf.xcpt, io.imem.bits.xcpt))
|
val xcpt = (0 until bufMask.getWidth).map(i => Mux(bufMask(i), buf.xcpt, io.imem.bits.xcpt))
|
||||||
val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0)))
|
val buf_replay = Mux(buf.replay, bufMask, UInt(0))
|
||||||
|
val ic_replay = buf_replay | Mux(io.imem.bits.replay, valid & ~bufMask, UInt(0))
|
||||||
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
|
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
|
||||||
assert(!io.imem.valid || !io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits)
|
assert(!io.imem.valid || !io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits)
|
||||||
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
|
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
|
||||||
@ -97,18 +98,19 @@ class IBuf(implicit p: Parameters) extends CoreModule {
|
|||||||
|
|
||||||
if (usingCompressed) {
|
if (usingCompressed) {
|
||||||
val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1)))
|
val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1)))
|
||||||
io.inst(i).valid := valid(j) && (exp.io.rvc || valid(j+1) || xcpt(j+1).asUInt.orR || replay)
|
val full_insn = exp.io.rvc || valid(j+1) || xcpt(j+1).asUInt.orR || buf_replay(j)
|
||||||
|
io.inst(i).valid := valid(j) && full_insn
|
||||||
io.inst(i).bits.xcpt0 := xcpt(j)
|
io.inst(i).bits.xcpt0 := xcpt(j)
|
||||||
io.inst(i).bits.xcpt1 := Mux(exp.io.rvc, 0.U, xcpt(j+1).asUInt).asTypeOf(new FrontendExceptions)
|
io.inst(i).bits.xcpt1 := Mux(exp.io.rvc, 0.U, xcpt(j+1).asUInt).asTypeOf(new FrontendExceptions)
|
||||||
io.inst(i).bits.replay := replay
|
io.inst(i).bits.replay := replay
|
||||||
io.inst(i).bits.btb_hit := btbHitMask(j) || (!exp.io.rvc && btbHitMask(j+1))
|
io.inst(i).bits.btb_hit := btbHitMask(j) || (!exp.io.rvc && btbHitMask(j+1))
|
||||||
io.inst(i).bits.rvc := exp.io.rvc
|
io.inst(i).bits.rvc := exp.io.rvc
|
||||||
|
|
||||||
when (io.inst(i).fire()) { nReady := Mux(exp.io.rvc, j+1, j+2) }
|
when (full_insn && (i == 0 || io.inst(i).ready)) { nReady := Mux(exp.io.rvc, j+1, j+2) }
|
||||||
|
|
||||||
expand(i+1, Mux(exp.io.rvc, j+1, j+2), Mux(exp.io.rvc, curInst >> 16, curInst >> 32))
|
expand(i+1, Mux(exp.io.rvc, j+1, j+2), Mux(exp.io.rvc, curInst >> 16, curInst >> 32))
|
||||||
} else {
|
} else {
|
||||||
when (io.inst(i).ready) { nReady := i+1 }
|
when (i == 0 || io.inst(i).ready) { nReady := i+1 }
|
||||||
io.inst(i).valid := valid(i)
|
io.inst(i).valid := valid(i)
|
||||||
io.inst(i).bits.xcpt0 := xcpt(i)
|
io.inst(i).bits.xcpt0 := xcpt(i)
|
||||||
io.inst(i).bits.xcpt1 := 0.U.asTypeOf(new FrontendExceptions)
|
io.inst(i).bits.xcpt1 := 0.U.asTypeOf(new FrontendExceptions)
|
||||||
|
@ -63,6 +63,7 @@ class ICache(val icacheParams: ICacheParams, val hartid: Int)(implicit p: Parame
|
|||||||
|
|
||||||
class ICacheResp(outer: ICache) extends Bundle {
|
class ICacheResp(outer: ICache) extends Bundle {
|
||||||
val data = UInt(width = outer.icacheParams.fetchBytes*8)
|
val data = UInt(width = outer.icacheParams.fetchBytes*8)
|
||||||
|
val replay = Bool()
|
||||||
val ae = Bool()
|
val ae = Bool()
|
||||||
|
|
||||||
override def cloneType = new ICacheResp(outer).asInstanceOf[this.type]
|
override def cloneType = new ICacheResp(outer).asInstanceOf[this.type]
|
||||||
@ -259,7 +260,8 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
|||||||
|
|
||||||
io.resp.bits.data := s2_data_decoded.uncorrected
|
io.resp.bits.data := s2_data_decoded.uncorrected
|
||||||
io.resp.bits.ae := s2_tl_error
|
io.resp.bits.ae := s2_tl_error
|
||||||
io.resp.valid := s2_valid && s2_hit && !s2_disparity
|
io.resp.bits.replay := s2_disparity
|
||||||
|
io.resp.valid := s2_valid && s2_hit
|
||||||
|
|
||||||
tl_in.map { tl =>
|
tl_in.map { tl =>
|
||||||
val respValid = RegInit(false.B)
|
val respValid = RegInit(false.B)
|
||||||
|
@ -135,7 +135,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
|
|||||||
|
|
||||||
val l2_refill = RegNext(false.B)
|
val l2_refill = RegNext(false.B)
|
||||||
io.dpath.perf.l2miss := false
|
io.dpath.perf.l2miss := false
|
||||||
val (l2_hit, l2_pte) = if (coreParams.nL2TLBEntries == 0) (false.B, Wire(new PTE)) else {
|
val (l2_hit, l2_valid, l2_pte) = if (coreParams.nL2TLBEntries == 0) (false.B, false.B, Wire(new PTE)) else {
|
||||||
val code = new ParityCode
|
val code = new ParityCode
|
||||||
require(isPow2(coreParams.nL2TLBEntries))
|
require(isPow2(coreParams.nL2TLBEntries))
|
||||||
val idxBits = log2Ceil(coreParams.nL2TLBEntries)
|
val idxBits = log2Ceil(coreParams.nL2TLBEntries)
|
||||||
@ -176,23 +176,25 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
|
|||||||
|
|
||||||
val s0_valid = !l2_refill && arb.io.out.fire()
|
val s0_valid = !l2_refill && arb.io.out.fire()
|
||||||
val s1_valid = RegNext(s0_valid)
|
val s1_valid = RegNext(s0_valid)
|
||||||
val s2_valid = RegNext(s1_valid && valid(r_idx))
|
val s2_valid = RegNext(s1_valid)
|
||||||
val s1_rdata = ram.read(arb.io.out.bits.addr(idxBits-1, 0), s0_valid)
|
val s1_rdata = ram.read(arb.io.out.bits.addr(idxBits-1, 0), s0_valid)
|
||||||
val s2_rdata = code.decode(RegEnable(s1_rdata, s1_valid))
|
val s2_rdata = code.decode(RegEnable(s1_rdata, s1_valid))
|
||||||
when (s2_valid && s2_rdata.error) { valid := 0.U }
|
val s2_valid_bit = RegEnable(valid(r_idx), s1_valid)
|
||||||
|
val s2_g = RegEnable(g(r_idx), s1_valid)
|
||||||
|
when (s2_valid && s2_valid_bit && s2_rdata.error) { valid := 0.U }
|
||||||
|
|
||||||
val s2_entry = s2_rdata.uncorrected.asTypeOf(new Entry)
|
val s2_entry = s2_rdata.uncorrected.asTypeOf(new Entry)
|
||||||
val s2_hit = s2_valid && !s2_rdata.error && r_tag === s2_entry.tag
|
val s2_hit = s2_valid && s2_valid_bit && !s2_rdata.error && r_tag === s2_entry.tag
|
||||||
io.dpath.perf.l2miss := s2_valid && !(r_tag === s2_entry.tag)
|
io.dpath.perf.l2miss := s2_valid && !(s2_valid_bit && r_tag === s2_entry.tag)
|
||||||
val s2_pte = Wire(new PTE)
|
val s2_pte = Wire(new PTE)
|
||||||
s2_pte := s2_entry
|
s2_pte := s2_entry
|
||||||
s2_pte.g := g(r_idx)
|
s2_pte.g := s2_g
|
||||||
s2_pte.v := true
|
s2_pte.v := true
|
||||||
|
|
||||||
(s2_hit, s2_pte)
|
(s2_hit, s2_valid && s2_valid_bit, s2_pte)
|
||||||
}
|
}
|
||||||
|
|
||||||
io.mem.req.valid := state === s_req && !l2_hit
|
io.mem.req.valid := state === s_req && !l2_valid
|
||||||
io.mem.req.bits.phys := Bool(true)
|
io.mem.req.bits.phys := Bool(true)
|
||||||
io.mem.req.bits.cmd := M_XRD
|
io.mem.req.bits.cmd := M_XRD
|
||||||
io.mem.req.bits.typ := log2Ceil(xLen/8)
|
io.mem.req.bits.typ := log2Ceil(xLen/8)
|
||||||
@ -231,7 +233,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(
|
|||||||
s1_kill := true
|
s1_kill := true
|
||||||
count := count + 1
|
count := count + 1
|
||||||
r_pte.ppn := pte_cache_data
|
r_pte.ppn := pte_cache_data
|
||||||
}.elsewhen (io.mem.req.ready) {
|
}.elsewhen (io.mem.req.fire()) {
|
||||||
state := s_wait1
|
state := s_wait1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -376,14 +376,18 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal
|
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal
|
||||||
val mem_direction_misprediction = (Bool(coreParams.jumpInFrontend) || mem_reg_btb_hit) && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken
|
val mem_direction_misprediction = (Bool(coreParams.jumpInFrontend) || mem_reg_btb_hit) && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken
|
||||||
val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken
|
val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken
|
||||||
take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_sfence || (mem_ctrl.jalr && csr.io.status.debug))
|
take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_sfence)
|
||||||
|
|
||||||
mem_reg_valid := !ctrl_killx
|
mem_reg_valid := !ctrl_killx
|
||||||
mem_reg_replay := !take_pc_mem_wb && replay_ex
|
mem_reg_replay := !take_pc_mem_wb && replay_ex
|
||||||
mem_reg_xcpt := !ctrl_killx && ex_xcpt
|
mem_reg_xcpt := !ctrl_killx && ex_xcpt
|
||||||
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
|
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
|
||||||
|
|
||||||
when (ex_pc_valid) {
|
// on pipeline flushes, cause mem_npc to hold the sequential npc, which
|
||||||
|
// will drive the W-stage npc mux
|
||||||
|
when (mem_reg_valid && mem_reg_flush_pipe) {
|
||||||
|
mem_reg_sfence := false
|
||||||
|
}.elsewhen (ex_pc_valid) {
|
||||||
mem_ctrl := ex_ctrl
|
mem_ctrl := ex_ctrl
|
||||||
mem_reg_rvc := ex_reg_rvc
|
mem_reg_rvc := ex_reg_rvc
|
||||||
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
|
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
|
||||||
@ -398,10 +402,16 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
mem_reg_inst := ex_reg_inst
|
mem_reg_inst := ex_reg_inst
|
||||||
mem_reg_pc := ex_reg_pc
|
mem_reg_pc := ex_reg_pc
|
||||||
mem_reg_wdata := alu.io.out
|
mem_reg_wdata := alu.io.out
|
||||||
|
|
||||||
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc || ex_sfence)) {
|
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc || ex_sfence)) {
|
||||||
val typ = Mux(ex_ctrl.rocc, log2Ceil(xLen/8).U, ex_ctrl.mem_type)
|
val typ = Mux(ex_ctrl.rocc, log2Ceil(xLen/8).U, ex_ctrl.mem_type)
|
||||||
mem_reg_rs2 := new StoreGen(typ, 0.U, ex_rs(1), coreDataBytes).data
|
mem_reg_rs2 := new StoreGen(typ, 0.U, ex_rs(1), coreDataBytes).data
|
||||||
}
|
}
|
||||||
|
when (ex_ctrl.jalr && csr.io.status.debug) {
|
||||||
|
// flush I$ on D-mode JALR to effect uncached fetch without D$ flush
|
||||||
|
mem_ctrl.fence_i := true
|
||||||
|
mem_reg_flush_pipe := true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val mem_breakpoint = (mem_reg_load && bpu.io.xcpt_ld) || (mem_reg_store && bpu.io.xcpt_st)
|
val mem_breakpoint = (mem_reg_load && bpu.io.xcpt_ld) || (mem_reg_store && bpu.io.xcpt_st)
|
||||||
@ -438,9 +448,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
wb_reg_cause := mem_cause
|
wb_reg_cause := mem_cause
|
||||||
wb_reg_inst := mem_reg_inst
|
wb_reg_inst := mem_reg_inst
|
||||||
wb_reg_pc := mem_reg_pc
|
wb_reg_pc := mem_reg_pc
|
||||||
when (mem_ctrl.jalr && csr.io.status.debug) {
|
|
||||||
wb_ctrl.fence_i := true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
val (wb_xcpt, wb_cause) = checkExceptions(List(
|
val (wb_xcpt, wb_cause) = checkExceptions(List(
|
||||||
@ -458,7 +465,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
val replay_wb_common = io.dmem.s2_nack || wb_reg_replay
|
val replay_wb_common = io.dmem.s2_nack || wb_reg_replay
|
||||||
val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
|
val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
|
||||||
val replay_wb = replay_wb_common || replay_wb_rocc
|
val replay_wb = replay_wb_common || replay_wb_rocc
|
||||||
val wb_npc = encodeVirtualAddress(wb_reg_pc, wb_reg_pc + Mux(replay_wb, 0.U, Mux(wb_reg_rvc, 2.U, 4.U)))
|
|
||||||
take_pc_wb := replay_wb || wb_xcpt || csr.io.eret || wb_reg_flush_pipe
|
take_pc_wb := replay_wb || wb_xcpt || csr.io.eret || wb_reg_flush_pipe
|
||||||
|
|
||||||
// writeback arbitration
|
// writeback arbitration
|
||||||
@ -585,9 +591,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
io.imem.req.valid := take_pc
|
io.imem.req.valid := take_pc
|
||||||
io.imem.req.bits.speculative := !take_pc_wb
|
io.imem.req.bits.speculative := !take_pc_wb
|
||||||
io.imem.req.bits.pc :=
|
io.imem.req.bits.pc :=
|
||||||
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
|
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
|
||||||
Mux(replay_wb || wb_reg_flush_pipe, wb_npc, // replay or flush
|
Mux(replay_wb, wb_reg_pc, // replay
|
||||||
mem_npc)) // branch misprediction
|
mem_npc)) // flush or branch misprediction
|
||||||
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
|
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
|
||||||
io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence
|
io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence
|
||||||
io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0)
|
io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0)
|
||||||
@ -596,7 +602,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
io.imem.sfence.bits.asid := wb_reg_rs2
|
io.imem.sfence.bits.asid := wb_reg_rs2
|
||||||
io.ptw.sfence := io.imem.sfence
|
io.ptw.sfence := io.imem.sfence
|
||||||
|
|
||||||
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt
|
ibuf.io.inst(0).ready := !ctrl_stalld
|
||||||
|
|
||||||
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_wrong_npc && (!mem_cfi || mem_cfi_taken))
|
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_wrong_npc && (!mem_cfi || mem_cfi_taken))
|
||||||
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
|
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
|
||||||
|
@ -20,20 +20,20 @@ class ShiftQueue[T <: Data](gen: T,
|
|||||||
private val valid = RegInit(Vec.fill(entries) { Bool(false) })
|
private val valid = RegInit(Vec.fill(entries) { Bool(false) })
|
||||||
private val elts = Reg(Vec(entries, gen))
|
private val elts = Reg(Vec(entries, gen))
|
||||||
|
|
||||||
private val do_enq = io.enq.fire()
|
|
||||||
private val do_deq = io.deq.fire()
|
|
||||||
|
|
||||||
for (i <- 0 until entries) {
|
for (i <- 0 until entries) {
|
||||||
|
def paddedValid(i: Int) = if (i == -1) true.B else if (i == entries) false.B else valid(i)
|
||||||
|
|
||||||
val wdata = if (i == entries-1) io.enq.bits else Mux(valid(i+1), elts(i+1), io.enq.bits)
|
val wdata = if (i == entries-1) io.enq.bits else Mux(valid(i+1), elts(i+1), io.enq.bits)
|
||||||
val shiftDown = if (i == entries-1) false.B else io.deq.ready && valid(i+1)
|
val wen =
|
||||||
val enqNew = io.enq.fire() && Mux(io.deq.ready, valid(i), !valid(i) && (if (i == 0) true.B else valid(i-1)))
|
Mux(io.deq.ready,
|
||||||
when (shiftDown || enqNew) { elts(i) := wdata }
|
paddedValid(i+1) || io.enq.fire() && valid(i),
|
||||||
}
|
io.enq.fire() && paddedValid(i-1) && !valid(i))
|
||||||
|
when (wen) { elts(i) := wdata }
|
||||||
|
|
||||||
val padded = Seq(true.B) ++ valid ++ Seq(false.B)
|
valid(i) :=
|
||||||
for (i <- 0 until entries) {
|
Mux(io.deq.ready,
|
||||||
when ( do_enq && !do_deq && padded(i+0)) { valid(i) := true.B }
|
paddedValid(i+1) || io.enq.fire() && (Bool(i == 0 && !flow) || valid(i)),
|
||||||
when (!do_enq && do_deq && !padded(i+2)) { valid(i) := false.B }
|
io.enq.fire() && paddedValid(i-1) || valid(i))
|
||||||
}
|
}
|
||||||
|
|
||||||
io.enq.ready := !valid(entries-1)
|
io.enq.ready := !valid(entries-1)
|
||||||
|
Loading…
Reference in New Issue
Block a user