Merge pull request #914 from freechipsproject/critical-paths
Fix some critical paths
This commit is contained in:
commit
d66e8f8e80
@ -89,7 +89,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
|
||||
val q_depth = if (rational) (2 min maxUncachedInFlight-1) else 0
|
||||
val tl_out_a = Wire(tl_out.a)
|
||||
tl_out.a <> (if (q_depth == 0) tl_out_a else Queue(tl_out_a, q_depth, flow = true, pipe = true))
|
||||
tl_out.a <> (if (q_depth == 0) tl_out_a else Queue(tl_out_a, q_depth, flow = true))
|
||||
val tl_out_c = Wire(tl_out.c)
|
||||
tl_out.c <> (if (cacheParams.acquireBeforeRelease) Queue(tl_out_c, cacheDataBeats, flow = true) else tl_out_c)
|
||||
|
||||
@ -184,8 +184,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
.reduce (_|_))
|
||||
(s1_meta_hit_way, s1_meta_hit_state, s1_meta, s1_meta_uncorrected(s1_victim_way))
|
||||
}
|
||||
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
|
||||
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
|
||||
val s1_data_way = Wire(init = Mux(inWriteback, releaseWay, s1_hit_way))
|
||||
val s1_all_data_ways = Vec(data.io.resp :+ dummyEncodeData(tl_out.d.bits.data))
|
||||
val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).mask)
|
||||
|
||||
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) && !io.cpu.s2_xcpt.asUInt.orR
|
||||
@ -210,7 +210,16 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
val s2_meta_errors = s2_meta.map(_.error).asUInt
|
||||
val s2_meta_error = s2_meta_errors.orR
|
||||
val s2_flush_valid = s2_flush_valid_pre_tag_ecc && !s2_meta_error
|
||||
val s2_data = RegEnable(s1_data, s1_valid || inWriteback)
|
||||
val s2_data = {
|
||||
val en = s1_valid || inWriteback || tl_out.d.fire()
|
||||
if (cacheParams.pipelineWayMux && nWays > 1) {
|
||||
val s2_data_way = RegEnable(s1_data_way, en)
|
||||
val s2_all_data_ways = (0 to nWays).map(i => RegEnable(s1_all_data_ways(i), en && s1_data_way(i)))
|
||||
Mux1H(s2_data_way, s2_all_data_ways)
|
||||
} else {
|
||||
RegEnable(Mux1H(s1_data_way, s1_all_data_ways), en)
|
||||
}
|
||||
}
|
||||
val s2_probe_way = RegEnable(s1_hit_way, s1_probe)
|
||||
val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
|
||||
val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
|
||||
@ -417,7 +426,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
}
|
||||
}
|
||||
when (grantIsUncachedData) {
|
||||
s2_data := dummyEncodeData(tl_out.d.bits.data)
|
||||
s1_data_way := 1.U << nWays
|
||||
s2_req.cmd := M_XRD
|
||||
s2_req.typ := req.typ
|
||||
s2_req.tag := req.tag
|
||||
|
@ -97,7 +97,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
val s2_speculative = Reg(init=Bool(false))
|
||||
val s2_partial_insn_valid = RegInit(false.B)
|
||||
val s2_partial_insn = Reg(UInt(width = coreInstBits))
|
||||
val s2_wrong_path = Reg(Bool())
|
||||
val wrong_path = Reg(Bool())
|
||||
|
||||
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
|
||||
val ntpc = s1_base_pc + fetchBytes.U
|
||||
@ -180,11 +180,12 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1)))
|
||||
btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType
|
||||
btb.io.ras_update.bits.prediction.valid := true
|
||||
} else when (fq.io.enq.fire()) {
|
||||
} else {
|
||||
val s2_btb_hit = s2_btb_resp_valid && s2_btb_resp_bits.taken
|
||||
val s2_base_pc = ~(~s2_pc | (fetchBytes-1))
|
||||
val taken_idx = Wire(UInt())
|
||||
val after_idx = Wire(UInt())
|
||||
val useRAS = Wire(init=false.B)
|
||||
|
||||
def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = {
|
||||
val prevRVI = prevValid && prevBits(1,0) === 3
|
||||
@ -200,49 +201,57 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ
|
||||
val rvcJAL = Bool(xLen == 32) && bits === Instructions.C_JAL
|
||||
val rvcJump = bits === Instructions.C_J || rvcJAL
|
||||
val rvcImm = Mux(bits(14), new RVCDecoder(bits).bImm.asSInt, 0.S) | Mux(bits(14,13) === 1, new RVCDecoder(bits).jImm.asSInt, 0.S)
|
||||
val rvcImm = Mux(bits(14), new RVCDecoder(bits).bImm.asSInt, new RVCDecoder(bits).jImm.asSInt)
|
||||
val rvcJR = bits === Instructions.C_MV && bits(6,2) === 0
|
||||
val rvcReturn = rvcJR && BitPat("b00?01") === bits(11,7)
|
||||
val rvcJALR = bits === Instructions.C_ADD && bits(6,2) === 0
|
||||
val rvcCall = rvcJAL || rvcJALR
|
||||
val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), 0.S) | Mux(!rviBits(2), ImmGen(IMM_SB, rviBits), 0.S)
|
||||
val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), ImmGen(IMM_SB, rviBits))
|
||||
val taken =
|
||||
prevRVI && (rviJump || rviJALR || rviBranch && s2_btb_resp_bits.bht.taken) ||
|
||||
valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && s2_btb_resp_bits.bht.taken)
|
||||
val predictReturn = btb.io.ras_head.valid && (prevRVI && rviReturn || valid && rvcReturn)
|
||||
val predictBranch =
|
||||
prevRVI && (rviJump || rviBranch && s2_btb_resp_bits.bht.taken) ||
|
||||
valid && (rvcJump || rvcBranch && s2_btb_resp_bits.bht.taken)
|
||||
|
||||
when (!prevTaken) {
|
||||
taken_idx := idx
|
||||
after_idx := idx + 1
|
||||
btb.io.ras_update.valid := !s2_wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn))
|
||||
btb.io.ras_update.valid := fq.io.enq.fire() && !wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn))
|
||||
btb.io.ras_update.bits.prediction.valid := true
|
||||
btb.io.ras_update.bits.cfiType := Mux(Mux(prevRVI, rviReturn, rvcReturn), CFIType.ret, CFIType.call)
|
||||
|
||||
when (!s2_btb_hit) {
|
||||
when (prevRVI && (rviJALR && !(rviReturn && btb.io.ras_head.valid)) ||
|
||||
valid && (rvcJALR || (rvcJR && !btb.io.ras_head.valid))) {
|
||||
s2_wrong_path := true
|
||||
when (fq.io.enq.fire() && taken && !predictBranch && !predictReturn) {
|
||||
wrong_path := true
|
||||
}
|
||||
when (taken) {
|
||||
when (s2_valid && predictReturn) {
|
||||
useRAS := true
|
||||
}
|
||||
when (s2_valid && predictBranch) {
|
||||
val pc = s2_base_pc | (idx*coreInstBytes)
|
||||
val npc =
|
||||
if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm)
|
||||
else Mux(prevRVI, pc - coreInstBytes, pc).asSInt + Mux(prevRVI, rviImm, rvcImm)
|
||||
predicted_npc := Mux(prevRVI && rviReturn || valid && rvcReturn, btb.io.ras_head.bits, npc.asUInt)
|
||||
predicted_npc := npc.asUInt
|
||||
}
|
||||
|
||||
when (prevRVI && rviBranch || valid && rvcBranch) {
|
||||
btb.io.bht_advance.valid := !s2_wrong_path && !s2_btb_resp_valid
|
||||
btb.io.bht_advance.valid := fq.io.enq.fire() && !wrong_path && !s2_btb_resp_valid
|
||||
btb.io.bht_advance.bits := s2_btb_resp_bits
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (idx == fetchWidth-1) {
|
||||
when (fq.io.enq.fire()) {
|
||||
s2_partial_insn_valid := false
|
||||
when (valid && !prevTaken && !rvc) {
|
||||
s2_partial_insn_valid := true
|
||||
s2_partial_insn := bits | 0x3
|
||||
}
|
||||
}
|
||||
prevTaken || taken
|
||||
} else {
|
||||
scanInsns(idx + 1, valid, bits, prevTaken || taken)
|
||||
@ -252,20 +261,24 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes))
|
||||
|
||||
val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B)
|
||||
when (s2_btb_hit) {
|
||||
when (useRAS) {
|
||||
predicted_npc := btb.io.ras_head.bits
|
||||
}
|
||||
when (fq.io.enq.fire() && s2_btb_hit) {
|
||||
s2_partial_insn_valid := false
|
||||
}.otherwise {
|
||||
}
|
||||
when (!s2_btb_hit) {
|
||||
fq.io.enq.bits.btb.bits.bridx := taken_idx
|
||||
when (taken) {
|
||||
fq.io.enq.bits.btb.valid := true
|
||||
fq.io.enq.bits.btb.bits.taken := true
|
||||
fq.io.enq.bits.btb.bits.entry := UInt(tileParams.btb.get.nEntries)
|
||||
s2_redirect := true
|
||||
when (fq.io.enq.fire()) { s2_redirect := true }
|
||||
}
|
||||
}
|
||||
}
|
||||
when (s2_redirect) { s2_partial_insn_valid := false }
|
||||
when (io.cpu.req.valid) { s2_wrong_path := false }
|
||||
when (io.cpu.req.valid) { wrong_path := false }
|
||||
}
|
||||
|
||||
io.cpu.resp <> fq.io.deq
|
||||
|
@ -27,6 +27,7 @@ case class DCacheParams(
|
||||
nMMIOs: Int = 1,
|
||||
blockBytes: Int = 64,
|
||||
acquireBeforeRelease: Boolean = false,
|
||||
pipelineWayMux: Boolean = false,
|
||||
scratch: Option[BigInt] = None) extends L1CacheParams {
|
||||
|
||||
def dataScratchpadBytes: Int = scratch.map(_ => nSets*blockBytes).getOrElse(0)
|
||||
|
@ -528,7 +528,12 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
|
||||
val sboard = new Scoreboard(32, true)
|
||||
sboard.clear(ll_wen, ll_waddr)
|
||||
val id_sboard_hazard = checkHazards(hazard_targets, rd => sboard.read(rd) && !(ll_wen && ll_waddr === rd))
|
||||
def id_sboard_clear_bypass(r: UInt) = {
|
||||
// ll_waddr arrives late when D$ has ECC, so reshuffle the hazard check
|
||||
if (tileParams.dcache.get.dataECC.isInstanceOf[IdentityCode]) ll_wen && ll_waddr === r
|
||||
else div.io.resp.fire() && div.io.resp.bits.tag === r || dmem_resp_replay && dmem_resp_xpu && dmem_resp_waddr === r
|
||||
}
|
||||
val id_sboard_hazard = checkHazards(hazard_targets, rd => sboard.read(rd) && !id_sboard_clear_bypass(rd))
|
||||
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
|
||||
|
||||
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
|
||||
|
Loading…
Reference in New Issue
Block a user