Merge pull request #914 from freechipsproject/critical-paths
Fix some critical paths
This commit is contained in:
commit
d66e8f8e80
@ -89,7 +89,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
|
|
||||||
val q_depth = if (rational) (2 min maxUncachedInFlight-1) else 0
|
val q_depth = if (rational) (2 min maxUncachedInFlight-1) else 0
|
||||||
val tl_out_a = Wire(tl_out.a)
|
val tl_out_a = Wire(tl_out.a)
|
||||||
tl_out.a <> (if (q_depth == 0) tl_out_a else Queue(tl_out_a, q_depth, flow = true, pipe = true))
|
tl_out.a <> (if (q_depth == 0) tl_out_a else Queue(tl_out_a, q_depth, flow = true))
|
||||||
val tl_out_c = Wire(tl_out.c)
|
val tl_out_c = Wire(tl_out.c)
|
||||||
tl_out.c <> (if (cacheParams.acquireBeforeRelease) Queue(tl_out_c, cacheDataBeats, flow = true) else tl_out_c)
|
tl_out.c <> (if (cacheParams.acquireBeforeRelease) Queue(tl_out_c, cacheDataBeats, flow = true) else tl_out_c)
|
||||||
|
|
||||||
@ -184,8 +184,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
.reduce (_|_))
|
.reduce (_|_))
|
||||||
(s1_meta_hit_way, s1_meta_hit_state, s1_meta, s1_meta_uncorrected(s1_victim_way))
|
(s1_meta_hit_way, s1_meta_hit_state, s1_meta, s1_meta_uncorrected(s1_victim_way))
|
||||||
}
|
}
|
||||||
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
|
val s1_data_way = Wire(init = Mux(inWriteback, releaseWay, s1_hit_way))
|
||||||
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
|
val s1_all_data_ways = Vec(data.io.resp :+ dummyEncodeData(tl_out.d.bits.data))
|
||||||
val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).mask)
|
val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).mask)
|
||||||
|
|
||||||
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) && !io.cpu.s2_xcpt.asUInt.orR
|
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) && !io.cpu.s2_xcpt.asUInt.orR
|
||||||
@ -210,7 +210,16 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
val s2_meta_errors = s2_meta.map(_.error).asUInt
|
val s2_meta_errors = s2_meta.map(_.error).asUInt
|
||||||
val s2_meta_error = s2_meta_errors.orR
|
val s2_meta_error = s2_meta_errors.orR
|
||||||
val s2_flush_valid = s2_flush_valid_pre_tag_ecc && !s2_meta_error
|
val s2_flush_valid = s2_flush_valid_pre_tag_ecc && !s2_meta_error
|
||||||
val s2_data = RegEnable(s1_data, s1_valid || inWriteback)
|
val s2_data = {
|
||||||
|
val en = s1_valid || inWriteback || tl_out.d.fire()
|
||||||
|
if (cacheParams.pipelineWayMux && nWays > 1) {
|
||||||
|
val s2_data_way = RegEnable(s1_data_way, en)
|
||||||
|
val s2_all_data_ways = (0 to nWays).map(i => RegEnable(s1_all_data_ways(i), en && s1_data_way(i)))
|
||||||
|
Mux1H(s2_data_way, s2_all_data_ways)
|
||||||
|
} else {
|
||||||
|
RegEnable(Mux1H(s1_data_way, s1_all_data_ways), en)
|
||||||
|
}
|
||||||
|
}
|
||||||
val s2_probe_way = RegEnable(s1_hit_way, s1_probe)
|
val s2_probe_way = RegEnable(s1_hit_way, s1_probe)
|
||||||
val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
|
val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
|
||||||
val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
|
val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
|
||||||
@ -417,7 +426,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
when (grantIsUncachedData) {
|
when (grantIsUncachedData) {
|
||||||
s2_data := dummyEncodeData(tl_out.d.bits.data)
|
s1_data_way := 1.U << nWays
|
||||||
s2_req.cmd := M_XRD
|
s2_req.cmd := M_XRD
|
||||||
s2_req.typ := req.typ
|
s2_req.typ := req.typ
|
||||||
s2_req.tag := req.tag
|
s2_req.tag := req.tag
|
||||||
|
@ -97,7 +97,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
val s2_speculative = Reg(init=Bool(false))
|
val s2_speculative = Reg(init=Bool(false))
|
||||||
val s2_partial_insn_valid = RegInit(false.B)
|
val s2_partial_insn_valid = RegInit(false.B)
|
||||||
val s2_partial_insn = Reg(UInt(width = coreInstBits))
|
val s2_partial_insn = Reg(UInt(width = coreInstBits))
|
||||||
val s2_wrong_path = Reg(Bool())
|
val wrong_path = Reg(Bool())
|
||||||
|
|
||||||
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
|
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
|
||||||
val ntpc = s1_base_pc + fetchBytes.U
|
val ntpc = s1_base_pc + fetchBytes.U
|
||||||
@ -180,11 +180,12 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1)))
|
Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1)))
|
||||||
btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType
|
btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType
|
||||||
btb.io.ras_update.bits.prediction.valid := true
|
btb.io.ras_update.bits.prediction.valid := true
|
||||||
} else when (fq.io.enq.fire()) {
|
} else {
|
||||||
val s2_btb_hit = s2_btb_resp_valid && s2_btb_resp_bits.taken
|
val s2_btb_hit = s2_btb_resp_valid && s2_btb_resp_bits.taken
|
||||||
val s2_base_pc = ~(~s2_pc | (fetchBytes-1))
|
val s2_base_pc = ~(~s2_pc | (fetchBytes-1))
|
||||||
val taken_idx = Wire(UInt())
|
val taken_idx = Wire(UInt())
|
||||||
val after_idx = Wire(UInt())
|
val after_idx = Wire(UInt())
|
||||||
|
val useRAS = Wire(init=false.B)
|
||||||
|
|
||||||
def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = {
|
def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = {
|
||||||
val prevRVI = prevValid && prevBits(1,0) === 3
|
val prevRVI = prevValid && prevBits(1,0) === 3
|
||||||
@ -200,49 +201,57 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ
|
val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ
|
||||||
val rvcJAL = Bool(xLen == 32) && bits === Instructions.C_JAL
|
val rvcJAL = Bool(xLen == 32) && bits === Instructions.C_JAL
|
||||||
val rvcJump = bits === Instructions.C_J || rvcJAL
|
val rvcJump = bits === Instructions.C_J || rvcJAL
|
||||||
val rvcImm = Mux(bits(14), new RVCDecoder(bits).bImm.asSInt, 0.S) | Mux(bits(14,13) === 1, new RVCDecoder(bits).jImm.asSInt, 0.S)
|
val rvcImm = Mux(bits(14), new RVCDecoder(bits).bImm.asSInt, new RVCDecoder(bits).jImm.asSInt)
|
||||||
val rvcJR = bits === Instructions.C_MV && bits(6,2) === 0
|
val rvcJR = bits === Instructions.C_MV && bits(6,2) === 0
|
||||||
val rvcReturn = rvcJR && BitPat("b00?01") === bits(11,7)
|
val rvcReturn = rvcJR && BitPat("b00?01") === bits(11,7)
|
||||||
val rvcJALR = bits === Instructions.C_ADD && bits(6,2) === 0
|
val rvcJALR = bits === Instructions.C_ADD && bits(6,2) === 0
|
||||||
val rvcCall = rvcJAL || rvcJALR
|
val rvcCall = rvcJAL || rvcJALR
|
||||||
val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), 0.S) | Mux(!rviBits(2), ImmGen(IMM_SB, rviBits), 0.S)
|
val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), ImmGen(IMM_SB, rviBits))
|
||||||
val taken =
|
val taken =
|
||||||
prevRVI && (rviJump || rviJALR || rviBranch && s2_btb_resp_bits.bht.taken) ||
|
prevRVI && (rviJump || rviJALR || rviBranch && s2_btb_resp_bits.bht.taken) ||
|
||||||
valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && s2_btb_resp_bits.bht.taken)
|
valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && s2_btb_resp_bits.bht.taken)
|
||||||
|
val predictReturn = btb.io.ras_head.valid && (prevRVI && rviReturn || valid && rvcReturn)
|
||||||
|
val predictBranch =
|
||||||
|
prevRVI && (rviJump || rviBranch && s2_btb_resp_bits.bht.taken) ||
|
||||||
|
valid && (rvcJump || rvcBranch && s2_btb_resp_bits.bht.taken)
|
||||||
|
|
||||||
when (!prevTaken) {
|
when (!prevTaken) {
|
||||||
taken_idx := idx
|
taken_idx := idx
|
||||||
after_idx := idx + 1
|
after_idx := idx + 1
|
||||||
btb.io.ras_update.valid := !s2_wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn))
|
btb.io.ras_update.valid := fq.io.enq.fire() && !wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn))
|
||||||
btb.io.ras_update.bits.prediction.valid := true
|
btb.io.ras_update.bits.prediction.valid := true
|
||||||
btb.io.ras_update.bits.cfiType := Mux(Mux(prevRVI, rviReturn, rvcReturn), CFIType.ret, CFIType.call)
|
btb.io.ras_update.bits.cfiType := Mux(Mux(prevRVI, rviReturn, rvcReturn), CFIType.ret, CFIType.call)
|
||||||
|
|
||||||
when (!s2_btb_hit) {
|
when (!s2_btb_hit) {
|
||||||
when (prevRVI && (rviJALR && !(rviReturn && btb.io.ras_head.valid)) ||
|
when (fq.io.enq.fire() && taken && !predictBranch && !predictReturn) {
|
||||||
valid && (rvcJALR || (rvcJR && !btb.io.ras_head.valid))) {
|
wrong_path := true
|
||||||
s2_wrong_path := true
|
|
||||||
}
|
}
|
||||||
when (taken) {
|
when (s2_valid && predictReturn) {
|
||||||
|
useRAS := true
|
||||||
|
}
|
||||||
|
when (s2_valid && predictBranch) {
|
||||||
val pc = s2_base_pc | (idx*coreInstBytes)
|
val pc = s2_base_pc | (idx*coreInstBytes)
|
||||||
val npc =
|
val npc =
|
||||||
if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm)
|
if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm)
|
||||||
else Mux(prevRVI, pc - coreInstBytes, pc).asSInt + Mux(prevRVI, rviImm, rvcImm)
|
else Mux(prevRVI, pc - coreInstBytes, pc).asSInt + Mux(prevRVI, rviImm, rvcImm)
|
||||||
predicted_npc := Mux(prevRVI && rviReturn || valid && rvcReturn, btb.io.ras_head.bits, npc.asUInt)
|
predicted_npc := npc.asUInt
|
||||||
}
|
}
|
||||||
|
|
||||||
when (prevRVI && rviBranch || valid && rvcBranch) {
|
when (prevRVI && rviBranch || valid && rvcBranch) {
|
||||||
btb.io.bht_advance.valid := !s2_wrong_path && !s2_btb_resp_valid
|
btb.io.bht_advance.valid := fq.io.enq.fire() && !wrong_path && !s2_btb_resp_valid
|
||||||
btb.io.bht_advance.bits := s2_btb_resp_bits
|
btb.io.bht_advance.bits := s2_btb_resp_bits
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (idx == fetchWidth-1) {
|
if (idx == fetchWidth-1) {
|
||||||
|
when (fq.io.enq.fire()) {
|
||||||
s2_partial_insn_valid := false
|
s2_partial_insn_valid := false
|
||||||
when (valid && !prevTaken && !rvc) {
|
when (valid && !prevTaken && !rvc) {
|
||||||
s2_partial_insn_valid := true
|
s2_partial_insn_valid := true
|
||||||
s2_partial_insn := bits | 0x3
|
s2_partial_insn := bits | 0x3
|
||||||
}
|
}
|
||||||
|
}
|
||||||
prevTaken || taken
|
prevTaken || taken
|
||||||
} else {
|
} else {
|
||||||
scanInsns(idx + 1, valid, bits, prevTaken || taken)
|
scanInsns(idx + 1, valid, bits, prevTaken || taken)
|
||||||
@ -252,20 +261,24 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes))
|
btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes))
|
||||||
|
|
||||||
val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B)
|
val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B)
|
||||||
when (s2_btb_hit) {
|
when (useRAS) {
|
||||||
|
predicted_npc := btb.io.ras_head.bits
|
||||||
|
}
|
||||||
|
when (fq.io.enq.fire() && s2_btb_hit) {
|
||||||
s2_partial_insn_valid := false
|
s2_partial_insn_valid := false
|
||||||
}.otherwise {
|
}
|
||||||
|
when (!s2_btb_hit) {
|
||||||
fq.io.enq.bits.btb.bits.bridx := taken_idx
|
fq.io.enq.bits.btb.bits.bridx := taken_idx
|
||||||
when (taken) {
|
when (taken) {
|
||||||
fq.io.enq.bits.btb.valid := true
|
fq.io.enq.bits.btb.valid := true
|
||||||
fq.io.enq.bits.btb.bits.taken := true
|
fq.io.enq.bits.btb.bits.taken := true
|
||||||
fq.io.enq.bits.btb.bits.entry := UInt(tileParams.btb.get.nEntries)
|
fq.io.enq.bits.btb.bits.entry := UInt(tileParams.btb.get.nEntries)
|
||||||
s2_redirect := true
|
when (fq.io.enq.fire()) { s2_redirect := true }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when (s2_redirect) { s2_partial_insn_valid := false }
|
when (s2_redirect) { s2_partial_insn_valid := false }
|
||||||
when (io.cpu.req.valid) { s2_wrong_path := false }
|
when (io.cpu.req.valid) { wrong_path := false }
|
||||||
}
|
}
|
||||||
|
|
||||||
io.cpu.resp <> fq.io.deq
|
io.cpu.resp <> fq.io.deq
|
||||||
|
@ -27,6 +27,7 @@ case class DCacheParams(
|
|||||||
nMMIOs: Int = 1,
|
nMMIOs: Int = 1,
|
||||||
blockBytes: Int = 64,
|
blockBytes: Int = 64,
|
||||||
acquireBeforeRelease: Boolean = false,
|
acquireBeforeRelease: Boolean = false,
|
||||||
|
pipelineWayMux: Boolean = false,
|
||||||
scratch: Option[BigInt] = None) extends L1CacheParams {
|
scratch: Option[BigInt] = None) extends L1CacheParams {
|
||||||
|
|
||||||
def dataScratchpadBytes: Int = scratch.map(_ => nSets*blockBytes).getOrElse(0)
|
def dataScratchpadBytes: Int = scratch.map(_ => nSets*blockBytes).getOrElse(0)
|
||||||
|
@ -528,7 +528,12 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
|
|
||||||
val sboard = new Scoreboard(32, true)
|
val sboard = new Scoreboard(32, true)
|
||||||
sboard.clear(ll_wen, ll_waddr)
|
sboard.clear(ll_wen, ll_waddr)
|
||||||
val id_sboard_hazard = checkHazards(hazard_targets, rd => sboard.read(rd) && !(ll_wen && ll_waddr === rd))
|
def id_sboard_clear_bypass(r: UInt) = {
|
||||||
|
// ll_waddr arrives late when D$ has ECC, so reshuffle the hazard check
|
||||||
|
if (tileParams.dcache.get.dataECC.isInstanceOf[IdentityCode]) ll_wen && ll_waddr === r
|
||||||
|
else div.io.resp.fire() && div.io.resp.bits.tag === r || dmem_resp_replay && dmem_resp_xpu && dmem_resp_waddr === r
|
||||||
|
}
|
||||||
|
val id_sboard_hazard = checkHazards(hazard_targets, rd => sboard.read(rd) && !id_sboard_clear_bypass(rd))
|
||||||
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
|
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
|
||||||
|
|
||||||
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
|
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
|
||||||
|
Loading…
Reference in New Issue
Block a user