1
0

Merge pull request #914 from freechipsproject/critical-paths

Fix some critical paths
This commit is contained in:
Andrew Waterman 2017-08-02 19:05:31 -07:00 committed by GitHub
commit d66e8f8e80
4 changed files with 53 additions and 25 deletions

View File

@ -89,7 +89,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val q_depth = if (rational) (2 min maxUncachedInFlight-1) else 0
val tl_out_a = Wire(tl_out.a)
tl_out.a <> (if (q_depth == 0) tl_out_a else Queue(tl_out_a, q_depth, flow = true, pipe = true))
tl_out.a <> (if (q_depth == 0) tl_out_a else Queue(tl_out_a, q_depth, flow = true))
val tl_out_c = Wire(tl_out.c)
tl_out.c <> (if (cacheParams.acquireBeforeRelease) Queue(tl_out_c, cacheDataBeats, flow = true) else tl_out_c)
@ -184,8 +184,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
.reduce (_|_))
(s1_meta_hit_way, s1_meta_hit_state, s1_meta, s1_meta_uncorrected(s1_victim_way))
}
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
val s1_data_way = Wire(init = Mux(inWriteback, releaseWay, s1_hit_way))
val s1_all_data_ways = Vec(data.io.resp :+ dummyEncodeData(tl_out.d.bits.data))
val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).mask)
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) && !io.cpu.s2_xcpt.asUInt.orR
@ -210,7 +210,16 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s2_meta_errors = s2_meta.map(_.error).asUInt
val s2_meta_error = s2_meta_errors.orR
val s2_flush_valid = s2_flush_valid_pre_tag_ecc && !s2_meta_error
val s2_data = RegEnable(s1_data, s1_valid || inWriteback)
val s2_data = {
val en = s1_valid || inWriteback || tl_out.d.fire()
if (cacheParams.pipelineWayMux && nWays > 1) {
val s2_data_way = RegEnable(s1_data_way, en)
val s2_all_data_ways = (0 to nWays).map(i => RegEnable(s1_all_data_ways(i), en && s1_data_way(i)))
Mux1H(s2_data_way, s2_all_data_ways)
} else {
RegEnable(Mux1H(s1_data_way, s1_all_data_ways), en)
}
}
val s2_probe_way = RegEnable(s1_hit_way, s1_probe)
val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
@ -417,7 +426,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
}
}
when (grantIsUncachedData) {
s2_data := dummyEncodeData(tl_out.d.bits.data)
s1_data_way := 1.U << nWays
s2_req.cmd := M_XRD
s2_req.typ := req.typ
s2_req.tag := req.tag

View File

@ -97,7 +97,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
val s2_speculative = Reg(init=Bool(false))
val s2_partial_insn_valid = RegInit(false.B)
val s2_partial_insn = Reg(UInt(width = coreInstBits))
val s2_wrong_path = Reg(Bool())
val wrong_path = Reg(Bool())
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
val ntpc = s1_base_pc + fetchBytes.U
@ -180,11 +180,12 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1)))
btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType
btb.io.ras_update.bits.prediction.valid := true
} else when (fq.io.enq.fire()) {
} else {
val s2_btb_hit = s2_btb_resp_valid && s2_btb_resp_bits.taken
val s2_base_pc = ~(~s2_pc | (fetchBytes-1))
val taken_idx = Wire(UInt())
val after_idx = Wire(UInt())
val useRAS = Wire(init=false.B)
def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = {
val prevRVI = prevValid && prevBits(1,0) === 3
@ -200,48 +201,56 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ
val rvcJAL = Bool(xLen == 32) && bits === Instructions.C_JAL
val rvcJump = bits === Instructions.C_J || rvcJAL
val rvcImm = Mux(bits(14), new RVCDecoder(bits).bImm.asSInt, 0.S) | Mux(bits(14,13) === 1, new RVCDecoder(bits).jImm.asSInt, 0.S)
val rvcImm = Mux(bits(14), new RVCDecoder(bits).bImm.asSInt, new RVCDecoder(bits).jImm.asSInt)
val rvcJR = bits === Instructions.C_MV && bits(6,2) === 0
val rvcReturn = rvcJR && BitPat("b00?01") === bits(11,7)
val rvcJALR = bits === Instructions.C_ADD && bits(6,2) === 0
val rvcCall = rvcJAL || rvcJALR
val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), 0.S) | Mux(!rviBits(2), ImmGen(IMM_SB, rviBits), 0.S)
val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), ImmGen(IMM_SB, rviBits))
val taken =
prevRVI && (rviJump || rviJALR || rviBranch && s2_btb_resp_bits.bht.taken) ||
valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && s2_btb_resp_bits.bht.taken)
val predictReturn = btb.io.ras_head.valid && (prevRVI && rviReturn || valid && rvcReturn)
val predictBranch =
prevRVI && (rviJump || rviBranch && s2_btb_resp_bits.bht.taken) ||
valid && (rvcJump || rvcBranch && s2_btb_resp_bits.bht.taken)
when (!prevTaken) {
taken_idx := idx
after_idx := idx + 1
btb.io.ras_update.valid := !s2_wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn))
btb.io.ras_update.valid := fq.io.enq.fire() && !wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn))
btb.io.ras_update.bits.prediction.valid := true
btb.io.ras_update.bits.cfiType := Mux(Mux(prevRVI, rviReturn, rvcReturn), CFIType.ret, CFIType.call)
when (!s2_btb_hit) {
when (prevRVI && (rviJALR && !(rviReturn && btb.io.ras_head.valid)) ||
valid && (rvcJALR || (rvcJR && !btb.io.ras_head.valid))) {
s2_wrong_path := true
when (fq.io.enq.fire() && taken && !predictBranch && !predictReturn) {
wrong_path := true
}
when (taken) {
when (s2_valid && predictReturn) {
useRAS := true
}
when (s2_valid && predictBranch) {
val pc = s2_base_pc | (idx*coreInstBytes)
val npc =
if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm)
else Mux(prevRVI, pc - coreInstBytes, pc).asSInt + Mux(prevRVI, rviImm, rvcImm)
predicted_npc := Mux(prevRVI && rviReturn || valid && rvcReturn, btb.io.ras_head.bits, npc.asUInt)
predicted_npc := npc.asUInt
}
when (prevRVI && rviBranch || valid && rvcBranch) {
btb.io.bht_advance.valid := !s2_wrong_path && !s2_btb_resp_valid
btb.io.bht_advance.valid := fq.io.enq.fire() && !wrong_path && !s2_btb_resp_valid
btb.io.bht_advance.bits := s2_btb_resp_bits
}
}
}
if (idx == fetchWidth-1) {
s2_partial_insn_valid := false
when (valid && !prevTaken && !rvc) {
s2_partial_insn_valid := true
s2_partial_insn := bits | 0x3
when (fq.io.enq.fire()) {
s2_partial_insn_valid := false
when (valid && !prevTaken && !rvc) {
s2_partial_insn_valid := true
s2_partial_insn := bits | 0x3
}
}
prevTaken || taken
} else {
@ -252,20 +261,24 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes))
val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B)
when (s2_btb_hit) {
when (useRAS) {
predicted_npc := btb.io.ras_head.bits
}
when (fq.io.enq.fire() && s2_btb_hit) {
s2_partial_insn_valid := false
}.otherwise {
}
when (!s2_btb_hit) {
fq.io.enq.bits.btb.bits.bridx := taken_idx
when (taken) {
fq.io.enq.bits.btb.valid := true
fq.io.enq.bits.btb.bits.taken := true
fq.io.enq.bits.btb.bits.entry := UInt(tileParams.btb.get.nEntries)
s2_redirect := true
when (fq.io.enq.fire()) { s2_redirect := true }
}
}
}
when (s2_redirect) { s2_partial_insn_valid := false }
when (io.cpu.req.valid) { s2_wrong_path := false }
when (io.cpu.req.valid) { wrong_path := false }
}
io.cpu.resp <> fq.io.deq

View File

@ -27,6 +27,7 @@ case class DCacheParams(
nMMIOs: Int = 1,
blockBytes: Int = 64,
acquireBeforeRelease: Boolean = false,
pipelineWayMux: Boolean = false,
scratch: Option[BigInt] = None) extends L1CacheParams {
def dataScratchpadBytes: Int = scratch.map(_ => nSets*blockBytes).getOrElse(0)

View File

@ -528,7 +528,12 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val sboard = new Scoreboard(32, true)
sboard.clear(ll_wen, ll_waddr)
val id_sboard_hazard = checkHazards(hazard_targets, rd => sboard.read(rd) && !(ll_wen && ll_waddr === rd))
def id_sboard_clear_bypass(r: UInt) = {
// ll_waddr arrives late when D$ has ECC, so reshuffle the hazard check
if (tileParams.dcache.get.dataECC.isInstanceOf[IdentityCode]) ll_wen && ll_waddr === r
else div.io.resp.fire() && div.io.resp.bits.tag === r || dmem_resp_replay && dmem_resp_xpu && dmem_resp_waddr === r
}
val id_sboard_hazard = checkHazards(hazard_targets, rd => sboard.read(rd) && !id_sboard_clear_bypass(rd))
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.