Merge pull request #902 from freechipsproject/perf-improvements
Perf improvements
This commit is contained in:
commit
140086e2c5
@ -104,11 +104,12 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
s1_req := io.cpu.req.bits
|
s1_req := io.cpu.req.bits
|
||||||
s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0))
|
s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0))
|
||||||
}
|
}
|
||||||
val s1_read = needsRead(s1_req)
|
val s1_read = isRead(s1_req.cmd)
|
||||||
val s1_write = isWrite(s1_req.cmd)
|
val s1_write = isWrite(s1_req.cmd)
|
||||||
val s1_readwrite = s1_read || s1_write
|
val s1_readwrite = s1_read || s1_write
|
||||||
val s1_sfence = s1_req.cmd === M_SFENCE
|
val s1_sfence = s1_req.cmd === M_SFENCE
|
||||||
val s1_flush_valid = Reg(Bool())
|
val s1_flush_valid = Reg(Bool())
|
||||||
|
val s1_waw_hazard = Wire(Bool())
|
||||||
|
|
||||||
val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_retry :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8)
|
val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_retry :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8)
|
||||||
val cached_grant_wait = Reg(init=Bool(false))
|
val cached_grant_wait = Reg(init=Bool(false))
|
||||||
@ -125,13 +126,15 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
val uncachedReqs = Seq.fill(maxUncachedInFlight) { Reg(new HellaCacheReq) }
|
val uncachedReqs = Seq.fill(maxUncachedInFlight) { Reg(new HellaCacheReq) }
|
||||||
|
|
||||||
// hit initiation path
|
// hit initiation path
|
||||||
val s0_read = needsRead(io.cpu.req.bits)
|
val s0_needsRead = needsRead(io.cpu.req.bits)
|
||||||
dataArb.io.in(3).valid := io.cpu.req.valid && s0_read
|
val s0_read = isRead(io.cpu.req.bits.cmd)
|
||||||
|
dataArb.io.in(3).valid := io.cpu.req.valid && s0_needsRead
|
||||||
dataArb.io.in(3).bits.write := false
|
dataArb.io.in(3).bits.write := false
|
||||||
dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr
|
dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr
|
||||||
dataArb.io.in(3).bits.wordMask := UIntToOH(io.cpu.req.bits.addr.extract(rowOffBits-1,offsetlsb))
|
dataArb.io.in(3).bits.wordMask := UIntToOH(io.cpu.req.bits.addr.extract(rowOffBits-1,offsetlsb))
|
||||||
dataArb.io.in(3).bits.way_en := ~UInt(0, nWays)
|
dataArb.io.in(3).bits.way_en := ~UInt(0, nWays)
|
||||||
when (!dataArb.io.in(3).ready && s0_read) { io.cpu.req.ready := false }
|
when (!dataArb.io.in(3).ready && s0_read) { io.cpu.req.ready := false }
|
||||||
|
val s1_didntRead = RegEnable(s0_needsRead && !dataArb.io.in(3).ready, metaArb.io.out.valid && !metaArb.io.out.bits.write)
|
||||||
metaArb.io.in(7).valid := io.cpu.req.valid
|
metaArb.io.in(7).valid := io.cpu.req.valid
|
||||||
metaArb.io.in(7).bits.write := false
|
metaArb.io.in(7).bits.write := false
|
||||||
metaArb.io.in(7).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB)
|
metaArb.io.in(7).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB)
|
||||||
@ -153,7 +156,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
tlb.io.req.bits.instruction := false
|
tlb.io.req.bits.instruction := false
|
||||||
tlb.io.req.bits.size := s1_req.typ
|
tlb.io.req.bits.size := s1_req.typ
|
||||||
tlb.io.req.bits.cmd := s1_req.cmd
|
tlb.io.req.bits.cmd := s1_req.cmd
|
||||||
when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false }
|
when (!tlb.io.req.ready && !tlb.io.ptw.resp.valid && !io.cpu.req.bits.phys) { io.cpu.req.ready := false }
|
||||||
when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true }
|
when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true }
|
||||||
|
|
||||||
val s1_paddr = tlb.io.resp.paddr
|
val s1_paddr = tlb.io.resp.paddr
|
||||||
@ -212,6 +215,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
|
val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
|
||||||
val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
|
val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
|
||||||
val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked)
|
val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked)
|
||||||
|
val s2_waw_hazard = RegEnable(s1_waw_hazard, s1_valid_not_nacked)
|
||||||
|
val s2_store_merge = Wire(Bool())
|
||||||
val s2_hit_valid = s2_hit_state.isValid()
|
val s2_hit_valid = s2_hit_state.isValid()
|
||||||
val (s2_hit, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd)
|
val (s2_hit, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd)
|
||||||
val s2_data_decoded = decodeData(s2_data)
|
val s2_data_decoded = decodeData(s2_data)
|
||||||
@ -221,8 +226,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
val s2_data_uncorrected = (s2_data_decoded.map(_.uncorrected): Seq[UInt]).asUInt
|
val s2_data_uncorrected = (s2_data_decoded.map(_.uncorrected): Seq[UInt]).asUInt
|
||||||
val s2_valid_hit_pre_data_ecc = s2_valid_masked && s2_readwrite && !s2_meta_error && s2_hit
|
val s2_valid_hit_pre_data_ecc = s2_valid_masked && s2_readwrite && !s2_meta_error && s2_hit
|
||||||
val s2_valid_data_error = s2_valid_hit_pre_data_ecc && s2_data_error
|
val s2_valid_data_error = s2_valid_hit_pre_data_ecc && s2_data_error
|
||||||
val s2_valid_hit = s2_valid_hit_pre_data_ecc && !s2_data_error
|
val s2_valid_hit = s2_valid_hit_pre_data_ecc && !s2_data_error && (!s2_waw_hazard || s2_store_merge)
|
||||||
val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_meta_error && !s2_hit && !any_pstore_valid && !release_ack_wait
|
val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_meta_error && !s2_hit && !release_ack_wait
|
||||||
val s2_valid_cached_miss = s2_valid_miss && !s2_uncached && !uncachedInFlight.asUInt.orR
|
val s2_valid_cached_miss = s2_valid_miss && !s2_uncached && !uncachedInFlight.asUInt.orR
|
||||||
val s2_victimize = Bool(!usingDataScratchpad) && (s2_valid_cached_miss || s2_valid_data_error || s2_flush_valid)
|
val s2_victimize = Bool(!usingDataScratchpad) && (s2_valid_cached_miss || s2_valid_data_error || s2_flush_valid)
|
||||||
val s2_valid_uncached = s2_valid_miss && s2_uncached
|
val s2_valid_uncached = s2_valid_miss && s2_uncached
|
||||||
@ -274,29 +279,45 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write)
|
val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write)
|
||||||
val pstore1_mask = RegEnable(s1_mask, s1_valid_not_nacked && s1_write)
|
val pstore1_mask = RegEnable(s1_mask, s1_valid_not_nacked && s1_write)
|
||||||
val pstore1_storegen_data = Wire(init = pstore1_data)
|
val pstore1_storegen_data = Wire(init = pstore1_data)
|
||||||
val pstore1_rmw = Bool(usingRMW) && RegEnable(s1_read, s1_valid_not_nacked && s1_write)
|
val pstore1_rmw = Bool(usingRMW) && RegEnable(needsRead(s1_req), s1_valid_not_nacked && s1_write)
|
||||||
val pstore1_valid = Wire(Bool())
|
val pstore1_valid = Wire(Bool())
|
||||||
|
val pstore1_merge = pstore1_valid && s2_store_merge
|
||||||
val pstore2_valid = Reg(Bool())
|
val pstore2_valid = Reg(Bool())
|
||||||
any_pstore_valid := pstore1_valid || pstore2_valid
|
any_pstore_valid := pstore1_valid || pstore2_valid
|
||||||
val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_rmw)
|
val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_rmw)
|
||||||
val pstore_drain_opportunistic = !(io.cpu.req.valid && s0_read)
|
val pstore_drain_opportunistic = !(io.cpu.req.valid && s0_needsRead)
|
||||||
val pstore_drain_on_miss = releaseInFlight || io.cpu.s2_nack
|
val pstore_drain_on_miss = releaseInFlight || io.cpu.s2_nack
|
||||||
val pstore_drain =
|
val pstore_drain = !pstore1_merge &&
|
||||||
Bool(usingRMW) && pstore_drain_structural ||
|
(Bool(usingRMW) && pstore_drain_structural ||
|
||||||
(((pstore1_valid && !pstore1_rmw) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss))
|
(((pstore1_valid && !pstore1_rmw) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss)))
|
||||||
pstore1_valid := {
|
pstore1_valid := {
|
||||||
val s2_store_valid = s2_valid_hit && s2_write && !s2_sc_fail
|
val s2_store_valid = s2_valid_hit && s2_write && !s2_sc_fail
|
||||||
val pstore1_held = Reg(Bool())
|
val pstore1_held = Reg(Bool())
|
||||||
assert(!s2_store_valid || !pstore1_held)
|
assert(!s2_store_valid || !pstore1_held)
|
||||||
pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain
|
pstore1_held := (s2_store_valid && !s2_store_merge || pstore1_held) && pstore2_valid && !pstore_drain
|
||||||
s2_store_valid || pstore1_held
|
s2_store_valid || pstore1_held
|
||||||
}
|
}
|
||||||
val advance_pstore1 = (pstore1_valid || s2_valid_correct) && (pstore2_valid === pstore_drain)
|
val advance_pstore1 = (pstore1_valid || s2_valid_correct) && (pstore2_valid === pstore_drain)
|
||||||
pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1
|
pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1
|
||||||
val pstore2_addr = RegEnable(Mux(s2_correct, s2_req.addr, pstore1_addr), advance_pstore1)
|
val pstore2_addr = RegEnable(Mux(s2_correct, s2_req.addr, pstore1_addr), advance_pstore1)
|
||||||
val pstore2_way = RegEnable(Mux(s2_correct, s2_hit_way, pstore1_way), advance_pstore1)
|
val pstore2_way = RegEnable(Mux(s2_correct, s2_hit_way, pstore1_way), advance_pstore1)
|
||||||
val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1)
|
s2_store_merge := {
|
||||||
val pstore2_storegen_mask = RegEnable(~Mux(s2_correct, 0.U, ~pstore1_mask), advance_pstore1)
|
val idxMatch = s2_req.addr(untagBits-1, log2Ceil(wordBytes)) === pstore2_addr(untagBits-1, log2Ceil(wordBytes))
|
||||||
|
val tagMatch = (s2_hit_way & pstore2_way).orR
|
||||||
|
Bool(eccBytes > 1) && pstore2_valid && idxMatch && tagMatch
|
||||||
|
}
|
||||||
|
val pstore2_storegen_data = {
|
||||||
|
for (i <- 0 until wordBytes)
|
||||||
|
yield RegEnable(pstore1_storegen_data(8*(i+1)-1, 8*i), advance_pstore1 || pstore1_merge && pstore1_mask(i))
|
||||||
|
}.asUInt
|
||||||
|
val pstore2_storegen_mask = {
|
||||||
|
val mask = Reg(UInt(width = wordBytes))
|
||||||
|
when (advance_pstore1 || pstore1_merge) {
|
||||||
|
val mergedMask = pstore1_mask | Mux(pstore1_merge, mask, 0.U)
|
||||||
|
mask := ~Mux(s2_correct, 0.U, ~mergedMask)
|
||||||
|
}
|
||||||
|
mask
|
||||||
|
}
|
||||||
dataArb.io.in(0).valid := pstore_drain
|
dataArb.io.in(0).valid := pstore_drain
|
||||||
dataArb.io.in(0).bits.write := true
|
dataArb.io.in(0).bits.write := true
|
||||||
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
|
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
|
||||||
@ -309,9 +330,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
def s1Depends(addr: UInt, mask: UInt) =
|
def s1Depends(addr: UInt, mask: UInt) =
|
||||||
addr(idxMSB, wordOffBits) === s1_req.addr(idxMSB, wordOffBits) &&
|
addr(idxMSB, wordOffBits) === s1_req.addr(idxMSB, wordOffBits) &&
|
||||||
Mux(s1_write, (eccByteMask(mask) & eccByteMask(s1_mask)).orR, (mask & s1_mask).orR)
|
Mux(s1_write, (eccByteMask(mask) & eccByteMask(s1_mask)).orR, (mask & s1_mask).orR)
|
||||||
val s1_raw_hazard = s1_read &&
|
val s1_hazard =
|
||||||
((pstore1_valid && s1Depends(pstore1_addr, pstore1_mask)) ||
|
(pstore1_valid && s1Depends(pstore1_addr, pstore1_mask)) ||
|
||||||
(pstore2_valid && s1Depends(pstore2_addr, pstore2_storegen_mask)))
|
(pstore2_valid && s1Depends(pstore2_addr, pstore2_storegen_mask))
|
||||||
|
val s1_raw_hazard = s1_read && s1_hazard
|
||||||
|
s1_waw_hazard := Bool(eccBytes > 1) && s1_write && (s1_hazard || s1_didntRead)
|
||||||
when (s1_valid && s1_raw_hazard) { s1_nack := true }
|
when (s1_valid && s1_raw_hazard) { s1_nack := true }
|
||||||
|
|
||||||
// Prepare a TileLink request message that initiates a transaction
|
// Prepare a TileLink request message that initiates a transaction
|
||||||
|
@ -90,10 +90,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
("load-use interlock", () => id_ex_hazard && ex_ctrl.mem || id_mem_hazard && mem_ctrl.mem || id_wb_hazard && wb_ctrl.mem),
|
("load-use interlock", () => id_ex_hazard && ex_ctrl.mem || id_mem_hazard && mem_ctrl.mem || id_wb_hazard && wb_ctrl.mem),
|
||||||
("long-latency interlock", () => id_sboard_hazard),
|
("long-latency interlock", () => id_sboard_hazard),
|
||||||
("csr interlock", () => id_ex_hazard && ex_ctrl.csr =/= CSR.N || id_mem_hazard && mem_ctrl.csr =/= CSR.N || id_wb_hazard && wb_ctrl.csr =/= CSR.N),
|
("csr interlock", () => id_ex_hazard && ex_ctrl.csr =/= CSR.N || id_mem_hazard && mem_ctrl.csr =/= CSR.N || id_wb_hazard && wb_ctrl.csr =/= CSR.N),
|
||||||
("I$ blocked", () => !(ibuf.io.inst(0).valid || Reg(next = take_pc))),
|
("I$ blocked", () => icache_blocked),
|
||||||
("D$ blocked", () => id_ctrl.mem && dcache_blocked),
|
("D$ blocked", () => id_ctrl.mem && dcache_blocked),
|
||||||
("branch misprediction", () => take_pc_mem && mem_direction_misprediction),
|
("branch misprediction", () => take_pc_mem && mem_direction_misprediction),
|
||||||
("control-flow target misprediction", () => take_pc_mem && mem_misprediction && !mem_direction_misprediction),
|
("control-flow target misprediction", () => take_pc_mem && mem_misprediction && mem_cfi && !mem_direction_misprediction && !icache_blocked),
|
||||||
("flush", () => wb_reg_flush_pipe),
|
("flush", () => wb_reg_flush_pipe),
|
||||||
("replay", () => replay_wb))
|
("replay", () => replay_wb))
|
||||||
++ (if (!usingMulDiv) Seq() else Seq(
|
++ (if (!usingMulDiv) Seq() else Seq(
|
||||||
@ -593,7 +593,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
|
|
||||||
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt
|
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt
|
||||||
|
|
||||||
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_misprediction)
|
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_wrong_npc && (!mem_cfi || mem_cfi_taken))
|
||||||
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
|
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
|
||||||
io.imem.btb_update.bits.cfiType :=
|
io.imem.btb_update.bits.cfiType :=
|
||||||
Mux((mem_ctrl.jal || mem_ctrl.jalr) && mem_waddr(0), CFIType.call,
|
Mux((mem_ctrl.jal || mem_ctrl.jalr) && mem_waddr(0), CFIType.call,
|
||||||
@ -642,6 +642,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
io.rocc.cmd.bits.rs2 := wb_reg_rs2
|
io.rocc.cmd.bits.rs2 := wb_reg_rs2
|
||||||
|
|
||||||
// evaluate performance counters
|
// evaluate performance counters
|
||||||
|
val icache_blocked = !(io.imem.resp.valid || RegNext(io.imem.resp.valid))
|
||||||
csr.io.counters foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) }
|
csr.io.counters foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) }
|
||||||
|
|
||||||
if (enableCommitLog) {
|
if (enableCommitLog) {
|
||||||
|
Loading…
Reference in New Issue
Block a user