Add TLB miss performance counters (#762)
This commit is contained in:
parent
b2b4c1abcd
commit
dbc5e7c494
@ -56,8 +56,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
|
|||||||
resp.valid := io.mem.resp.valid && tag_hit
|
resp.valid := io.mem.resp.valid && tag_hit
|
||||||
io.requestor(i).s2_xcpt := io.mem.s2_xcpt
|
io.requestor(i).s2_xcpt := io.mem.s2_xcpt
|
||||||
io.requestor(i).ordered := io.mem.ordered
|
io.requestor(i).ordered := io.mem.ordered
|
||||||
io.requestor(i).acquire := io.mem.acquire
|
io.requestor(i).perf := io.mem.perf
|
||||||
io.requestor(i).release := io.mem.release
|
|
||||||
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
|
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
|
||||||
resp.bits := io.mem.resp.bits
|
resp.bits := io.mem.resp.bits
|
||||||
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
|
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
|
||||||
|
@ -142,6 +142,7 @@ object CSR
|
|||||||
val firstHPM = 3
|
val firstHPM = 3
|
||||||
val nCtr = 32
|
val nCtr = 32
|
||||||
val nHPM = nCtr - firstHPM
|
val nHPM = nCtr - firstHPM
|
||||||
|
val hpmWidth = 40
|
||||||
|
|
||||||
val maxPMPs = 16
|
val maxPMPs = 16
|
||||||
}
|
}
|
||||||
@ -290,7 +291,7 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
|
|||||||
val reg_cycle = if (enableCommitLog) reg_instret else WideCounter(64)
|
val reg_cycle = if (enableCommitLog) reg_instret else WideCounter(64)
|
||||||
val reg_hpmevent = io.counters.map(c => Reg(init = UInt(0, xLen)))
|
val reg_hpmevent = io.counters.map(c => Reg(init = UInt(0, xLen)))
|
||||||
(io.counters zip reg_hpmevent) foreach { case (c, e) => c.eventSel := e }
|
(io.counters zip reg_hpmevent) foreach { case (c, e) => c.eventSel := e }
|
||||||
val reg_hpmcounter = io.counters.map(c => WideCounter(40, c.inc, reset = false))
|
val reg_hpmcounter = io.counters.map(c => WideCounter(CSR.hpmWidth, c.inc, reset = false))
|
||||||
val hpm_mask = reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren)
|
val hpm_mask = reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren)
|
||||||
|
|
||||||
val mip = Wire(init=reg_mip)
|
val mip = Wire(init=reg_mip)
|
||||||
|
@ -562,6 +562,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// performance events
|
// performance events
|
||||||
io.cpu.acquire := edge.done(tl_out_a)
|
io.cpu.perf.acquire := edge.done(tl_out_a)
|
||||||
io.cpu.release := edge.done(tl_out.c)
|
io.cpu.perf.release := edge.done(tl_out.c)
|
||||||
|
io.cpu.perf.tlbMiss := io.ptw.req.fire()
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,11 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
|
|||||||
val replay = Bool()
|
val replay = Bool()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class FrontendPerfEvents extends Bundle {
|
||||||
|
val acquire = Bool()
|
||||||
|
val tlbMiss = Bool()
|
||||||
|
}
|
||||||
|
|
||||||
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
|
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||||
val req = Valid(new FrontendReq)
|
val req = Valid(new FrontendReq)
|
||||||
val sfence = Valid(new SFenceReq)
|
val sfence = Valid(new SFenceReq)
|
||||||
@ -37,9 +42,7 @@ class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
|
|||||||
val ras_update = Valid(new RASUpdate)
|
val ras_update = Valid(new RASUpdate)
|
||||||
val flush_icache = Bool(OUTPUT)
|
val flush_icache = Bool(OUTPUT)
|
||||||
val npc = UInt(INPUT, width = vaddrBitsExtended)
|
val npc = UInt(INPUT, width = vaddrBitsExtended)
|
||||||
|
val perf = new FrontendPerfEvents().asInput
|
||||||
// performance events
|
|
||||||
val acquire = Bool(INPUT)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class Frontend(hartid: Int)(implicit p: Parameters) extends LazyModule {
|
class Frontend(hartid: Int)(implicit p: Parameters) extends LazyModule {
|
||||||
@ -175,7 +178,8 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
io.cpu.resp <> fq.io.deq
|
io.cpu.resp <> fq.io.deq
|
||||||
|
|
||||||
// performance events
|
// performance events
|
||||||
io.cpu.acquire := edge.done(icache.io.tl_out(0).a)
|
io.cpu.perf.acquire := edge.done(icache.io.tl_out(0).a)
|
||||||
|
io.cpu.perf.tlbMiss := io.ptw.req.fire()
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */
|
/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */
|
||||||
|
@ -124,6 +124,12 @@ class HellaCacheWriteData(implicit p: Parameters) extends CoreBundle()(p) {
|
|||||||
val mask = UInt(width = coreDataBytes)
|
val mask = UInt(width = coreDataBytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class HellaCachePerfEvents extends Bundle {
|
||||||
|
val acquire = Bool()
|
||||||
|
val release = Bool()
|
||||||
|
val tlbMiss = Bool()
|
||||||
|
}
|
||||||
|
|
||||||
// interface between D$ and processor/DTLB
|
// interface between D$ and processor/DTLB
|
||||||
class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
|
class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||||
val req = Decoupled(new HellaCacheReq)
|
val req = Decoupled(new HellaCacheReq)
|
||||||
@ -131,15 +137,12 @@ class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
|
|||||||
val s1_data = new HellaCacheWriteData().asOutput // data for previous cycle's req
|
val s1_data = new HellaCacheWriteData().asOutput // data for previous cycle's req
|
||||||
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
|
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
|
||||||
|
|
||||||
// performance events
|
|
||||||
val acquire = Bool(INPUT)
|
|
||||||
val release = Bool(INPUT)
|
|
||||||
|
|
||||||
val resp = Valid(new HellaCacheResp).flip
|
val resp = Valid(new HellaCacheResp).flip
|
||||||
val replay_next = Bool(INPUT)
|
val replay_next = Bool(INPUT)
|
||||||
val s2_xcpt = (new HellaCacheExceptions).asInput
|
val s2_xcpt = (new HellaCacheExceptions).asInput
|
||||||
val invalidate_lr = Bool(OUTPUT)
|
val invalidate_lr = Bool(OUTPUT)
|
||||||
val ordered = Bool(INPUT)
|
val ordered = Bool(INPUT)
|
||||||
|
val perf = new HellaCachePerfEvents().asInput
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Base classes for Diplomatic TL2 HellaCaches */
|
/** Base classes for Diplomatic TL2 HellaCaches */
|
||||||
|
@ -985,6 +985,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
|
|||||||
io.cpu.s2_xcpt := Mux(RegNext(s1_xcpt_valid), RegEnable(s1_xcpt, s1_clk_en), 0.U.asTypeOf(s1_xcpt))
|
io.cpu.s2_xcpt := Mux(RegNext(s1_xcpt_valid), RegEnable(s1_xcpt, s1_clk_en), 0.U.asTypeOf(s1_xcpt))
|
||||||
|
|
||||||
// performance events
|
// performance events
|
||||||
io.cpu.acquire := edge.done(tl_out.a)
|
io.cpu.perf.acquire := edge.done(tl_out.a)
|
||||||
io.cpu.release := edge.done(tl_out.c)
|
io.cpu.perf.release := edge.done(tl_out.c)
|
||||||
|
io.cpu.perf.tlbMiss := io.ptw.req.fire()
|
||||||
}
|
}
|
||||||
|
@ -102,9 +102,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
++ (if (!usingFPU) Seq() else Seq(
|
++ (if (!usingFPU) Seq() else Seq(
|
||||||
("fp interlock", () => id_ex_hazard && ex_ctrl.fp || id_mem_hazard && mem_ctrl.fp || id_wb_hazard && wb_ctrl.fp || id_ctrl.fp && id_stall_fpu)))),
|
("fp interlock", () => id_ex_hazard && ex_ctrl.fp || id_mem_hazard && mem_ctrl.fp || id_wb_hazard && wb_ctrl.fp || id_ctrl.fp && id_stall_fpu)))),
|
||||||
new EventSet((mask, hits) => (mask & hits).orR, Seq(
|
new EventSet((mask, hits) => (mask & hits).orR, Seq(
|
||||||
("I$ miss", () => io.imem.acquire),
|
("I$ miss", () => io.imem.perf.acquire),
|
||||||
("D$ miss", () => io.dmem.acquire),
|
("D$ miss", () => io.dmem.perf.acquire),
|
||||||
("D$ release", () => io.dmem.release)))))
|
("D$ release", () => io.dmem.perf.release),
|
||||||
|
("ITLB miss", () => io.imem.perf.tlbMiss),
|
||||||
|
("DTLB miss", () => io.dmem.perf.tlbMiss)))))
|
||||||
|
|
||||||
val decode_table = {
|
val decode_table = {
|
||||||
(if (usingMulDiv) new MDecode +: (xLen > 32).option(new M64Decode).toSeq else Nil) ++:
|
(if (usingMulDiv) new MDecode +: (xLen > 32).option(new M64Decode).toSeq else Nil) ++:
|
||||||
|
Loading…
Reference in New Issue
Block a user