1
0

Add TLB miss performance counters (#762)

This commit is contained in:
Andrew Waterman 2017-05-23 12:52:25 -07:00 committed by Henry Cook
parent b2b4c1abcd
commit dbc5e7c494
7 changed files with 29 additions and 18 deletions

View File

@ -56,8 +56,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
resp.valid := io.mem.resp.valid && tag_hit resp.valid := io.mem.resp.valid && tag_hit
io.requestor(i).s2_xcpt := io.mem.s2_xcpt io.requestor(i).s2_xcpt := io.mem.s2_xcpt
io.requestor(i).ordered := io.mem.ordered io.requestor(i).ordered := io.mem.ordered
io.requestor(i).acquire := io.mem.acquire io.requestor(i).perf := io.mem.perf
io.requestor(i).release := io.mem.release
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i) io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
resp.bits := io.mem.resp.bits resp.bits := io.mem.resp.bits
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)

View File

@ -142,6 +142,7 @@ object CSR
val firstHPM = 3 val firstHPM = 3
val nCtr = 32 val nCtr = 32
val nHPM = nCtr - firstHPM val nHPM = nCtr - firstHPM
val hpmWidth = 40
val maxPMPs = 16 val maxPMPs = 16
} }
@ -290,7 +291,7 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
val reg_cycle = if (enableCommitLog) reg_instret else WideCounter(64) val reg_cycle = if (enableCommitLog) reg_instret else WideCounter(64)
val reg_hpmevent = io.counters.map(c => Reg(init = UInt(0, xLen))) val reg_hpmevent = io.counters.map(c => Reg(init = UInt(0, xLen)))
(io.counters zip reg_hpmevent) foreach { case (c, e) => c.eventSel := e } (io.counters zip reg_hpmevent) foreach { case (c, e) => c.eventSel := e }
val reg_hpmcounter = io.counters.map(c => WideCounter(40, c.inc, reset = false)) val reg_hpmcounter = io.counters.map(c => WideCounter(CSR.hpmWidth, c.inc, reset = false))
val hpm_mask = reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren) val hpm_mask = reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren)
val mip = Wire(init=reg_mip) val mip = Wire(init=reg_mip)

View File

@ -562,6 +562,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
} }
// performance events // performance events
io.cpu.acquire := edge.done(tl_out_a) io.cpu.perf.acquire := edge.done(tl_out_a)
io.cpu.release := edge.done(tl_out.c) io.cpu.perf.release := edge.done(tl_out.c)
io.cpu.perf.tlbMiss := io.ptw.req.fire()
} }

View File

@ -28,6 +28,11 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
val replay = Bool() val replay = Bool()
} }
class FrontendPerfEvents extends Bundle {
val acquire = Bool()
val tlbMiss = Bool()
}
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Valid(new FrontendReq) val req = Valid(new FrontendReq)
val sfence = Valid(new SFenceReq) val sfence = Valid(new SFenceReq)
@ -37,9 +42,7 @@ class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
val ras_update = Valid(new RASUpdate) val ras_update = Valid(new RASUpdate)
val flush_icache = Bool(OUTPUT) val flush_icache = Bool(OUTPUT)
val npc = UInt(INPUT, width = vaddrBitsExtended) val npc = UInt(INPUT, width = vaddrBitsExtended)
val perf = new FrontendPerfEvents().asInput
// performance events
val acquire = Bool(INPUT)
} }
class Frontend(hartid: Int)(implicit p: Parameters) extends LazyModule { class Frontend(hartid: Int)(implicit p: Parameters) extends LazyModule {
@ -175,7 +178,8 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
io.cpu.resp <> fq.io.deq io.cpu.resp <> fq.io.deq
// performance events // performance events
io.cpu.acquire := edge.done(icache.io.tl_out(0).a) io.cpu.perf.acquire := edge.done(icache.io.tl_out(0).a)
io.cpu.perf.tlbMiss := io.ptw.req.fire()
} }
/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */ /** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */

View File

@ -124,6 +124,12 @@ class HellaCacheWriteData(implicit p: Parameters) extends CoreBundle()(p) {
val mask = UInt(width = coreDataBytes) val mask = UInt(width = coreDataBytes)
} }
class HellaCachePerfEvents extends Bundle {
val acquire = Bool()
val release = Bool()
val tlbMiss = Bool()
}
// interface between D$ and processor/DTLB // interface between D$ and processor/DTLB
class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Decoupled(new HellaCacheReq) val req = Decoupled(new HellaCacheReq)
@ -131,15 +137,12 @@ class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
val s1_data = new HellaCacheWriteData().asOutput // data for previous cycle's req val s1_data = new HellaCacheWriteData().asOutput // data for previous cycle's req
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
// performance events
val acquire = Bool(INPUT)
val release = Bool(INPUT)
val resp = Valid(new HellaCacheResp).flip val resp = Valid(new HellaCacheResp).flip
val replay_next = Bool(INPUT) val replay_next = Bool(INPUT)
val s2_xcpt = (new HellaCacheExceptions).asInput val s2_xcpt = (new HellaCacheExceptions).asInput
val invalidate_lr = Bool(OUTPUT) val invalidate_lr = Bool(OUTPUT)
val ordered = Bool(INPUT) val ordered = Bool(INPUT)
val perf = new HellaCachePerfEvents().asInput
} }
/** Base classes for Diplomatic TL2 HellaCaches */ /** Base classes for Diplomatic TL2 HellaCaches */

View File

@ -985,6 +985,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
io.cpu.s2_xcpt := Mux(RegNext(s1_xcpt_valid), RegEnable(s1_xcpt, s1_clk_en), 0.U.asTypeOf(s1_xcpt)) io.cpu.s2_xcpt := Mux(RegNext(s1_xcpt_valid), RegEnable(s1_xcpt, s1_clk_en), 0.U.asTypeOf(s1_xcpt))
// performance events // performance events
io.cpu.acquire := edge.done(tl_out.a) io.cpu.perf.acquire := edge.done(tl_out.a)
io.cpu.release := edge.done(tl_out.c) io.cpu.perf.release := edge.done(tl_out.c)
io.cpu.perf.tlbMiss := io.ptw.req.fire()
} }

View File

@ -102,9 +102,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
++ (if (!usingFPU) Seq() else Seq( ++ (if (!usingFPU) Seq() else Seq(
("fp interlock", () => id_ex_hazard && ex_ctrl.fp || id_mem_hazard && mem_ctrl.fp || id_wb_hazard && wb_ctrl.fp || id_ctrl.fp && id_stall_fpu)))), ("fp interlock", () => id_ex_hazard && ex_ctrl.fp || id_mem_hazard && mem_ctrl.fp || id_wb_hazard && wb_ctrl.fp || id_ctrl.fp && id_stall_fpu)))),
new EventSet((mask, hits) => (mask & hits).orR, Seq( new EventSet((mask, hits) => (mask & hits).orR, Seq(
("I$ miss", () => io.imem.acquire), ("I$ miss", () => io.imem.perf.acquire),
("D$ miss", () => io.dmem.acquire), ("D$ miss", () => io.dmem.perf.acquire),
("D$ release", () => io.dmem.release))))) ("D$ release", () => io.dmem.perf.release),
("ITLB miss", () => io.imem.perf.tlbMiss),
("DTLB miss", () => io.dmem.perf.tlbMiss)))))
val decode_table = { val decode_table = {
(if (usingMulDiv) new MDecode +: (xLen > 32).option(new M64Decode).toSeq else Nil) ++: (if (usingMulDiv) new MDecode +: (xLen > 32).option(new M64Decode).toSeq else Nil) ++: