From dbc5e7c49460f99259f89fc11d979b906a307a4a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 23 May 2017 12:52:25 -0700 Subject: [PATCH] Add TLB miss performance counters (#762) --- src/main/scala/rocket/Arbiter.scala | 3 +-- src/main/scala/rocket/CSR.scala | 3 ++- src/main/scala/rocket/DCache.scala | 5 +++-- src/main/scala/rocket/Frontend.scala | 12 ++++++++---- src/main/scala/rocket/HellaCache.scala | 11 +++++++---- src/main/scala/rocket/NBDcache.scala | 5 +++-- src/main/scala/rocket/Rocket.scala | 8 +++++--- 7 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/main/scala/rocket/Arbiter.scala b/src/main/scala/rocket/Arbiter.scala index ebbafb5c..e203bf97 100644 --- a/src/main/scala/rocket/Arbiter.scala +++ b/src/main/scala/rocket/Arbiter.scala @@ -56,8 +56,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module resp.valid := io.mem.resp.valid && tag_hit io.requestor(i).s2_xcpt := io.mem.s2_xcpt io.requestor(i).ordered := io.mem.ordered - io.requestor(i).acquire := io.mem.acquire - io.requestor(i).release := io.mem.release + io.requestor(i).perf := io.mem.perf io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i) resp.bits := io.mem.resp.bits resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) diff --git a/src/main/scala/rocket/CSR.scala b/src/main/scala/rocket/CSR.scala index 1df0b1cd..ae5c1f04 100644 --- a/src/main/scala/rocket/CSR.scala +++ b/src/main/scala/rocket/CSR.scala @@ -142,6 +142,7 @@ object CSR val firstHPM = 3 val nCtr = 32 val nHPM = nCtr - firstHPM + val hpmWidth = 40 val maxPMPs = 16 } @@ -290,7 +291,7 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param val reg_cycle = if (enableCommitLog) reg_instret else WideCounter(64) val reg_hpmevent = io.counters.map(c => Reg(init = UInt(0, xLen))) (io.counters zip reg_hpmevent) foreach { case (c, e) => c.eventSel := e } - val reg_hpmcounter = io.counters.map(c => WideCounter(40, c.inc, reset = false)) + val reg_hpmcounter = io.counters.map(c => WideCounter(CSR.hpmWidth, c.inc, reset = false)) val hpm_mask = reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren) val mip = Wire(init=reg_mip) diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala index 8d8e0cb4..a0800937 100644 --- a/src/main/scala/rocket/DCache.scala +++ b/src/main/scala/rocket/DCache.scala @@ -562,6 +562,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { } // performance events - io.cpu.acquire := edge.done(tl_out_a) - io.cpu.release := edge.done(tl_out.c) + io.cpu.perf.acquire := edge.done(tl_out_a) + io.cpu.perf.release := edge.done(tl_out.c) + io.cpu.perf.tlbMiss := io.ptw.req.fire() } diff --git a/src/main/scala/rocket/Frontend.scala b/src/main/scala/rocket/Frontend.scala index 94ef4654..a9845812 100644 --- a/src/main/scala/rocket/Frontend.scala +++ b/src/main/scala/rocket/Frontend.scala @@ -28,6 +28,11 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { val replay = Bool() } +class FrontendPerfEvents extends Bundle { + val acquire = Bool() + val tlbMiss = Bool() +} + class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Valid(new FrontendReq) val sfence = Valid(new SFenceReq) @@ -37,9 +42,7 @@ class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { val ras_update = Valid(new RASUpdate) val flush_icache = Bool(OUTPUT) val npc = UInt(INPUT, width = vaddrBitsExtended) - - // performance events - val acquire = Bool(INPUT) + val perf = new FrontendPerfEvents().asInput } class Frontend(hartid: Int)(implicit p: Parameters) extends LazyModule { @@ -175,7 +178,8 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer) io.cpu.resp <> fq.io.deq // performance events - io.cpu.acquire := edge.done(icache.io.tl_out(0).a) + io.cpu.perf.acquire := edge.done(icache.io.tl_out(0).a) + io.cpu.perf.tlbMiss := io.ptw.req.fire() } /** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */ diff --git a/src/main/scala/rocket/HellaCache.scala b/src/main/scala/rocket/HellaCache.scala index e27ec11a..f571f209 100644 --- a/src/main/scala/rocket/HellaCache.scala +++ b/src/main/scala/rocket/HellaCache.scala @@ -124,6 +124,12 @@ class HellaCacheWriteData(implicit p: Parameters) extends CoreBundle()(p) { val mask = UInt(width = coreDataBytes) } +class HellaCachePerfEvents extends Bundle { + val acquire = Bool() + val release = Bool() + val tlbMiss = Bool() +} + // interface between D$ and processor/DTLB class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Decoupled(new HellaCacheReq) @@ -131,15 +137,12 @@ class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { val s1_data = new HellaCacheWriteData().asOutput // data for previous cycle's req val s2_nack = Bool(INPUT) // req from two cycles ago is rejected - // performance events - val acquire = Bool(INPUT) - val release = Bool(INPUT) - val resp = Valid(new HellaCacheResp).flip val replay_next = Bool(INPUT) val s2_xcpt = (new HellaCacheExceptions).asInput val invalidate_lr = Bool(OUTPUT) val ordered = Bool(INPUT) + val perf = new HellaCachePerfEvents().asInput } /** Base classes for Diplomatic TL2 HellaCaches */ diff --git a/src/main/scala/rocket/NBDcache.scala b/src/main/scala/rocket/NBDcache.scala index 30a4e672..0e7422ca 100644 --- a/src/main/scala/rocket/NBDcache.scala +++ b/src/main/scala/rocket/NBDcache.scala @@ -985,6 +985,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule io.cpu.s2_xcpt := Mux(RegNext(s1_xcpt_valid), RegEnable(s1_xcpt, s1_clk_en), 0.U.asTypeOf(s1_xcpt)) // performance events - io.cpu.acquire := edge.done(tl_out.a) - io.cpu.release := edge.done(tl_out.c) + io.cpu.perf.acquire := edge.done(tl_out.a) + io.cpu.perf.release := edge.done(tl_out.c) + io.cpu.perf.tlbMiss := io.ptw.req.fire() } diff --git a/src/main/scala/rocket/Rocket.scala b/src/main/scala/rocket/Rocket.scala index 27999f14..57a93acf 100644 --- a/src/main/scala/rocket/Rocket.scala +++ b/src/main/scala/rocket/Rocket.scala @@ -102,9 +102,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) ++ (if (!usingFPU) Seq() else Seq( ("fp interlock", () => id_ex_hazard && ex_ctrl.fp || id_mem_hazard && mem_ctrl.fp || id_wb_hazard && wb_ctrl.fp || id_ctrl.fp && id_stall_fpu)))), new EventSet((mask, hits) => (mask & hits).orR, Seq( - ("I$ miss", () => io.imem.acquire), - ("D$ miss", () => io.dmem.acquire), - ("D$ release", () => io.dmem.release))))) + ("I$ miss", () => io.imem.perf.acquire), + ("D$ miss", () => io.dmem.perf.acquire), + ("D$ release", () => io.dmem.perf.release), + ("ITLB miss", () => io.imem.perf.tlbMiss), + ("DTLB miss", () => io.dmem.perf.tlbMiss))))) val decode_table = { (if (usingMulDiv) new MDecode +: (xLen > 32).option(new M64Decode).toSeq else Nil) ++: