1
0

Add performance counter facility

This commit is contained in:
Andrew Waterman 2017-03-09 00:28:19 -08:00
parent e57ee2692d
commit 4f8f05d635
10 changed files with 132 additions and 15 deletions

View File

@ -56,6 +56,8 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
resp.valid := io.mem.resp.valid && tag_hit
io.requestor(i).xcpt := io.mem.xcpt
io.requestor(i).ordered := io.mem.ordered
io.requestor(i).acquire := io.mem.acquire
io.requestor(i).release := io.mem.release
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
resp.bits := io.mem.resp.bits
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)

View File

@ -124,14 +124,23 @@ object CSR
}
val firstCtr = CSRs.cycle
val firstCtrH = CSRs.cycleh
val firstHPC = CSRs.hpmcounter3
val firstHPCH = CSRs.hpmcounter3h
val firstHPE = CSRs.mhpmevent3
val firstMHPC = CSRs.mhpmcounter3
val firstMHPCH = CSRs.mhpmcounter3h
val firstHPM = 3
val nCtr = 32
val nHPM = nCtr - firstHPM
}
class PerfCounterIO(implicit p: Parameters) extends CoreBundle
with HasRocketCoreParameters {
val eventSel = UInt(OUTPUT, xLen)
val inc = UInt(INPUT, log2Ceil(1+retireWidth))
}
class CSRFileIO(implicit p: Parameters) extends CoreBundle
with HasRocketCoreParameters {
val interrupts = new TileInterrupts().asInput
@ -174,10 +183,10 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle
val interrupt = Bool(OUTPUT)
val interrupt_cause = UInt(OUTPUT, xLen)
val bp = Vec(nBreakpoints, new BP).asOutput
val events = Vec(nPerfEvents, Bool()).asInput
val counters = Vec(nPerfCounters, new PerfCounterIO)
}
class CSRFile(implicit p: Parameters) extends CoreModule()(p)
class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Parameters) extends CoreModule()(p)
with HasRocketCoreParameters {
val io = new CSRFileIO
@ -258,8 +267,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
val reg_instret = WideCounter(64, io.retire)
val reg_cycle = if (enableCommitLog) reg_instret else WideCounter(64)
val reg_hpmevent = Seq.fill(nPerfCounters)(if (nPerfEvents > 1) Reg(UInt(width = log2Ceil(nPerfEvents))) else UInt(0))
val reg_hpmcounter = reg_hpmevent.map(e => WideCounter(64, ((UInt(0) +: io.events): Seq[UInt])(e)))
val reg_hpmevent = io.counters.map(c => Reg(init = UInt(0, xLen)))
(io.counters zip reg_hpmevent) foreach { case (c, e) => c.eventSel := e }
val reg_hpmcounter = io.counters.map(c => WideCounter(40, c.inc, reset = false))
val hpm_mask = reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren)
val mip = Wire(init=reg_mip)
@ -339,6 +349,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
read_mapping += (i + CSR.firstHPE) -> e // mhpmeventN
read_mapping += (i + CSR.firstMHPC) -> c // mhpmcounterN
if (usingUser) read_mapping += (i + CSR.firstHPC) -> c // hpmcounterN
if (xLen == 32) {
read_mapping += (i + CSR.firstMHPCH) -> c // mhpmcounterNh
if (usingUser) read_mapping += (i + CSR.firstHPCH) -> c // hpmcounterNh
}
}
if (usingVM) {
@ -407,7 +421,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
io.decode.read_illegal := effective_prv < io.decode.csr(9,8) ||
!read_mapping.keys.map(io.decode.csr === _).reduce(_||_) ||
io.decode.csr === CSRs.sptbr && !allow_sfence_vma ||
io.decode.csr >= CSR.firstCtr && io.decode.csr < CSR.firstCtr + CSR.nCtr && effective_prv <= PRV.S && hpm_mask(io.decode.csr(log2Ceil(CSR.firstCtr)-1,0)) ||
(io.decode.csr.inRange(CSR.firstCtr, CSR.firstCtr + CSR.nCtr) || io.decode.csr.inRange(CSR.firstCtrH, CSR.firstCtrH + CSR.nCtr)) && effective_prv <= PRV.S && hpm_mask(io.decode.csr(log2Ceil(CSR.firstCtr)-1,0)) ||
Bool(usingDebug) && !reg_debug && debug_csrs.keys.map(io.decode.csr === _).reduce(_||_) ||
Bool(usingFPU) && fp_csrs.keys.map(io.decode.csr === _).reduce(_||_) && io.decode.fp_illegal
io.decode.write_illegal := io.decode.csr(11,10).andR
@ -561,8 +575,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
for (((e, c), i) <- (reg_hpmevent zip reg_hpmcounter) zipWithIndex) {
writeCounter(i + CSR.firstMHPC, c, wdata)
if (nPerfEvents > 1)
when (decoded_addr(i + CSR.firstHPE)) { e := wdata }
when (decoded_addr(i + CSR.firstHPE)) { e := perfEventSets.maskEventSelector(wdata) }
}
writeCounter(CSRs.mcycle, reg_cycle, wdata)
writeCounter(CSRs.minstret, reg_instret, wdata)
@ -688,10 +701,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
def writeCounter(lo: Int, ctr: WideCounter, wdata: UInt) = {
if (xLen == 32) {
val hi = lo + CSRs.mcycleh - CSRs.mcycle
when (decoded_addr(lo)) { ctr := Cat(ctr(63, 32), wdata) }
when (decoded_addr(hi)) { ctr := Cat(wdata, ctr(31, 0)) }
when (decoded_addr(lo)) { ctr := Cat(ctr(ctr.getWidth-1, 32), wdata) }
when (decoded_addr(hi)) { ctr := Cat(wdata(ctr.getWidth-33, 0), ctr(31, 0)) }
} else {
when (decoded_addr(lo)) { ctr := wdata }
when (decoded_addr(lo)) { ctr := wdata(ctr.getWidth-1, 0) }
}
}
def formEPC(x: UInt) = ~(~x | Cat(!reg_misa('c'-'a'), UInt(1)))

View File

@ -503,4 +503,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
flushing := false
}
}
// performance events
io.cpu.acquire := tl_out.a.fire()
io.cpu.release := tl_out.c.fire()
}

View File

@ -0,0 +1,35 @@
// See LICENSE.Berkeley for license details.
// See LICENSE.SiFive for license details.
package rocket
import util._
import Chisel._
class EventSet(gate: (UInt, UInt) => Bool, events: Seq[(String, () => Bool)]) {
def size = events.size
def hits = events.map(_._2()).asUInt
def check(mask: UInt) = gate(mask, hits)
}
class EventSets(eventSets: Seq[EventSet]) {
def maskEventSelector(eventSel: UInt): UInt = {
// allow full associativity between counters and event sets (for now?)
val setMask = (BigInt(1) << log2Ceil(eventSets.size)) - 1
val maskMask = ((BigInt(1) << eventSets.map(_.size).max) - 1) << eventSetIdBits
eventSel & (setMask | maskMask).U
}
private def decode(counter: UInt): (UInt, UInt) = {
require(eventSets.size <= (1 << eventSetIdBits))
(counter(log2Ceil(eventSets.size)-1, 0), counter >> eventSetIdBits)
}
def evaluate(eventSel: UInt): Bool = {
val (set, mask) = decode(eventSel)
val sets = eventSets map (_ check mask)
sets(set)
}
private def eventSetIdBits = 8
}

View File

@ -35,6 +35,9 @@ class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
val flush_icache = Bool(OUTPUT)
val flush_tlb = Bool(OUTPUT)
val npc = UInt(INPUT, width = vaddrBitsExtended)
// performance events
val acquire = Bool(INPUT)
}
class Frontend(implicit p: Parameters) extends LazyModule {
@ -150,6 +153,9 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
io.cpu.resp.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt_if
io.cpu.resp.bits.btb.valid := s2_btb_resp_valid
io.cpu.resp.bits.btb.bits := s2_btb_resp_bits
// performance events
io.cpu.acquire := icache.io.mem(0).a.fire()
}
/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */

View File

@ -121,6 +121,10 @@ class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
// performance events
val acquire = Bool(INPUT)
val release = Bool(INPUT)
val resp = Valid(new HellaCacheResp).flip
val replay_next = Bool(INPUT)
val xcpt = (new HellaCacheExceptions).asInput

View File

@ -973,4 +973,8 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next
// performance events
io.cpu.acquire := tl_out.a.fire()
io.cpu.release := tl_out.c.fire()
}

View File

@ -19,7 +19,6 @@ case class RocketCoreParams(
useCompressed: Boolean = true,
nBreakpoints: Int = 1,
nPerfCounters: Int = 0,
nPerfEvents: Int = 0,
nCustomMRWCSRs: Int = 0,
mtvecInit: Option[BigInt] = Some(BigInt(0)),
mtvecWritable: Boolean = true,
@ -44,7 +43,6 @@ trait HasRocketCoreParameters extends HasCoreParameters {
val fastJAL = rocketParams.fastJAL
val nBreakpoints = rocketParams.nBreakpoints
val nPerfCounters = rocketParams.nPerfCounters
val nPerfEvents = rocketParams.nPerfEvents
val nCustomMrwCsrs = rocketParams.nCustomMRWCSRs
val mtvecInit = rocketParams.mtvecInit
val mtvecWritable = rocketParams.mtvecWritable
@ -58,6 +56,50 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
with HasRocketCoreParameters
with HasCoreIO {
// performance counters
def pipelineIDToWB[T <: Data](x: T): T =
RegEnable(RegEnable(RegEnable(x, !ctrl_killd), ex_pc_valid), mem_pc_valid)
val perfEvents = new EventSets(Seq(
new EventSet((mask, hits) => Mux(mask(0), wb_xcpt, wb_valid && pipelineIDToWB((mask & hits).orR)), Seq(
("exception", () => false.B),
("load", () => id_ctrl.mem && id_ctrl.mem_cmd === M_XRD && !id_ctrl.fp),
("store", () => id_ctrl.mem && id_ctrl.mem_cmd === M_XWR && !id_ctrl.fp),
("amo", () => Bool(usingAtomics) && id_ctrl.mem && (isAMO(id_ctrl.mem_cmd) || id_ctrl.mem_cmd.isOneOf(M_XLR, M_XSC))),
("system", () => id_ctrl.csr =/= CSR.N),
("arith", () => id_ctrl.wxd && !(id_ctrl.jal || id_ctrl.jalr || id_ctrl.mem || id_ctrl.fp || id_ctrl.div || id_ctrl.csr =/= CSR.N)),
("branch", () => id_ctrl.branch),
("jal", () => id_ctrl.jal),
("jalr", () => id_ctrl.jalr))
++ (if (!usingMulDiv) Seq() else Seq(
("mul", () => id_ctrl.div && (id_ctrl.alu_fn & ALU.FN_DIV) =/= ALU.FN_DIV),
("div", () => id_ctrl.div && (id_ctrl.alu_fn & ALU.FN_DIV) === ALU.FN_DIV)))
++ (if (!usingFPU) Seq() else Seq(
("fp load", () => id_ctrl.fp && io.fpu.dec.ldst && io.fpu.dec.wen),
("fp store", () => id_ctrl.fp && io.fpu.dec.ldst && !io.fpu.dec.wen),
("fp add", () => id_ctrl.fp && io.fpu.dec.fma && io.fpu.dec.swap23),
("fp mul", () => id_ctrl.fp && io.fpu.dec.fma && !io.fpu.dec.swap23 && !io.fpu.dec.ren3),
("fp mul-add", () => id_ctrl.fp && io.fpu.dec.fma && io.fpu.dec.ren3),
("fp div/sqrt", () => id_ctrl.fp && (io.fpu.dec.div || io.fpu.dec.sqrt)),
("fp other", () => id_ctrl.fp && !(io.fpu.dec.ldst || io.fpu.dec.fma || io.fpu.dec.div || io.fpu.dec.sqrt))))),
new EventSet((mask, hits) => (mask & hits).orR, Seq(
("load-use interlock", () => id_ex_hazard && ex_ctrl.mem || id_mem_hazard && mem_ctrl.mem || id_wb_hazard && wb_ctrl.mem),
("long-latency interlock", () => id_sboard_hazard),
("csr interlock", () => id_ex_hazard && ex_ctrl.csr =/= CSR.N || id_mem_hazard && mem_ctrl.csr =/= CSR.N || id_wb_hazard && wb_ctrl.csr =/= CSR.N),
("I$ blocked", () => !(ibuf.io.inst(0).valid || Reg(next = take_pc))),
("D$ blocked", () => id_ctrl.mem && dcache_blocked),
("branch misprediction", () => take_pc_mem && mem_direction_misprediction),
("control-flow target misprediction", () => take_pc_mem && mem_misprediction && !mem_direction_misprediction),
("flush", () => take_pc_mem && mem_reg_flush_pipe),
("replay", () => replay_wb))
++ (if (!usingMulDiv) Seq() else Seq(
("mul/div interlock", () => id_ex_hazard && ex_ctrl.div || id_mem_hazard && mem_ctrl.div || id_wb_hazard && wb_ctrl.div)))
++ (if (!usingFPU) Seq() else Seq(
("fp interlock", () => id_ex_hazard && ex_ctrl.fp || id_mem_hazard && mem_ctrl.fp || id_wb_hazard && wb_ctrl.fp || id_ctrl.fp && id_stall_fpu)))),
new EventSet((mask, hits) => (mask & hits).orR, Seq(
("I$ miss", () => io.imem.acquire),
("D$ miss", () => io.dmem.acquire),
("D$ release", () => io.dmem.release)))))
val decode_table = {
(if (usingMulDiv) new MDecode +: (xLen > 32).option(new M64Decode).toSeq else Nil) ++:
(if (usingAtomics) new ADecode +: (xLen > 32).option(new A64Decode).toSeq else Nil) ++:
@ -142,7 +184,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val id_npc = (ibuf.io.pc.asSInt + ImmGen(IMM_UJ, id_inst(0))).asUInt
take_pc_id := Bool(fastJAL) && !ctrl_killd && id_ctrl.jal
val csr = Module(new CSRFile)
val csr = Module(new CSRFile(perfEvents))
val id_csr_en = id_ctrl.csr.isOneOf(CSR.S, CSR.C, CSR.W)
val id_system_insn = id_ctrl.csr >= CSR.I
val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0)
@ -292,6 +334,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
(ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause)))
// memory stage
val mem_pc_valid = mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt
val mem_br_taken = mem_reg_wdata(0)
val mem_br_target = mem_reg_pc.asSInt +
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
@ -303,6 +346,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || (Bool(!fastJAL) && mem_ctrl.jal)
val mem_direction_misprediction = mem_reg_btb_hit && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken
val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken
take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
@ -357,7 +401,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
wb_reg_replay := replay_mem && !take_pc_wb
wb_reg_xcpt := mem_xcpt && !take_pc_wb
when (mem_xcpt) { wb_reg_cause := mem_cause }
when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) {
when (mem_pc_valid) {
wb_ctrl := mem_ctrl
wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (mem_ctrl.rocc) {
@ -556,6 +600,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
io.rocc.cmd.bits.rs1 := wb_reg_wdata
io.rocc.cmd.bits.rs2 := wb_reg_rs2
// evaluate performance counters
csr.io.counters foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) }
if (enableCommitLog) {
val pc = Wire(SInt(width=xLen))
pc := wb_reg_pc

View File

@ -50,7 +50,7 @@ case class WideCounter(width: Int, inc: UInt = UInt(1), reset: Boolean = true)
private val large = if (isWide) {
val r = if (reset) Reg(init=UInt(0, width - smallWidth)) else Reg(UInt(width = width - smallWidth))
when (nextSmall(smallWidth)) { r := r +& UInt(1) }
when (nextSmall(smallWidth)) { r := r + UInt(1) }
r
} else null

View File

@ -43,6 +43,8 @@ package object util {
if (hi == lo-1) UInt(0)
else x(hi, lo)
}
def inRange(base: UInt, bounds: UInt) = x >= base && x < bounds
}
implicit class BooleanToAugmentedBoolean(val x: Boolean) extends AnyVal {