Add performance counter facility
This commit is contained in:
parent
e57ee2692d
commit
4f8f05d635
@ -56,6 +56,8 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
|
||||
resp.valid := io.mem.resp.valid && tag_hit
|
||||
io.requestor(i).xcpt := io.mem.xcpt
|
||||
io.requestor(i).ordered := io.mem.ordered
|
||||
io.requestor(i).acquire := io.mem.acquire
|
||||
io.requestor(i).release := io.mem.release
|
||||
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
|
||||
resp.bits := io.mem.resp.bits
|
||||
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
|
||||
|
@ -124,14 +124,23 @@ object CSR
|
||||
}
|
||||
|
||||
val firstCtr = CSRs.cycle
|
||||
val firstCtrH = CSRs.cycleh
|
||||
val firstHPC = CSRs.hpmcounter3
|
||||
val firstHPCH = CSRs.hpmcounter3h
|
||||
val firstHPE = CSRs.mhpmevent3
|
||||
val firstMHPC = CSRs.mhpmcounter3
|
||||
val firstMHPCH = CSRs.mhpmcounter3h
|
||||
val firstHPM = 3
|
||||
val nCtr = 32
|
||||
val nHPM = nCtr - firstHPM
|
||||
}
|
||||
|
||||
class PerfCounterIO(implicit p: Parameters) extends CoreBundle
|
||||
with HasRocketCoreParameters {
|
||||
val eventSel = UInt(OUTPUT, xLen)
|
||||
val inc = UInt(INPUT, log2Ceil(1+retireWidth))
|
||||
}
|
||||
|
||||
class CSRFileIO(implicit p: Parameters) extends CoreBundle
|
||||
with HasRocketCoreParameters {
|
||||
val interrupts = new TileInterrupts().asInput
|
||||
@ -174,10 +183,10 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle
|
||||
val interrupt = Bool(OUTPUT)
|
||||
val interrupt_cause = UInt(OUTPUT, xLen)
|
||||
val bp = Vec(nBreakpoints, new BP).asOutput
|
||||
val events = Vec(nPerfEvents, Bool()).asInput
|
||||
val counters = Vec(nPerfCounters, new PerfCounterIO)
|
||||
}
|
||||
|
||||
class CSRFile(implicit p: Parameters) extends CoreModule()(p)
|
||||
class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Parameters) extends CoreModule()(p)
|
||||
with HasRocketCoreParameters {
|
||||
val io = new CSRFileIO
|
||||
|
||||
@ -258,8 +267,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
|
||||
|
||||
val reg_instret = WideCounter(64, io.retire)
|
||||
val reg_cycle = if (enableCommitLog) reg_instret else WideCounter(64)
|
||||
val reg_hpmevent = Seq.fill(nPerfCounters)(if (nPerfEvents > 1) Reg(UInt(width = log2Ceil(nPerfEvents))) else UInt(0))
|
||||
val reg_hpmcounter = reg_hpmevent.map(e => WideCounter(64, ((UInt(0) +: io.events): Seq[UInt])(e)))
|
||||
val reg_hpmevent = io.counters.map(c => Reg(init = UInt(0, xLen)))
|
||||
(io.counters zip reg_hpmevent) foreach { case (c, e) => c.eventSel := e }
|
||||
val reg_hpmcounter = io.counters.map(c => WideCounter(40, c.inc, reset = false))
|
||||
val hpm_mask = reg_mcounteren & Mux((!usingVM).B || reg_mstatus.prv === PRV.S, delegable_counters.U, reg_scounteren)
|
||||
|
||||
val mip = Wire(init=reg_mip)
|
||||
@ -339,6 +349,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
|
||||
read_mapping += (i + CSR.firstHPE) -> e // mhpmeventN
|
||||
read_mapping += (i + CSR.firstMHPC) -> c // mhpmcounterN
|
||||
if (usingUser) read_mapping += (i + CSR.firstHPC) -> c // hpmcounterN
|
||||
if (xLen == 32) {
|
||||
read_mapping += (i + CSR.firstMHPCH) -> c // mhpmcounterNh
|
||||
if (usingUser) read_mapping += (i + CSR.firstHPCH) -> c // hpmcounterNh
|
||||
}
|
||||
}
|
||||
|
||||
if (usingVM) {
|
||||
@ -407,7 +421,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
|
||||
io.decode.read_illegal := effective_prv < io.decode.csr(9,8) ||
|
||||
!read_mapping.keys.map(io.decode.csr === _).reduce(_||_) ||
|
||||
io.decode.csr === CSRs.sptbr && !allow_sfence_vma ||
|
||||
io.decode.csr >= CSR.firstCtr && io.decode.csr < CSR.firstCtr + CSR.nCtr && effective_prv <= PRV.S && hpm_mask(io.decode.csr(log2Ceil(CSR.firstCtr)-1,0)) ||
|
||||
(io.decode.csr.inRange(CSR.firstCtr, CSR.firstCtr + CSR.nCtr) || io.decode.csr.inRange(CSR.firstCtrH, CSR.firstCtrH + CSR.nCtr)) && effective_prv <= PRV.S && hpm_mask(io.decode.csr(log2Ceil(CSR.firstCtr)-1,0)) ||
|
||||
Bool(usingDebug) && !reg_debug && debug_csrs.keys.map(io.decode.csr === _).reduce(_||_) ||
|
||||
Bool(usingFPU) && fp_csrs.keys.map(io.decode.csr === _).reduce(_||_) && io.decode.fp_illegal
|
||||
io.decode.write_illegal := io.decode.csr(11,10).andR
|
||||
@ -561,8 +575,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
|
||||
|
||||
for (((e, c), i) <- (reg_hpmevent zip reg_hpmcounter) zipWithIndex) {
|
||||
writeCounter(i + CSR.firstMHPC, c, wdata)
|
||||
if (nPerfEvents > 1)
|
||||
when (decoded_addr(i + CSR.firstHPE)) { e := wdata }
|
||||
when (decoded_addr(i + CSR.firstHPE)) { e := perfEventSets.maskEventSelector(wdata) }
|
||||
}
|
||||
writeCounter(CSRs.mcycle, reg_cycle, wdata)
|
||||
writeCounter(CSRs.minstret, reg_instret, wdata)
|
||||
@ -688,10 +701,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
|
||||
def writeCounter(lo: Int, ctr: WideCounter, wdata: UInt) = {
|
||||
if (xLen == 32) {
|
||||
val hi = lo + CSRs.mcycleh - CSRs.mcycle
|
||||
when (decoded_addr(lo)) { ctr := Cat(ctr(63, 32), wdata) }
|
||||
when (decoded_addr(hi)) { ctr := Cat(wdata, ctr(31, 0)) }
|
||||
when (decoded_addr(lo)) { ctr := Cat(ctr(ctr.getWidth-1, 32), wdata) }
|
||||
when (decoded_addr(hi)) { ctr := Cat(wdata(ctr.getWidth-33, 0), ctr(31, 0)) }
|
||||
} else {
|
||||
when (decoded_addr(lo)) { ctr := wdata }
|
||||
when (decoded_addr(lo)) { ctr := wdata(ctr.getWidth-1, 0) }
|
||||
}
|
||||
}
|
||||
def formEPC(x: UInt) = ~(~x | Cat(!reg_misa('c'-'a'), UInt(1)))
|
||||
|
@ -503,4 +503,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
flushing := false
|
||||
}
|
||||
}
|
||||
|
||||
// performance events
|
||||
io.cpu.acquire := tl_out.a.fire()
|
||||
io.cpu.release := tl_out.c.fire()
|
||||
}
|
||||
|
35
src/main/scala/rocket/Events.scala
Normal file
35
src/main/scala/rocket/Events.scala
Normal file
@ -0,0 +1,35 @@
|
||||
// See LICENSE.Berkeley for license details.
|
||||
// See LICENSE.SiFive for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import util._
|
||||
import Chisel._
|
||||
|
||||
class EventSet(gate: (UInt, UInt) => Bool, events: Seq[(String, () => Bool)]) {
|
||||
def size = events.size
|
||||
def hits = events.map(_._2()).asUInt
|
||||
def check(mask: UInt) = gate(mask, hits)
|
||||
}
|
||||
|
||||
class EventSets(eventSets: Seq[EventSet]) {
|
||||
def maskEventSelector(eventSel: UInt): UInt = {
|
||||
// allow full associativity between counters and event sets (for now?)
|
||||
val setMask = (BigInt(1) << log2Ceil(eventSets.size)) - 1
|
||||
val maskMask = ((BigInt(1) << eventSets.map(_.size).max) - 1) << eventSetIdBits
|
||||
eventSel & (setMask | maskMask).U
|
||||
}
|
||||
|
||||
private def decode(counter: UInt): (UInt, UInt) = {
|
||||
require(eventSets.size <= (1 << eventSetIdBits))
|
||||
(counter(log2Ceil(eventSets.size)-1, 0), counter >> eventSetIdBits)
|
||||
}
|
||||
|
||||
def evaluate(eventSel: UInt): Bool = {
|
||||
val (set, mask) = decode(eventSel)
|
||||
val sets = eventSets map (_ check mask)
|
||||
sets(set)
|
||||
}
|
||||
|
||||
private def eventSetIdBits = 8
|
||||
}
|
@ -35,6 +35,9 @@ class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val flush_icache = Bool(OUTPUT)
|
||||
val flush_tlb = Bool(OUTPUT)
|
||||
val npc = UInt(INPUT, width = vaddrBitsExtended)
|
||||
|
||||
// performance events
|
||||
val acquire = Bool(INPUT)
|
||||
}
|
||||
|
||||
class Frontend(implicit p: Parameters) extends LazyModule {
|
||||
@ -150,6 +153,9 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
io.cpu.resp.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt_if
|
||||
io.cpu.resp.bits.btb.valid := s2_btb_resp_valid
|
||||
io.cpu.resp.bits.btb.bits := s2_btb_resp_bits
|
||||
|
||||
// performance events
|
||||
io.cpu.acquire := icache.io.mem(0).a.fire()
|
||||
}
|
||||
|
||||
/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */
|
||||
|
@ -121,6 +121,10 @@ class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req
|
||||
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
|
||||
|
||||
// performance events
|
||||
val acquire = Bool(INPUT)
|
||||
val release = Bool(INPUT)
|
||||
|
||||
val resp = Valid(new HellaCacheResp).flip
|
||||
val replay_next = Bool(INPUT)
|
||||
val xcpt = (new HellaCacheExceptions).asInput
|
||||
|
@ -973,4 +973,8 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
|
||||
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
|
||||
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
|
||||
io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next
|
||||
|
||||
// performance events
|
||||
io.cpu.acquire := tl_out.a.fire()
|
||||
io.cpu.release := tl_out.c.fire()
|
||||
}
|
||||
|
@ -19,7 +19,6 @@ case class RocketCoreParams(
|
||||
useCompressed: Boolean = true,
|
||||
nBreakpoints: Int = 1,
|
||||
nPerfCounters: Int = 0,
|
||||
nPerfEvents: Int = 0,
|
||||
nCustomMRWCSRs: Int = 0,
|
||||
mtvecInit: Option[BigInt] = Some(BigInt(0)),
|
||||
mtvecWritable: Boolean = true,
|
||||
@ -44,7 +43,6 @@ trait HasRocketCoreParameters extends HasCoreParameters {
|
||||
val fastJAL = rocketParams.fastJAL
|
||||
val nBreakpoints = rocketParams.nBreakpoints
|
||||
val nPerfCounters = rocketParams.nPerfCounters
|
||||
val nPerfEvents = rocketParams.nPerfEvents
|
||||
val nCustomMrwCsrs = rocketParams.nCustomMRWCSRs
|
||||
val mtvecInit = rocketParams.mtvecInit
|
||||
val mtvecWritable = rocketParams.mtvecWritable
|
||||
@ -58,6 +56,50 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
with HasRocketCoreParameters
|
||||
with HasCoreIO {
|
||||
|
||||
// performance counters
|
||||
def pipelineIDToWB[T <: Data](x: T): T =
|
||||
RegEnable(RegEnable(RegEnable(x, !ctrl_killd), ex_pc_valid), mem_pc_valid)
|
||||
val perfEvents = new EventSets(Seq(
|
||||
new EventSet((mask, hits) => Mux(mask(0), wb_xcpt, wb_valid && pipelineIDToWB((mask & hits).orR)), Seq(
|
||||
("exception", () => false.B),
|
||||
("load", () => id_ctrl.mem && id_ctrl.mem_cmd === M_XRD && !id_ctrl.fp),
|
||||
("store", () => id_ctrl.mem && id_ctrl.mem_cmd === M_XWR && !id_ctrl.fp),
|
||||
("amo", () => Bool(usingAtomics) && id_ctrl.mem && (isAMO(id_ctrl.mem_cmd) || id_ctrl.mem_cmd.isOneOf(M_XLR, M_XSC))),
|
||||
("system", () => id_ctrl.csr =/= CSR.N),
|
||||
("arith", () => id_ctrl.wxd && !(id_ctrl.jal || id_ctrl.jalr || id_ctrl.mem || id_ctrl.fp || id_ctrl.div || id_ctrl.csr =/= CSR.N)),
|
||||
("branch", () => id_ctrl.branch),
|
||||
("jal", () => id_ctrl.jal),
|
||||
("jalr", () => id_ctrl.jalr))
|
||||
++ (if (!usingMulDiv) Seq() else Seq(
|
||||
("mul", () => id_ctrl.div && (id_ctrl.alu_fn & ALU.FN_DIV) =/= ALU.FN_DIV),
|
||||
("div", () => id_ctrl.div && (id_ctrl.alu_fn & ALU.FN_DIV) === ALU.FN_DIV)))
|
||||
++ (if (!usingFPU) Seq() else Seq(
|
||||
("fp load", () => id_ctrl.fp && io.fpu.dec.ldst && io.fpu.dec.wen),
|
||||
("fp store", () => id_ctrl.fp && io.fpu.dec.ldst && !io.fpu.dec.wen),
|
||||
("fp add", () => id_ctrl.fp && io.fpu.dec.fma && io.fpu.dec.swap23),
|
||||
("fp mul", () => id_ctrl.fp && io.fpu.dec.fma && !io.fpu.dec.swap23 && !io.fpu.dec.ren3),
|
||||
("fp mul-add", () => id_ctrl.fp && io.fpu.dec.fma && io.fpu.dec.ren3),
|
||||
("fp div/sqrt", () => id_ctrl.fp && (io.fpu.dec.div || io.fpu.dec.sqrt)),
|
||||
("fp other", () => id_ctrl.fp && !(io.fpu.dec.ldst || io.fpu.dec.fma || io.fpu.dec.div || io.fpu.dec.sqrt))))),
|
||||
new EventSet((mask, hits) => (mask & hits).orR, Seq(
|
||||
("load-use interlock", () => id_ex_hazard && ex_ctrl.mem || id_mem_hazard && mem_ctrl.mem || id_wb_hazard && wb_ctrl.mem),
|
||||
("long-latency interlock", () => id_sboard_hazard),
|
||||
("csr interlock", () => id_ex_hazard && ex_ctrl.csr =/= CSR.N || id_mem_hazard && mem_ctrl.csr =/= CSR.N || id_wb_hazard && wb_ctrl.csr =/= CSR.N),
|
||||
("I$ blocked", () => !(ibuf.io.inst(0).valid || Reg(next = take_pc))),
|
||||
("D$ blocked", () => id_ctrl.mem && dcache_blocked),
|
||||
("branch misprediction", () => take_pc_mem && mem_direction_misprediction),
|
||||
("control-flow target misprediction", () => take_pc_mem && mem_misprediction && !mem_direction_misprediction),
|
||||
("flush", () => take_pc_mem && mem_reg_flush_pipe),
|
||||
("replay", () => replay_wb))
|
||||
++ (if (!usingMulDiv) Seq() else Seq(
|
||||
("mul/div interlock", () => id_ex_hazard && ex_ctrl.div || id_mem_hazard && mem_ctrl.div || id_wb_hazard && wb_ctrl.div)))
|
||||
++ (if (!usingFPU) Seq() else Seq(
|
||||
("fp interlock", () => id_ex_hazard && ex_ctrl.fp || id_mem_hazard && mem_ctrl.fp || id_wb_hazard && wb_ctrl.fp || id_ctrl.fp && id_stall_fpu)))),
|
||||
new EventSet((mask, hits) => (mask & hits).orR, Seq(
|
||||
("I$ miss", () => io.imem.acquire),
|
||||
("D$ miss", () => io.dmem.acquire),
|
||||
("D$ release", () => io.dmem.release)))))
|
||||
|
||||
val decode_table = {
|
||||
(if (usingMulDiv) new MDecode +: (xLen > 32).option(new M64Decode).toSeq else Nil) ++:
|
||||
(if (usingAtomics) new ADecode +: (xLen > 32).option(new A64Decode).toSeq else Nil) ++:
|
||||
@ -142,7 +184,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
val id_npc = (ibuf.io.pc.asSInt + ImmGen(IMM_UJ, id_inst(0))).asUInt
|
||||
take_pc_id := Bool(fastJAL) && !ctrl_killd && id_ctrl.jal
|
||||
|
||||
val csr = Module(new CSRFile)
|
||||
val csr = Module(new CSRFile(perfEvents))
|
||||
val id_csr_en = id_ctrl.csr.isOneOf(CSR.S, CSR.C, CSR.W)
|
||||
val id_system_insn = id_ctrl.csr >= CSR.I
|
||||
val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0)
|
||||
@ -292,6 +334,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
(ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause)))
|
||||
|
||||
// memory stage
|
||||
val mem_pc_valid = mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt
|
||||
val mem_br_taken = mem_reg_wdata(0)
|
||||
val mem_br_target = mem_reg_pc.asSInt +
|
||||
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
|
||||
@ -303,6 +346,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
|
||||
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
|
||||
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || (Bool(!fastJAL) && mem_ctrl.jal)
|
||||
val mem_direction_misprediction = mem_reg_btb_hit && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken
|
||||
val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken
|
||||
take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
|
||||
|
||||
@ -357,7 +401,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
wb_reg_replay := replay_mem && !take_pc_wb
|
||||
wb_reg_xcpt := mem_xcpt && !take_pc_wb
|
||||
when (mem_xcpt) { wb_reg_cause := mem_cause }
|
||||
when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) {
|
||||
when (mem_pc_valid) {
|
||||
wb_ctrl := mem_ctrl
|
||||
wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
|
||||
when (mem_ctrl.rocc) {
|
||||
@ -556,6 +600,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
io.rocc.cmd.bits.rs1 := wb_reg_wdata
|
||||
io.rocc.cmd.bits.rs2 := wb_reg_rs2
|
||||
|
||||
// evaluate performance counters
|
||||
csr.io.counters foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) }
|
||||
|
||||
if (enableCommitLog) {
|
||||
val pc = Wire(SInt(width=xLen))
|
||||
pc := wb_reg_pc
|
||||
|
@ -50,7 +50,7 @@ case class WideCounter(width: Int, inc: UInt = UInt(1), reset: Boolean = true)
|
||||
|
||||
private val large = if (isWide) {
|
||||
val r = if (reset) Reg(init=UInt(0, width - smallWidth)) else Reg(UInt(width = width - smallWidth))
|
||||
when (nextSmall(smallWidth)) { r := r +& UInt(1) }
|
||||
when (nextSmall(smallWidth)) { r := r + UInt(1) }
|
||||
r
|
||||
} else null
|
||||
|
||||
|
@ -43,6 +43,8 @@ package object util {
|
||||
if (hi == lo-1) UInt(0)
|
||||
else x(hi, lo)
|
||||
}
|
||||
|
||||
def inRange(base: UInt, bounds: UInt) = x >= base && x < bounds
|
||||
}
|
||||
|
||||
implicit class BooleanToAugmentedBoolean(val x: Boolean) extends AnyVal {
|
||||
|
Loading…
Reference in New Issue
Block a user