1
0
rocket-chip/src/main/scala/rocket/RocketCore.scala
2017-08-13 15:47:14 -07:00

789 lines
34 KiB
Scala

// See LICENSE.Berkeley for license details.
// See LICENSE.SiFive for license details.
package freechips.rocketchip.rocket
import Chisel._
import Chisel.ImplicitConversions._
import chisel3.core.withReset
import freechips.rocketchip.config.Parameters
import freechips.rocketchip.tile._
import freechips.rocketchip.util._
import scala.collection.immutable.ListMap
import scala.collection.mutable.ArrayBuffer
case class RocketCoreParams(
bootFreqHz: BigInt = 0,
useVM: Boolean = true,
useUser: Boolean = false,
useDebug: Boolean = true,
useAtomics: Boolean = true,
useCompressed: Boolean = true,
nLocalInterrupts: Int = 0,
nBreakpoints: Int = 1,
nPMPs: Int = 8,
nPerfCounters: Int = 0,
nCustomMRWCSRs: Int = 0,
nL2TLBEntries: Int = 0,
mtvecInit: Option[BigInt] = Some(BigInt(0)),
mtvecWritable: Boolean = true,
fastLoadWord: Boolean = true,
fastLoadByte: Boolean = false,
jumpInFrontend: Boolean = true,
mulDiv: Option[MulDivParams] = Some(MulDivParams()),
fpu: Option[FPUParams] = Some(FPUParams())
) extends CoreParams {
val fetchWidth: Int = if (useCompressed) 2 else 1
// fetchWidth doubled, but coreInstBytes halved, for RVC:
val decodeWidth: Int = fetchWidth / (if (useCompressed) 2 else 1)
val retireWidth: Int = 1
val instBits: Int = if (useCompressed) 16 else 32
}
trait HasRocketCoreParameters extends HasCoreParameters {
val rocketParams: RocketCoreParams = tileParams.core.asInstanceOf[RocketCoreParams]
val fastLoadWord = rocketParams.fastLoadWord
val fastLoadByte = rocketParams.fastLoadByte
val nBreakpoints = rocketParams.nBreakpoints
val nPMPs = rocketParams.nPMPs
val nPerfCounters = rocketParams.nPerfCounters
val nCustomMrwCsrs = rocketParams.nCustomMRWCSRs
val mtvecInit = rocketParams.mtvecInit
val mtvecWritable = rocketParams.mtvecWritable
val mulDivParams = rocketParams.mulDiv.getOrElse(MulDivParams()) // TODO ask andrew about this
require(!fastLoadByte || fastLoadWord)
}
class Rocket(implicit p: Parameters) extends CoreModule()(p)
with HasRocketCoreParameters
with HasCoreIO {
// performance counters
def pipelineIDToWB[T <: Data](x: T): T =
RegEnable(RegEnable(RegEnable(x, !ctrl_killd), ex_pc_valid), mem_pc_valid)
val perfEvents = new EventSets(Seq(
new EventSet((mask, hits) => Mux(mask(0), wb_xcpt, wb_valid && pipelineIDToWB((mask & hits).orR)), Seq(
("exception", () => false.B),
("load", () => id_ctrl.mem && id_ctrl.mem_cmd === M_XRD && !id_ctrl.fp),
("store", () => id_ctrl.mem && id_ctrl.mem_cmd === M_XWR && !id_ctrl.fp),
("amo", () => Bool(usingAtomics) && id_ctrl.mem && (isAMO(id_ctrl.mem_cmd) || id_ctrl.mem_cmd.isOneOf(M_XLR, M_XSC))),
("system", () => id_ctrl.csr =/= CSR.N),
("arith", () => id_ctrl.wxd && !(id_ctrl.jal || id_ctrl.jalr || id_ctrl.mem || id_ctrl.fp || id_ctrl.div || id_ctrl.csr =/= CSR.N)),
("branch", () => id_ctrl.branch),
("jal", () => id_ctrl.jal),
("jalr", () => id_ctrl.jalr))
++ (if (!usingMulDiv) Seq() else Seq(
("mul", () => id_ctrl.div && (id_ctrl.alu_fn & ALU.FN_DIV) =/= ALU.FN_DIV),
("div", () => id_ctrl.div && (id_ctrl.alu_fn & ALU.FN_DIV) === ALU.FN_DIV)))
++ (if (!usingFPU) Seq() else Seq(
("fp load", () => id_ctrl.fp && io.fpu.dec.ldst && io.fpu.dec.wen),
("fp store", () => id_ctrl.fp && io.fpu.dec.ldst && !io.fpu.dec.wen),
("fp add", () => id_ctrl.fp && io.fpu.dec.fma && io.fpu.dec.swap23),
("fp mul", () => id_ctrl.fp && io.fpu.dec.fma && !io.fpu.dec.swap23 && !io.fpu.dec.ren3),
("fp mul-add", () => id_ctrl.fp && io.fpu.dec.fma && io.fpu.dec.ren3),
("fp div/sqrt", () => id_ctrl.fp && (io.fpu.dec.div || io.fpu.dec.sqrt)),
("fp other", () => id_ctrl.fp && !(io.fpu.dec.ldst || io.fpu.dec.fma || io.fpu.dec.div || io.fpu.dec.sqrt))))),
new EventSet((mask, hits) => (mask & hits).orR, Seq(
("load-use interlock", () => id_ex_hazard && ex_ctrl.mem || id_mem_hazard && mem_ctrl.mem || id_wb_hazard && wb_ctrl.mem),
("long-latency interlock", () => id_sboard_hazard),
("csr interlock", () => id_ex_hazard && ex_ctrl.csr =/= CSR.N || id_mem_hazard && mem_ctrl.csr =/= CSR.N || id_wb_hazard && wb_ctrl.csr =/= CSR.N),
("I$ blocked", () => icache_blocked),
("D$ blocked", () => id_ctrl.mem && dcache_blocked),
("branch misprediction", () => take_pc_mem && mem_direction_misprediction),
("control-flow target misprediction", () => take_pc_mem && mem_misprediction && mem_cfi && !mem_direction_misprediction && !icache_blocked),
("flush", () => wb_reg_flush_pipe),
("replay", () => replay_wb))
++ (if (!usingMulDiv) Seq() else Seq(
("mul/div interlock", () => id_ex_hazard && ex_ctrl.div || id_mem_hazard && mem_ctrl.div || id_wb_hazard && wb_ctrl.div)))
++ (if (!usingFPU) Seq() else Seq(
("fp interlock", () => id_ex_hazard && ex_ctrl.fp || id_mem_hazard && mem_ctrl.fp || id_wb_hazard && wb_ctrl.fp || id_ctrl.fp && id_stall_fpu)))),
new EventSet((mask, hits) => (mask & hits).orR, Seq(
("I$ miss", () => io.imem.perf.acquire),
("D$ miss", () => io.dmem.perf.acquire),
("D$ release", () => io.dmem.perf.release),
("ITLB miss", () => io.imem.perf.tlbMiss),
("DTLB miss", () => io.dmem.perf.tlbMiss),
("L2 TLB miss", () => io.ptw.perf.l2miss)))))
val decode_table = {
(if (usingMulDiv) new MDecode +: (xLen > 32).option(new M64Decode).toSeq else Nil) ++:
(if (usingAtomics) new ADecode +: (xLen > 32).option(new A64Decode).toSeq else Nil) ++:
(if (usingFPU) new FDecode +: (xLen > 32).option(new F64Decode).toSeq else Nil) ++:
(if (usingFPU && xLen > 32) Seq(new DDecode, new D64Decode) else Nil) ++:
(usingRoCC.option(new RoCCDecode)) ++:
((xLen > 32).option(new I64Decode)) ++:
(usingVM.option(new SDecode)) ++:
(usingDebug.option(new DebugDecode)) ++:
Seq(new IDecode)
} flatMap(_.table)
val ex_ctrl = Reg(new IntCtrlSigs)
val mem_ctrl = Reg(new IntCtrlSigs)
val wb_ctrl = Reg(new IntCtrlSigs)
val ex_reg_xcpt_interrupt = Reg(Bool())
val ex_reg_valid = Reg(Bool())
val ex_reg_rvc = Reg(Bool())
val ex_reg_btb_hit = Reg(Bool())
val ex_reg_btb_resp = Reg(new BTBResp)
val ex_reg_xcpt = Reg(Bool())
val ex_reg_flush_pipe = Reg(Bool())
val ex_reg_load_use = Reg(Bool())
val ex_reg_cause = Reg(UInt())
val ex_reg_replay = Reg(Bool())
val ex_reg_pc = Reg(UInt())
val ex_reg_inst = Reg(Bits())
val ex_reg_cinst = Reg(Bits())
val mem_reg_xcpt_interrupt = Reg(Bool())
val mem_reg_valid = Reg(Bool())
val mem_reg_rvc = Reg(Bool())
val mem_reg_btb_hit = Reg(Bool())
val mem_reg_btb_resp = Reg(new BTBResp)
val mem_reg_xcpt = Reg(Bool())
val mem_reg_replay = Reg(Bool())
val mem_reg_flush_pipe = Reg(Bool())
val mem_reg_cause = Reg(UInt())
val mem_reg_slow_bypass = Reg(Bool())
val mem_reg_load = Reg(Bool())
val mem_reg_store = Reg(Bool())
val mem_reg_sfence = Reg(Bool())
val mem_reg_pc = Reg(UInt())
val mem_reg_inst = Reg(Bits())
val mem_reg_cinst = Reg(Bits())
val mem_reg_wdata = Reg(Bits())
val mem_reg_rs2 = Reg(Bits())
val take_pc_mem = Wire(Bool())
val wb_reg_valid = Reg(Bool())
val wb_reg_rvc = Reg(Bool())
val wb_reg_xcpt = Reg(Bool())
val wb_reg_replay = Reg(Bool())
val wb_reg_flush_pipe = Reg(Bool())
val wb_reg_cause = Reg(UInt())
val wb_reg_sfence = Reg(Bool())
val wb_reg_pc = Reg(UInt())
val wb_reg_inst = Reg(Bits())
val wb_reg_cinst = Reg(Bits())
val wb_reg_wdata = Reg(Bits())
val wb_reg_rs2 = Reg(Bits())
val take_pc_wb = Wire(Bool())
val take_pc_mem_wb = take_pc_wb || take_pc_mem
val take_pc = take_pc_mem_wb
// decode stage
val ibuf = Module(new IBuf)
val id_expanded_inst = ibuf.io.inst.map(_.bits.inst)
val id_nonexpanded_inst = ibuf.io.inst.map(_.bits.cinst)
val id_inst = id_expanded_inst.map(_.bits)
ibuf.io.imem <> io.imem.resp
ibuf.io.kill := take_pc
require(decodeWidth == 1 /* TODO */ && retireWidth == decodeWidth)
val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst(0), decode_table)
val id_raddr3 = id_expanded_inst(0).rs3
val id_raddr2 = id_expanded_inst(0).rs2
val id_raddr1 = id_expanded_inst(0).rs1
val id_waddr = id_expanded_inst(0).rd
val id_load_use = Wire(Bool())
val id_reg_fence = Reg(init=Bool(false))
val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2)
val id_raddr = IndexedSeq(id_raddr1, id_raddr2)
val rf = new RegFile(31, xLen)
val id_rs = id_raddr.map(rf.read _)
val ctrl_killd = Wire(Bool())
val id_npc = (ibuf.io.pc.asSInt + ImmGen(IMM_UJ, id_inst(0))).asUInt
val csr = Module(new CSRFile(perfEvents))
val id_csr_en = id_ctrl.csr.isOneOf(CSR.S, CSR.C, CSR.W)
val id_system_insn = id_ctrl.csr >= CSR.I
val id_csr_ren = id_ctrl.csr.isOneOf(CSR.S, CSR.C) && id_raddr1 === UInt(0)
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
val id_sfence = id_ctrl.mem && id_ctrl.mem_cmd === M_SFENCE
val id_csr_flush = id_sfence || id_system_insn || (id_csr_en && !id_csr_ren && csr.io.decode.write_flush)
val id_illegal_insn = !id_ctrl.legal ||
id_ctrl.div && !csr.io.status.isa('m'-'a') ||
id_ctrl.amo && !csr.io.status.isa('a'-'a') ||
id_ctrl.fp && (csr.io.decode.fp_illegal || io.fpu.illegal_rm) ||
id_ctrl.dp && !csr.io.status.isa('d'-'a') ||
ibuf.io.inst(0).bits.rvc && !csr.io.status.isa('c'-'a') ||
id_ctrl.rocc && csr.io.decode.rocc_illegal ||
id_csr_en && (csr.io.decode.read_illegal || !id_csr_ren && csr.io.decode.write_illegal) ||
!ibuf.io.inst(0).bits.rvc && ((id_sfence || id_system_insn) && csr.io.decode.system_illegal)
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
val id_amo_aq = id_inst(0)(26)
val id_amo_rl = id_inst(0)(25)
val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
when (!id_mem_busy) { id_reg_fence := false }
val id_rocc_busy = Bool(usingRoCC) &&
(io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||
mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)
val id_do_fence = Wire(init = id_rocc_busy && id_ctrl.fence ||
id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc)))
val bpu = Module(new BreakpointUnit(nBreakpoints))
bpu.io.status := csr.io.status
bpu.io.bp := csr.io.bp
bpu.io.pc := ibuf.io.pc
bpu.io.ea := mem_reg_wdata
val id_xcpt0 = ibuf.io.inst(0).bits.xcpt0
val id_xcpt1 = ibuf.io.inst(0).bits.xcpt1
val (id_xcpt, id_cause) = checkExceptions(List(
(csr.io.interrupt, csr.io.interrupt_cause),
(bpu.io.debug_if, UInt(CSR.debugTriggerCause)),
(bpu.io.xcpt_if, UInt(Causes.breakpoint)),
(id_xcpt0.pf.inst, UInt(Causes.fetch_page_fault)),
(id_xcpt0.ae.inst, UInt(Causes.fetch_access)),
(id_xcpt1.pf.inst, UInt(Causes.fetch_page_fault)),
(id_xcpt1.ae.inst, UInt(Causes.fetch_access)),
(id_illegal_insn, UInt(Causes.illegal_instruction))))
val dcache_bypass_data =
if (fastLoadByte) io.dmem.resp.bits.data
else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass
else wb_reg_wdata
// detect bypass opportunities
val ex_waddr = ex_reg_inst(11,7)
val mem_waddr = mem_reg_inst(11,7)
val wb_waddr = wb_reg_inst(11,7)
val bypass_sources = IndexedSeq(
(Bool(true), UInt(0), UInt(0)), // treat reading x0 as a bypass
(ex_reg_valid && ex_ctrl.wxd, ex_waddr, mem_reg_wdata),
(mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, mem_waddr, wb_reg_wdata),
(mem_reg_valid && mem_ctrl.wxd, mem_waddr, dcache_bypass_data))
val id_bypass_src = id_raddr.map(raddr => bypass_sources.map(s => s._1 && s._2 === raddr))
// execute stage
val bypass_mux = bypass_sources.map(_._3)
val ex_reg_rs_bypass = Reg(Vec(id_raddr.size, Bool()))
val ex_reg_rs_lsb = Reg(Vec(id_raddr.size, UInt(width = log2Ceil(bypass_sources.size))))
val ex_reg_rs_msb = Reg(Vec(id_raddr.size, UInt()))
val ex_rs = for (i <- 0 until id_raddr.size)
yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
val ex_imm = ImmGen(ex_ctrl.sel_imm, ex_reg_inst)
val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq(
A1_RS1 -> ex_rs(0).asSInt,
A1_PC -> ex_reg_pc.asSInt))
val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq(
A2_RS2 -> ex_rs(1).asSInt,
A2_IMM -> ex_imm,
A2_SIZE -> Mux(ex_reg_rvc, SInt(2), SInt(4))))
val alu = Module(new ALU)
alu.io.dw := ex_ctrl.alu_dw
alu.io.fn := ex_ctrl.alu_fn
alu.io.in2 := ex_op2.asUInt
alu.io.in1 := ex_op1.asUInt
// multiplier and divider
val div = Module(new MulDiv(mulDivParams, width = xLen))
div.io.req.valid := ex_reg_valid && ex_ctrl.div
div.io.req.bits.dw := ex_ctrl.alu_dw
div.io.req.bits.fn := ex_ctrl.alu_fn
div.io.req.bits.in1 := ex_rs(0)
div.io.req.bits.in2 := ex_rs(1)
div.io.req.bits.tag := ex_waddr
ex_reg_valid := !ctrl_killd
ex_reg_replay := !take_pc && ibuf.io.inst(0).valid && ibuf.io.inst(0).bits.replay
ex_reg_xcpt := !ctrl_killd && id_xcpt
ex_reg_xcpt_interrupt := !take_pc && ibuf.io.inst(0).valid && csr.io.interrupt
ex_reg_btb_hit := ibuf.io.inst(0).bits.btb_hit
when (!ctrl_killd) {
ex_ctrl := id_ctrl
ex_reg_rvc := ibuf.io.inst(0).bits.rvc
ex_ctrl.csr := id_csr
when (id_fence_next) { id_reg_fence := true }
when (id_xcpt) { // pass PC down ALU writeback pipeline for badaddr
ex_ctrl.alu_fn := ALU.FN_ADD
ex_ctrl.alu_dw := DW_XPR
ex_ctrl.sel_alu1 := A1_RS1 // badaddr := instruction
ex_ctrl.sel_alu2 := A2_ZERO
when (id_xcpt1.asUInt.orR) { // badaddr := PC+2
ex_ctrl.sel_alu1 := A1_PC
ex_ctrl.sel_alu2 := A2_SIZE
ex_reg_rvc := true
}
when (bpu.io.xcpt_if || id_xcpt0.asUInt.orR) { // badaddr := PC
ex_ctrl.sel_alu1 := A1_PC
ex_ctrl.sel_alu2 := A2_ZERO
}
}
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush
ex_reg_load_use := id_load_use
when (id_sfence) {
ex_ctrl.mem_type := Cat(id_raddr2 =/= UInt(0), id_raddr1 =/= UInt(0))
}
for (i <- 0 until id_raddr.size) {
val do_bypass = id_bypass_src(i).reduce(_||_)
val bypass_src = PriorityEncoder(id_bypass_src(i))
ex_reg_rs_bypass(i) := do_bypass
ex_reg_rs_lsb(i) := bypass_src
when (id_ren(i) && !do_bypass) {
ex_reg_rs_lsb(i) := id_rs(i)(log2Ceil(bypass_sources.size)-1, 0)
ex_reg_rs_msb(i) := id_rs(i) >> log2Ceil(bypass_sources.size)
}
}
when (id_illegal_insn) {
val inst = Mux(ibuf.io.inst(0).bits.rvc, ibuf.io.inst(0).bits.raw(15, 0), ibuf.io.inst(0).bits.raw)
ex_reg_rs_bypass(0) := false
ex_reg_rs_lsb(0) := inst(log2Ceil(bypass_sources.size)-1, 0)
ex_reg_rs_msb(0) := inst >> log2Ceil(bypass_sources.size)
}
}
when (!ctrl_killd || csr.io.interrupt || ibuf.io.inst(0).bits.replay) {
ex_reg_cause := id_cause
ex_reg_inst := id_inst(0)
ex_reg_cinst := id_nonexpanded_inst(0)
ex_reg_pc := ibuf.io.pc
ex_reg_btb_resp := ibuf.io.btb_resp
}
// replay inst in ex stage?
val ex_pc_valid = ex_reg_valid || ex_reg_replay || ex_reg_xcpt_interrupt
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
ex_ctrl.div && !div.io.req.ready
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
val replay_ex = ex_reg_replay || (ex_reg_valid && (replay_ex_structural || replay_ex_load_use))
val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid
// detect 2-cycle load-use delay for LB/LH/SC
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
val ex_sfence = Bool(usingVM) && ex_ctrl.mem && ex_ctrl.mem_cmd === M_SFENCE
val (ex_xcpt, ex_cause) = checkExceptions(List(
(ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause)))
// memory stage
val mem_pc_valid = mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt
val mem_br_taken = mem_reg_wdata(0)
val mem_br_target = mem_reg_pc.asSInt +
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst),
Mux(mem_reg_rvc, SInt(2), SInt(4))))
val mem_npc = (Mux(mem_ctrl.jalr || mem_reg_sfence, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt
val mem_wrong_npc =
Mux(ex_pc_valid, mem_npc =/= ex_reg_pc,
Mux(ibuf.io.inst(0).valid || ibuf.io.imem.valid, mem_npc =/= ibuf.io.pc, Bool(true)))
val mem_npc_misaligned = !csr.io.status.isa('c'-'a') && mem_npc(1) && !mem_reg_sfence
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal
val mem_direction_misprediction = (Bool(coreParams.jumpInFrontend) || mem_reg_btb_hit) && mem_ctrl.branch && mem_br_taken =/= mem_reg_btb_resp.taken
val mem_misprediction = if (usingBTB) mem_wrong_npc else mem_cfi_taken
take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_sfence)
mem_reg_valid := !ctrl_killx
mem_reg_replay := !take_pc_mem_wb && replay_ex
mem_reg_xcpt := !ctrl_killx && ex_xcpt
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
// on pipeline flushes, cause mem_npc to hold the sequential npc, which
// will drive the W-stage npc mux
when (mem_reg_valid && mem_reg_flush_pipe) {
mem_reg_sfence := false
}.elsewhen (ex_pc_valid) {
mem_ctrl := ex_ctrl
mem_reg_rvc := ex_reg_rvc
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
mem_reg_sfence := ex_sfence
mem_reg_btb_hit := ex_reg_btb_hit
mem_reg_btb_resp := ex_reg_btb_resp
mem_reg_flush_pipe := ex_reg_flush_pipe
mem_reg_slow_bypass := ex_slow_bypass
mem_reg_cause := ex_cause
mem_reg_inst := ex_reg_inst
mem_reg_cinst := ex_reg_cinst
mem_reg_pc := ex_reg_pc
mem_reg_wdata := alu.io.out
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc || ex_sfence)) {
val typ = Mux(ex_ctrl.rocc, log2Ceil(xLen/8).U, ex_ctrl.mem_type)
mem_reg_rs2 := new StoreGen(typ, 0.U, ex_rs(1), coreDataBytes).data
}
when (ex_ctrl.jalr && csr.io.status.debug) {
// flush I$ on D-mode JALR to effect uncached fetch without D$ flush
mem_ctrl.fence_i := true
mem_reg_flush_pipe := true
}
}
val mem_breakpoint = (mem_reg_load && bpu.io.xcpt_ld) || (mem_reg_store && bpu.io.xcpt_st)
val mem_debug_breakpoint = (mem_reg_load && bpu.io.debug_ld) || (mem_reg_store && bpu.io.debug_st)
val (mem_new_xcpt, mem_new_cause) = checkExceptions(List(
(mem_debug_breakpoint, UInt(CSR.debugTriggerCause)),
(mem_breakpoint, UInt(Causes.breakpoint)),
(mem_npc_misaligned, UInt(Causes.misaligned_fetch))))
val (mem_xcpt, mem_cause) = checkExceptions(List(
(mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
(mem_reg_valid && mem_new_xcpt, mem_new_cause)))
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
div.io.kill := killm_common && Reg(next = div.io.req.fire())
val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
// writeback stage
wb_reg_valid := !ctrl_killm
wb_reg_replay := replay_mem && !take_pc_wb
wb_reg_xcpt := mem_xcpt && !take_pc_wb
wb_reg_flush_pipe := !ctrl_killm && mem_reg_flush_pipe
when (mem_pc_valid) {
wb_ctrl := mem_ctrl
wb_reg_rvc := mem_reg_rvc
wb_reg_sfence := mem_reg_sfence
wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (mem_ctrl.rocc || mem_reg_sfence) {
wb_reg_rs2 := mem_reg_rs2
}
wb_reg_cause := mem_cause
wb_reg_inst := mem_reg_inst
wb_reg_cinst := mem_reg_cinst
wb_reg_pc := mem_reg_pc
}
val (wb_xcpt, wb_cause) = checkExceptions(List(
(wb_reg_xcpt, wb_reg_cause),
(wb_reg_valid && wb_ctrl.mem && io.dmem.s2_xcpt.ma.st, UInt(Causes.misaligned_store)),
(wb_reg_valid && wb_ctrl.mem && io.dmem.s2_xcpt.ma.ld, UInt(Causes.misaligned_load)),
(wb_reg_valid && wb_ctrl.mem && io.dmem.s2_xcpt.pf.st, UInt(Causes.store_page_fault)),
(wb_reg_valid && wb_ctrl.mem && io.dmem.s2_xcpt.pf.ld, UInt(Causes.load_page_fault)),
(wb_reg_valid && wb_ctrl.mem && io.dmem.s2_xcpt.ae.st, UInt(Causes.store_access)),
(wb_reg_valid && wb_ctrl.mem && io.dmem.s2_xcpt.ae.ld, UInt(Causes.load_access))
))
val wb_wxd = wb_reg_valid && wb_ctrl.wxd
val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
val replay_wb_common = io.dmem.s2_nack || wb_reg_replay
val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
val replay_wb = replay_wb_common || replay_wb_rocc
take_pc_wb := replay_wb || wb_xcpt || csr.io.eret || wb_reg_flush_pipe
// writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp.bits.tag(5, 1)
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay
div.io.resp.ready := !wb_wxd
val ll_wdata = Wire(init = div.io.resp.bits.data)
val ll_waddr = Wire(init = div.io.resp.bits.tag)
val ll_wen = Wire(init = div.io.resp.fire())
if (usingRoCC) {
io.rocc.resp.ready := !wb_wxd
when (io.rocc.resp.fire()) {
div.io.resp.ready := Bool(false)
ll_wdata := io.rocc.resp.bits.data
ll_waddr := io.rocc.resp.bits.rd
ll_wen := Bool(true)
}
}
when (dmem_resp_replay && dmem_resp_xpu) {
div.io.resp.ready := Bool(false)
if (usingRoCC)
io.rocc.resp.ready := Bool(false)
ll_waddr := dmem_resp_waddr
ll_wen := Bool(true)
}
val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt
val wb_wen = wb_valid && wb_ctrl.wxd
val rf_wen = wb_wen || ll_wen
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data,
Mux(ll_wen, ll_wdata,
Mux(wb_ctrl.csr =/= CSR.N, csr.io.rw.rdata,
wb_reg_wdata)))
when (rf_wen) { rf.write(rf_waddr, rf_wdata) }
// hook up control/status regfile
csr.io.decode.csr := ibuf.io.inst(0).bits.raw(31,20)
csr.io.exception := wb_xcpt
csr.io.cause := wb_cause
csr.io.retire := wb_valid
csr.io.interrupts := io.interrupts
csr.io.hartid := io.hartid
io.fpu.fcsr_rm := csr.io.fcsr_rm
csr.io.fcsr_flags := io.fpu.fcsr_flags
csr.io.rocc_interrupt := io.rocc.interrupt
csr.io.pc := wb_reg_pc
csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata)
io.ptw.ptbr := csr.io.ptbr
io.ptw.status := csr.io.status
io.ptw.pmp := csr.io.pmp
csr.io.rw.addr := wb_reg_inst(31,20)
csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
csr.io.rw.wdata := wb_reg_wdata
val hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 =/= UInt(0), id_raddr1),
(id_ctrl.rxs2 && id_raddr2 =/= UInt(0), id_raddr2),
(id_ctrl.wxd && id_waddr =/= UInt(0), id_waddr))
val fp_hazard_targets = Seq((io.fpu.dec.ren1, id_raddr1),
(io.fpu.dec.ren2, id_raddr2),
(io.fpu.dec.ren3, id_raddr3),
(io.fpu.dec.wen, id_waddr))
val sboard = new Scoreboard(32, true)
sboard.clear(ll_wen, ll_waddr)
def id_sboard_clear_bypass(r: UInt) = {
// ll_waddr arrives late when D$ has ECC, so reshuffle the hazard check
if (tileParams.dcache.get.dataECC.isInstanceOf[IdentityCode]) ll_wen && ll_waddr === r
else div.io.resp.fire() && div.io.resp.bits.tag === r || dmem_resp_replay && dmem_resp_xpu && dmem_resp_waddr === r
}
val id_sboard_hazard = checkHazards(hazard_targets, rd => sboard.read(rd) && !id_sboard_clear_bypass(rd))
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
val ex_cannot_bypass = ex_ctrl.csr =/= CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc
val data_hazard_ex = ex_ctrl.wxd && checkHazards(hazard_targets, _ === ex_waddr)
val fp_data_hazard_ex = ex_ctrl.wfd && checkHazards(fp_hazard_targets, _ === ex_waddr)
val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex)
// stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.
val mem_mem_cmd_bh =
if (fastLoadWord) Bool(!fastLoadByte) && mem_reg_slow_bypass
else Bool(true)
val mem_cannot_bypass = mem_ctrl.csr =/= CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc
val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr)
val fp_data_hazard_mem = mem_ctrl.wfd && checkHazards(fp_hazard_targets, _ === mem_waddr)
val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem)
id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem
// stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback.
val data_hazard_wb = wb_ctrl.wxd && checkHazards(hazard_targets, _ === wb_waddr)
val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr)
val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)
val id_stall_fpu = if (usingFPU) {
val fp_sboard = new Scoreboard(32)
fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr)
fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr)
fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra)
id_csr_en && !io.fpu.fcsr_rdy || checkHazards(fp_hazard_targets, fp_sboard.read _)
} else Bool(false)
val dcache_blocked = Reg(Bool())
dcache_blocked := !io.dmem.req.ready && (io.dmem.req.valid || dcache_blocked)
val rocc_blocked = Reg(Bool())
rocc_blocked := !wb_xcpt && !io.rocc.cmd.ready && (io.rocc.cmd.valid || rocc_blocked)
val ctrl_stalld =
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||
csr.io.singleStep && (ex_reg_valid || mem_reg_valid || wb_reg_valid) ||
id_ctrl.fp && id_stall_fpu ||
id_ctrl.mem && dcache_blocked || // reduce activity during D$ misses
id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy
id_ctrl.div && (!(div.io.req.ready || (div.io.resp.valid && !wb_wxd)) || div.io.req.valid) || // reduce odds of replay
id_do_fence ||
csr.io.csr_stall
ctrl_killd := !ibuf.io.inst(0).valid || ibuf.io.inst(0).bits.replay || take_pc_mem_wb || ctrl_stalld || csr.io.interrupt
io.imem.req.valid := take_pc
io.imem.req.bits.speculative := !take_pc_wb
io.imem.req.bits.pc :=
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
Mux(replay_wb, wb_reg_pc, // replay
mem_npc)) // flush or branch misprediction
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
io.imem.sfence.valid := wb_reg_valid && wb_reg_sfence
io.imem.sfence.bits.rs1 := wb_ctrl.mem_type(0)
io.imem.sfence.bits.rs2 := wb_ctrl.mem_type(1)
io.imem.sfence.bits.addr := wb_reg_wdata
io.imem.sfence.bits.asid := wb_reg_rs2
io.ptw.sfence := io.imem.sfence
ibuf.io.inst(0).ready := !ctrl_stalld
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_wrong_npc && (!mem_cfi || mem_cfi_taken))
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
io.imem.btb_update.bits.cfiType :=
Mux((mem_ctrl.jal || mem_ctrl.jalr) && mem_waddr(0), CFIType.call,
Mux(mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00?01"), CFIType.ret,
Mux(mem_ctrl.jal || mem_ctrl.jalr, CFIType.jump,
CFIType.branch)))
io.imem.btb_update.bits.target := io.imem.req.bits.pc
io.imem.btb_update.bits.br_pc := (if (usingCompressed) mem_reg_pc + Mux(mem_reg_rvc, UInt(0), UInt(2)) else mem_reg_pc)
io.imem.btb_update.bits.pc := ~(~io.imem.btb_update.bits.br_pc | (coreInstBytes*fetchWidth-1))
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.bht_update.valid := mem_reg_valid && !take_pc_wb && mem_ctrl.branch
io.imem.bht_update.bits.pc := io.imem.btb_update.bits.pc
io.imem.bht_update.bits.taken := mem_br_taken
io.imem.bht_update.bits.mispredict := mem_wrong_npc
io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction
io.fpu.valid := !ctrl_killd && id_ctrl.fp
io.fpu.killx := ctrl_killx
io.fpu.killm := killm_common
io.fpu.inst := id_inst(0)
io.fpu.fromint_data := ex_rs(0)
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
io.fpu.dmem_resp_tag := dmem_resp_waddr
io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
val ex_dcache_tag = Cat(ex_waddr, ex_ctrl.fp)
require(coreDCacheReqTagBits >= ex_dcache_tag.getWidth)
io.dmem.req.bits.tag := ex_dcache_tag
io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
io.dmem.req.bits.typ := ex_ctrl.mem_type
io.dmem.req.bits.phys := Bool(false)
io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out)
io.dmem.invalidate_lr := wb_xcpt
io.dmem.s1_data.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
io.dmem.s1_kill := killm_common || mem_breakpoint
io.rocc.cmd.valid := wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
io.rocc.exception := wb_xcpt && csr.io.status.xs.orR
io.rocc.cmd.bits.status := csr.io.status
io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst)
io.rocc.cmd.bits.rs1 := wb_reg_wdata
io.rocc.cmd.bits.rs2 := wb_reg_rs2
// evaluate performance counters
val icache_blocked = !(io.imem.resp.valid || RegNext(io.imem.resp.valid))
csr.io.counters foreach { c => c.inc := RegNext(perfEvents.evaluate(c.eventSel)) }
if (enableCommitLog) {
val pc = Wire(SInt(width=xLen))
pc := wb_reg_pc.asSInt
val inst = wb_reg_inst
val cinst = wb_reg_cinst
val rd = RegNext(RegNext(RegNext(id_waddr)))
val wfd = wb_ctrl.wfd
val wxd = wb_ctrl.wxd
val has_data = wb_wen && !wb_set_sboard
val priv = csr.io.status.prv
when (wb_valid) {
when (wfd) {
printf ("%d 0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, cinst, rd, rd+UInt(32))
}
.elsewhen (wxd && rd =/= UInt(0) && has_data) {
printf ("%d 0x%x (0x%x) x%d 0x%x\n", priv, pc, cinst, rd, rf_wdata)
}
.elsewhen (wxd && rd =/= UInt(0) && !has_data) {
printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, cinst, rd, rd)
}
.otherwise {
printf ("%d 0x%x (0x%x)\n", priv, pc, cinst)
}
}
when (ll_wen && rf_waddr =/= UInt(0)) {
printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata)
}
}
else {
printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",
io.hartid, csr.io.time(31,0), wb_valid, wb_reg_pc,
Mux(rf_wen && !(wb_set_sboard && wb_wen), rf_waddr, UInt(0)), rf_wdata, rf_wen,
wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),
wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),
wb_reg_cinst, wb_reg_cinst)
}
val max_core_cycles = PlusArg("max-core-cycles",
default = 0,
docstring = "Maximum Core Clock cycles simulation may run before timeout. Ignored if 0 (Default).")
when (max_core_cycles > UInt(0)) {
assert (csr.io.time < max_core_cycles, "Maximum Core Cycles reached.")
}
def checkExceptions(x: Seq[(Bool, UInt)]) =
(x.map(_._1).reduce(_||_), PriorityMux(x))
def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) =
targets.map(h => h._1 && cond(h._2)).reduce(_||_)
def encodeVirtualAddress(a0: UInt, ea: UInt) = if (vaddrBitsExtended == vaddrBits) ea else {
// efficient means to compress 64-bit VA into vaddrBits+1 bits
// (VA is bad if VA(vaddrBits) != VA(vaddrBits-1))
val a = a0 >> vaddrBits-1
val e = ea(vaddrBits,vaddrBits-1).asSInt
val msb =
Mux(a === UInt(0) || a === UInt(1), e =/= SInt(0),
Mux(a.asSInt === SInt(-1) || a.asSInt === SInt(-2), e === SInt(-1), e(0)))
Cat(msb, ea(vaddrBits-1,0))
}
class Scoreboard(n: Int, zero: Boolean = false)
{
def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))
def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))
def read(addr: UInt): Bool = r(addr)
def readBypassed(addr: UInt): Bool = _next(addr)
private val _r = Reg(init=Bits(0, n))
private val r = if (zero) (_r >> 1 << 1) else _r
private var _next = r
private var ens = Bool(false)
private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))
private def update(en: Bool, update: UInt) = {
_next = update
ens = ens || en
when (ens) { _r := _next }
}
}
}
class RegFile(n: Int, w: Int, zero: Boolean = false) {
private val rf = Mem(n, UInt(width = w))
private def access(addr: UInt) = rf(~addr(log2Up(n)-1,0))
private val reads = ArrayBuffer[(UInt,UInt)]()
private var canRead = true
def read(addr: UInt) = {
require(canRead)
reads += addr -> Wire(UInt())
reads.last._2 := Mux(Bool(zero) && addr === UInt(0), UInt(0), access(addr))
reads.last._2
}
def write(addr: UInt, data: UInt) = {
canRead = false
when (addr =/= UInt(0)) {
access(addr) := data
for ((raddr, rdata) <- reads)
when (addr === raddr) { rdata := data }
}
}
}
object ImmGen {
def apply(sel: UInt, inst: UInt) = {
val sign = Mux(sel === IMM_Z, SInt(0), inst(31).asSInt)
val b30_20 = Mux(sel === IMM_U, inst(30,20).asSInt, sign)
val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19,12).asSInt)
val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0),
Mux(sel === IMM_UJ, inst(20).asSInt,
Mux(sel === IMM_SB, inst(7).asSInt, sign)))
val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25))
val b4_1 = Mux(sel === IMM_U, Bits(0),
Mux(sel === IMM_S || sel === IMM_SB, inst(11,8),
Mux(sel === IMM_Z, inst(19,16), inst(24,21))))
val b0 = Mux(sel === IMM_S, inst(7),
Mux(sel === IMM_I, inst(20),
Mux(sel === IMM_Z, inst(15), Bits(0))))
Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).asSInt
}
}