1
0

Refactor pipeline RTL (merge ctrl + dpath into rocket)

This commit is contained in:
Andrew Waterman 2015-07-21 17:10:56 -07:00
parent ac6e73e317
commit cc447c8110
7 changed files with 662 additions and 839 deletions

View File

@ -18,11 +18,6 @@ trait ScalarOpConstants {
val BR_LTU = Bits(6, 3) val BR_LTU = Bits(6, 3)
val BR_GEU = Bits(7, 3) val BR_GEU = Bits(7, 3)
val PC_EX = UInt(0, 2)
val PC_MEM = UInt(1, 2)
val PC_WB = UInt(2, 2)
val PC_CSR = UInt(3, 2)
val A1_X = Bits("b??", 2) val A1_X = Bits("b??", 2)
val A1_ZERO = UInt(0, 2) val A1_ZERO = UInt(0, 2)
val A1_RS1 = UInt(1, 2) val A1_RS1 = UInt(1, 2)
@ -46,13 +41,6 @@ trait ScalarOpConstants {
val N = Bool(false) val N = Bool(false)
val Y = Bool(true) val Y = Bool(true)
val NBYP = 4
val SZ_BYP = log2Up(NBYP)
val BYP_0 = 0
val BYP_EX = 1
val BYP_MEM = 2
val BYP_DC = 3
val SZ_DW = 1 val SZ_DW = 1
val DW_X = X val DW_X = X
val DW_32 = N val DW_32 = N

View File

@ -1,94 +0,0 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Util._
import uncore._
case object BuildFPU extends Field[Option[() => FPU]]
case object FDivSqrt extends Field[Boolean]
case object XLen extends Field[Int]
case object NMultXpr extends Field[Int]
case object FetchWidth extends Field[Int]
case object RetireWidth extends Field[Int]
case object UseVM extends Field[Boolean]
case object FastLoadWord extends Field[Boolean]
case object FastLoadByte extends Field[Boolean]
case object FastMulDiv extends Field[Boolean]
case object CoreInstBits extends Field[Int]
case object CoreDataBits extends Field[Int]
case object CoreDCacheReqTagBits extends Field[Int]
case object NCustomMRWCSRs extends Field[Int]
abstract trait CoreParameters extends UsesParameters {
val xLen = params(XLen)
val paddrBits = params(PAddrBits)
val vaddrBits = params(VAddrBits)
val pgIdxBits = params(PgIdxBits)
val ppnBits = params(PPNBits)
val vpnBits = params(VPNBits)
val pgLevels = params(PgLevels)
val pgLevelBits = params(PgLevelBits)
val asIdBits = params(ASIdBits)
val retireWidth = params(RetireWidth)
val coreFetchWidth = params(FetchWidth)
val coreInstBits = params(CoreInstBits)
val coreInstBytes = coreInstBits/8
val coreDataBits = xLen
val coreDataBytes = coreDataBits/8
val coreDCacheReqTagBits = params(CoreDCacheReqTagBits)
val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits
val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt
if(params(FastLoadByte)) require(params(FastLoadWord))
}
abstract trait RocketCoreParameters extends CoreParameters
{
require(params(FetchWidth) == 1) // for now...
require(params(RetireWidth) == 1) // for now...
}
abstract class CoreBundle extends Bundle with CoreParameters
abstract class CoreModule extends Module with CoreParameters
class RocketIO extends Bundle
{
val host = new HTIFIO
val imem = new CPUFrontendIO
val dmem = new HellaCacheIO
val ptw = new DatapathPTWIO().flip
val rocc = new RoCCInterface().flip
}
class Core extends Module with CoreParameters
{
val io = new RocketIO
val ctrl = Module(new Control)
val dpath = Module(new Datapath)
//If so specified, build an FPU module and wire it in
params(BuildFPU)
.map { bf => bf() }
.foreach { fpu =>
dpath.io.fpu <> fpu.io.dpath
ctrl.io.fpu <> fpu.io.ctrl
}
ctrl.io.dpath <> dpath.io.ctrl
dpath.io.host <> io.host
ctrl.io.imem <> io.imem
dpath.io.imem <> io.imem
ctrl.io.dmem <> io.dmem
dpath.io.dmem <> io.dmem
dpath.io.ptw <> io.ptw
ctrl.io.rocc <> io.rocc
dpath.io.rocc <> io.rocc
}

View File

@ -1,291 +0,0 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Instructions._
import Util._
import uncore._
class Datapath extends CoreModule
{
val io = new Bundle {
val host = new HTIFIO
val ctrl = new CtrlDpathIO().flip
val dmem = new HellaCacheIO
val ptw = new DatapathPTWIO().flip
val imem = new CPUFrontendIO
val fpu = new DpathFPUIO
val rocc = new RoCCInterface().flip
}
// execute definitions
val ex_reg_pc = Reg(UInt())
val ex_reg_inst = Reg(Bits())
val ex_reg_kill = Reg(Bool())
val ex_reg_rs_bypass = Reg(Vec.fill(2)(Bool()))
val ex_reg_rs_lsb = Reg(Vec.fill(2)(Bits()))
val ex_reg_rs_msb = Reg(Vec.fill(2)(Bits()))
// memory definitions
val mem_reg_pc = Reg(UInt())
val mem_reg_inst = Reg(Bits())
val mem_reg_wdata = Reg(Bits())
val mem_reg_kill = Reg(Bool())
val mem_reg_rs2 = Reg(Bits())
// writeback definitions
val wb_reg_pc = Reg(UInt())
val wb_reg_inst = Reg(Bits())
val wb_reg_wdata = Reg(Bits())
val wb_wdata = Wire(Bits())
val wb_reg_rs2 = Reg(Bits())
// instruction decode stage
val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1)
val id_pc = io.imem.resp.bits.pc
class RegFile {
private val rf = Mem(UInt(width = 64), 31)
private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]()
private var canRead = true
def read(addr: UInt) = {
require(canRead)
reads += addr -> Wire(UInt())
reads.last._2 := rf(~addr)
reads.last._2
}
def write(addr: UInt, data: UInt) = {
canRead = false
when (addr != UInt(0)) {
rf(~addr) := data
for ((raddr, rdata) <- reads)
when (addr === raddr) { rdata := data }
}
}
}
val rf = new RegFile
// RF read ports + bypass from WB stage
val id_raddr = Vec(id_inst(19,15), id_inst(24,20))
val id_rs = id_raddr.map(rf.read _)
// immediate generation
def imm(sel: Bits, inst: Bits) = {
val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt)
val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign)
val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt)
val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0),
Mux(sel === IMM_UJ, inst(20).toSInt,
Mux(sel === IMM_SB, inst(7).toSInt, sign)))
val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25))
val b4_1 = Mux(sel === IMM_U, Bits(0),
Mux(sel === IMM_S || sel === IMM_SB, inst(11,8),
Mux(sel === IMM_Z, inst(19,16), inst(24,21))))
val b0 = Mux(sel === IMM_S, inst(7),
Mux(sel === IMM_I, inst(20),
Mux(sel === IMM_Z, inst(15), Bits(0))))
Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt
}
io.ctrl.inst := id_inst
io.fpu.inst := id_inst
// execute stage
ex_reg_kill := io.ctrl.killd
when (!io.ctrl.killd) {
ex_reg_pc := id_pc
ex_reg_inst := id_inst
ex_reg_rs_bypass := io.ctrl.bypass
for (i <- 0 until id_rs.size) {
when (io.ctrl.ren(i)) {
ex_reg_rs_lsb(i) := id_rs(i)(SZ_BYP-1,0)
when (!io.ctrl.bypass(i)) {
ex_reg_rs_msb(i) := id_rs(i) >> SZ_BYP
}
}
when (io.ctrl.bypass(i)) { ex_reg_rs_lsb(i) := io.ctrl.bypass_src(i) }
}
}
val bypass = Wire(Vec(Bits(), NBYP))
bypass(BYP_0) := Bits(0)
bypass(BYP_EX) := mem_reg_wdata
bypass(BYP_MEM) := wb_reg_wdata
bypass(BYP_DC) := (if(params(FastLoadByte)) io.dmem.resp.bits.data_subword
else if(params(FastLoadWord)) io.dmem.resp.bits.data
else wb_reg_wdata)
val ex_rs = for (i <- 0 until id_rs.size)
yield Mux(ex_reg_rs_bypass(i), bypass(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
val ex_imm = imm(io.ctrl.ex_ctrl.sel_imm, ex_reg_inst)
val ex_op1 = MuxLookup(io.ctrl.ex_ctrl.sel_alu1, SInt(0), Seq(
A1_RS1 -> ex_rs(0).toSInt,
A1_PC -> ex_reg_pc.toSInt))
val ex_op2 = MuxLookup(io.ctrl.ex_ctrl.sel_alu2, SInt(0), Seq(
A2_RS2 -> ex_rs(1).toSInt,
A2_IMM -> ex_imm,
A2_FOUR -> SInt(4)))
val alu = Module(new ALU)
alu.io.dw := io.ctrl.ex_ctrl.alu_dw
alu.io.fn := io.ctrl.ex_ctrl.alu_fn
alu.io.in2 := ex_op2.toUInt
alu.io.in1 := ex_op1
// multiplier and divider
val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1,
earlyOut = params(FastMulDiv)))
div.io.req.valid := io.ctrl.ex_valid && io.ctrl.ex_ctrl.div
div.io.req.bits.dw := io.ctrl.ex_ctrl.alu_dw
div.io.req.bits.fn := io.ctrl.ex_ctrl.alu_fn
div.io.req.bits.in1 := ex_rs(0)
div.io.req.bits.in2 := ex_rs(1)
div.io.req.bits.tag := io.ctrl.ex_waddr
div.io.kill := io.ctrl.killm && Reg(next = div.io.req.fire())
io.ctrl.div_mul_rdy := div.io.req.ready
io.fpu.fromint_data := ex_rs(0)
def vaSign(a0: UInt, ea: Bits) = {
// efficient means to compress 64-bit VA into vaddrBits+1 bits
// (VA is bad if VA(vaddrBits) != VA(vaddrBits-1))
val a = a0 >> vaddrBits-1
val e = ea(vaddrBits,vaddrBits-1)
Mux(a === UInt(0) || a === UInt(1), e != UInt(0),
Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1),
e(0)))
}
// D$ request interface (registered inside D$ module)
// other signals (req_val, req_rdy) connect to control module
io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt
io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_ctrl.fp)
require(io.dmem.req.bits.tag.getWidth >= 6)
require(params(CoreDCacheReqTagBits) >= 6)
// processor control regfile read
val csr = Module(new CSRFile)
csr.io.host <> io.host
csr.io <> io.ctrl
csr.io <> io.fpu
csr.io.rocc <> io.rocc
csr.io.pc := wb_reg_pc
csr.io.uarch_counters.foreach(_ := Bool(false))
io.ptw.ptbr := csr.io.ptbr
io.ptw.invalidate := csr.io.fatc
io.ptw.status := csr.io.status
// memory stage
mem_reg_kill := ex_reg_kill
when (!ex_reg_kill) {
mem_reg_pc := ex_reg_pc
mem_reg_inst := ex_reg_inst
mem_reg_wdata := alu.io.out
when (io.ctrl.ex_ctrl.rxs2 && (io.ctrl.ex_ctrl.mem || io.ctrl.ex_ctrl.rocc)) {
mem_reg_rs2 := ex_rs(1)
}
}
io.dmem.req.bits.data := Mux(io.ctrl.mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
// writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1)
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data
val ll_wdata = Wire(Bits())
div.io.resp.ready := io.ctrl.ll_ready
ll_wdata := div.io.resp.bits.data
io.ctrl.ll_waddr := div.io.resp.bits.tag
io.ctrl.ll_wen := div.io.resp.fire()
if (!params(BuildRoCC).isEmpty) {
io.rocc.resp.ready := io.ctrl.ll_ready
when (io.rocc.resp.fire()) {
div.io.resp.ready := Bool(false)
ll_wdata := io.rocc.resp.bits.data
io.ctrl.ll_waddr := io.rocc.resp.bits.rd
io.ctrl.ll_wen := Bool(true)
}
}
when (dmem_resp_replay && dmem_resp_xpu) {
div.io.resp.ready := Bool(false)
if (!params(BuildRoCC).isEmpty)
io.rocc.resp.ready := Bool(false)
io.ctrl.ll_waddr := dmem_resp_waddr
io.ctrl.ll_wen := Bool(true)
}
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp.bits.data
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
io.fpu.dmem_resp_tag := dmem_resp_waddr
io.ctrl.mem_br_taken := mem_reg_wdata(0)
val mem_br_target = mem_reg_pc.toSInt +
Mux(io.ctrl.mem_ctrl.branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst),
Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4)))
val mem_npc = (Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUInt
io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid
io.ctrl.mem_npc_misaligned := mem_npc(1)
io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1
val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt
// writeback stage
when (!mem_reg_kill) {
wb_reg_pc := mem_reg_pc
wb_reg_inst := mem_reg_inst
wb_reg_wdata := Mux(io.ctrl.mem_ctrl.fp && io.ctrl.mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (io.ctrl.mem_ctrl.rocc) {
wb_reg_rs2 := mem_reg_rs2
}
}
wb_wdata := Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword,
Mux(io.ctrl.ll_wen, ll_wdata,
Mux(io.ctrl.csr_cmd != CSR.N, csr.io.rw.rdata,
wb_reg_wdata)))
val wb_wen = io.ctrl.ll_wen || io.ctrl.wb_wen
val wb_waddr = Mux(io.ctrl.ll_wen, io.ctrl.ll_waddr, io.ctrl.wb_waddr)
when (wb_wen) { rf.write(wb_waddr, wb_wdata) }
// scoreboard clear (for div/mul and D$ load miss writebacks)
io.ctrl.fp_sboard_clr := dmem_resp_replay && dmem_resp_fpu
io.ctrl.fp_sboard_clra := dmem_resp_waddr
// processor control regfile write
csr.io.rw.addr := wb_reg_inst(31,20)
csr.io.rw.cmd := io.ctrl.csr_cmd
csr.io.rw.wdata := wb_reg_wdata
io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst)
io.rocc.cmd.bits.rs1 := wb_reg_wdata
io.rocc.cmd.bits.rs2 := wb_reg_rs2
// hook up I$
io.imem.req.bits.pc :=
Mux(io.ctrl.sel_pc === PC_MEM, mem_npc,
Mux(io.ctrl.sel_pc === PC_CSR, csr.io.evec,
wb_reg_pc)).toUInt // PC_WB
io.imem.btb_update.bits.pc := mem_reg_pc
io.imem.btb_update.bits.target := io.imem.req.bits.pc
io.imem.btb_update.bits.br_pc := mem_reg_pc
io.imem.bht_update.bits.pc := mem_reg_pc
io.imem.ras_update.bits.returnAddr := mem_int_wdata
// for hazard/bypass opportunity detection
io.ctrl.ex_waddr := ex_reg_inst(11,7)
io.ctrl.mem_waddr := mem_reg_inst(11,7)
io.ctrl.wb_waddr := wb_reg_inst(11,7)
printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",
io.host.id, csr.io.time(32,0), io.ctrl.retire, wb_reg_pc,
Mux(wb_wen, wb_waddr, UInt(0)), wb_wdata, wb_wen,
wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),
wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),
wb_reg_inst, wb_reg_inst)
}

View File

@ -138,33 +138,34 @@ class FPUDecoder extends Module
s.toint, s.fastpipe, s.fma, s.div, s.sqrt, s.round, s.wflags) := decoder s.toint, s.fastpipe, s.fma, s.div, s.sqrt, s.round, s.wflags) := decoder
} }
class DpathFPUIO extends Bundle { class FPUIO extends Bundle {
val inst = Bits(OUTPUT, 32) val inst = Bits(INPUT, 32)
val fromint_data = Bits(OUTPUT, 64) val fromint_data = Bits(INPUT, 64)
val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ))
val store_data = Bits(INPUT, 64) val store_data = Bits(OUTPUT, 64)
val toint_data = Bits(INPUT, 64) val toint_data = Bits(OUTPUT, 64)
val dmem_resp_val = Bool(OUTPUT) val dmem_resp_val = Bool(INPUT)
val dmem_resp_type = Bits(OUTPUT, 3) val dmem_resp_type = Bits(INPUT, 3)
val dmem_resp_tag = UInt(OUTPUT, 5) val dmem_resp_tag = UInt(INPUT, 5)
val dmem_resp_data = Bits(OUTPUT, 64) val dmem_resp_data = Bits(INPUT, 64)
val valid = Bool(INPUT)
val fcsr_rdy = Bool(OUTPUT)
val nack_mem = Bool(OUTPUT)
val illegal_rm = Bool(OUTPUT)
val killx = Bool(INPUT)
val killm = Bool(INPUT)
val dec = new FPUCtrlSigs().asOutput
val sboard_set = Bool(OUTPUT)
val sboard_clr = Bool(OUTPUT)
val sboard_clra = UInt(OUTPUT, 5)
} }
class CtrlFPUIO extends Bundle { class CtrlFPUIO extends Bundle {
val valid = Bool(OUTPUT)
val fcsr_rdy = Bool(INPUT)
val nack_mem = Bool(INPUT)
val illegal_rm = Bool(INPUT)
val killx = Bool(OUTPUT)
val killm = Bool(OUTPUT)
val dec = new FPUCtrlSigs().asInput
val sboard_set = Bool(INPUT)
val sboard_clr = Bool(INPUT)
val sboard_clra = UInt(INPUT, 5)
} }
class FPResult extends Bundle class FPResult extends Bundle
@ -355,31 +356,28 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module
class FPU extends Module class FPU extends Module
{ {
val io = new Bundle { val io = new FPUIO
val ctrl = (new CtrlFPUIO).flip
val dpath = (new DpathFPUIO).flip
}
val ex_reg_valid = Reg(next=io.ctrl.valid, init=Bool(false)) val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
val ex_reg_inst = RegEnable(io.dpath.inst, io.ctrl.valid) val ex_reg_inst = RegEnable(io.inst, io.valid)
val mem_reg_valid = Reg(next=ex_reg_valid && !io.ctrl.killx, init=Bool(false)) val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx, init=Bool(false))
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid) val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
val killm = io.ctrl.killm || io.ctrl.nack_mem val killm = io.killm || io.nack_mem
val wb_reg_valid = Reg(next=mem_reg_valid && !killm, init=Bool(false)) val wb_reg_valid = Reg(next=mem_reg_valid && !killm, init=Bool(false))
val fp_decoder = Module(new FPUDecoder) val fp_decoder = Module(new FPUDecoder)
fp_decoder.io.inst := io.dpath.inst fp_decoder.io.inst := io.inst
val id_ctrl = fp_decoder.io.sigs val id_ctrl = fp_decoder.io.sigs
val ex_ctrl = RegEnable(id_ctrl, io.ctrl.valid) val ex_ctrl = RegEnable(id_ctrl, io.valid)
val mem_ctrl = RegEnable(ex_ctrl, ex_reg_valid) val mem_ctrl = RegEnable(ex_ctrl, ex_reg_valid)
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid) val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
// load response // load response
val load_wb = Reg(next=io.dpath.dmem_resp_val) val load_wb = Reg(next=io.dmem_resp_val)
val load_wb_single = RegEnable(io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU, io.dpath.dmem_resp_val) val load_wb_single = RegEnable(io.dmem_resp_type === MT_W || io.dmem_resp_type === MT_WU, io.dmem_resp_val)
val load_wb_data = RegEnable(io.dpath.dmem_resp_data, io.dpath.dmem_resp_val) val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val)
val load_wb_tag = RegEnable(io.dpath.dmem_resp_tag, io.dpath.dmem_resp_val) val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val)
val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9)
val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12)
val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d) val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d)
@ -389,20 +387,20 @@ class FPU extends Module
when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded }
val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt())) val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
when (io.ctrl.valid) { when (io.valid) {
when (id_ctrl.ren1) { when (id_ctrl.ren1) {
when (!id_ctrl.swap12) { ex_ra1 := io.dpath.inst(19,15) } when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) }
when (id_ctrl.swap12) { ex_ra2 := io.dpath.inst(19,15) } when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) }
} }
when (id_ctrl.ren2) { when (id_ctrl.ren2) {
when (id_ctrl.swap12) { ex_ra1 := io.dpath.inst(24,20) } when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) }
when (id_ctrl.swap23) { ex_ra3 := io.dpath.inst(24,20) } when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) }
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.dpath.inst(24,20) } when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) }
} }
when (id_ctrl.ren3) { ex_ra3 := io.dpath.inst(31,27) } when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) }
} }
val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_)) val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_))
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.dpath.fcsr_rm, ex_reg_inst(14,12)) val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
val req = Wire(new FPInput) val req = Wire(new FPInput)
req := ex_ctrl req := ex_ctrl
@ -423,13 +421,13 @@ class FPU extends Module
val fpiu = Module(new FPToInt) val fpiu = Module(new FPToInt)
fpiu.io.in.valid := ex_reg_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX) fpiu.io.in.valid := ex_reg_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
fpiu.io.in.bits := req fpiu.io.in.bits := req
io.dpath.store_data := fpiu.io.out.bits.store io.store_data := fpiu.io.out.bits.store
io.dpath.toint_data := fpiu.io.out.bits.toint io.toint_data := fpiu.io.out.bits.toint
val ifpu = Module(new IntToFP(3)) val ifpu = Module(new IntToFP(3))
ifpu.io.in.valid := ex_reg_valid && ex_ctrl.fromint ifpu.io.in.valid := ex_reg_valid && ex_ctrl.fromint
ifpu.io.in.bits := req ifpu.io.in.bits := req
ifpu.io.in.bits.in1 := io.dpath.fromint_data ifpu.io.in.bits.in1 := io.fromint_data
val fpmu = Module(new FPToFP(2)) val fpmu = Module(new FPToFP(2))
fpmu.io.in.valid := ex_reg_valid && ex_ctrl.fastpipe fpmu.io.in.valid := ex_reg_valid && ex_ctrl.fastpipe
@ -489,22 +487,22 @@ class FPU extends Module
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
io.dpath.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0) io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
io.dpath.fcsr_flags.bits := io.fcsr_flags.bits :=
Mux(wb_toint_valid, wb_toint_exc, UInt(0)) | Mux(wb_toint_valid, wb_toint_exc, UInt(0)) |
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) | Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
Mux(wen(0), wexc, UInt(0)) Mux(wen(0), wexc, UInt(0))
val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR) // || mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid)) val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR) // || mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid))
io.ctrl.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight) io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
io.ctrl.nack_mem := units_busy || write_port_busy || divSqrt_in_flight io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
io.ctrl.dec <> fp_decoder.io.sigs io.dec <> fp_decoder.io.sigs
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_) def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
io.ctrl.sboard_set := wb_reg_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt) io.sboard_set := wb_reg_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
io.ctrl.sboard_clr := divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))) io.sboard_clr := divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2)))
io.ctrl.sboard_clra := waddr io.sboard_clra := waddr
// we don't currently support round-max-magnitude (rm=4) // we don't currently support round-max-magnitude (rm=4)
io.ctrl.illegal_rm := ex_rm(2) && ex_ctrl.round io.illegal_rm := ex_rm(2) && ex_ctrl.round
divSqrt_wdata := 0 divSqrt_wdata := 0
divSqrt_flags := 0 divSqrt_flags := 0
@ -516,7 +514,7 @@ class FPU extends Module
def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1
val divSqrt_wb_hazard = wen.orR val divSqrt_wb_hazard = wen.orR
divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && !io.ctrl.killm && (mem_ctrl.div || mem_ctrl.sqrt) divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && !io.killm && (mem_ctrl.div || mem_ctrl.sqrt)
divSqrt.io.sqrtOp := mem_ctrl.sqrt divSqrt.io.sqrtOp := mem_ctrl.sqrt
divSqrt.io.a := fpiu.io.as_double.in1 divSqrt.io.a := fpiu.io.as_double.in1
divSqrt.io.b := fpiu.io.as_double.in2 divSqrt.io.b := fpiu.io.as_double.in2

View File

@ -1,4 +1,4 @@
// See LICENSE for license details. // See LICENSE for license details
package rocket package rocket
@ -6,50 +6,6 @@ import Chisel._
import Instructions._ import Instructions._
import uncore.constants.MemoryOpConstants._ import uncore.constants.MemoryOpConstants._
import ALU._ import ALU._
import Util._
class CtrlDpathIO extends CoreBundle
{
// outputs to datapath
val sel_pc = UInt(OUTPUT, 3)
val killd = Bool(OUTPUT)
val killm = Bool(OUTPUT)
val ren = Vec.fill(2)(Bool(OUTPUT))
val ex_ctrl = new IntCtrlSigs().asOutput
val mem_ctrl = new IntCtrlSigs().asOutput
val csr_cmd = UInt(OUTPUT, CSR.SZ)
val ex_valid = Bool(OUTPUT)
val wb_wen = Bool(OUTPUT)
val bypass = Vec.fill(2)(Bool(OUTPUT))
val bypass_src = Vec.fill(2)(Bits(OUTPUT, SZ_BYP))
val ll_ready = Bool(OUTPUT)
// exception handling
val retire = Bool(OUTPUT)
val exception = Bool(OUTPUT)
val cause = UInt(OUTPUT, xLen)
// inputs from datapath
val inst = Bits(INPUT, 32)
val mem_br_taken = Bool(INPUT)
val mem_misprediction = Bool(INPUT)
val mem_npc_misaligned = Bool(INPUT)
val div_mul_rdy = Bool(INPUT)
val ll_wen = Bool(INPUT)
val ll_waddr = UInt(INPUT, 5)
val ex_waddr = UInt(INPUT, 5)
val mem_rs1_ra = Bool(INPUT)
val mem_waddr = UInt(INPUT, 5)
val wb_waddr = UInt(INPUT, 5)
val status = new MStatus().asInput
val fp_sboard_clr = Bool(INPUT)
val fp_sboard_clra = UInt(INPUT, 5)
// inputs from csr file
val csr_replay = Bool(INPUT)
val csr_stall = Bool(INPUT)
val csr_xcpt = Bool(INPUT)
val eret = Bool(INPUT)
val interrupt = Bool(INPUT)
val interrupt_cause = UInt(INPUT, xLen)
}
abstract trait DecodeConstants abstract trait DecodeConstants
{ {
@ -343,343 +299,3 @@ object RoCCDecode extends DecodeConstants
CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N)) CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N))
} }
class Control extends CoreModule
{
val io = new Bundle {
val dpath = new CtrlDpathIO
val imem = new CPUFrontendIO
val dmem = new HellaCacheIO
val fpu = new CtrlFPUIO
val rocc = new RoCCInterface().flip
}
var decode_table = XDecode.table
if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table
if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table
if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table
val id_ctrl = Wire(new IntCtrlSigs()).decode(io.dpath.inst, decode_table)
val ex_ctrl = Reg(new IntCtrlSigs)
val mem_ctrl = Reg(new IntCtrlSigs)
val wb_ctrl = Reg(new IntCtrlSigs)
val ex_reg_xcpt_interrupt = Reg(Bool())
val ex_reg_valid = Reg(Bool())
val ex_reg_btb_hit = Reg(Bool())
val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits)
val ex_reg_xcpt = Reg(Bool())
val ex_reg_flush_pipe = Reg(Bool())
val ex_reg_load_use = Reg(Bool())
val ex_reg_cause = Reg(UInt())
val mem_reg_xcpt_interrupt = Reg(Bool())
val mem_reg_valid = Reg(Bool())
val mem_reg_btb_hit = Reg(Bool())
val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits)
val mem_reg_xcpt = Reg(Bool())
val mem_reg_replay = Reg(Bool())
val mem_reg_flush_pipe = Reg(Bool())
val mem_reg_cause = Reg(UInt())
val mem_reg_slow_bypass = Reg(Bool())
val wb_reg_valid = Reg(Bool())
val wb_reg_xcpt = Reg(Bool())
val wb_reg_replay = Reg(Bool())
val wb_reg_cause = Reg(UInt())
val wb_reg_rocc_pending = Reg(init=Bool(false))
val take_pc_wb = Wire(Bool())
val mem_misprediction = io.dpath.mem_misprediction && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal)
val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
val take_pc_mem = want_take_pc_mem && !io.dpath.mem_npc_misaligned
val take_pc_mem_wb = take_pc_wb || take_pc_mem
val take_pc = take_pc_mem_wb
val ctrl_killd = Wire(Bool())
val ctrl_killx = Wire(Bool())
val ctrl_killm = Wire(Bool())
val id_raddr3 = io.dpath.inst(31,27)
val id_raddr2 = io.dpath.inst(24,20)
val id_raddr1 = io.dpath.inst(19,15)
val id_waddr = io.dpath.inst(11,7)
val id_load_use = Wire(Bool())
val id_reg_fence = Reg(init=Bool(false))
val id_csr_en = id_ctrl.csr != CSR.N
val id_system_insn = id_ctrl.csr === CSR.I
val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0)
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
val id_csr_addr = io.dpath.inst(31,20)
// this is overly conservative
val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil
val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*)
val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs, legal_csrs -- safe_csrs))
val id_illegal_insn = !id_ctrl.legal ||
id_ctrl.fp && !io.dpath.status.fs.orR ||
id_ctrl.rocc && !io.dpath.status.xs.orR
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
val id_amo_aq = io.dpath.inst(26)
val id_amo_rl = io.dpath.inst(25)
val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) &&
(io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||
mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)
id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy
val id_do_fence = id_rocc_busy && id_ctrl.fence ||
id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en)
def checkExceptions(x: Seq[(Bool, UInt)]) =
(x.map(_._1).reduce(_||_), PriorityMux(x))
val (id_xcpt, id_cause) = checkExceptions(List(
(io.dpath.interrupt, io.dpath.interrupt_cause),
(io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)),
(id_illegal_insn, UInt(Causes.illegal_instruction))))
ex_reg_valid := !ctrl_killd
ex_reg_xcpt := !ctrl_killd && id_xcpt
ex_reg_xcpt_interrupt := io.dpath.interrupt && !take_pc && io.imem.resp.valid
when (id_xcpt) { ex_reg_cause := id_cause }
when (!ctrl_killd) {
ex_ctrl := id_ctrl
ex_ctrl.csr := id_csr
ex_reg_btb_hit := io.imem.btb_resp.valid
when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits }
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush
ex_reg_load_use := id_load_use
ex_reg_xcpt := id_xcpt
}
// replay inst in ex stage
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
ex_ctrl.div && !io.dpath.div_mul_rdy
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
val replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use)
ctrl_killx := take_pc_mem_wb || replay_ex || !ex_reg_valid
// detect 2-cycle load-use delay for LB/LH/SC
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
val (ex_xcpt, ex_cause) = checkExceptions(List(
(ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause),
(ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction))))
mem_reg_valid := !ctrl_killx
mem_reg_replay := !take_pc_mem_wb && replay_ex
mem_reg_xcpt := !ctrl_killx && ex_xcpt
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
when (ex_xcpt) { mem_reg_cause := ex_cause }
when (!ctrl_killx) {
mem_ctrl := ex_ctrl
mem_reg_btb_hit := ex_reg_btb_hit
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
mem_reg_flush_pipe := ex_reg_flush_pipe
mem_reg_slow_bypass := ex_slow_bypass
mem_reg_xcpt := ex_xcpt
}
val (mem_xcpt, mem_cause) = checkExceptions(List(
(mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
(want_take_pc_mem && io.dpath.mem_npc_misaligned, UInt(Causes.misaligned_fetch)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load))))
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next.valid // structural hazard on writeback port
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem
wb_reg_valid := !ctrl_killm
when (!ctrl_killm) { wb_ctrl := mem_ctrl }
wb_reg_replay := replay_mem && !take_pc_wb
wb_reg_xcpt := mem_xcpt && !take_pc_wb
when (mem_xcpt) { wb_reg_cause := mem_cause }
val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
val replay_wb_common =
io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.csr_replay
val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
when (wb_rocc_val) { wb_reg_rocc_pending := !io.rocc.cmd.ready }
when (wb_reg_xcpt) { wb_reg_rocc_pending := Bool(false) }
class Scoreboard(n: Int)
{
def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))
def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))
def read(addr: UInt): Bool = r(addr)
def readBypassed(addr: UInt): Bool = _next(addr)
private val r = Reg(init=Bits(0, n))
private var _next = r
private var ens = Bool(false)
private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))
private def update(en: Bool, update: UInt) = {
_next = update
ens = ens || en
when (ens) { r := _next }
}
}
val sboard = new Scoreboard(32)
sboard.clear(io.dpath.ll_wen, io.dpath.ll_waddr)
val id_stall_fpu = if (!params(BuildFPU).isEmpty) {
val fp_sboard = new Scoreboard(32)
fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && io.dpath.retire, io.dpath.wb_waddr)
fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra)
fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra)
id_csr_en && !io.fpu.fcsr_rdy ||
io.fpu.dec.ren1 && fp_sboard.read(id_raddr1) ||
io.fpu.dec.ren2 && fp_sboard.read(id_raddr2) ||
io.fpu.dec.ren3 && fp_sboard.read(id_raddr3) ||
io.fpu.dec.wen && fp_sboard.read(id_waddr)
} else Bool(false)
// write CAUSE CSR on an exception
io.dpath.exception := wb_reg_xcpt
io.dpath.cause := wb_reg_cause
val wb_xcpt = wb_reg_xcpt || io.dpath.csr_xcpt
// control transfer from ex/wb
take_pc_wb := replay_wb || wb_xcpt || io.dpath.eret
io.dpath.sel_pc :=
Mux(wb_xcpt || io.dpath.eret, PC_CSR, // exception or [m|s]ret
Mux(replay_wb, PC_WB, // replay
PC_MEM))
io.imem.btb_update.valid := mem_reg_valid && !io.dpath.mem_npc_misaligned && io.dpath.mem_misprediction && ((mem_ctrl.branch && io.dpath.mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && io.dpath.mem_rs1_ra
io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb
io.imem.bht_update.bits.taken := io.dpath.mem_br_taken
io.imem.bht_update.bits.mispredict := io.dpath.mem_misprediction
io.imem.bht_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.bht_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !io.dpath.mem_npc_misaligned && !take_pc_wb
io.imem.ras_update.bits.isCall := mem_ctrl.wxd && io.dpath.mem_waddr(0)
io.imem.ras_update.bits.isReturn := mem_ctrl.jalr && io.dpath.mem_rs1_ra
io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.req.valid := take_pc
val bypassDst = Array(id_raddr1, id_raddr2)
val bypassSrc = Array.fill(NBYP)((Bool(true), UInt(0)))
bypassSrc(BYP_EX) = (ex_reg_valid && ex_ctrl.wxd, io.dpath.ex_waddr)
bypassSrc(BYP_MEM) = (mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, io.dpath.mem_waddr)
bypassSrc(BYP_DC) = (mem_reg_valid && mem_ctrl.wxd, io.dpath.mem_waddr)
val doBypass = bypassDst.map(d => bypassSrc.map(s => s._1 && s._2 === d))
for (i <- 0 until io.dpath.bypass.size) {
io.dpath.bypass(i) := doBypass(i).reduce(_||_)
io.dpath.bypass_src(i) := PriorityEncoder(doBypass(i))
}
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
val id_renx1_not0 = id_ctrl.rxs1 && id_raddr1 != UInt(0)
val id_renx2_not0 = id_ctrl.rxs2 && id_raddr2 != UInt(0)
val id_wen_not0 = id_ctrl.wxd && id_waddr != UInt(0)
val ex_cannot_bypass = ex_ctrl.csr != CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc
val data_hazard_ex = ex_ctrl.wxd &&
(id_renx1_not0 && id_raddr1 === io.dpath.ex_waddr ||
id_renx2_not0 && id_raddr2 === io.dpath.ex_waddr ||
id_wen_not0 && id_waddr === io.dpath.ex_waddr)
val fp_data_hazard_ex = ex_ctrl.wfd &&
(io.fpu.dec.ren1 && id_raddr1 === io.dpath.ex_waddr ||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr ||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr ||
io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr)
val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex)
// stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.
val mem_mem_cmd_bh =
if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass
else Bool(true)
val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc
val data_hazard_mem = mem_ctrl.wxd &&
(id_renx1_not0 && id_raddr1 === io.dpath.mem_waddr ||
id_renx2_not0 && id_raddr2 === io.dpath.mem_waddr ||
id_wen_not0 && id_waddr === io.dpath.mem_waddr)
val fp_data_hazard_mem = mem_ctrl.wfd &&
(io.fpu.dec.ren1 && id_raddr1 === io.dpath.mem_waddr ||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr ||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr ||
io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr)
val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem)
id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem
// stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback.
val data_hazard_wb = wb_ctrl.wxd &&
(id_renx1_not0 && id_raddr1 === io.dpath.wb_waddr ||
id_renx2_not0 && id_raddr2 === io.dpath.wb_waddr ||
id_wen_not0 && id_waddr === io.dpath.wb_waddr)
val fp_data_hazard_wb = wb_ctrl.wfd &&
(io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr ||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr ||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr ||
io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr)
val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)
val id_sboard_hazard =
(id_renx1_not0 && sboard.readBypassed(id_raddr1) ||
id_renx2_not0 && sboard.readBypassed(id_raddr2) ||
id_wen_not0 && sboard.readBypassed(id_waddr))
sboard.set(wb_set_sboard && io.dpath.wb_wen, io.dpath.wb_waddr)
val ctrl_stalld =
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||
id_ctrl.fp && id_stall_fpu ||
id_ctrl.mem && !io.dmem.req.ready ||
Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready ||
id_do_fence ||
io.dpath.csr_stall
val ctrl_draind = io.dpath.interrupt
ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind
io.dpath.killd := take_pc || ctrl_stalld && !ctrl_draind
io.imem.resp.ready := !ctrl_stalld || ctrl_draind
io.imem.invalidate := wb_reg_valid && wb_ctrl.fence_i
io.dpath.ren(1) := id_ctrl.rxs2
io.dpath.ren(0) := id_ctrl.rxs1
io.dpath.ex_ctrl := ex_ctrl
io.dpath.mem_ctrl := mem_ctrl
io.dpath.ex_valid := ex_reg_valid
io.dpath.ll_ready := !(wb_reg_valid && wb_ctrl.wxd)
io.dpath.retire := wb_reg_valid && !replay_wb && !io.dpath.csr_xcpt
io.dpath.wb_wen := io.dpath.retire && wb_ctrl.wxd
io.dpath.csr_cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
io.dpath.killm := killm_common
io.fpu.valid := !ctrl_killd && id_ctrl.fp
io.fpu.killx := ctrl_killx
io.fpu.killm := killm_common
io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
io.dmem.req.bits.kill := killm_common || mem_xcpt
io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
io.dmem.req.bits.typ := ex_ctrl.mem_type
io.dmem.req.bits.phys := Bool(false)
io.dmem.invalidate_lr := wb_xcpt
io.rocc.cmd.valid := wb_rocc_val
io.rocc.exception := wb_xcpt && io.dpath.status.xs.orR
io.rocc.s := io.dpath.status.prv.orR // should we just pass all of mstatus?
}

View File

@ -0,0 +1,599 @@
// See LICENSE for license details.
package rocket
import Chisel._
import uncore._
import Util._
case object BuildFPU extends Field[Option[() => FPU]]
case object FDivSqrt extends Field[Boolean]
case object XLen extends Field[Int]
case object NMultXpr extends Field[Int]
case object FetchWidth extends Field[Int]
case object RetireWidth extends Field[Int]
case object UseVM extends Field[Boolean]
case object FastLoadWord extends Field[Boolean]
case object FastLoadByte extends Field[Boolean]
case object FastMulDiv extends Field[Boolean]
case object CoreInstBits extends Field[Int]
case object CoreDataBits extends Field[Int]
case object CoreDCacheReqTagBits extends Field[Int]
case object NCustomMRWCSRs extends Field[Int]
abstract trait CoreParameters extends UsesParameters {
val xLen = params(XLen)
val paddrBits = params(PAddrBits)
val vaddrBits = params(VAddrBits)
val pgIdxBits = params(PgIdxBits)
val ppnBits = params(PPNBits)
val vpnBits = params(VPNBits)
val pgLevels = params(PgLevels)
val pgLevelBits = params(PgLevelBits)
val asIdBits = params(ASIdBits)
val retireWidth = params(RetireWidth)
val coreFetchWidth = params(FetchWidth)
val coreInstBits = params(CoreInstBits)
val coreInstBytes = coreInstBits/8
val coreDataBits = xLen
val coreDataBytes = coreDataBits/8
val coreDCacheReqTagBits = params(CoreDCacheReqTagBits)
val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits
val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt
if(params(FastLoadByte)) require(params(FastLoadWord))
}
abstract trait RocketCoreParameters extends CoreParameters
{
require(params(FetchWidth) == 1) // for now...
require(params(RetireWidth) == 1) // for now...
}
abstract class CoreBundle extends Bundle with CoreParameters
abstract class CoreModule extends Module with CoreParameters
class Rocket extends CoreModule
{
val io = new Bundle {
val host = new HTIFIO
val imem = new CPUFrontendIO
val dmem = new HellaCacheIO
val ptw = new DatapathPTWIO().flip
val fpu = new FPUIO().flip
val rocc = new RoCCInterface().flip
}
var decode_table = XDecode.table
if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table
if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table
if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table
val ex_ctrl = Reg(new IntCtrlSigs)
val mem_ctrl = Reg(new IntCtrlSigs)
val wb_ctrl = Reg(new IntCtrlSigs)
val ex_reg_xcpt_interrupt = Reg(Bool())
val ex_reg_valid = Reg(Bool())
val ex_reg_btb_hit = Reg(Bool())
val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits)
val ex_reg_xcpt = Reg(Bool())
val ex_reg_flush_pipe = Reg(Bool())
val ex_reg_load_use = Reg(Bool())
val ex_reg_cause = Reg(UInt())
val ex_reg_pc = Reg(UInt())
val ex_reg_inst = Reg(Bits())
val mem_reg_xcpt_interrupt = Reg(Bool())
val mem_reg_valid = Reg(Bool())
val mem_reg_btb_hit = Reg(Bool())
val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits)
val mem_reg_xcpt = Reg(Bool())
val mem_reg_replay = Reg(Bool())
val mem_reg_flush_pipe = Reg(Bool())
val mem_reg_cause = Reg(UInt())
val mem_reg_slow_bypass = Reg(Bool())
val mem_reg_pc = Reg(UInt())
val mem_reg_inst = Reg(Bits())
val mem_reg_wdata = Reg(Bits())
val mem_reg_rs2 = Reg(Bits())
val take_pc_mem = Wire(Bool())
val wb_reg_valid = Reg(Bool())
val wb_reg_xcpt = Reg(Bool())
val wb_reg_replay = Reg(Bool())
val wb_reg_cause = Reg(UInt())
val wb_reg_rocc_pending = Reg(init=Bool(false))
val wb_reg_pc = Reg(UInt())
val wb_reg_inst = Reg(Bits())
val wb_reg_wdata = Reg(Bits())
val wb_reg_rs2 = Reg(Bits())
val take_pc_wb = Wire(Bool())
val take_pc_mem_wb = take_pc_wb || take_pc_mem
val take_pc = take_pc_mem_wb
// decode stage
val id_pc = io.imem.resp.bits.pc
val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1)
val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table)
val id_raddr3 = id_inst(31,27)
val id_raddr2 = id_inst(24,20)
val id_raddr1 = id_inst(19,15)
val id_waddr = id_inst(11,7)
val id_load_use = Wire(Bool())
val id_reg_fence = Reg(init=Bool(false))
val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2)
val id_raddr = IndexedSeq(id_raddr1, id_raddr2)
val rf = new RegFile
val id_rs = id_raddr.map(rf.read _)
val ctrl_killd = Wire(Bool())
val csr = Module(new CSRFile)
val id_csr_en = id_ctrl.csr != CSR.N
val id_system_insn = id_ctrl.csr === CSR.I
val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0)
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
val id_csr_addr = id_inst(31,20)
// this is overly conservative
val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil
val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*)
val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs, legal_csrs -- safe_csrs))
val id_illegal_insn = !id_ctrl.legal ||
id_ctrl.fp && !csr.io.status.fs.orR ||
id_ctrl.rocc && !csr.io.status.xs.orR
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
val id_amo_aq = id_inst(26)
val id_amo_rl = id_inst(25)
val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) &&
(io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||
mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)
id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy
val id_do_fence = id_rocc_busy && id_ctrl.fence ||
id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en)
val (id_xcpt, id_cause) = checkExceptions(List(
(csr.io.interrupt, csr.io.interrupt_cause),
(io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)),
(id_illegal_insn, UInt(Causes.illegal_instruction))))
val dcache_bypass_data =
if(params(FastLoadByte)) io.dmem.resp.bits.data_subword
else if(params(FastLoadWord)) io.dmem.resp.bits.data
else wb_reg_wdata
// detect bypass opportunities
val ex_waddr = ex_reg_inst(11,7)
val mem_waddr = mem_reg_inst(11,7)
val wb_waddr = wb_reg_inst(11,7)
val bypass_sources = IndexedSeq(
(Bool(true), UInt(0), UInt(0)), // treat reading x0 as a bypass
(ex_reg_valid && ex_ctrl.wxd, ex_waddr, mem_reg_wdata),
(mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, mem_waddr, wb_reg_wdata),
(mem_reg_valid && mem_ctrl.wxd, mem_waddr, dcache_bypass_data))
val id_bypass_src = id_raddr.map(raddr => bypass_sources.map(s => s._1 && s._2 === raddr))
// execute stage
val bypass_mux = Vec(bypass_sources.map(_._3))
val ex_reg_rs_bypass = Reg(Vec.fill(id_raddr.size)(Bool()))
val ex_reg_rs_lsb = Reg(Vec.fill(id_raddr.size)(Bits()))
val ex_reg_rs_msb = Reg(Vec.fill(id_raddr.size)(Bits()))
val ex_rs = for (i <- 0 until id_raddr.size)
yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
val ex_imm = imm(ex_ctrl.sel_imm, ex_reg_inst)
val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq(
A1_RS1 -> ex_rs(0).toSInt,
A1_PC -> ex_reg_pc.toSInt))
val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq(
A2_RS2 -> ex_rs(1).toSInt,
A2_IMM -> ex_imm,
A2_FOUR -> SInt(4)))
val alu = Module(new ALU)
alu.io.dw := ex_ctrl.alu_dw
alu.io.fn := ex_ctrl.alu_fn
alu.io.in2 := ex_op2.toUInt
alu.io.in1 := ex_op1
// multiplier and divider
val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1,
earlyOut = params(FastMulDiv)))
div.io.req.valid := ex_reg_valid && ex_ctrl.div
div.io.req.bits.dw := ex_ctrl.alu_dw
div.io.req.bits.fn := ex_ctrl.alu_fn
div.io.req.bits.in1 := ex_rs(0)
div.io.req.bits.in2 := ex_rs(1)
div.io.req.bits.tag := ex_waddr
ex_reg_valid := !ctrl_killd
ex_reg_xcpt := !ctrl_killd && id_xcpt
ex_reg_xcpt_interrupt := csr.io.interrupt && !take_pc && io.imem.resp.valid
when (id_xcpt) { ex_reg_cause := id_cause }
when (!ctrl_killd) {
ex_ctrl := id_ctrl
ex_ctrl.csr := id_csr
ex_reg_btb_hit := io.imem.btb_resp.valid
when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits }
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush
ex_reg_load_use := id_load_use
for (i <- 0 until id_raddr.size) {
val do_bypass = id_bypass_src(i).reduce(_||_)
val bypass_src = PriorityEncoder(id_bypass_src(i))
ex_reg_rs_bypass(i) := do_bypass
ex_reg_rs_lsb(i) := bypass_src
when (id_ren(i) && !do_bypass) {
ex_reg_rs_lsb(i) := id_rs(i)(bypass_src.getWidth-1,0)
ex_reg_rs_msb(i) := id_rs(i) >> bypass_src.getWidth
}
}
}
when (!ctrl_killd || csr.io.interrupt) {
ex_reg_inst := id_inst
ex_reg_pc := id_pc
}
// replay inst in ex stage?
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
ex_ctrl.div && !div.io.req.ready
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
val replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use)
val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid
// detect 2-cycle load-use delay for LB/LH/SC
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
val (ex_xcpt, ex_cause) = checkExceptions(List(
(ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause),
(ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction))))
// memory stage
val mem_br_taken = mem_reg_wdata(0)
val mem_br_target = mem_reg_pc.toSInt +
Mux(mem_ctrl.branch && mem_br_taken, imm(IMM_SB, mem_reg_inst),
Mux(mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4)))
val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt
val mem_npc = (Mux(mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUInt
val mem_wrong_npc = mem_npc != ex_reg_pc || !ex_reg_valid
val mem_npc_misaligned = mem_npc(1)
val mem_misprediction = mem_wrong_npc && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal)
val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
take_pc_mem := want_take_pc_mem && !mem_npc_misaligned
mem_reg_valid := !ctrl_killx
mem_reg_replay := !take_pc_mem_wb && replay_ex
mem_reg_xcpt := !ctrl_killx && ex_xcpt
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
when (ex_xcpt) { mem_reg_cause := ex_cause }
when (ex_reg_valid || ex_reg_xcpt_interrupt) {
mem_ctrl := ex_ctrl
mem_reg_btb_hit := ex_reg_btb_hit
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
mem_reg_flush_pipe := ex_reg_flush_pipe
mem_reg_slow_bypass := ex_slow_bypass
mem_reg_inst := ex_reg_inst
mem_reg_pc := ex_reg_pc
mem_reg_wdata := alu.io.out
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) {
mem_reg_rs2 := ex_rs(1)
}
}
val (mem_xcpt, mem_cause) = checkExceptions(List(
(mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
(want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load))))
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next.valid // structural hazard on writeback port
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
div.io.kill := killm_common && Reg(next = div.io.req.fire())
val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
wb_reg_valid := !ctrl_killm
wb_reg_replay := replay_mem && !take_pc_wb
wb_reg_xcpt := mem_xcpt && !take_pc_wb
when (mem_xcpt) { wb_reg_cause := mem_cause }
when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) {
wb_ctrl := mem_ctrl
wb_reg_wdata := Mux(mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (mem_ctrl.rocc) {
wb_reg_rs2 := mem_reg_rs2
}
wb_reg_inst := mem_reg_inst
wb_reg_pc := mem_reg_pc
}
val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
val replay_wb_common =
io.dmem.resp.bits.nack || wb_reg_replay || csr.io.csr_replay
val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
when (wb_rocc_val) { wb_reg_rocc_pending := !io.rocc.cmd.ready }
when (wb_reg_xcpt) { wb_reg_rocc_pending := Bool(false) }
// writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1)
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data
div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
val ll_wdata = Wire(init = div.io.resp.bits.data)
val ll_waddr = Wire(init = div.io.resp.bits.tag)
val ll_wen = Wire(init = div.io.resp.fire())
if (!params(BuildRoCC).isEmpty) {
io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
when (io.rocc.resp.fire()) {
div.io.resp.ready := Bool(false)
ll_wdata := io.rocc.resp.bits.data
ll_waddr := io.rocc.resp.bits.rd
ll_wen := Bool(true)
}
}
when (dmem_resp_replay && dmem_resp_xpu) {
div.io.resp.ready := Bool(false)
if (!params(BuildRoCC).isEmpty)
io.rocc.resp.ready := Bool(false)
ll_waddr := dmem_resp_waddr
ll_wen := Bool(true)
}
val wb_valid = wb_reg_valid && !replay_wb && !csr.io.csr_xcpt
val wb_wen = wb_valid && wb_ctrl.wxd
val rf_wen = wb_wen || ll_wen
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword,
Mux(ll_wen, ll_wdata,
Mux(wb_ctrl.csr != CSR.N, csr.io.rw.rdata,
wb_reg_wdata)))
when (rf_wen) { rf.write(rf_waddr, rf_wdata) }
// hook up control/status regfile
csr.io.exception := wb_reg_xcpt
csr.io.cause := wb_reg_cause
csr.io.retire := wb_valid
csr.io.host <> io.host
io.fpu.fcsr_rm := csr.io.fcsr_rm
csr.io.fcsr_flags := io.fpu.fcsr_flags
csr.io.rocc <> io.rocc
csr.io.pc := wb_reg_pc
csr.io.uarch_counters.foreach(_ := Bool(false))
io.ptw.ptbr := csr.io.ptbr
io.ptw.invalidate := csr.io.fatc
io.ptw.status := csr.io.status
csr.io.rw.addr := wb_reg_inst(31,20)
csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
csr.io.rw.wdata := wb_reg_wdata
val sboard = new Scoreboard(32)
sboard.clear(ll_wen, ll_waddr)
// control transfer from ex/wb
val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt
take_pc_wb := replay_wb || wb_xcpt || csr.io.eret
io.imem.req.bits.pc :=
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
Mux(replay_wb, wb_reg_pc, // replay
mem_npc)).toUInt // mispredicted branch
io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && ((mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === Bits("b00??1")
io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb
io.imem.bht_update.bits.taken := mem_br_taken
io.imem.bht_update.bits.mispredict := mem_wrong_npc
io.imem.bht_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.bht_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.ras_update.valid := mem_reg_valid && io.imem.btb_update.bits.isJump && !mem_npc_misaligned && !take_pc_wb
io.imem.ras_update.bits.isCall := mem_ctrl.wxd && mem_waddr(0)
io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn
io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.req.valid := take_pc
io.imem.btb_update.bits.pc := mem_reg_pc
io.imem.btb_update.bits.target := io.imem.req.bits.pc
io.imem.btb_update.bits.br_pc := mem_reg_pc
io.imem.bht_update.bits.pc := mem_reg_pc
io.imem.ras_update.bits.returnAddr := mem_int_wdata
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
val id_renx1_not0 = id_ctrl.rxs1 && id_raddr1 != UInt(0)
val id_renx2_not0 = id_ctrl.rxs2 && id_raddr2 != UInt(0)
val id_wen_not0 = id_ctrl.wxd && id_waddr != UInt(0)
val ex_cannot_bypass = ex_ctrl.csr != CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc
val data_hazard_ex = ex_ctrl.wxd &&
(id_renx1_not0 && id_raddr1 === ex_waddr ||
id_renx2_not0 && id_raddr2 === ex_waddr ||
id_wen_not0 && id_waddr === ex_waddr)
val fp_data_hazard_ex = ex_ctrl.wfd &&
(io.fpu.dec.ren1 && id_raddr1 === ex_waddr ||
io.fpu.dec.ren2 && id_raddr2 === ex_waddr ||
io.fpu.dec.ren3 && id_raddr3 === ex_waddr ||
io.fpu.dec.wen && id_waddr === ex_waddr)
val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex)
// stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.
val mem_mem_cmd_bh =
if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass
else Bool(true)
val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc
val data_hazard_mem = mem_ctrl.wxd &&
(id_renx1_not0 && id_raddr1 === mem_waddr ||
id_renx2_not0 && id_raddr2 === mem_waddr ||
id_wen_not0 && id_waddr === mem_waddr)
val fp_data_hazard_mem = mem_ctrl.wfd &&
(io.fpu.dec.ren1 && id_raddr1 === mem_waddr ||
io.fpu.dec.ren2 && id_raddr2 === mem_waddr ||
io.fpu.dec.ren3 && id_raddr3 === mem_waddr ||
io.fpu.dec.wen && id_waddr === mem_waddr)
val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem)
id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem
// stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback.
val data_hazard_wb = wb_ctrl.wxd &&
(id_renx1_not0 && id_raddr1 === wb_waddr ||
id_renx2_not0 && id_raddr2 === wb_waddr ||
id_wen_not0 && id_waddr === wb_waddr)
val fp_data_hazard_wb = wb_ctrl.wfd &&
(io.fpu.dec.ren1 && id_raddr1 === wb_waddr ||
io.fpu.dec.ren2 && id_raddr2 === wb_waddr ||
io.fpu.dec.ren3 && id_raddr3 === wb_waddr ||
io.fpu.dec.wen && id_waddr === wb_waddr)
val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)
val id_sboard_hazard =
(id_renx1_not0 && sboard.readBypassed(id_raddr1) ||
id_renx2_not0 && sboard.readBypassed(id_raddr2) ||
id_wen_not0 && sboard.readBypassed(id_waddr))
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
val id_stall_fpu = if (!params(BuildFPU).isEmpty) {
val fp_sboard = new Scoreboard(32)
fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr)
fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr)
fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra)
id_csr_en && !io.fpu.fcsr_rdy ||
io.fpu.dec.ren1 && fp_sboard.read(id_raddr1) ||
io.fpu.dec.ren2 && fp_sboard.read(id_raddr2) ||
io.fpu.dec.ren3 && fp_sboard.read(id_raddr3) ||
io.fpu.dec.wen && fp_sboard.read(id_waddr)
} else Bool(false)
val ctrl_stalld =
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||
id_ctrl.fp && id_stall_fpu ||
id_ctrl.mem && !io.dmem.req.ready ||
Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready ||
id_do_fence ||
csr.io.csr_stall
ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt
io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt
io.imem.invalidate := wb_reg_valid && wb_ctrl.fence_i
io.fpu.valid := !ctrl_killd && id_ctrl.fp
io.fpu.killx := ctrl_killx
io.fpu.killm := killm_common
io.fpu.inst := id_inst
io.fpu.fromint_data := ex_rs(0)
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp.bits.data
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
io.fpu.dmem_resp_tag := dmem_resp_waddr
io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
io.dmem.req.bits.kill := killm_common || mem_xcpt
io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
io.dmem.req.bits.typ := ex_ctrl.mem_type
io.dmem.req.bits.phys := Bool(false)
io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt
io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp)
io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
require(params(CoreDCacheReqTagBits) >= 6)
io.dmem.invalidate_lr := wb_xcpt
io.rocc.cmd.valid := wb_rocc_val
io.rocc.exception := wb_xcpt && csr.io.status.xs.orR
io.rocc.s := csr.io.status.prv.orR // should we just pass all of mstatus?
io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst)
io.rocc.cmd.bits.rs1 := wb_reg_wdata
io.rocc.cmd.bits.rs2 := wb_reg_rs2
printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",
io.host.id, csr.io.time(32,0), wb_valid, wb_reg_pc,
Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen,
wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),
wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),
wb_reg_inst, wb_reg_inst)
def checkExceptions(x: Seq[(Bool, UInt)]) =
(x.map(_._1).reduce(_||_), PriorityMux(x))
def imm(sel: Bits, inst: Bits) = {
val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt)
val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign)
val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt)
val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0),
Mux(sel === IMM_UJ, inst(20).toSInt,
Mux(sel === IMM_SB, inst(7).toSInt, sign)))
val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25))
val b4_1 = Mux(sel === IMM_U, Bits(0),
Mux(sel === IMM_S || sel === IMM_SB, inst(11,8),
Mux(sel === IMM_Z, inst(19,16), inst(24,21))))
val b0 = Mux(sel === IMM_S, inst(7),
Mux(sel === IMM_I, inst(20),
Mux(sel === IMM_Z, inst(15), Bits(0))))
Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt
}
def vaSign(a0: UInt, ea: Bits) = {
// efficient means to compress 64-bit VA into vaddrBits+1 bits
// (VA is bad if VA(vaddrBits) != VA(vaddrBits-1))
val a = a0 >> vaddrBits-1
val e = ea(vaddrBits,vaddrBits-1)
Mux(a === UInt(0) || a === UInt(1), e != UInt(0),
Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1),
e(0)))
}
class RegFile {
private val rf = Mem(UInt(width = 64), 31)
private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]()
private var canRead = true
def read(addr: UInt) = {
require(canRead)
reads += addr -> Wire(UInt())
reads.last._2 := rf(~addr)
reads.last._2
}
def write(addr: UInt, data: UInt) = {
canRead = false
when (addr != UInt(0)) {
rf(~addr) := data
for ((raddr, rdata) <- reads)
when (addr === raddr) { rdata := data }
}
}
}
class Scoreboard(n: Int)
{
def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))
def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))
def read(addr: UInt): Bool = r(addr)
def readBypassed(addr: UInt): Bool = _next(addr)
private val r = Reg(init=Bits(0, n))
private var _next = r
private var ens = Bool(false)
private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))
private def update(en: Bool, update: UInt) = {
_next = update
ens = ens || en
when (ens) { r := _next }
}
}
}

View File

@ -23,7 +23,7 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) {
val icache = Module(new Frontend, { case CacheName => "L1I"; case CoreName => "Rocket" }) val icache = Module(new Frontend, { case CacheName => "L1I"; case CoreName => "Rocket" })
val dcache = Module(new HellaCache, { case CacheName => "L1D" }) val dcache = Module(new HellaCache, { case CacheName => "L1D" })
val ptw = Module(new PTW(params(NPTWPorts))) val ptw = Module(new PTW(params(NPTWPorts)))
val core = Module(new Core, { case CoreName => "Rocket" }) val core = Module(new Rocket, { case CoreName => "Rocket" })
dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache
val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts)))
@ -38,6 +38,13 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) {
core.io.imem <> icache.io.cpu core.io.imem <> icache.io.cpu
core.io.ptw <> ptw.io.dpath core.io.ptw <> ptw.io.dpath
//If so specified, build an FPU module and wire it in
params(BuildFPU)
.map { bf => bf() }
.foreach { fpu =>
fpu.io <> core.io.fpu
}
// Connect the caches and ROCC to the outer memory system // Connect the caches and ROCC to the outer memory system
io.cached <> dcache.io.mem io.cached <> dcache.io.mem
// If so specified, build an RoCC module and wire it in // If so specified, build an RoCC module and wire it in