1
0

Merge master into rocc-fpu-port

ebb33f2f4b658211960a4c6c023c139420c67212
This commit is contained in:
Colin Schmidt 2015-08-06 08:03:10 -07:00
commit cab12635f8
23 changed files with 1045 additions and 1293 deletions

View File

@ -5,3 +5,6 @@ version := "1.2"
name := "rocket"
scalaVersion := "2.10.2"
libraryDependencies ++= (Seq("chisel", "hardfloat", "uncore", "junctions").map {
dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten

View File

@ -1,10 +0,0 @@
// Provide a managed dependency on chisel if -DchiselVersion="" is
// supplied on the command line.
val chiselVersion_r = System.getProperty("chiselVersion", "None")
// _r a temporary fix until sbt 13.6 https://github.com/sbt/sbt/issues/1465
libraryDependencies ++= ( if (chiselVersion_r != "None" ) (
"edu.berkeley.cs" %% "chisel" % chiselVersion_r
) :: Nil; else Nil)

View File

@ -1,8 +0,0 @@
// Provide a managed dependency on chisel if -DhardfloatVersion="" is
// supplied on the command line.
val hardfloatVersion = System.getProperty("hardfloatVersion", "None")
libraryDependencies ++= ( if (hardfloatVersion != "None" ) (
"edu.berkeley.cs" %% "hardfloat" % hardfloatVersion
) :: Nil; else Nil)

View File

@ -3,9 +3,8 @@
package rocket
import Chisel._
import junctions._
import Util._
import Node._
import uncore._
case object NBTBEntries extends Field[Int]
case object NRAS extends Field[Int]
@ -36,7 +35,7 @@ class RAS(nras: Int) {
private val count = Reg(init=UInt(0,log2Up(nras+1)))
private val pos = Reg(init=UInt(0,log2Up(nras)))
private val stack = Vec.fill(nras){Reg(UInt())}
private val stack = Reg(Vec.fill(nras){UInt()})
}
class BHTResp extends Bundle with BTBParameters {
@ -56,7 +55,7 @@ class BHTResp extends Bundle with BTBParameters {
class BHT(nbht: Int) {
val nbhtbits = log2Up(nbht)
def get(addr: UInt, update: Bool): BHTResp = {
val res = new BHTResp
val res = Wire(new BHTResp)
val index = addr(nbhtbits+1,2) ^ history
res.value := table(index)
res.history := history
@ -144,8 +143,8 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0))
val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0))
val useRAS = Reg(UInt(width = entries))
val isJump = Reg(UInt(width = entries))
val useRAS = Reg(Vec(Bool(), entries))
val isJump = Reg(Vec(Bool(), entries))
val brIdx = Mem(UInt(width=log2Up(params(FetchWidth))), entries)
private def page(addr: UInt) = addr >> matchBits
@ -153,7 +152,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
val p = page(addr)
Vec(pages.map(_ === p)).toBits & pageValid
}
private def tagMatch(addr: UInt, pgMatch: UInt): UInt = {
private def tagMatch(addr: UInt, pgMatch: UInt) = {
val idx = addr(matchBits-1,0)
val idxMatch = idxs.map(_ === idx).toBits
val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits
@ -178,7 +177,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
val useUpdatePageHit = updatePageHit.orR
val doIdxPageRepl = !useUpdatePageHit
val idxPageRepl = UInt()
val idxPageRepl = Wire(UInt(width = nPages))
val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
val idxPageUpdate = OHToUInt(idxPageUpdateOH)
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
@ -203,9 +202,10 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl)
// invalidate entries if we stomp on pages they depend upon
idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits
val invalidateMask = Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits
val validateMask = UIntToOH(waddr)
idxValid := (idxValid & ~invalidateMask) | validateMask
idxValid(waddr) := Bool(true)
idxs(waddr) := r_btb_update.bits.pc
tgts(waddr) := update_target
idxPages(waddr) := idxPageUpdate
@ -253,12 +253,13 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
if (nBHT > 0) {
val bht = new BHT(nBHT)
val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump))
val isBranch = !Mux1H(hits, isJump)
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
val update_btb_hit = io.bht_update.bits.prediction.valid
when (io.bht_update.valid && update_btb_hit) {
bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict)
}
when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false }
when (!res.value(0) && isBranch) { io.resp.bits.taken := false }
io.resp.bits.bht := res
}

View File

@ -8,27 +8,22 @@ import scala.math._
trait ScalarOpConstants {
val SZ_BR = 3
val BR_X = Bits("b???", 3)
val BR_EQ = Bits(0, 3)
val BR_NE = Bits(1, 3)
val BR_J = Bits(2, 3)
val BR_N = Bits(3, 3)
val BR_LT = Bits(4, 3)
val BR_GE = Bits(5, 3)
val BR_LTU = Bits(6, 3)
val BR_GEU = Bits(7, 3)
val BR_X = BitPat("b???")
val BR_EQ = UInt(0, 3)
val BR_NE = UInt(1, 3)
val BR_J = UInt(2, 3)
val BR_N = UInt(3, 3)
val BR_LT = UInt(4, 3)
val BR_GE = UInt(5, 3)
val BR_LTU = UInt(6, 3)
val BR_GEU = UInt(7, 3)
val PC_EX = UInt(0, 2)
val PC_MEM = UInt(1, 2)
val PC_WB = UInt(2, 2)
val PC_CSR = UInt(3, 2)
val A1_X = Bits("b??", 2)
val A1_X = BitPat("b??")
val A1_ZERO = UInt(0, 2)
val A1_RS1 = UInt(1, 2)
val A1_PC = UInt(2, 2)
val IMM_X = Bits("b???", 3)
val IMM_X = BitPat("b???")
val IMM_S = UInt(0, 3)
val IMM_SB = UInt(1, 3)
val IMM_U = UInt(2, 3)
@ -36,22 +31,15 @@ trait ScalarOpConstants {
val IMM_I = UInt(4, 3)
val IMM_Z = UInt(5, 3)
val A2_X = Bits("b??", 2)
val A2_X = BitPat("b??")
val A2_ZERO = UInt(0, 2)
val A2_FOUR = UInt(1, 2)
val A2_RS2 = UInt(2, 2)
val A2_IMM = UInt(3, 2)
val X = Bool.DC
val N = Bool(false)
val Y = Bool(true)
val NBYP = 4
val SZ_BYP = log2Up(NBYP)
val BYP_0 = 0
val BYP_EX = 1
val BYP_MEM = 2
val BYP_DC = 3
val X = BitPat("b?")
val N = BitPat("b0")
val Y = BitPat("b1")
val SZ_DW = 1
val DW_X = X
@ -64,6 +52,4 @@ trait ScalarOpConstants {
val PRV_S = 1
val PRV_H = 2
val PRV_M = 3
val RA = UInt(1, 5)
}

View File

@ -1,100 +0,0 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Util._
import uncore._
case object BuildFPU extends Field[Option[() => FPU]]
case object FDivSqrt extends Field[Boolean]
case object XLen extends Field[Int]
case object NMultXpr extends Field[Int]
case object FetchWidth extends Field[Int]
case object RetireWidth extends Field[Int]
case object UseVM extends Field[Boolean]
case object FastLoadWord extends Field[Boolean]
case object FastLoadByte extends Field[Boolean]
case object FastMulDiv extends Field[Boolean]
case object CoreInstBits extends Field[Int]
case object CoreDataBits extends Field[Int]
case object CoreDCacheReqTagBits extends Field[Int]
case object NCustomMRWCSRs extends Field[Int]
abstract trait CoreParameters extends UsesParameters {
val xLen = params(XLen)
val paddrBits = params(PAddrBits)
val vaddrBits = params(VAddrBits)
val pgIdxBits = params(PgIdxBits)
val ppnBits = params(PPNBits)
val vpnBits = params(VPNBits)
val pgLevels = params(PgLevels)
val pgLevelBits = params(PgLevelBits)
val asIdBits = params(ASIdBits)
val retireWidth = params(RetireWidth)
val coreFetchWidth = params(FetchWidth)
val coreInstBits = params(CoreInstBits)
val coreInstBytes = coreInstBits/8
val coreDataBits = xLen
val coreDataBytes = coreDataBits/8
val coreDCacheReqTagBits = params(CoreDCacheReqTagBits)
val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits
val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt
if(params(FastLoadByte)) require(params(FastLoadWord))
}
abstract trait RocketCoreParameters extends CoreParameters
{
require(params(FetchWidth) == 1) // for now...
require(params(RetireWidth) == 1) // for now...
}
abstract class CoreBundle extends Bundle with CoreParameters
abstract class CoreModule extends Module with CoreParameters
class RocketIO extends Bundle
{
val host = new HTIFIO
val imem = new CPUFrontendIO
val dmem = new HellaCacheIO
val ptw = new DatapathPTWIO().flip
val rocc = new RoCCInterface().flip
}
class Core extends Module with CoreParameters
{
val io = new RocketIO
val ctrl = Module(new Control)
val dpath = Module(new Datapath)
//If so specified, build an FPU module and wire it in
params(BuildFPU)
.map { bf => bf() }
.foreach { fpu =>
dpath.io.fpu <> fpu.io.dpath
ctrl.io.fpu <> fpu.io.ctrl
if(!params(BuildRoCC).isEmpty) {
io.rocc.fpu_req <> fpu.io.cp_req
io.rocc.fpu_resp <> fpu.io.cp_resp
} else {
fpu.io.cp_req.valid := Bool(false)
}
}
ctrl.io.dpath <> dpath.io.ctrl
dpath.io.host <> io.host
ctrl.io.imem <> io.imem
dpath.io.imem <> io.imem
ctrl.io.dmem <> io.dmem
dpath.io.dmem <> io.dmem
dpath.io.ptw <> io.ptw
ctrl.io.rocc <> io.rocc
dpath.io.rocc <> io.rocc
}

View File

@ -5,7 +5,6 @@ package rocket
import Chisel._
import Util._
import Instructions._
import Node._
import uncore._
import scala.math._
@ -58,7 +57,7 @@ object CSR
{
// commands
val SZ = 3
val X = UInt.DC(SZ)
val X = BitPat.DC(SZ)
val N = UInt(0,SZ)
val W = UInt(1,SZ)
val S = UInt(2,SZ)
@ -134,7 +133,7 @@ class CSRFile extends CoreModule
io.interrupt_cause := 0
io.interrupt := io.interrupt_cause(xLen-1)
val some_interrupt_pending = Bool(); some_interrupt_pending := false
val some_interrupt_pending = Wire(init=Bool(false))
def checkInterrupt(max_priv: UInt, cond: Bool, num: Int) = {
when (cond && (reg_mstatus.prv < max_priv || reg_mstatus.prv === max_priv && reg_mstatus.ie)) {
io.interrupt_cause := UInt((BigInt(1) << (xLen-1)) + num)
@ -216,20 +215,17 @@ class CSRFile extends CoreModule
CSRs.mfromhost -> reg_fromhost)
if (params(UseVM)) {
val read_sstatus = new SStatus
read_sstatus := new SStatus().fromBits(read_mstatus) // sstatus mostly overlaps mstatus
val read_sstatus = Wire(init=new SStatus().fromBits(read_mstatus))
read_sstatus.zero1 := 0
read_sstatus.zero2 := 0
read_sstatus.zero3 := 0
read_sstatus.zero4 := 0
val read_sip = new MIP
read_sip := new MIP().fromBits(0)
val read_sip = Wire(init=new MIP().fromBits(0))
read_sip.ssip := reg_mip.ssip
read_sip.stip := reg_mip.stip
val read_sie = new MIP
read_sie := new MIP().fromBits(0)
val read_sie = Wire(init=new MIP().fromBits(0))
read_sie.ssip := reg_mie.ssip
read_sie.stip := reg_mie.stip
@ -295,9 +291,9 @@ class CSRFile extends CoreModule
io.csr_xcpt := csr_xcpt
io.eret := insn_ret || insn_redirect_trap
io.status := reg_mstatus
io.status.fs := reg_mstatus.fs.orR.toSInt // either off or dirty (no clean/initial support yet)
io.status.xs := reg_mstatus.xs.orR.toSInt // either off or dirty (no clean/initial support yet)
io.status.sd := reg_mstatus.xs.orR || reg_mstatus.fs.orR
io.status.fs := Fill(2, reg_mstatus.fs.orR) // either off or dirty (no clean/initial support yet)
io.status.xs := Fill(2, reg_mstatus.xs.orR) // either off or dirty (no clean/initial support yet)
io.status.sd := io.status.fs.andR || io.status.xs.andR
if (xLen == 32)
io.status.sd_rv32 := io.status.sd
@ -310,7 +306,7 @@ class CSRFile extends CoreModule
reg_mstatus.prv2 := reg_mstatus.prv1
reg_mstatus.ie2 := reg_mstatus.ie1
reg_mepc := io.pc & SInt(-coreInstBytes)
reg_mepc := ~(~io.pc | (coreInstBytes-1))
reg_mcause := io.cause
when (csr_xcpt) {
reg_mcause := Causes.illegal_instruction
@ -409,7 +405,7 @@ class CSRFile extends CoreModule
when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata }
when (decoded_addr(CSRs.frm)) { reg_frm := wdata }
when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth }
when (decoded_addr(CSRs.mepc)) { reg_mepc := wdata(vaddrBitsExtended-1,0).toSInt & SInt(-coreInstBytes) }
when (decoded_addr(CSRs.mepc)) { reg_mepc := ~(~wdata | (coreInstBytes-1)) }
when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata }
when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ }
when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) }
@ -424,7 +420,7 @@ class CSRFile extends CoreModule
val new_sstatus = new SStatus().fromBits(wdata)
reg_mstatus.ie := new_sstatus.ie
reg_mstatus.ie1 := new_sstatus.pie
reg_mstatus.prv1 := Mux(new_sstatus.ps, PRV_S, PRV_U)
reg_mstatus.prv1 := Mux[UInt](new_sstatus.ps, PRV_S, PRV_U)
reg_mstatus.mprv := new_sstatus.mprv
reg_mstatus.fs := new_sstatus.fs // even without an FPU
if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_sstatus.xs
@ -440,8 +436,8 @@ class CSRFile extends CoreModule
}
when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata }
when (decoded_addr(CSRs.sptbr)) { reg_sptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)) }
when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata(vaddrBitsExtended-1,0).toSInt & SInt(-coreInstBytes) }
when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata(vaddrBits-1,0).toSInt & SInt(-coreInstBytes) }
when (decoded_addr(CSRs.sepc)) { reg_sepc := ~(~wdata | (coreInstBytes-1)) }
when (decoded_addr(CSRs.stvec)) { reg_stvec := ~(~wdata | (coreInstBytes-1)) }
}
}

View File

@ -3,25 +3,17 @@
package rocket
import Chisel._
import Node._
object DecodeLogic
{
def term(b: Bits) = {
val lit = b.litOf
if (lit.isZ) {
var (bits, mask, swidth) = Literal.parseLit(lit.toString)
new Term(BigInt(bits, 2), BigInt(2).pow(lit.getWidth)-(BigInt(mask, 2)+1))
} else {
new Term(lit.value)
}
}
def term(lit: BitPat) =
new Term(lit.value, BigInt(2).pow(lit.getWidth)-(lit.mask+1))
def logic(addr: UInt, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bool], terms: Seq[Term]) = {
terms.map { t =>
cache.getOrElseUpdate(t, (if (t.mask == 0) addr else addr & Bits(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth)) === Bits(t.value, addrWidth))
}.foldLeft(Bool(false))(_||_)
}
def apply[T <: Bits](addr: UInt, default: T, mapping: Iterable[(UInt, T)]): T = {
def apply(addr: UInt, default: BitPat, mapping: Iterable[(BitPat, BitPat)]): UInt = {
val cache = caches.getOrElseUpdate(addr, collection.mutable.Map[Term,Bool]())
val dterm = term(default)
val (keys, values) = mapping.unzip
@ -33,7 +25,7 @@ object DecodeLogic
for (u <- t.tail)
assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap")
val result = (0 until default.litOf.getWidth.max(values.map(_.litOf.getWidth).max)).map({ case (i: Int) =>
(0 until default.getWidth.max(values.map(_.getWidth).max)).map({ case (i: Int) =>
val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1)
val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1)
val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1)
@ -47,20 +39,19 @@ object DecodeLogic
if (defbit == 0) bit else ~bit
}
}).reverse.reduceRight(Cat(_,_))
default.fromBits(result)
}
def apply[T <: Bits](addr: UInt, default: Iterable[T], mappingIn: Iterable[(UInt, Iterable[T])]): Iterable[T] = {
val mapping = collection.mutable.ArrayBuffer.fill(default.size)(collection.mutable.ArrayBuffer[(UInt, T)]())
def apply(addr: UInt, default: Seq[BitPat], mappingIn: Iterable[(BitPat, Seq[BitPat])]): Seq[UInt] = {
val mapping = collection.mutable.ArrayBuffer.fill(default.size)(collection.mutable.ArrayBuffer[(BitPat, BitPat)]())
for ((key, values) <- mappingIn)
for ((value, i) <- values zipWithIndex)
mapping(i) += key -> value
for ((thisDefault, thisMapping) <- default zip mapping)
yield apply(addr, thisDefault, thisMapping)
}
def apply(addr: UInt, default: Seq[BitPat], mappingIn: List[(UInt, Seq[BitPat])]): Seq[UInt] =
apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Seq[BitPat])]])
def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool =
apply(addr, Bool.DC, trues.map(_ -> Bool(true)) ++ falses.map(_ -> Bool(false)))
def apply(addr: UInt, tru: UInt, fals: UInt): Bool =
apply(addr, Seq(tru), Seq(fals))
apply(addr, BitPat.DC(1), trues.map(BitPat(_) -> BitPat("b1")) ++ falses.map(BitPat(_) -> BitPat("b0"))).toBool
private val caches = collection.mutable.Map[UInt,collection.mutable.Map[Term,Bool]]()
}

View File

@ -1,291 +0,0 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Instructions._
import Util._
import uncore._
class Datapath extends CoreModule
{
val io = new Bundle {
val host = new HTIFIO
val ctrl = new CtrlDpathIO().flip
val dmem = new HellaCacheIO
val ptw = new DatapathPTWIO().flip
val imem = new CPUFrontendIO
val fpu = new DpathFPUIO
val rocc = new RoCCInterface().flip
}
// execute definitions
val ex_reg_pc = Reg(UInt())
val ex_reg_inst = Reg(Bits())
val ex_reg_kill = Reg(Bool())
val ex_reg_rs_bypass = Vec.fill(2)(Reg(Bool()))
val ex_reg_rs_lsb = Vec.fill(2)(Reg(Bits()))
val ex_reg_rs_msb = Vec.fill(2)(Reg(Bits()))
// memory definitions
val mem_reg_pc = Reg(UInt())
val mem_reg_inst = Reg(Bits())
val mem_reg_wdata = Reg(Bits())
val mem_reg_kill = Reg(Bool())
val mem_reg_rs2 = Reg(Bits())
// writeback definitions
val wb_reg_pc = Reg(UInt())
val wb_reg_inst = Reg(Bits())
val wb_reg_wdata = Reg(Bits())
val wb_wdata = Bits()
val wb_reg_rs2 = Reg(Bits())
// instruction decode stage
val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1)
val id_pc = io.imem.resp.bits.pc
class RegFile {
private val rf = Mem(UInt(width = 64), 31)
private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]()
private var canRead = true
def read(addr: UInt) = {
require(canRead)
reads += addr -> UInt()
reads.last._2 := rf(~addr)
reads.last._2
}
def write(addr: UInt, data: UInt) = {
canRead = false
when (addr != UInt(0)) {
rf(~addr) := data
for ((raddr, rdata) <- reads)
when (addr === raddr) { rdata := data }
}
}
}
val rf = new RegFile
// RF read ports + bypass from WB stage
val id_raddr = Vec(id_inst(19,15), id_inst(24,20))
val id_rs = id_raddr.map(rf.read _)
// immediate generation
def imm(sel: Bits, inst: Bits) = {
val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt)
val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign)
val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt)
val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0),
Mux(sel === IMM_UJ, inst(20).toSInt,
Mux(sel === IMM_SB, inst(7).toSInt, sign)))
val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25))
val b4_1 = Mux(sel === IMM_U, Bits(0),
Mux(sel === IMM_S || sel === IMM_SB, inst(11,8),
Mux(sel === IMM_Z, inst(19,16), inst(24,21))))
val b0 = Mux(sel === IMM_S, inst(7),
Mux(sel === IMM_I, inst(20),
Mux(sel === IMM_Z, inst(15), Bits(0))))
Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt
}
io.ctrl.inst := id_inst
io.fpu.inst := id_inst
// execute stage
ex_reg_kill := io.ctrl.killd
when (!io.ctrl.killd) {
ex_reg_pc := id_pc
ex_reg_inst := id_inst
ex_reg_rs_bypass := io.ctrl.bypass
for (i <- 0 until id_rs.size) {
when (io.ctrl.ren(i)) {
ex_reg_rs_lsb(i) := id_rs(i)(SZ_BYP-1,0)
when (!io.ctrl.bypass(i)) {
ex_reg_rs_msb(i) := id_rs(i) >> SZ_BYP
}
}
when (io.ctrl.bypass(i)) { ex_reg_rs_lsb(i) := io.ctrl.bypass_src(i) }
}
}
val bypass = Vec.fill(NBYP)(Bits())
bypass(BYP_0) := Bits(0)
bypass(BYP_EX) := mem_reg_wdata
bypass(BYP_MEM) := wb_reg_wdata
bypass(BYP_DC) := (if(params(FastLoadByte)) io.dmem.resp.bits.data_subword
else if(params(FastLoadWord)) io.dmem.resp.bits.data
else wb_reg_wdata)
val ex_rs = for (i <- 0 until id_rs.size)
yield Mux(ex_reg_rs_bypass(i), bypass(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
val ex_imm = imm(io.ctrl.ex_ctrl.sel_imm, ex_reg_inst)
val ex_op1 = MuxLookup(io.ctrl.ex_ctrl.sel_alu1, SInt(0), Seq(
A1_RS1 -> ex_rs(0).toSInt,
A1_PC -> ex_reg_pc.toSInt))
val ex_op2 = MuxLookup(io.ctrl.ex_ctrl.sel_alu2, SInt(0), Seq(
A2_RS2 -> ex_rs(1).toSInt,
A2_IMM -> ex_imm,
A2_FOUR -> SInt(4)))
val alu = Module(new ALU)
alu.io.dw := io.ctrl.ex_ctrl.alu_dw
alu.io.fn := io.ctrl.ex_ctrl.alu_fn
alu.io.in2 := ex_op2.toUInt
alu.io.in1 := ex_op1
// multiplier and divider
val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1,
earlyOut = params(FastMulDiv)))
div.io.req.valid := io.ctrl.ex_valid && io.ctrl.ex_ctrl.div
div.io.req.bits.dw := io.ctrl.ex_ctrl.alu_dw
div.io.req.bits.fn := io.ctrl.ex_ctrl.alu_fn
div.io.req.bits.in1 := ex_rs(0)
div.io.req.bits.in2 := ex_rs(1)
div.io.req.bits.tag := io.ctrl.ex_waddr
div.io.kill := io.ctrl.killm && Reg(next = div.io.req.fire())
io.ctrl.div_mul_rdy := div.io.req.ready
io.fpu.fromint_data := ex_rs(0)
def vaSign(a0: UInt, ea: Bits) = {
// efficient means to compress 64-bit VA into vaddrBits+1 bits
// (VA is bad if VA(vaddrBits) != VA(vaddrBits-1))
val a = a0 >> vaddrBits-1
val e = ea(vaddrBits,vaddrBits-1)
Mux(a === UInt(0) || a === UInt(1), e != UInt(0),
Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1),
e(0)))
}
// D$ request interface (registered inside D$ module)
// other signals (req_val, req_rdy) connect to control module
io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt
io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_ctrl.fp)
require(io.dmem.req.bits.tag.getWidth >= 6)
require(params(CoreDCacheReqTagBits) >= 6)
// processor control regfile read
val csr = Module(new CSRFile)
csr.io.host <> io.host
csr.io <> io.ctrl
csr.io <> io.fpu
csr.io.rocc <> io.rocc
csr.io.pc := wb_reg_pc
csr.io.uarch_counters.foreach(_ := Bool(false))
io.ptw.ptbr := csr.io.ptbr
io.ptw.invalidate := csr.io.fatc
io.ptw.status := csr.io.status
// memory stage
mem_reg_kill := ex_reg_kill
when (!ex_reg_kill) {
mem_reg_pc := ex_reg_pc
mem_reg_inst := ex_reg_inst
mem_reg_wdata := alu.io.out
when (io.ctrl.ex_ctrl.rxs2 && (io.ctrl.ex_ctrl.mem || io.ctrl.ex_ctrl.rocc)) {
mem_reg_rs2 := ex_rs(1)
}
}
io.dmem.req.bits.data := Mux(io.ctrl.mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
// writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1)
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data
val ll_wdata = Bits()
div.io.resp.ready := io.ctrl.ll_ready
ll_wdata := div.io.resp.bits.data
io.ctrl.ll_waddr := div.io.resp.bits.tag
io.ctrl.ll_wen := div.io.resp.fire()
if (!params(BuildRoCC).isEmpty) {
io.rocc.resp.ready := io.ctrl.ll_ready
when (io.rocc.resp.fire()) {
div.io.resp.ready := Bool(false)
ll_wdata := io.rocc.resp.bits.data
io.ctrl.ll_waddr := io.rocc.resp.bits.rd
io.ctrl.ll_wen := Bool(true)
}
}
when (dmem_resp_replay && dmem_resp_xpu) {
div.io.resp.ready := Bool(false)
if (!params(BuildRoCC).isEmpty)
io.rocc.resp.ready := Bool(false)
io.ctrl.ll_waddr := dmem_resp_waddr
io.ctrl.ll_wen := Bool(true)
}
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp.bits.data
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
io.fpu.dmem_resp_tag := dmem_resp_waddr
io.ctrl.mem_br_taken := mem_reg_wdata(0)
val mem_br_target = mem_reg_pc.toSInt +
Mux(io.ctrl.mem_ctrl.branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst),
Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4)))
val mem_npc = (Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)), mem_br_target) & SInt(-2)).toUInt
io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid
io.ctrl.mem_npc_misaligned := mem_npc(1)
io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1
val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata).toUInt
// writeback stage
when (!mem_reg_kill) {
wb_reg_pc := mem_reg_pc
wb_reg_inst := mem_reg_inst
wb_reg_wdata := Mux(io.ctrl.mem_ctrl.fp && io.ctrl.mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (io.ctrl.mem_ctrl.rocc) {
wb_reg_rs2 := mem_reg_rs2
}
}
wb_wdata := Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword,
Mux(io.ctrl.ll_wen, ll_wdata,
Mux(io.ctrl.csr_cmd != CSR.N, csr.io.rw.rdata,
wb_reg_wdata)))
val wb_wen = io.ctrl.ll_wen || io.ctrl.wb_wen
val wb_waddr = Mux(io.ctrl.ll_wen, io.ctrl.ll_waddr, io.ctrl.wb_waddr)
when (wb_wen) { rf.write(wb_waddr, wb_wdata) }
// scoreboard clear (for div/mul and D$ load miss writebacks)
io.ctrl.fp_sboard_clr := dmem_resp_replay && dmem_resp_fpu
io.ctrl.fp_sboard_clra := dmem_resp_waddr
// processor control regfile write
csr.io.rw.addr := wb_reg_inst(31,20)
csr.io.rw.cmd := io.ctrl.csr_cmd
csr.io.rw.wdata := wb_reg_wdata
io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst)
io.rocc.cmd.bits.rs1 := wb_reg_wdata
io.rocc.cmd.bits.rs2 := wb_reg_rs2
// hook up I$
io.imem.req.bits.pc :=
Mux(io.ctrl.sel_pc === PC_MEM, mem_npc,
Mux(io.ctrl.sel_pc === PC_CSR, csr.io.evec,
wb_reg_pc)).toUInt // PC_WB
io.imem.btb_update.bits.pc := mem_reg_pc
io.imem.btb_update.bits.target := io.imem.req.bits.pc
io.imem.btb_update.bits.br_pc := mem_reg_pc
io.imem.bht_update.bits.pc := mem_reg_pc
io.imem.ras_update.bits.returnAddr := mem_int_wdata
// for hazard/bypass opportunity detection
io.ctrl.ex_waddr := ex_reg_inst(11,7)
io.ctrl.mem_waddr := mem_reg_inst(11,7)
io.ctrl.wb_waddr := wb_reg_inst(11,7)
printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",
io.host.id, csr.io.time(32,0), io.ctrl.retire, wb_reg_pc,
Mux(wb_wen, wb_waddr, UInt(0)), wb_wdata, wb_wen,
wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),
wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),
wb_reg_inst, wb_reg_inst)
}

View File

@ -3,27 +3,26 @@
package rocket
import Chisel._
import Node._
import Instructions._
object ALU
{
val SZ_ALU_FN = 4
val FN_X = Bits("b????")
val FN_ADD = Bits(0)
val FN_SL = Bits(1)
val FN_XOR = Bits(4)
val FN_OR = Bits(6)
val FN_AND = Bits(7)
val FN_SR = Bits(5)
val FN_SEQ = Bits(8)
val FN_SNE = Bits(9)
val FN_SUB = Bits(10)
val FN_SRA = Bits(11)
val FN_SLT = Bits(12)
val FN_SGE = Bits(13)
val FN_SLTU = Bits(14)
val FN_SGEU = Bits(15)
val FN_X = BitPat("b????")
val FN_ADD = UInt(0)
val FN_SL = UInt(1)
val FN_XOR = UInt(4)
val FN_OR = UInt(6)
val FN_AND = UInt(7)
val FN_SR = UInt(5)
val FN_SEQ = UInt(8)
val FN_SNE = UInt(9)
val FN_SUB = UInt(10)
val FN_SRA = UInt(11)
val FN_SLT = UInt(12)
val FN_SGE = UInt(13)
val FN_SLTU = UInt(14)
val FN_SGEU = UInt(15)
val FN_DIV = FN_XOR
val FN_DIVU = FN_SR

View File

@ -13,24 +13,24 @@ case object DFMALatency
object FPConstants
{
val FCMD_ADD = Bits("b0??00")
val FCMD_SUB = Bits("b0??01")
val FCMD_MUL = Bits("b0??10")
val FCMD_MADD = Bits("b1??00")
val FCMD_MSUB = Bits("b1??01")
val FCMD_NMSUB = Bits("b1??10")
val FCMD_NMADD = Bits("b1??11")
val FCMD_DIV = Bits("b?0011")
val FCMD_SQRT = Bits("b?1011")
val FCMD_SGNJ = Bits("b??1?0")
val FCMD_MINMAX = Bits("b?01?1")
val FCMD_CVT_FF = Bits("b??0??")
val FCMD_CVT_IF = Bits("b?10??")
val FCMD_CMP = Bits("b?01??")
val FCMD_MV_XF = Bits("b?11??")
val FCMD_CVT_FI = Bits("b??0??")
val FCMD_MV_FX = Bits("b??1??")
val FCMD_X = Bits("b?????")
val FCMD_ADD = BitPat("b0??00")
val FCMD_SUB = BitPat("b0??01")
val FCMD_MUL = BitPat("b0??10")
val FCMD_MADD = BitPat("b1??00")
val FCMD_MSUB = BitPat("b1??01")
val FCMD_NMSUB = BitPat("b1??10")
val FCMD_NMADD = BitPat("b1??11")
val FCMD_DIV = BitPat("b?0011")
val FCMD_SQRT = BitPat("b?1011")
val FCMD_SGNJ = BitPat("b??1?0")
val FCMD_MINMAX = BitPat("b?01?1")
val FCMD_CVT_FF = BitPat("b??0??")
val FCMD_CVT_IF = BitPat("b?10??")
val FCMD_CMP = BitPat("b?01??")
val FCMD_MV_XF = BitPat("b?11??")
val FCMD_CVT_FI = BitPat("b??0??")
val FCMD_MV_FX = BitPat("b??1??")
val FCMD_X = BitPat("b?????")
val FCMD_WIDTH = 5
val RM_SZ = 3
@ -65,9 +65,6 @@ class FPUDecoder extends Module
val sigs = new FPUCtrlSigs().asOutput
}
val N = Bool(false)
val Y = Bool(true)
val X = Bool(false)
val decoder = DecodeLogic(io.inst,
List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X),
Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N),
@ -138,33 +135,34 @@ class FPUDecoder extends Module
s.toint, s.fastpipe, s.fma, s.div, s.sqrt, s.round, s.wflags) := decoder
}
class DpathFPUIO extends Bundle {
val inst = Bits(OUTPUT, 32)
val fromint_data = Bits(OUTPUT, 64)
class FPUIO extends Bundle {
val inst = Bits(INPUT, 32)
val fromint_data = Bits(INPUT, 64)
val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip
val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ))
val store_data = Bits(INPUT, 64)
val toint_data = Bits(INPUT, 64)
val store_data = Bits(OUTPUT, 64)
val toint_data = Bits(OUTPUT, 64)
val dmem_resp_val = Bool(OUTPUT)
val dmem_resp_type = Bits(OUTPUT, 3)
val dmem_resp_tag = UInt(OUTPUT, 5)
val dmem_resp_data = Bits(OUTPUT, 64)
}
val dmem_resp_val = Bool(INPUT)
val dmem_resp_type = Bits(INPUT, 3)
val dmem_resp_tag = UInt(INPUT, 5)
val dmem_resp_data = Bits(INPUT, 64)
class CtrlFPUIO extends Bundle {
val valid = Bool(OUTPUT)
val fcsr_rdy = Bool(INPUT)
val nack_mem = Bool(INPUT)
val illegal_rm = Bool(INPUT)
val killx = Bool(OUTPUT)
val killm = Bool(OUTPUT)
val dec = new FPUCtrlSigs().asInput
val sboard_set = Bool(INPUT)
val sboard_clr = Bool(INPUT)
val sboard_clra = UInt(INPUT, 5)
val valid = Bool(INPUT)
val fcsr_rdy = Bool(OUTPUT)
val nack_mem = Bool(OUTPUT)
val illegal_rm = Bool(OUTPUT)
val killx = Bool(INPUT)
val killm = Bool(INPUT)
val dec = new FPUCtrlSigs().asOutput
val sboard_set = Bool(OUTPUT)
val sboard_clr = Bool(OUTPUT)
val sboard_clra = UInt(OUTPUT, 5)
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
val cp_resp = Decoupled(new FPResult())
}
class FPResult extends Bundle
@ -230,7 +228,7 @@ class FPToInt extends Module
io.out.bits.exc := dcmp_exc
}
when (in.cmd === FCMD_CVT_IF) {
io.out.bits.toint := Mux(in.typ(1), d2i._1, d2i._1(31,0).toSInt)
io.out.bits.toint := Mux(in.typ(1), d2i._1, d2i._1(31,0).toSInt).toUInt
io.out.bits.exc := d2i._2
}
@ -248,7 +246,7 @@ class IntToFP(val latency: Int) extends Module
val in = Pipe(io.in)
val mux = new FPResult
val mux = Wire(new FPResult)
mux.exc := Bits(0)
mux.data := hardfloat.floatNToRecodedFloatN(in.bits.in1, 52, 12)
when (in.bits.single) {
@ -299,7 +297,7 @@ class FPToFP(val latency: Int) extends Module
val isMax = in.bits.rm(0)
val isLHS = isnan2 || isMax != io.lt && !isnan1
val mux = new FPResult
val mux = Wire(new FPResult)
mux.exc := minmax_exc
mux.data := in.bits.in2
@ -347,7 +345,7 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module
fma.io.b := in.in2
fma.io.c := in.in3
val res = new FPResult
val res = Wire(new FPResult)
res.data := fma.io.out
res.exc := fma.io.exceptionFlags
io.out := Pipe(valid, res, latency-1)
@ -355,26 +353,21 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module
class FPU extends Module
{
val io = new Bundle {
val ctrl = (new CtrlFPUIO).flip
val dpath = (new DpathFPUIO).flip
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
val cp_resp = Decoupled(new FPResult())
}
val io = new FPUIO
val ex_reg_valid = Reg(next=io.ctrl.valid, init=Bool(false))
val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
val req_valid = ex_reg_valid || io.cp_req.valid
val ex_reg_inst = RegEnable(io.dpath.inst, io.ctrl.valid)
val ex_reg_inst = RegEnable(io.inst, io.valid)
val ex_cp_valid = io.cp_req.valid && !ex_reg_valid
val mem_reg_valid = Reg(next=ex_reg_valid && !io.ctrl.killx || ex_cp_valid, init=Bool(false))
val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx || ex_cp_valid, init=Bool(false))
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false))
val killm = (io.ctrl.killm || io.ctrl.nack_mem) && !mem_cp_valid
val killm = (io.killm || io.nack_mem) && !mem_cp_valid
val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false))
val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false))
val fp_decoder = Module(new FPUDecoder)
fp_decoder.io.inst := io.dpath.inst
fp_decoder.io.inst := io.inst
val cp_ctrl = new FPUCtrlSigs
cp_ctrl <> io.cp_req.bits
@ -382,15 +375,15 @@ class FPU extends Module
io.cp_resp.bits.data := UInt(0)
val id_ctrl = fp_decoder.io.sigs
val ex_ctrl = Mux(ex_reg_valid, RegEnable(id_ctrl, io.ctrl.valid), cp_ctrl)
val ex_ctrl = Mux(ex_reg_valid, RegEnable(id_ctrl, io.valid), cp_ctrl)
val mem_ctrl = RegEnable(ex_ctrl, req_valid)
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
// load response
val load_wb = Reg(next=io.dpath.dmem_resp_val)
val load_wb_single = RegEnable(io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU, io.dpath.dmem_resp_val)
val load_wb_data = RegEnable(io.dpath.dmem_resp_data, io.dpath.dmem_resp_val)
val load_wb_tag = RegEnable(io.dpath.dmem_resp_tag, io.dpath.dmem_resp_val)
val load_wb = Reg(next=io.dmem_resp_val)
val load_wb_single = RegEnable(io.dmem_resp_type === MT_W || io.dmem_resp_type === MT_WU, io.dmem_resp_val)
val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val)
val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val)
val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9)
val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12)
val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d)
@ -400,26 +393,26 @@ class FPU extends Module
when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded }
val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
when (io.ctrl.valid) {
when (io.valid) {
when (id_ctrl.ren1) {
when (!id_ctrl.swap12) { ex_ra1 := io.dpath.inst(19,15) }
when (id_ctrl.swap12) { ex_ra2 := io.dpath.inst(19,15) }
when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) }
when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) }
}
when (id_ctrl.ren2) {
when (id_ctrl.swap12) { ex_ra1 := io.dpath.inst(24,20) }
when (id_ctrl.swap23) { ex_ra3 := io.dpath.inst(24,20) }
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.dpath.inst(24,20) }
when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) }
when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) }
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) }
}
when (id_ctrl.ren3) { ex_ra3 := io.dpath.inst(31,27) }
when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) }
}
val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_))
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.dpath.fcsr_rm, ex_reg_inst(14,12))
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
val cp_rs1 = io.cp_req.bits.in1
val cp_rs2 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in3, io.cp_req.bits.in2)
val cp_rs3 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in2, io.cp_req.bits.in3)
val req = new FPInput
val req = Wire(new FPInput)
req := ex_ctrl
req.rm := Mux(ex_reg_valid, ex_rm, io.cp_req.bits.rm)
req.in1 := Mux(ex_reg_valid, ex_rs1, cp_rs1)
@ -438,8 +431,8 @@ class FPU extends Module
val fpiu = Module(new FPToInt)
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
fpiu.io.in.bits := req
io.dpath.store_data := fpiu.io.out.bits.store
io.dpath.toint_data := fpiu.io.out.bits.toint
io.store_data := fpiu.io.out.bits.store
io.toint_data := fpiu.io.out.bits.toint
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
io.cp_resp.bits.data := fpiu.io.out.bits.toint
io.cp_resp.valid := Bool(true)
@ -448,25 +441,23 @@ class FPU extends Module
val ifpu = Module(new IntToFP(3))
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
ifpu.io.in.bits := req
ifpu.io.in.bits.in1 := Mux(ex_reg_valid, io.dpath.fromint_data, cp_rs1)
ifpu.io.in.bits.in1 := Mux(ex_reg_valid, io.fromint_data, cp_rs1)
val fpmu = Module(new FPToFP(2))
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
fpmu.io.in.bits := req
fpmu.io.lt := fpiu.io.out.bits.lt
val divSqrt = Module(new hardfloat.divSqrtRecodedFloat64)
val divSqrt_inReady = Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
val divSqrt_wen = Reg(next=Bool(false))
val divSqrt_inReady = Wire(init=Bool(false))
val divSqrt_waddr = Reg(Bits())
val divSqrt_wdata = Bits()
val divSqrt_flags = Bits()
val divSqrt_wdata = Wire(Bits())
val divSqrt_flags = Wire(Bits())
val divSqrt_in_flight = Reg(init=Bool(false))
val divSqrt_cp = Reg(init=Bool(false))
// writeback arbitration
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: Bits, wexc: Bits)
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: UInt, wexc: UInt)
val pipes = List(
Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits.data, fpmu.io.out.bits.exc),
Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits.data, ifpu.io.out.bits.exc),
@ -481,7 +472,7 @@ class FPU extends Module
val memLatencyMask = latencyMask(mem_ctrl, 2)
val wen = Reg(init=Bits(0, maxLatency-1))
val winfo = Vec.fill(maxLatency-1){Reg(Bits())}
val winfo = Reg(Vec.fill(maxLatency-1){Bits()})
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_reg_inst(11,7))
@ -515,22 +506,22 @@ class FPU extends Module
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
io.dpath.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
io.dpath.fcsr_flags.bits :=
io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
io.fcsr_flags.bits :=
Mux(wb_toint_valid, wb_toint_exc, UInt(0)) |
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
Mux(wen(0), wexc, UInt(0))
val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR) // || mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid))
io.ctrl.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
io.ctrl.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
io.ctrl.dec <> fp_decoder.io.sigs
io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
io.dec <> fp_decoder.io.sigs
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
io.ctrl.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
io.ctrl.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))))
io.ctrl.sboard_clra := waddr
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))))
io.sboard_clra := waddr
// we don't currently support round-max-magnitude (rm=4)
io.ctrl.illegal_rm := ex_rm(2) && ex_ctrl.round
io.illegal_rm := ex_rm(2) && ex_ctrl.round
divSqrt_wdata := 0
divSqrt_flags := 0
@ -540,9 +531,11 @@ class FPU extends Module
val divSqrt_flags_double = Reg(Bits())
val divSqrt_wdata_double = Reg(Bits())
def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1
val divSqrt = Module(new hardfloat.divSqrtRecodedFloat64)
divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
val divSqrt_wb_hazard = wen.orR
divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && (!io.ctrl.killm || mem_cp_valid) && (mem_ctrl.div || mem_ctrl.sqrt)
divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && (!io.killm || mem_cp_valid) && (mem_ctrl.div || mem_ctrl.sqrt)
divSqrt.io.sqrtOp := mem_ctrl.sqrt
divSqrt.io.a := fpiu.io.as_double.in1
divSqrt.io.b := fpiu.io.as_double.in2

View File

@ -50,12 +50,12 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule
val tlb = Module(new TLB)
val s1_pc_ = Reg(UInt())
val s1_pc = s1_pc_ & SInt(-coreInstBytes) // discard PC LSBS (this propagates down the pipeline)
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
val s1_same_block = Reg(Bool())
val s2_valid = Reg(init=Bool(true))
val s2_pc = Reg(init=UInt(START_ADDR))
val s2_btb_resp_valid = Reg(init=Bool(false))
val s2_btb_resp_bits = Reg(btb.io.resp.bits.clone)
val s2_btb_resp_bits = Reg(btb.io.resp.bits)
val s2_xcpt_if = Reg(init=Bool(false))
val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true))
@ -94,7 +94,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule
btb.io.ras_update := io.cpu.ras_update
btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate
tlb.io.ptw <> io.ptw
io.ptw <> tlb.io.ptw
tlb.io.req.valid := !stall && !icmiss
tlb.io.req.bits.vpn := s1_pc >> UInt(pgIdxBits)
tlb.io.req.bits.asid := UInt(0)
@ -102,7 +102,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule
tlb.io.req.bits.instruction := Bool(true)
tlb.io.req.bits.store := Bool(false)
icache.io.mem <> io.mem
io.mem <> icache.io.mem
icache.io.req.valid := !stall && !s0_same_block
icache.io.req.bits.idx := io.cpu.npc
icache.io.invalidate := io.cpu.invalidate
@ -160,7 +160,7 @@ class ICache extends FrontendModule
val state = Reg(init=s_ready)
val invalidated = Reg(Bool())
val stall = !io.resp.ready
val rdy = Bool()
val rdy = Wire(Bool())
val refill_addr = Reg(UInt(width = paddrBits))
val s1_any_tag_hit = Bool()
@ -197,17 +197,13 @@ class ICache extends FrontendModule
val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0)
val entagbits = code.width(tagBits)
val tag_array = Mem(Bits(width = entagbits*nWays), nSets, seqRead = true)
val tag_raddr = Reg(UInt())
val tag_array = SeqMem(Bits(width = entagbits*nWays), nSets)
val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid)
when (refill_done) {
val wmask = FillInterleaved(entagbits, if (isDM) Bits(1) else UIntToOH(repl_way))
val tag = code.encode(refill_tag).toUInt
tag_array.write(s1_idx, Fill(nWays, tag), wmask)
}
// /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM
.elsewhen (s0_valid) {
tag_raddr := s0_pgoff(untagBits-1,blockOffBits)
}
val vb_array = Reg(init=Bits(0, nSets*nWays))
when (refill_done && !invalidated) {
@ -227,7 +223,7 @@ class ICache extends FrontendModule
for (i <- 0 until nWays) {
val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool
val tag_out = tag_array(tag_raddr)(entagbits*(i+1)-1, entagbits*i)
val tag_out = tag_rdata(entagbits*(i+1)-1, entagbits*i)
val s1_tag_disparity = code.decode(tag_out).error
when (s1_valid && rdy && !stall) {
}
@ -238,20 +234,18 @@ class ICache extends FrontendModule
s1_any_tag_hit := s1_tag_hit.reduceLeft(_||_) && !s1_disparity.reduceLeft(_||_)
for (i <- 0 until nWays) {
val data_array = Mem(Bits(width = code.width(rowBits)), nSets*refillCycles, seqRead = true)
val s1_raddr = Reg(UInt())
when (narrow_grant.valid && repl_way === UInt(i)) {
val e_d = code.encode(narrow_grant.bits.data)
if(refillCycles > 1) data_array(Cat(s1_idx, refill_cnt)) := e_d
else data_array(s1_idx) := e_d
}
// /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM
.elsewhen (s0_valid) {
s1_raddr := s0_pgoff(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0))
val data_array = SeqMem(Bits(width = code.width(rowBits)), nSets*refillCycles)
val wen = narrow_grant.valid && repl_way === UInt(i)
when (wen) {
val e_d = code.encode(narrow_grant.bits.data).toUInt
if(refillCycles > 1) data_array.write(Cat(s1_idx, refill_cnt), e_d)
else data_array.write(s1_idx, e_d)
}
val s0_raddr = s0_pgoff(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0))
val s1_rdata = data_array.read(s0_raddr, !wen && s0_valid)
// if s1_tag_match is critical, replace with partial tag check
s1_dout(i) := 0
when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s1_dout(i) := data_array(s1_raddr) }
when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s1_dout(i) := s1_rdata }
}
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)

View File

@ -1,4 +1,4 @@
// See LICENSE for license details.
// See LICENSE for license details
package rocket
@ -6,56 +6,12 @@ import Chisel._
import Instructions._
import uncore.constants.MemoryOpConstants._
import ALU._
import Util._
class CtrlDpathIO extends CoreBundle
{
// outputs to datapath
val sel_pc = UInt(OUTPUT, 3)
val killd = Bool(OUTPUT)
val killm = Bool(OUTPUT)
val ren = Vec.fill(2)(Bool(OUTPUT))
val ex_ctrl = new IntCtrlSigs().asOutput
val mem_ctrl = new IntCtrlSigs().asOutput
val csr_cmd = UInt(OUTPUT, CSR.SZ)
val ex_valid = Bool(OUTPUT)
val wb_wen = Bool(OUTPUT)
val bypass = Vec.fill(2)(Bool(OUTPUT))
val bypass_src = Vec.fill(2)(Bits(OUTPUT, SZ_BYP))
val ll_ready = Bool(OUTPUT)
// exception handling
val retire = Bool(OUTPUT)
val exception = Bool(OUTPUT)
val cause = UInt(OUTPUT, xLen)
// inputs from datapath
val inst = Bits(INPUT, 32)
val mem_br_taken = Bool(INPUT)
val mem_misprediction = Bool(INPUT)
val mem_npc_misaligned = Bool(INPUT)
val div_mul_rdy = Bool(INPUT)
val ll_wen = Bool(INPUT)
val ll_waddr = UInt(INPUT, 5)
val ex_waddr = UInt(INPUT, 5)
val mem_rs1_ra = Bool(INPUT)
val mem_waddr = UInt(INPUT, 5)
val wb_waddr = UInt(INPUT, 5)
val status = new MStatus().asInput
val fp_sboard_clr = Bool(INPUT)
val fp_sboard_clra = UInt(INPUT, 5)
// inputs from csr file
val csr_replay = Bool(INPUT)
val csr_stall = Bool(INPUT)
val csr_xcpt = Bool(INPUT)
val eret = Bool(INPUT)
val interrupt = Bool(INPUT)
val interrupt_cause = UInt(INPUT, xLen)
}
abstract trait DecodeConstants
{
val xpr64 = Y
val decode_default =
val decode_default: List[BitPat] =
// jal renf1 fence.i
// | jalr | renf2 |
// fp_val| | renx2 | | renf3 |
@ -66,7 +22,7 @@ abstract trait DecodeConstants
// | | | | | | | | | | | | | | | | | | | | | | | | | |
List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X)
val table: Array[(UInt, List[UInt])]
val table: Array[(BitPat, List[BitPat])]
}
class IntCtrlSigs extends Bundle {
@ -97,18 +53,19 @@ class IntCtrlSigs extends Bundle {
val fence = Bool()
val amo = Bool()
def decode(inst: UInt, table: Iterable[(UInt, List[UInt])]) = {
def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = {
val decoder = DecodeLogic(inst, XDecode.decode_default, table)
Vec(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2, sel_alu1,
sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type,
rfs1, rfs2, rfs3, wfd, div, wxd, csr, fence_i, fence, amo) := decoder
val sigs = Seq(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2,
sel_alu1, sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type,
rfs1, rfs2, rfs3, wfd, div, wxd, csr, fence_i, fence, amo)
sigs zip decoder map {case(s,d) => s := d}
this
}
}
object XDecode extends DecodeConstants
{
val table = Array(
val table: Array[(BitPat, List[BitPat])] = Array(
// jal renf1 fence.i
// | jalr | renf2 |
// fp_val| | renx2 | | renf3 |
@ -229,7 +186,7 @@ object XDecode extends DecodeConstants
object FDecode extends DecodeConstants
{
val table = Array(
val table: Array[(BitPat, List[BitPat])] = Array(
// jal renf1 fence.i
// | jalr | renf2 |
// fp_val| | renx2 | | renf3 |
@ -300,7 +257,7 @@ object FDecode extends DecodeConstants
object FDivSqrtDecode extends DecodeConstants
{
val table = Array(
val table: Array[(BitPat, List[BitPat])] = Array(
FDIV_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FDIV_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSQRT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
@ -309,7 +266,7 @@ object FDivSqrtDecode extends DecodeConstants
object RoCCDecode extends DecodeConstants
{
val table = Array(
val table: Array[(BitPat, List[BitPat])] = Array(
// jal renf1 fence.i
// | jalr | renf2 |
// fp_val| | renx2 | | renf3 |
@ -343,343 +300,3 @@ object RoCCDecode extends DecodeConstants
CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N))
}
class Control extends CoreModule
{
val io = new Bundle {
val dpath = new CtrlDpathIO
val imem = new CPUFrontendIO
val dmem = new HellaCacheIO
val fpu = new CtrlFPUIO
val rocc = new RoCCInterface().flip
}
var decode_table = XDecode.table
if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table
if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table
if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table
val id_ctrl = new IntCtrlSigs().decode(io.dpath.inst, decode_table)
val ex_ctrl = Reg(new IntCtrlSigs)
val mem_ctrl = Reg(new IntCtrlSigs)
val wb_ctrl = Reg(new IntCtrlSigs)
val ex_reg_xcpt_interrupt = Reg(Bool())
val ex_reg_valid = Reg(Bool())
val ex_reg_btb_hit = Reg(Bool())
val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone)
val ex_reg_xcpt = Reg(Bool())
val ex_reg_flush_pipe = Reg(Bool())
val ex_reg_load_use = Reg(Bool())
val ex_reg_cause = Reg(UInt())
val mem_reg_xcpt_interrupt = Reg(Bool())
val mem_reg_valid = Reg(Bool())
val mem_reg_btb_hit = Reg(Bool())
val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone)
val mem_reg_xcpt = Reg(Bool())
val mem_reg_replay = Reg(Bool())
val mem_reg_flush_pipe = Reg(Bool())
val mem_reg_cause = Reg(UInt())
val mem_reg_slow_bypass = Reg(Bool())
val wb_reg_valid = Reg(Bool())
val wb_reg_xcpt = Reg(Bool())
val wb_reg_replay = Reg(Bool())
val wb_reg_cause = Reg(UInt())
val wb_reg_rocc_pending = Reg(init=Bool(false))
val take_pc_wb = Bool()
val mem_misprediction = io.dpath.mem_misprediction && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal)
val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
val take_pc_mem = want_take_pc_mem && !io.dpath.mem_npc_misaligned
val take_pc_mem_wb = take_pc_wb || take_pc_mem
val take_pc = take_pc_mem_wb
val ctrl_killd = Bool()
val ctrl_killx = Bool()
val ctrl_killm = Bool()
val id_raddr3 = io.dpath.inst(31,27)
val id_raddr2 = io.dpath.inst(24,20)
val id_raddr1 = io.dpath.inst(19,15)
val id_waddr = io.dpath.inst(11,7)
val id_load_use = Bool()
val id_reg_fence = Reg(init=Bool(false))
val id_csr_en = id_ctrl.csr != CSR.N
val id_system_insn = id_ctrl.csr === CSR.I
val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0)
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
val id_csr_addr = io.dpath.inst(31,20)
// this is overly conservative
val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil
val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*)
val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs, legal_csrs -- safe_csrs))
val id_illegal_insn = !id_ctrl.legal ||
id_ctrl.fp && !io.dpath.status.fs.orR ||
id_ctrl.rocc && !io.dpath.status.xs.orR
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
val id_amo_aq = io.dpath.inst(26)
val id_amo_rl = io.dpath.inst(25)
val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) &&
(io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||
mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)
id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy
val id_do_fence = id_rocc_busy && id_ctrl.fence ||
id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en)
def checkExceptions(x: Seq[(Bool, UInt)]) =
(x.map(_._1).reduce(_||_), PriorityMux(x))
val (id_xcpt, id_cause) = checkExceptions(List(
(io.dpath.interrupt, io.dpath.interrupt_cause),
(io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)),
(id_illegal_insn, UInt(Causes.illegal_instruction))))
ex_reg_valid := !ctrl_killd
ex_reg_xcpt := !ctrl_killd && id_xcpt
ex_reg_xcpt_interrupt := io.dpath.interrupt && !take_pc && io.imem.resp.valid
when (id_xcpt) { ex_reg_cause := id_cause }
when (!ctrl_killd) {
ex_ctrl := id_ctrl
ex_ctrl.csr := id_csr
ex_reg_btb_hit := io.imem.btb_resp.valid
when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits }
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush
ex_reg_load_use := id_load_use
ex_reg_xcpt := id_xcpt
}
// replay inst in ex stage
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
ex_ctrl.div && !io.dpath.div_mul_rdy
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
val replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use)
ctrl_killx := take_pc_mem_wb || replay_ex || !ex_reg_valid
// detect 2-cycle load-use delay for LB/LH/SC
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
val (ex_xcpt, ex_cause) = checkExceptions(List(
(ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause),
(ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction))))
mem_reg_valid := !ctrl_killx
mem_reg_replay := !take_pc_mem_wb && replay_ex
mem_reg_xcpt := !ctrl_killx && ex_xcpt
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
when (ex_xcpt) { mem_reg_cause := ex_cause }
when (!ctrl_killx) {
mem_ctrl := ex_ctrl
mem_reg_btb_hit := ex_reg_btb_hit
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
mem_reg_flush_pipe := ex_reg_flush_pipe
mem_reg_slow_bypass := ex_slow_bypass
mem_reg_xcpt := ex_xcpt
}
val (mem_xcpt, mem_cause) = checkExceptions(List(
(mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
(want_take_pc_mem && io.dpath.mem_npc_misaligned, UInt(Causes.misaligned_fetch)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load))))
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next.valid // structural hazard on writeback port
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem
wb_reg_valid := !ctrl_killm
when (!ctrl_killm) { wb_ctrl := mem_ctrl }
wb_reg_replay := replay_mem && !take_pc_wb
wb_reg_xcpt := mem_xcpt && !take_pc_wb
when (mem_xcpt) { wb_reg_cause := mem_cause }
val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
val replay_wb_common =
io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.csr_replay
val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
when (wb_rocc_val) { wb_reg_rocc_pending := !io.rocc.cmd.ready }
when (wb_reg_xcpt) { wb_reg_rocc_pending := Bool(false) }
class Scoreboard(n: Int)
{
def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))
def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))
def read(addr: UInt): Bool = r(addr)
def readBypassed(addr: UInt): Bool = _next(addr)
private val r = Reg(init=Bits(0, n))
private var _next = r
private var ens = Bool(false)
private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))
private def update(en: Bool, update: UInt) = {
_next = update
ens = ens || en
when (ens) { r := _next }
}
}
val sboard = new Scoreboard(32)
sboard.clear(io.dpath.ll_wen, io.dpath.ll_waddr)
val id_stall_fpu = if (!params(BuildFPU).isEmpty) {
val fp_sboard = new Scoreboard(32)
fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && io.dpath.retire, io.dpath.wb_waddr)
fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra)
fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra)
id_csr_en && !io.fpu.fcsr_rdy ||
io.fpu.dec.ren1 && fp_sboard.read(id_raddr1) ||
io.fpu.dec.ren2 && fp_sboard.read(id_raddr2) ||
io.fpu.dec.ren3 && fp_sboard.read(id_raddr3) ||
io.fpu.dec.wen && fp_sboard.read(id_waddr)
} else Bool(false)
// write CAUSE CSR on an exception
io.dpath.exception := wb_reg_xcpt
io.dpath.cause := wb_reg_cause
val wb_xcpt = wb_reg_xcpt || io.dpath.csr_xcpt
// control transfer from ex/wb
take_pc_wb := replay_wb || wb_xcpt || io.dpath.eret
io.dpath.sel_pc :=
Mux(wb_xcpt || io.dpath.eret, PC_CSR, // exception or [m|s]ret
Mux(replay_wb, PC_WB, // replay
PC_MEM))
io.imem.btb_update.valid := mem_reg_valid && !io.dpath.mem_npc_misaligned && io.dpath.mem_misprediction && ((mem_ctrl.branch && io.dpath.mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && io.dpath.mem_rs1_ra
io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb
io.imem.bht_update.bits.taken := io.dpath.mem_br_taken
io.imem.bht_update.bits.mispredict := io.dpath.mem_misprediction
io.imem.bht_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.bht_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !io.dpath.mem_npc_misaligned && !take_pc_wb
io.imem.ras_update.bits.isCall := mem_ctrl.wxd && io.dpath.mem_waddr(0)
io.imem.ras_update.bits.isReturn := mem_ctrl.jalr && io.dpath.mem_rs1_ra
io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.req.valid := take_pc
val bypassDst = Array(id_raddr1, id_raddr2)
val bypassSrc = Array.fill(NBYP)((Bool(true), UInt(0)))
bypassSrc(BYP_EX) = (ex_reg_valid && ex_ctrl.wxd, io.dpath.ex_waddr)
bypassSrc(BYP_MEM) = (mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, io.dpath.mem_waddr)
bypassSrc(BYP_DC) = (mem_reg_valid && mem_ctrl.wxd, io.dpath.mem_waddr)
val doBypass = bypassDst.map(d => bypassSrc.map(s => s._1 && s._2 === d))
for (i <- 0 until io.dpath.bypass.size) {
io.dpath.bypass(i) := doBypass(i).reduce(_||_)
io.dpath.bypass_src(i) := PriorityEncoder(doBypass(i))
}
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
val id_renx1_not0 = id_ctrl.rxs1 && id_raddr1 != UInt(0)
val id_renx2_not0 = id_ctrl.rxs2 && id_raddr2 != UInt(0)
val id_wen_not0 = id_ctrl.wxd && id_waddr != UInt(0)
val ex_cannot_bypass = ex_ctrl.csr != CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc
val data_hazard_ex = ex_ctrl.wxd &&
(id_renx1_not0 && id_raddr1 === io.dpath.ex_waddr ||
id_renx2_not0 && id_raddr2 === io.dpath.ex_waddr ||
id_wen_not0 && id_waddr === io.dpath.ex_waddr)
val fp_data_hazard_ex = ex_ctrl.wfd &&
(io.fpu.dec.ren1 && id_raddr1 === io.dpath.ex_waddr ||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr ||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr ||
io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr)
val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex)
// stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.
val mem_mem_cmd_bh =
if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass
else Bool(true)
val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc
val data_hazard_mem = mem_ctrl.wxd &&
(id_renx1_not0 && id_raddr1 === io.dpath.mem_waddr ||
id_renx2_not0 && id_raddr2 === io.dpath.mem_waddr ||
id_wen_not0 && id_waddr === io.dpath.mem_waddr)
val fp_data_hazard_mem = mem_ctrl.wfd &&
(io.fpu.dec.ren1 && id_raddr1 === io.dpath.mem_waddr ||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr ||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr ||
io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr)
val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem)
id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem
// stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback.
val data_hazard_wb = wb_ctrl.wxd &&
(id_renx1_not0 && id_raddr1 === io.dpath.wb_waddr ||
id_renx2_not0 && id_raddr2 === io.dpath.wb_waddr ||
id_wen_not0 && id_waddr === io.dpath.wb_waddr)
val fp_data_hazard_wb = wb_ctrl.wfd &&
(io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr ||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr ||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr ||
io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr)
val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)
val id_sboard_hazard =
(id_renx1_not0 && sboard.readBypassed(id_raddr1) ||
id_renx2_not0 && sboard.readBypassed(id_raddr2) ||
id_wen_not0 && sboard.readBypassed(id_waddr))
sboard.set(wb_set_sboard && io.dpath.wb_wen, io.dpath.wb_waddr)
val ctrl_stalld =
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||
id_ctrl.fp && id_stall_fpu ||
id_ctrl.mem && !io.dmem.req.ready ||
Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready ||
id_do_fence ||
io.dpath.csr_stall
val ctrl_draind = io.dpath.interrupt
ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind
io.dpath.killd := take_pc || ctrl_stalld && !ctrl_draind
io.imem.resp.ready := !ctrl_stalld || ctrl_draind
io.imem.invalidate := wb_reg_valid && wb_ctrl.fence_i
io.dpath.ren(1) := id_ctrl.rxs2
io.dpath.ren(0) := id_ctrl.rxs1
io.dpath.ex_ctrl := ex_ctrl
io.dpath.mem_ctrl := mem_ctrl
io.dpath.ex_valid := ex_reg_valid
io.dpath.ll_ready := !(wb_reg_valid && wb_ctrl.wxd)
io.dpath.retire := wb_reg_valid && !replay_wb && !io.dpath.csr_xcpt
io.dpath.wb_wen := io.dpath.retire && wb_ctrl.wxd
io.dpath.csr_cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
io.dpath.killm := killm_common
io.fpu.valid := !ctrl_killd && id_ctrl.fp
io.fpu.killx := ctrl_killx
io.fpu.killm := killm_common
io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
io.dmem.req.bits.kill := killm_common || mem_xcpt
io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
io.dmem.req.bits.typ := ex_ctrl.mem_type
io.dmem.req.bits.phys := Bool(false)
io.dmem.invalidate_lr := wb_xcpt
io.rocc.cmd.valid := wb_rocc_val
io.rocc.exception := wb_xcpt && io.dpath.status.xs.orR
io.rocc.s := io.dpath.status.prv.orR // should we just pass all of mstatus?
}

View File

@ -3,196 +3,195 @@
package rocket
import Chisel._
import Node._
/* Automatically generated by parse-opcodes */
object Instructions {
def BEQ = Bits("b?????????????????000?????1100011")
def BNE = Bits("b?????????????????001?????1100011")
def BLT = Bits("b?????????????????100?????1100011")
def BGE = Bits("b?????????????????101?????1100011")
def BLTU = Bits("b?????????????????110?????1100011")
def BGEU = Bits("b?????????????????111?????1100011")
def JALR = Bits("b?????????????????000?????1100111")
def JAL = Bits("b?????????????????????????1101111")
def LUI = Bits("b?????????????????????????0110111")
def AUIPC = Bits("b?????????????????????????0010111")
def ADDI = Bits("b?????????????????000?????0010011")
def SLLI = Bits("b000000???????????001?????0010011")
def SLTI = Bits("b?????????????????010?????0010011")
def SLTIU = Bits("b?????????????????011?????0010011")
def XORI = Bits("b?????????????????100?????0010011")
def SRLI = Bits("b000000???????????101?????0010011")
def SRAI = Bits("b010000???????????101?????0010011")
def ORI = Bits("b?????????????????110?????0010011")
def ANDI = Bits("b?????????????????111?????0010011")
def ADD = Bits("b0000000??????????000?????0110011")
def SUB = Bits("b0100000??????????000?????0110011")
def SLL = Bits("b0000000??????????001?????0110011")
def SLT = Bits("b0000000??????????010?????0110011")
def SLTU = Bits("b0000000??????????011?????0110011")
def XOR = Bits("b0000000??????????100?????0110011")
def SRL = Bits("b0000000??????????101?????0110011")
def SRA = Bits("b0100000??????????101?????0110011")
def OR = Bits("b0000000??????????110?????0110011")
def AND = Bits("b0000000??????????111?????0110011")
def ADDIW = Bits("b?????????????????000?????0011011")
def SLLIW = Bits("b0000000??????????001?????0011011")
def SRLIW = Bits("b0000000??????????101?????0011011")
def SRAIW = Bits("b0100000??????????101?????0011011")
def ADDW = Bits("b0000000??????????000?????0111011")
def SUBW = Bits("b0100000??????????000?????0111011")
def SLLW = Bits("b0000000??????????001?????0111011")
def SRLW = Bits("b0000000??????????101?????0111011")
def SRAW = Bits("b0100000??????????101?????0111011")
def LB = Bits("b?????????????????000?????0000011")
def LH = Bits("b?????????????????001?????0000011")
def LW = Bits("b?????????????????010?????0000011")
def LD = Bits("b?????????????????011?????0000011")
def LBU = Bits("b?????????????????100?????0000011")
def LHU = Bits("b?????????????????101?????0000011")
def LWU = Bits("b?????????????????110?????0000011")
def SB = Bits("b?????????????????000?????0100011")
def SH = Bits("b?????????????????001?????0100011")
def SW = Bits("b?????????????????010?????0100011")
def SD = Bits("b?????????????????011?????0100011")
def FENCE = Bits("b?????????????????000?????0001111")
def FENCE_I = Bits("b?????????????????001?????0001111")
def MUL = Bits("b0000001??????????000?????0110011")
def MULH = Bits("b0000001??????????001?????0110011")
def MULHSU = Bits("b0000001??????????010?????0110011")
def MULHU = Bits("b0000001??????????011?????0110011")
def DIV = Bits("b0000001??????????100?????0110011")
def DIVU = Bits("b0000001??????????101?????0110011")
def REM = Bits("b0000001??????????110?????0110011")
def REMU = Bits("b0000001??????????111?????0110011")
def MULW = Bits("b0000001??????????000?????0111011")
def DIVW = Bits("b0000001??????????100?????0111011")
def DIVUW = Bits("b0000001??????????101?????0111011")
def REMW = Bits("b0000001??????????110?????0111011")
def REMUW = Bits("b0000001??????????111?????0111011")
def AMOADD_W = Bits("b00000????????????010?????0101111")
def AMOXOR_W = Bits("b00100????????????010?????0101111")
def AMOOR_W = Bits("b01000????????????010?????0101111")
def AMOAND_W = Bits("b01100????????????010?????0101111")
def AMOMIN_W = Bits("b10000????????????010?????0101111")
def AMOMAX_W = Bits("b10100????????????010?????0101111")
def AMOMINU_W = Bits("b11000????????????010?????0101111")
def AMOMAXU_W = Bits("b11100????????????010?????0101111")
def AMOSWAP_W = Bits("b00001????????????010?????0101111")
def LR_W = Bits("b00010??00000?????010?????0101111")
def SC_W = Bits("b00011????????????010?????0101111")
def AMOADD_D = Bits("b00000????????????011?????0101111")
def AMOXOR_D = Bits("b00100????????????011?????0101111")
def AMOOR_D = Bits("b01000????????????011?????0101111")
def AMOAND_D = Bits("b01100????????????011?????0101111")
def AMOMIN_D = Bits("b10000????????????011?????0101111")
def AMOMAX_D = Bits("b10100????????????011?????0101111")
def AMOMINU_D = Bits("b11000????????????011?????0101111")
def AMOMAXU_D = Bits("b11100????????????011?????0101111")
def AMOSWAP_D = Bits("b00001????????????011?????0101111")
def LR_D = Bits("b00010??00000?????011?????0101111")
def SC_D = Bits("b00011????????????011?????0101111")
def SCALL = Bits("b00000000000000000000000001110011")
def SBREAK = Bits("b00000000000100000000000001110011")
def SRET = Bits("b00010000000000000000000001110011")
def SFENCE_VM = Bits("b000100000001?????000000001110011")
def WFI = Bits("b00010000001000000000000001110011")
def MRTH = Bits("b00110000011000000000000001110011")
def MRTS = Bits("b00110000010100000000000001110011")
def HRTS = Bits("b00100000010100000000000001110011")
def CSRRW = Bits("b?????????????????001?????1110011")
def CSRRS = Bits("b?????????????????010?????1110011")
def CSRRC = Bits("b?????????????????011?????1110011")
def CSRRWI = Bits("b?????????????????101?????1110011")
def CSRRSI = Bits("b?????????????????110?????1110011")
def CSRRCI = Bits("b?????????????????111?????1110011")
def FADD_S = Bits("b0000000??????????????????1010011")
def FSUB_S = Bits("b0000100??????????????????1010011")
def FMUL_S = Bits("b0001000??????????????????1010011")
def FDIV_S = Bits("b0001100??????????????????1010011")
def FSGNJ_S = Bits("b0010000??????????000?????1010011")
def FSGNJN_S = Bits("b0010000??????????001?????1010011")
def FSGNJX_S = Bits("b0010000??????????010?????1010011")
def FMIN_S = Bits("b0010100??????????000?????1010011")
def FMAX_S = Bits("b0010100??????????001?????1010011")
def FSQRT_S = Bits("b010110000000?????????????1010011")
def FADD_D = Bits("b0000001??????????????????1010011")
def FSUB_D = Bits("b0000101??????????????????1010011")
def FMUL_D = Bits("b0001001??????????????????1010011")
def FDIV_D = Bits("b0001101??????????????????1010011")
def FSGNJ_D = Bits("b0010001??????????000?????1010011")
def FSGNJN_D = Bits("b0010001??????????001?????1010011")
def FSGNJX_D = Bits("b0010001??????????010?????1010011")
def FMIN_D = Bits("b0010101??????????000?????1010011")
def FMAX_D = Bits("b0010101??????????001?????1010011")
def FCVT_S_D = Bits("b010000000001?????????????1010011")
def FCVT_D_S = Bits("b010000100000?????????????1010011")
def FSQRT_D = Bits("b010110100000?????????????1010011")
def FLE_S = Bits("b1010000??????????000?????1010011")
def FLT_S = Bits("b1010000??????????001?????1010011")
def FEQ_S = Bits("b1010000??????????010?????1010011")
def FLE_D = Bits("b1010001??????????000?????1010011")
def FLT_D = Bits("b1010001??????????001?????1010011")
def FEQ_D = Bits("b1010001??????????010?????1010011")
def FCVT_W_S = Bits("b110000000000?????????????1010011")
def FCVT_WU_S = Bits("b110000000001?????????????1010011")
def FCVT_L_S = Bits("b110000000010?????????????1010011")
def FCVT_LU_S = Bits("b110000000011?????????????1010011")
def FMV_X_S = Bits("b111000000000?????000?????1010011")
def FCLASS_S = Bits("b111000000000?????001?????1010011")
def FCVT_W_D = Bits("b110000100000?????????????1010011")
def FCVT_WU_D = Bits("b110000100001?????????????1010011")
def FCVT_L_D = Bits("b110000100010?????????????1010011")
def FCVT_LU_D = Bits("b110000100011?????????????1010011")
def FMV_X_D = Bits("b111000100000?????000?????1010011")
def FCLASS_D = Bits("b111000100000?????001?????1010011")
def FCVT_S_W = Bits("b110100000000?????????????1010011")
def FCVT_S_WU = Bits("b110100000001?????????????1010011")
def FCVT_S_L = Bits("b110100000010?????????????1010011")
def FCVT_S_LU = Bits("b110100000011?????????????1010011")
def FMV_S_X = Bits("b111100000000?????000?????1010011")
def FCVT_D_W = Bits("b110100100000?????????????1010011")
def FCVT_D_WU = Bits("b110100100001?????????????1010011")
def FCVT_D_L = Bits("b110100100010?????????????1010011")
def FCVT_D_LU = Bits("b110100100011?????????????1010011")
def FMV_D_X = Bits("b111100100000?????000?????1010011")
def FLW = Bits("b?????????????????010?????0000111")
def FLD = Bits("b?????????????????011?????0000111")
def FSW = Bits("b?????????????????010?????0100111")
def FSD = Bits("b?????????????????011?????0100111")
def FMADD_S = Bits("b?????00??????????????????1000011")
def FMSUB_S = Bits("b?????00??????????????????1000111")
def FNMSUB_S = Bits("b?????00??????????????????1001011")
def FNMADD_S = Bits("b?????00??????????????????1001111")
def FMADD_D = Bits("b?????01??????????????????1000011")
def FMSUB_D = Bits("b?????01??????????????????1000111")
def FNMSUB_D = Bits("b?????01??????????????????1001011")
def FNMADD_D = Bits("b?????01??????????????????1001111")
def CUSTOM0 = Bits("b?????????????????000?????0001011")
def CUSTOM0_RS1 = Bits("b?????????????????010?????0001011")
def CUSTOM0_RS1_RS2 = Bits("b?????????????????011?????0001011")
def CUSTOM0_RD = Bits("b?????????????????100?????0001011")
def CUSTOM0_RD_RS1 = Bits("b?????????????????110?????0001011")
def CUSTOM0_RD_RS1_RS2 = Bits("b?????????????????111?????0001011")
def CUSTOM1 = Bits("b?????????????????000?????0101011")
def CUSTOM1_RS1 = Bits("b?????????????????010?????0101011")
def CUSTOM1_RS1_RS2 = Bits("b?????????????????011?????0101011")
def CUSTOM1_RD = Bits("b?????????????????100?????0101011")
def CUSTOM1_RD_RS1 = Bits("b?????????????????110?????0101011")
def CUSTOM1_RD_RS1_RS2 = Bits("b?????????????????111?????0101011")
def CUSTOM2 = Bits("b?????????????????000?????1011011")
def CUSTOM2_RS1 = Bits("b?????????????????010?????1011011")
def CUSTOM2_RS1_RS2 = Bits("b?????????????????011?????1011011")
def CUSTOM2_RD = Bits("b?????????????????100?????1011011")
def CUSTOM2_RD_RS1 = Bits("b?????????????????110?????1011011")
def CUSTOM2_RD_RS1_RS2 = Bits("b?????????????????111?????1011011")
def CUSTOM3 = Bits("b?????????????????000?????1111011")
def CUSTOM3_RS1 = Bits("b?????????????????010?????1111011")
def CUSTOM3_RS1_RS2 = Bits("b?????????????????011?????1111011")
def CUSTOM3_RD = Bits("b?????????????????100?????1111011")
def CUSTOM3_RD_RS1 = Bits("b?????????????????110?????1111011")
def CUSTOM3_RD_RS1_RS2 = Bits("b?????????????????111?????1111011")
def BEQ = BitPat("b?????????????????000?????1100011")
def BNE = BitPat("b?????????????????001?????1100011")
def BLT = BitPat("b?????????????????100?????1100011")
def BGE = BitPat("b?????????????????101?????1100011")
def BLTU = BitPat("b?????????????????110?????1100011")
def BGEU = BitPat("b?????????????????111?????1100011")
def JALR = BitPat("b?????????????????000?????1100111")
def JAL = BitPat("b?????????????????????????1101111")
def LUI = BitPat("b?????????????????????????0110111")
def AUIPC = BitPat("b?????????????????????????0010111")
def ADDI = BitPat("b?????????????????000?????0010011")
def SLLI = BitPat("b000000???????????001?????0010011")
def SLTI = BitPat("b?????????????????010?????0010011")
def SLTIU = BitPat("b?????????????????011?????0010011")
def XORI = BitPat("b?????????????????100?????0010011")
def SRLI = BitPat("b000000???????????101?????0010011")
def SRAI = BitPat("b010000???????????101?????0010011")
def ORI = BitPat("b?????????????????110?????0010011")
def ANDI = BitPat("b?????????????????111?????0010011")
def ADD = BitPat("b0000000??????????000?????0110011")
def SUB = BitPat("b0100000??????????000?????0110011")
def SLL = BitPat("b0000000??????????001?????0110011")
def SLT = BitPat("b0000000??????????010?????0110011")
def SLTU = BitPat("b0000000??????????011?????0110011")
def XOR = BitPat("b0000000??????????100?????0110011")
def SRL = BitPat("b0000000??????????101?????0110011")
def SRA = BitPat("b0100000??????????101?????0110011")
def OR = BitPat("b0000000??????????110?????0110011")
def AND = BitPat("b0000000??????????111?????0110011")
def ADDIW = BitPat("b?????????????????000?????0011011")
def SLLIW = BitPat("b0000000??????????001?????0011011")
def SRLIW = BitPat("b0000000??????????101?????0011011")
def SRAIW = BitPat("b0100000??????????101?????0011011")
def ADDW = BitPat("b0000000??????????000?????0111011")
def SUBW = BitPat("b0100000??????????000?????0111011")
def SLLW = BitPat("b0000000??????????001?????0111011")
def SRLW = BitPat("b0000000??????????101?????0111011")
def SRAW = BitPat("b0100000??????????101?????0111011")
def LB = BitPat("b?????????????????000?????0000011")
def LH = BitPat("b?????????????????001?????0000011")
def LW = BitPat("b?????????????????010?????0000011")
def LD = BitPat("b?????????????????011?????0000011")
def LBU = BitPat("b?????????????????100?????0000011")
def LHU = BitPat("b?????????????????101?????0000011")
def LWU = BitPat("b?????????????????110?????0000011")
def SB = BitPat("b?????????????????000?????0100011")
def SH = BitPat("b?????????????????001?????0100011")
def SW = BitPat("b?????????????????010?????0100011")
def SD = BitPat("b?????????????????011?????0100011")
def FENCE = BitPat("b?????????????????000?????0001111")
def FENCE_I = BitPat("b?????????????????001?????0001111")
def MUL = BitPat("b0000001??????????000?????0110011")
def MULH = BitPat("b0000001??????????001?????0110011")
def MULHSU = BitPat("b0000001??????????010?????0110011")
def MULHU = BitPat("b0000001??????????011?????0110011")
def DIV = BitPat("b0000001??????????100?????0110011")
def DIVU = BitPat("b0000001??????????101?????0110011")
def REM = BitPat("b0000001??????????110?????0110011")
def REMU = BitPat("b0000001??????????111?????0110011")
def MULW = BitPat("b0000001??????????000?????0111011")
def DIVW = BitPat("b0000001??????????100?????0111011")
def DIVUW = BitPat("b0000001??????????101?????0111011")
def REMW = BitPat("b0000001??????????110?????0111011")
def REMUW = BitPat("b0000001??????????111?????0111011")
def AMOADD_W = BitPat("b00000????????????010?????0101111")
def AMOXOR_W = BitPat("b00100????????????010?????0101111")
def AMOOR_W = BitPat("b01000????????????010?????0101111")
def AMOAND_W = BitPat("b01100????????????010?????0101111")
def AMOMIN_W = BitPat("b10000????????????010?????0101111")
def AMOMAX_W = BitPat("b10100????????????010?????0101111")
def AMOMINU_W = BitPat("b11000????????????010?????0101111")
def AMOMAXU_W = BitPat("b11100????????????010?????0101111")
def AMOSWAP_W = BitPat("b00001????????????010?????0101111")
def LR_W = BitPat("b00010??00000?????010?????0101111")
def SC_W = BitPat("b00011????????????010?????0101111")
def AMOADD_D = BitPat("b00000????????????011?????0101111")
def AMOXOR_D = BitPat("b00100????????????011?????0101111")
def AMOOR_D = BitPat("b01000????????????011?????0101111")
def AMOAND_D = BitPat("b01100????????????011?????0101111")
def AMOMIN_D = BitPat("b10000????????????011?????0101111")
def AMOMAX_D = BitPat("b10100????????????011?????0101111")
def AMOMINU_D = BitPat("b11000????????????011?????0101111")
def AMOMAXU_D = BitPat("b11100????????????011?????0101111")
def AMOSWAP_D = BitPat("b00001????????????011?????0101111")
def LR_D = BitPat("b00010??00000?????011?????0101111")
def SC_D = BitPat("b00011????????????011?????0101111")
def SCALL = BitPat("b00000000000000000000000001110011")
def SBREAK = BitPat("b00000000000100000000000001110011")
def SRET = BitPat("b00010000000000000000000001110011")
def SFENCE_VM = BitPat("b000100000001?????000000001110011")
def WFI = BitPat("b00010000001000000000000001110011")
def MRTH = BitPat("b00110000011000000000000001110011")
def MRTS = BitPat("b00110000010100000000000001110011")
def HRTS = BitPat("b00100000010100000000000001110011")
def CSRRW = BitPat("b?????????????????001?????1110011")
def CSRRS = BitPat("b?????????????????010?????1110011")
def CSRRC = BitPat("b?????????????????011?????1110011")
def CSRRWI = BitPat("b?????????????????101?????1110011")
def CSRRSI = BitPat("b?????????????????110?????1110011")
def CSRRCI = BitPat("b?????????????????111?????1110011")
def FADD_S = BitPat("b0000000??????????????????1010011")
def FSUB_S = BitPat("b0000100??????????????????1010011")
def FMUL_S = BitPat("b0001000??????????????????1010011")
def FDIV_S = BitPat("b0001100??????????????????1010011")
def FSGNJ_S = BitPat("b0010000??????????000?????1010011")
def FSGNJN_S = BitPat("b0010000??????????001?????1010011")
def FSGNJX_S = BitPat("b0010000??????????010?????1010011")
def FMIN_S = BitPat("b0010100??????????000?????1010011")
def FMAX_S = BitPat("b0010100??????????001?????1010011")
def FSQRT_S = BitPat("b010110000000?????????????1010011")
def FADD_D = BitPat("b0000001??????????????????1010011")
def FSUB_D = BitPat("b0000101??????????????????1010011")
def FMUL_D = BitPat("b0001001??????????????????1010011")
def FDIV_D = BitPat("b0001101??????????????????1010011")
def FSGNJ_D = BitPat("b0010001??????????000?????1010011")
def FSGNJN_D = BitPat("b0010001??????????001?????1010011")
def FSGNJX_D = BitPat("b0010001??????????010?????1010011")
def FMIN_D = BitPat("b0010101??????????000?????1010011")
def FMAX_D = BitPat("b0010101??????????001?????1010011")
def FCVT_S_D = BitPat("b010000000001?????????????1010011")
def FCVT_D_S = BitPat("b010000100000?????????????1010011")
def FSQRT_D = BitPat("b010110100000?????????????1010011")
def FLE_S = BitPat("b1010000??????????000?????1010011")
def FLT_S = BitPat("b1010000??????????001?????1010011")
def FEQ_S = BitPat("b1010000??????????010?????1010011")
def FLE_D = BitPat("b1010001??????????000?????1010011")
def FLT_D = BitPat("b1010001??????????001?????1010011")
def FEQ_D = BitPat("b1010001??????????010?????1010011")
def FCVT_W_S = BitPat("b110000000000?????????????1010011")
def FCVT_WU_S = BitPat("b110000000001?????????????1010011")
def FCVT_L_S = BitPat("b110000000010?????????????1010011")
def FCVT_LU_S = BitPat("b110000000011?????????????1010011")
def FMV_X_S = BitPat("b111000000000?????000?????1010011")
def FCLASS_S = BitPat("b111000000000?????001?????1010011")
def FCVT_W_D = BitPat("b110000100000?????????????1010011")
def FCVT_WU_D = BitPat("b110000100001?????????????1010011")
def FCVT_L_D = BitPat("b110000100010?????????????1010011")
def FCVT_LU_D = BitPat("b110000100011?????????????1010011")
def FMV_X_D = BitPat("b111000100000?????000?????1010011")
def FCLASS_D = BitPat("b111000100000?????001?????1010011")
def FCVT_S_W = BitPat("b110100000000?????????????1010011")
def FCVT_S_WU = BitPat("b110100000001?????????????1010011")
def FCVT_S_L = BitPat("b110100000010?????????????1010011")
def FCVT_S_LU = BitPat("b110100000011?????????????1010011")
def FMV_S_X = BitPat("b111100000000?????000?????1010011")
def FCVT_D_W = BitPat("b110100100000?????????????1010011")
def FCVT_D_WU = BitPat("b110100100001?????????????1010011")
def FCVT_D_L = BitPat("b110100100010?????????????1010011")
def FCVT_D_LU = BitPat("b110100100011?????????????1010011")
def FMV_D_X = BitPat("b111100100000?????000?????1010011")
def FLW = BitPat("b?????????????????010?????0000111")
def FLD = BitPat("b?????????????????011?????0000111")
def FSW = BitPat("b?????????????????010?????0100111")
def FSD = BitPat("b?????????????????011?????0100111")
def FMADD_S = BitPat("b?????00??????????????????1000011")
def FMSUB_S = BitPat("b?????00??????????????????1000111")
def FNMSUB_S = BitPat("b?????00??????????????????1001011")
def FNMADD_S = BitPat("b?????00??????????????????1001111")
def FMADD_D = BitPat("b?????01??????????????????1000011")
def FMSUB_D = BitPat("b?????01??????????????????1000111")
def FNMSUB_D = BitPat("b?????01??????????????????1001011")
def FNMADD_D = BitPat("b?????01??????????????????1001111")
def CUSTOM0 = BitPat("b?????????????????000?????0001011")
def CUSTOM0_RS1 = BitPat("b?????????????????010?????0001011")
def CUSTOM0_RS1_RS2 = BitPat("b?????????????????011?????0001011")
def CUSTOM0_RD = BitPat("b?????????????????100?????0001011")
def CUSTOM0_RD_RS1 = BitPat("b?????????????????110?????0001011")
def CUSTOM0_RD_RS1_RS2 = BitPat("b?????????????????111?????0001011")
def CUSTOM1 = BitPat("b?????????????????000?????0101011")
def CUSTOM1_RS1 = BitPat("b?????????????????010?????0101011")
def CUSTOM1_RS1_RS2 = BitPat("b?????????????????011?????0101011")
def CUSTOM1_RD = BitPat("b?????????????????100?????0101011")
def CUSTOM1_RD_RS1 = BitPat("b?????????????????110?????0101011")
def CUSTOM1_RD_RS1_RS2 = BitPat("b?????????????????111?????0101011")
def CUSTOM2 = BitPat("b?????????????????000?????1011011")
def CUSTOM2_RS1 = BitPat("b?????????????????010?????1011011")
def CUSTOM2_RS1_RS2 = BitPat("b?????????????????011?????1011011")
def CUSTOM2_RD = BitPat("b?????????????????100?????1011011")
def CUSTOM2_RD_RS1 = BitPat("b?????????????????110?????1011011")
def CUSTOM2_RD_RS1_RS2 = BitPat("b?????????????????111?????1011011")
def CUSTOM3 = BitPat("b?????????????????000?????1111011")
def CUSTOM3_RS1 = BitPat("b?????????????????010?????1111011")
def CUSTOM3_RS1_RS2 = BitPat("b?????????????????011?????1111011")
def CUSTOM3_RD = BitPat("b?????????????????100?????1111011")
def CUSTOM3_RD_RS1 = BitPat("b?????????????????110?????1111011")
def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011")
}
object Causes {
val misaligned_fetch = 0x0

View File

@ -50,7 +50,7 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module {
FN_MUL -> List(Y, N, X, X),
FN_MULH -> List(Y, Y, Y, Y),
FN_MULHU -> List(Y, Y, N, N),
FN_MULHSU -> List(Y, Y, Y, N)))
FN_MULHSU -> List(Y, Y, Y, N))).map(_ toBool)
def sext(x: Bits, signed: Bool) = {
val sign = signed && Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1))
@ -95,7 +95,7 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module {
!isHi && (mplier & ~eOutMask) === UInt(0)
val eOutRes = (mulReg >> (mulw - count * mulUnroll)(log2Up(mulw)-1,0))
val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0))
remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0)).toSInt
remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0))
count := count + 1
when (eOut || count === mulw/mulUnroll-1) {

View File

@ -108,7 +108,7 @@ class L1MetaWriteReq extends
object L1Metadata {
def apply(tag: Bits, coh: ClientMetadata) = {
val meta = new L1Metadata
val meta = Wire(new L1Metadata)
meta.tag := tag
meta.coh := coh
meta
@ -303,12 +303,12 @@ class MSHRFile extends L1HellaCacheModule {
val sdq = Mem(io.req.bits.data, sdqDepth)
when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data }
val idxMatch = Vec.fill(nMSHRs){Bool()}
val tagList = Vec.fill(nMSHRs){Bits()}
val idxMatch = Wire(Vec(Bool(), nMSHRs))
val tagList = Wire(Vec(Bits(width = tagBits), nMSHRs))
val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits
val wbTagList = Vec.fill(nMSHRs){Bits()}
val refillMux = Vec.fill(nMSHRs){new L1RefillReq}
val wbTagList = Wire(Vec(Bits(), nMSHRs))
val refillMux = Wire(Vec(new L1RefillReq, nMSHRs))
val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs))
val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs))
val mem_req_arb = Module(new LockingArbiter(
@ -341,11 +341,11 @@ class MSHRFile extends L1HellaCacheModule {
mshr.io.req_bits := io.req.bits
mshr.io.req_bits.sdq_id := sdq_alloc_id
mshr.io.meta_read <> meta_read_arb.io.in(i)
mshr.io.meta_write <> meta_write_arb.io.in(i)
mshr.io.mem_req <> mem_req_arb.io.in(i)
mshr.io.wb_req <> wb_req_arb.io.in(i)
mshr.io.replay <> replay_arb.io.in(i)
meta_read_arb.io.in(i) <> mshr.io.meta_read
meta_write_arb.io.in(i) <> mshr.io.meta_write
mem_req_arb.io.in(i) <> mshr.io.mem_req
wb_req_arb.io.in(i) <> mshr.io.wb_req
replay_arb.io.in(i) <> mshr.io.replay
mshr.io.mem_grant.valid := io.mem_grant.valid &&
io.mem_grant.bits.client_xact_id === UInt(i)
@ -362,10 +362,10 @@ class MSHRFile extends L1HellaCacheModule {
alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match
meta_read_arb.io.out <> io.meta_read
meta_write_arb.io.out <> io.meta_write
mem_req_arb.io.out <> io.mem_req
wb_req_arb.io.out <> io.wb_req
io.meta_read <> meta_read_arb.io.out
io.meta_write <> meta_write_arb.io.out
io.mem_req <> mem_req_arb.io.out
io.wb_req <> wb_req_arb.io.out
io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy
io.secondary_miss := idx_match
@ -409,11 +409,13 @@ class WritebackUnit extends L1HellaCacheModule {
}
when (r2_data_req_fired) {
io.release.valid := beat_done
when(!io.release.ready) {
r1_data_req_fired := false
r2_data_req_fired := false
data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1)
} .elsewhen(beat_done) { if(refillCyclesPerBeat > 1) buf_v := 0 }
when(beat_done) {
when(!io.release.ready) {
r1_data_req_fired := false
r2_data_req_fired := false
data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1)
} .otherwise { if(refillCyclesPerBeat > 1) buf_v := 0 }
}
when(!r1_data_req_fired) {
// We're done if this is the final data request and the Release can be sent
active := data_req_cnt < UInt(refillCycles) || !io.release.ready
@ -546,16 +548,16 @@ class DataArray extends L1HellaCacheModule {
for (w <- 0 until nWays by rowWords) {
val wway_en = io.write.bits.way_en(w+rowWords-1,w)
val rway_en = io.read.bits.way_en(w+rowWords-1,w)
val resp = Vec.fill(rowWords){Bits(width = encRowBits)}
val resp = Wire(Vec(Bits(width = encRowBits), rowWords))
val r_raddr = RegEnable(io.read.bits.addr, io.read.valid)
for (p <- 0 until resp.size) {
val array = Mem(Bits(width=encRowBits), nSets*refillCycles, seqRead = true)
val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles)
when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) {
val data = Fill(rowWords, io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p))
val mask = FillInterleaved(encDataBits, wway_en)
array.write(waddr, data, mask)
}
resp(p) := array(RegEnable(raddr, rway_en.orR && io.read.valid))
resp(p) := array.read(raddr, rway_en.orR && io.read.valid)
}
for (dw <- 0 until rowWords) {
val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw)))
@ -568,11 +570,11 @@ class DataArray extends L1HellaCacheModule {
} else {
val wmask = FillInterleaved(encDataBits, io.write.bits.wmask)
for (w <- 0 until nWays) {
val array = Mem(Bits(width=encRowBits), nSets*refillCycles, seqRead = true)
val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles)
when (io.write.bits.way_en(w) && io.write.valid) {
array.write(waddr, io.write.bits.data, wmask)
}
io.resp(w) := array(RegEnable(raddr, io.read.bits.way_en(w) && io.read.valid))
io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid)
}
}
@ -600,19 +602,19 @@ class HellaCache extends L1HellaCacheModule {
io.cpu.req.ready := Bool(true)
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
val s1_req = Reg(io.cpu.req.bits.clone)
val s1_req = Reg(io.cpu.req.bits)
val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill
val s1_replay = Reg(init=Bool(false))
val s1_clk_en = Reg(Bool())
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
val s2_req = Reg(io.cpu.req.bits.clone)
val s2_req = Reg(io.cpu.req.bits)
val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd != M_NOP
val s2_recycle = Bool()
val s2_valid_masked = Bool()
val s2_recycle = Wire(Bool())
val s2_valid_masked = Wire(Bool())
val s3_valid = Reg(init=Bool(false))
val s3_req = Reg(io.cpu.req.bits.clone)
val s3_req = Reg(io.cpu.req.bits)
val s3_way = Reg(Bits())
val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en)
@ -622,7 +624,7 @@ class HellaCache extends L1HellaCacheModule {
val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd)
val dtlb = Module(new TLB)
dtlb.io.ptw <> io.ptw
io.ptw <> dtlb.io.ptw
dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys
dtlb.io.req.bits.passthrough := s1_req.phys
dtlb.io.req.bits.asid := UInt(0)
@ -682,8 +684,8 @@ class HellaCache extends L1HellaCacheModule {
val meta = Module(new MetadataArray(onReset _))
val metaReadArb = Module(new Arbiter(new MetaReadReq, 5))
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2))
metaReadArb.io.out <> meta.io.read
metaWriteArb.io.out <> meta.io.write
meta.io.read <> metaReadArb.io.out
meta.io.write <> metaWriteArb.io.out
// data
val data = Module(new DataArray)
@ -703,7 +705,7 @@ class HellaCache extends L1HellaCacheModule {
// data read for new requests
readArb.io.in(3).valid := io.cpu.req.valid
readArb.io.in(3).bits.addr := io.cpu.req.bits.addr
readArb.io.in(3).bits.way_en := SInt(-1)
readArb.io.in(3).bits.way_en := ~UInt(0, nWays)
when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) }
// recycled requests
@ -711,7 +713,7 @@ class HellaCache extends L1HellaCacheModule {
metaReadArb.io.in(0).bits.idx := s2_req.addr >> blockOffBits
readArb.io.in(0).valid := s2_recycle
readArb.io.in(0).bits.addr := s2_req.addr
readArb.io.in(0).bits.way_en := SInt(-1)
readArb.io.in(0).bits.way_en := ~UInt(0, nWays)
// tag check and way muxing
def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f))
@ -745,9 +747,9 @@ class HellaCache extends L1HellaCacheModule {
}
when (io.cpu.invalidate_lr) { lrsc_count := 0 }
val s2_data = Vec.fill(nWays){Bits(width = encRowBits)}
val s2_data = Wire(Vec(Bits(width=encRowBits), nWays))
for (w <- 0 until nWays) {
val regs = Vec.fill(rowWords){Reg(Bits(width = encDataBits))}
val regs = Reg(Vec.fill(rowWords){Bits(width = encDataBits)})
val en1 = s1_clk_en && s1_tag_eq_way(w)
for (i <- 0 until regs.size) {
val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback)
@ -761,7 +763,7 @@ class HellaCache extends L1HellaCacheModule {
val s2_data_uncorrected = Vec(s2_data_decoded.map(_.uncorrected)).toBits
val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,3)
val s2_data_correctable = Vec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx)
// store/amo hits
s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd)
val amoalu = Module(new AMOALU)
@ -798,7 +800,7 @@ class HellaCache extends L1HellaCacheModule {
// replays
readArb.io.in(1).valid := mshrs.io.replay.valid
readArb.io.in(1).bits := mshrs.io.replay.bits
readArb.io.in(1).bits.way_en := SInt(-1)
readArb.io.in(1).bits.way_en := ~UInt(0, nWays)
mshrs.io.replay.ready := readArb.io.in(1).ready
s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready
metaReadArb.io.in(1) <> mshrs.io.meta_read
@ -806,16 +808,16 @@ class HellaCache extends L1HellaCacheModule {
// probes and releases
val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, (r: Release) => r.hasMultibeatData()))
releaseArb.io.out <> io.mem.release
io.mem.release <> releaseArb.io.out
prober.io.req.valid := io.mem.probe.valid && !lrsc_valid
io.mem.probe.ready := prober.io.req.ready && !lrsc_valid
prober.io.req.bits := io.mem.probe.bits
prober.io.rep <> releaseArb.io.in(1)
releaseArb.io.in(1) <> prober.io.rep
prober.io.way_en := s2_tag_match_way
prober.io.block_state := s2_hit_state
prober.io.meta_read <> metaReadArb.io.in(2)
prober.io.meta_write <> metaWriteArb.io.in(1)
metaReadArb.io.in(2) <> prober.io.meta_read
metaWriteArb.io.in(1) <> prober.io.meta_write
prober.io.mshr_rdy := mshrs.io.probe_rdy
// refills
@ -826,18 +828,18 @@ class HellaCache extends L1HellaCacheModule {
writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData()
writeArb.io.in(1).bits.addr := mshrs.io.refill.addr
writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en
writeArb.io.in(1).bits.wmask := SInt(-1)
writeArb.io.in(1).bits.wmask := ~UInt(0, nWays)
writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0)
data.io.read <> readArb.io.out
readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked
readArb.io.out <> data.io.read
// writebacks
val wbArb = Module(new Arbiter(new WritebackReq, 2))
prober.io.wb_req <> wbArb.io.in(0)
mshrs.io.wb_req <> wbArb.io.in(1)
wbArb.io.out <> wb.io.req
wb.io.meta_read <> metaReadArb.io.in(3)
wb.io.data_req <> readArb.io.in(2)
wbArb.io.in(0) <> prober.io.wb_req
wbArb.io.in(1) <> mshrs.io.wb_req
wb.io.req <> wbArb.io.out
metaReadArb.io.in(3) <> wb.io.meta_read
readArb.io.in(2) <> wb.io.data_req
wb.io.data_resp := s2_data_corrected
releaseArb.io.in(0) <> wb.io.release
@ -863,8 +865,10 @@ class HellaCache extends L1HellaCacheModule {
val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits)))
val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass)
val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc)
amoalu.io := s2_req
amoalu.io.addr := s2_req.addr
amoalu.io.cmd := s2_req.cmd
amoalu.io.typ := s2_req.typ
amoalu.io.lhs := s2_data_word
amoalu.io.rhs := s2_req.data
@ -880,7 +884,7 @@ class HellaCache extends L1HellaCacheModule {
val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable
val s2_recycle_next = Reg(init=Bool(false))
when (s1_valid || s1_replay) { s2_recycle_next := (s1_valid || s1_replay) && s2_recycle_ecc }
when (s1_valid || s1_replay) { s2_recycle_next := s2_recycle_ecc }
s2_recycle := s2_recycle_ecc || s2_recycle_next
// after a nack, block until nack condition resolves to save energy

View File

@ -85,20 +85,21 @@ class PTW(n: Int) extends CoreModule
val (pte_cache_hit, pte_cache_data) = {
val size = log2Up(pgLevels * 2)
val plru = new PseudoLRU(size)
val valid = Reg(init = Bits(0, size))
val valid = Reg(Vec(Bool(), size))
val validBits = valid.toBits
val tags = Mem(UInt(width = paddrBits), size)
val data = Mem(UInt(width = ppnBits), size)
val hits = Vec(tags.map(_ === pte_addr)).toBits & valid
val hits = Vec(tags.map(_ === pte_addr)).toBits & validBits
val hit = hits.orR
when (io.mem.resp.valid && pte.table() && !hit) {
val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid))
val r = Mux(validBits.andR, plru.replace, PriorityEncoder(~validBits))
valid(r) := true
tags(r) := pte_addr
data(r) := pte.ppn
}
when (hit && state === s_req) { plru.access(OHToUInt(hits)) }
when (io.dpath.invalidate) { valid := 0 }
when (reset || io.dpath.invalidate) { valid.foreach(_ := false) }
(hit, Mux1H(hits, data))
}
@ -109,8 +110,7 @@ class PTW(n: Int) extends CoreModule
r_pte := pte
}
val pte_wdata = new PTE
pte_wdata := new PTE().fromBits(0)
val pte_wdata = Wire(init=new PTE().fromBits(0))
pte_wdata.r := true
pte_wdata.d := r_req.store

View File

@ -3,7 +3,6 @@
package rocket
import Chisel._
import Node._
import uncore._
import Util._
@ -64,7 +63,7 @@ class AccumulatorExample extends RoCC
{
val n = 4
val regfile = Mem(UInt(width = xLen), n)
val busy = Vec.fill(n){Reg(init=Bool(false))}
val busy = Reg(init=Vec(Bool(false), n))
val cmd = Queue(io.cmd)
val funct = cmd.bits.inst.funct

View File

@ -0,0 +1,580 @@
// See LICENSE for license details.
package rocket
import Chisel._
import junctions._
import uncore._
import Util._
case object BuildFPU extends Field[Option[() => FPU]]
case object FDivSqrt extends Field[Boolean]
case object XLen extends Field[Int]
case object NMultXpr extends Field[Int]
case object FetchWidth extends Field[Int]
case object RetireWidth extends Field[Int]
case object UseVM extends Field[Boolean]
case object FastLoadWord extends Field[Boolean]
case object FastLoadByte extends Field[Boolean]
case object FastMulDiv extends Field[Boolean]
case object CoreInstBits extends Field[Int]
case object CoreDataBits extends Field[Int]
case object CoreDCacheReqTagBits extends Field[Int]
case object NCustomMRWCSRs extends Field[Int]
abstract trait CoreParameters extends UsesParameters {
val xLen = params(XLen)
val paddrBits = params(PAddrBits)
val vaddrBits = params(VAddrBits)
val pgIdxBits = params(PgIdxBits)
val ppnBits = params(PPNBits)
val vpnBits = params(VPNBits)
val pgLevels = params(PgLevels)
val pgLevelBits = params(PgLevelBits)
val asIdBits = params(ASIdBits)
val retireWidth = params(RetireWidth)
val coreFetchWidth = params(FetchWidth)
val coreInstBits = params(CoreInstBits)
val coreInstBytes = coreInstBits/8
val coreDataBits = xLen
val coreDataBytes = coreDataBits/8
val coreDCacheReqTagBits = params(CoreDCacheReqTagBits)
val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits
val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt
if(params(FastLoadByte)) require(params(FastLoadWord))
}
abstract trait RocketCoreParameters extends CoreParameters
{
require(params(FetchWidth) == 1) // for now...
require(params(RetireWidth) == 1) // for now...
}
abstract class CoreBundle extends Bundle with CoreParameters
abstract class CoreModule extends Module with CoreParameters
class Rocket extends CoreModule
{
val io = new Bundle {
val host = new HTIFIO
val imem = new CPUFrontendIO
val dmem = new HellaCacheIO
val ptw = new DatapathPTWIO().flip
val fpu = new FPUIO().flip
val rocc = new RoCCInterface().flip
}
var decode_table = XDecode.table
if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table
if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table
if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table
val ex_ctrl = Reg(new IntCtrlSigs)
val mem_ctrl = Reg(new IntCtrlSigs)
val wb_ctrl = Reg(new IntCtrlSigs)
val ex_reg_xcpt_interrupt = Reg(Bool())
val ex_reg_valid = Reg(Bool())
val ex_reg_btb_hit = Reg(Bool())
val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits)
val ex_reg_xcpt = Reg(Bool())
val ex_reg_flush_pipe = Reg(Bool())
val ex_reg_load_use = Reg(Bool())
val ex_reg_cause = Reg(UInt())
val ex_reg_pc = Reg(UInt())
val ex_reg_inst = Reg(Bits())
val mem_reg_xcpt_interrupt = Reg(Bool())
val mem_reg_valid = Reg(Bool())
val mem_reg_btb_hit = Reg(Bool())
val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits)
val mem_reg_xcpt = Reg(Bool())
val mem_reg_replay = Reg(Bool())
val mem_reg_flush_pipe = Reg(Bool())
val mem_reg_cause = Reg(UInt())
val mem_reg_slow_bypass = Reg(Bool())
val mem_reg_pc = Reg(UInt())
val mem_reg_inst = Reg(Bits())
val mem_reg_wdata = Reg(Bits())
val mem_reg_rs2 = Reg(Bits())
val take_pc_mem = Wire(Bool())
val wb_reg_valid = Reg(Bool())
val wb_reg_xcpt = Reg(Bool())
val wb_reg_replay = Reg(Bool())
val wb_reg_cause = Reg(UInt())
val wb_reg_rocc_pending = Reg(init=Bool(false))
val wb_reg_pc = Reg(UInt())
val wb_reg_inst = Reg(Bits())
val wb_reg_wdata = Reg(Bits())
val wb_reg_rs2 = Reg(Bits())
val take_pc_wb = Wire(Bool())
val take_pc_mem_wb = take_pc_wb || take_pc_mem
val take_pc = take_pc_mem_wb
// decode stage
val id_pc = io.imem.resp.bits.pc
val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1)
val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table)
val id_raddr3 = id_inst(31,27)
val id_raddr2 = id_inst(24,20)
val id_raddr1 = id_inst(19,15)
val id_waddr = id_inst(11,7)
val id_load_use = Wire(Bool())
val id_reg_fence = Reg(init=Bool(false))
val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2)
val id_raddr = IndexedSeq(id_raddr1, id_raddr2)
val rf = new RegFile
val id_rs = id_raddr.map(rf.read _)
val ctrl_killd = Wire(Bool())
val csr = Module(new CSRFile)
val id_csr_en = id_ctrl.csr != CSR.N
val id_system_insn = id_ctrl.csr === CSR.I
val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0)
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
val id_csr_addr = id_inst(31,20)
// this is overly conservative
val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil
val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*)
val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs.map(UInt(_)), (legal_csrs -- safe_csrs).toList.map(UInt(_))))
val id_illegal_insn = !id_ctrl.legal ||
id_ctrl.fp && !csr.io.status.fs.orR ||
id_ctrl.rocc && !csr.io.status.xs.orR
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
val id_amo_aq = id_inst(26)
val id_amo_rl = id_inst(25)
val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) &&
(io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||
mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)
id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy
val id_do_fence = id_rocc_busy && id_ctrl.fence ||
id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en)
val (id_xcpt, id_cause) = checkExceptions(List(
(csr.io.interrupt, csr.io.interrupt_cause),
(io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)),
(id_illegal_insn, UInt(Causes.illegal_instruction))))
val dcache_bypass_data =
if(params(FastLoadByte)) io.dmem.resp.bits.data_subword
else if(params(FastLoadWord)) io.dmem.resp.bits.data
else wb_reg_wdata
// detect bypass opportunities
val ex_waddr = ex_reg_inst(11,7)
val mem_waddr = mem_reg_inst(11,7)
val wb_waddr = wb_reg_inst(11,7)
val bypass_sources = IndexedSeq(
(Bool(true), UInt(0), UInt(0)), // treat reading x0 as a bypass
(ex_reg_valid && ex_ctrl.wxd, ex_waddr, mem_reg_wdata),
(mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, mem_waddr, wb_reg_wdata),
(mem_reg_valid && mem_ctrl.wxd, mem_waddr, dcache_bypass_data))
val id_bypass_src = id_raddr.map(raddr => bypass_sources.map(s => s._1 && s._2 === raddr))
// execute stage
val bypass_mux = Vec(bypass_sources.map(_._3))
val ex_reg_rs_bypass = Reg(Vec.fill(id_raddr.size)(Bool()))
val ex_reg_rs_lsb = Reg(Vec.fill(id_raddr.size)(Bits()))
val ex_reg_rs_msb = Reg(Vec.fill(id_raddr.size)(Bits()))
val ex_rs = for (i <- 0 until id_raddr.size)
yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
val ex_imm = imm(ex_ctrl.sel_imm, ex_reg_inst)
val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq(
A1_RS1 -> ex_rs(0).toSInt,
A1_PC -> ex_reg_pc.toSInt))
val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq(
A2_RS2 -> ex_rs(1).toSInt,
A2_IMM -> ex_imm,
A2_FOUR -> SInt(4)))
val alu = Module(new ALU)
alu.io.dw := ex_ctrl.alu_dw
alu.io.fn := ex_ctrl.alu_fn
alu.io.in2 := ex_op2.toUInt
alu.io.in1 := ex_op1.toUInt
// multiplier and divider
val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1,
earlyOut = params(FastMulDiv)))
div.io.req.valid := ex_reg_valid && ex_ctrl.div
div.io.req.bits.dw := ex_ctrl.alu_dw
div.io.req.bits.fn := ex_ctrl.alu_fn
div.io.req.bits.in1 := ex_rs(0)
div.io.req.bits.in2 := ex_rs(1)
div.io.req.bits.tag := ex_waddr
ex_reg_valid := !ctrl_killd
ex_reg_xcpt := !ctrl_killd && id_xcpt
ex_reg_xcpt_interrupt := csr.io.interrupt && !take_pc && io.imem.resp.valid
when (id_xcpt) { ex_reg_cause := id_cause }
when (!ctrl_killd) {
ex_ctrl := id_ctrl
ex_ctrl.csr := id_csr
ex_reg_btb_hit := io.imem.btb_resp.valid
when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits }
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush
ex_reg_load_use := id_load_use
for (i <- 0 until id_raddr.size) {
val do_bypass = id_bypass_src(i).reduce(_||_)
val bypass_src = PriorityEncoder(id_bypass_src(i))
ex_reg_rs_bypass(i) := do_bypass
ex_reg_rs_lsb(i) := bypass_src
when (id_ren(i) && !do_bypass) {
ex_reg_rs_lsb(i) := id_rs(i)(bypass_src.getWidth-1,0)
ex_reg_rs_msb(i) := id_rs(i) >> bypass_src.getWidth
}
}
}
when (!ctrl_killd || csr.io.interrupt) {
ex_reg_inst := id_inst
ex_reg_pc := id_pc
}
// replay inst in ex stage?
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
ex_ctrl.div && !div.io.req.ready
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
val replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use)
val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid
// detect 2-cycle load-use delay for LB/LH/SC
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
val (ex_xcpt, ex_cause) = checkExceptions(List(
(ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause),
(ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction))))
// memory stage
val mem_br_taken = mem_reg_wdata(0)
val mem_br_target = mem_reg_pc.toSInt +
Mux(mem_ctrl.branch && mem_br_taken, imm(IMM_SB, mem_reg_inst),
Mux(mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4)))
val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt
val mem_npc = (Mux(mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUInt
val mem_wrong_npc = mem_npc != ex_reg_pc || !ex_reg_valid
val mem_npc_misaligned = mem_npc(1)
val mem_misprediction = mem_wrong_npc && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal)
val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
take_pc_mem := want_take_pc_mem && !mem_npc_misaligned
mem_reg_valid := !ctrl_killx
mem_reg_replay := !take_pc_mem_wb && replay_ex
mem_reg_xcpt := !ctrl_killx && ex_xcpt
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
when (ex_xcpt) { mem_reg_cause := ex_cause }
when (ex_reg_valid || ex_reg_xcpt_interrupt) {
mem_ctrl := ex_ctrl
mem_reg_btb_hit := ex_reg_btb_hit
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
mem_reg_flush_pipe := ex_reg_flush_pipe
mem_reg_slow_bypass := ex_slow_bypass
mem_reg_inst := ex_reg_inst
mem_reg_pc := ex_reg_pc
mem_reg_wdata := alu.io.out
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) {
mem_reg_rs2 := ex_rs(1)
}
}
val (mem_xcpt, mem_cause) = checkExceptions(List(
(mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
(want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)),
(mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load))))
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next.valid // structural hazard on writeback port
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
div.io.kill := killm_common && Reg(next = div.io.req.fire())
val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
// writeback stage
wb_reg_valid := !ctrl_killm
wb_reg_replay := replay_mem && !take_pc_wb
wb_reg_xcpt := mem_xcpt && !take_pc_wb
when (mem_xcpt) { wb_reg_cause := mem_cause }
when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) {
wb_ctrl := mem_ctrl
wb_reg_wdata := Mux(mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (mem_ctrl.rocc) {
wb_reg_rs2 := mem_reg_rs2
}
wb_reg_inst := mem_reg_inst
wb_reg_pc := mem_reg_pc
}
val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
val replay_wb_common =
io.dmem.resp.bits.nack || wb_reg_replay || csr.io.csr_replay
val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt
take_pc_wb := replay_wb || wb_xcpt || csr.io.eret
when (wb_rocc_val) { wb_reg_rocc_pending := !io.rocc.cmd.ready }
when (wb_reg_xcpt) { wb_reg_rocc_pending := Bool(false) }
// writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1)
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data
div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
val ll_wdata = Wire(init = div.io.resp.bits.data)
val ll_waddr = Wire(init = div.io.resp.bits.tag)
val ll_wen = Wire(init = div.io.resp.fire())
if (!params(BuildRoCC).isEmpty) {
io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
when (io.rocc.resp.fire()) {
div.io.resp.ready := Bool(false)
ll_wdata := io.rocc.resp.bits.data
ll_waddr := io.rocc.resp.bits.rd
ll_wen := Bool(true)
}
}
when (dmem_resp_replay && dmem_resp_xpu) {
div.io.resp.ready := Bool(false)
if (!params(BuildRoCC).isEmpty)
io.rocc.resp.ready := Bool(false)
ll_waddr := dmem_resp_waddr
ll_wen := Bool(true)
}
val wb_valid = wb_reg_valid && !replay_wb && !csr.io.csr_xcpt
val wb_wen = wb_valid && wb_ctrl.wxd
val rf_wen = wb_wen || ll_wen
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword,
Mux(ll_wen, ll_wdata,
Mux(wb_ctrl.csr != CSR.N, csr.io.rw.rdata,
wb_reg_wdata)))
when (rf_wen) { rf.write(rf_waddr, rf_wdata) }
// hook up control/status regfile
csr.io.exception := wb_reg_xcpt
csr.io.cause := wb_reg_cause
csr.io.retire := wb_valid
io.host <> csr.io.host
io.fpu.fcsr_rm := csr.io.fcsr_rm
csr.io.fcsr_flags := io.fpu.fcsr_flags
csr.io.rocc <> io.rocc
csr.io.pc := wb_reg_pc
csr.io.uarch_counters.foreach(_ := Bool(false))
io.ptw.ptbr := csr.io.ptbr
io.ptw.invalidate := csr.io.fatc
io.ptw.status := csr.io.status
csr.io.rw.addr := wb_reg_inst(31,20)
csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
csr.io.rw.wdata := wb_reg_wdata
val hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 != UInt(0), id_raddr1),
(id_ctrl.rxs2 && id_raddr2 != UInt(0), id_raddr2),
(id_ctrl.wxd && id_waddr != UInt(0), id_waddr))
val fp_hazard_targets = Seq((io.fpu.dec.ren1, id_raddr1),
(io.fpu.dec.ren2, id_raddr2),
(io.fpu.dec.ren3, id_raddr3),
(io.fpu.dec.wen, id_waddr))
val sboard = new Scoreboard(32)
sboard.clear(ll_wen, ll_waddr)
val id_sboard_hazard = checkHazards(hazard_targets, sboard.readBypassed _)
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
val ex_cannot_bypass = ex_ctrl.csr != CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc
val data_hazard_ex = ex_ctrl.wxd && checkHazards(hazard_targets, _ === ex_waddr)
val fp_data_hazard_ex = ex_ctrl.wfd && checkHazards(fp_hazard_targets, _ === ex_waddr)
val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex)
// stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.
val mem_mem_cmd_bh =
if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass
else Bool(true)
val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc
val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr)
val fp_data_hazard_mem = mem_ctrl.wfd && checkHazards(fp_hazard_targets, _ === mem_waddr)
val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem)
id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem
// stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback.
val data_hazard_wb = wb_ctrl.wxd && checkHazards(hazard_targets, _ === wb_waddr)
val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr)
val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)
val id_stall_fpu = if (!params(BuildFPU).isEmpty) {
val fp_sboard = new Scoreboard(32)
fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr)
fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr)
fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra)
id_csr_en && !io.fpu.fcsr_rdy || checkHazards(fp_hazard_targets, fp_sboard.read _)
} else Bool(false)
val ctrl_stalld =
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||
id_ctrl.fp && id_stall_fpu ||
id_ctrl.mem && !io.dmem.req.ready ||
Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready ||
id_do_fence ||
csr.io.csr_stall
ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt
io.imem.req.valid := take_pc
io.imem.req.bits.pc :=
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
Mux(replay_wb, wb_reg_pc, // replay
mem_npc)).toUInt // mispredicted branch
io.imem.invalidate := wb_reg_valid && wb_ctrl.fence_i
io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt
io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && ((mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb
io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1")
io.imem.btb_update.bits.pc := mem_reg_pc
io.imem.btb_update.bits.target := io.imem.req.bits.pc
io.imem.btb_update.bits.br_pc := mem_reg_pc
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb
io.imem.bht_update.bits.pc := mem_reg_pc
io.imem.bht_update.bits.taken := mem_br_taken
io.imem.bht_update.bits.mispredict := mem_wrong_npc
io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction
io.imem.ras_update.valid := mem_reg_valid && io.imem.btb_update.bits.isJump && !mem_npc_misaligned && !take_pc_wb
io.imem.ras_update.bits.returnAddr := mem_int_wdata
io.imem.ras_update.bits.isCall := mem_ctrl.wxd && mem_waddr(0)
io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn
io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction
io.fpu.valid := !ctrl_killd && id_ctrl.fp
io.fpu.killx := ctrl_killx
io.fpu.killm := killm_common
io.fpu.inst := id_inst
io.fpu.fromint_data := ex_rs(0)
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp.bits.data
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
io.fpu.dmem_resp_tag := dmem_resp_waddr
io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
io.dmem.req.bits.kill := killm_common || mem_xcpt
io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
io.dmem.req.bits.typ := ex_ctrl.mem_type
io.dmem.req.bits.phys := Bool(false)
io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt
io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp)
io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
require(params(CoreDCacheReqTagBits) >= 6)
io.dmem.invalidate_lr := wb_xcpt
io.rocc.cmd.valid := wb_rocc_val
io.rocc.exception := wb_xcpt && csr.io.status.xs.orR
io.rocc.s := csr.io.status.prv.orR // should we just pass all of mstatus?
io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst)
io.rocc.cmd.bits.rs1 := wb_reg_wdata
io.rocc.cmd.bits.rs2 := wb_reg_rs2
if (!params(BuildFPU).isEmpty) {
io.fpu.cp_req <> io.rocc.fpu_req
io.fpu.cp_resp <> io.rocc.fpu_resp
} else {
io.fpu.cp_req.valid := Bool(false)
}
printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",
io.host.id, csr.io.time(32,0), wb_valid, wb_reg_pc,
Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen,
wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),
wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),
wb_reg_inst, wb_reg_inst)
def checkExceptions(x: Seq[(Bool, UInt)]) =
(x.map(_._1).reduce(_||_), PriorityMux(x))
def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) =
targets.map(h => h._1 && cond(h._2)).reduce(_||_)
def imm(sel: UInt, inst: UInt) = {
val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt)
val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign)
val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt)
val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0),
Mux(sel === IMM_UJ, inst(20).toSInt,
Mux(sel === IMM_SB, inst(7).toSInt, sign)))
val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25))
val b4_1 = Mux(sel === IMM_U, Bits(0),
Mux(sel === IMM_S || sel === IMM_SB, inst(11,8),
Mux(sel === IMM_Z, inst(19,16), inst(24,21))))
val b0 = Mux(sel === IMM_S, inst(7),
Mux(sel === IMM_I, inst(20),
Mux(sel === IMM_Z, inst(15), Bits(0))))
Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt
}
def vaSign(a0: UInt, ea: UInt) = {
// efficient means to compress 64-bit VA into vaddrBits+1 bits
// (VA is bad if VA(vaddrBits) != VA(vaddrBits-1))
val a = a0 >> vaddrBits-1
val e = ea(vaddrBits,vaddrBits-1)
Mux(a === UInt(0) || a === UInt(1), e != UInt(0),
Mux(a.toSInt === SInt(-1) || a.toSInt === SInt(-2), e.toSInt === SInt(-1),
e(0)))
}
class RegFile {
private val rf = Mem(UInt(width = 64), 31)
private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]()
private var canRead = true
def read(addr: UInt) = {
require(canRead)
reads += addr -> Wire(UInt())
reads.last._2 := rf(~addr)
reads.last._2
}
def write(addr: UInt, data: UInt) = {
canRead = false
when (addr != UInt(0)) {
rf(~addr) := data
for ((raddr, rdata) <- reads)
when (addr === raddr) { rdata := data }
}
}
}
class Scoreboard(n: Int)
{
def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))
def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))
def read(addr: UInt): Bool = r(addr)
def readBypassed(addr: UInt): Bool = _next(addr)
private val r = Reg(init=Bits(0, n))
private var _next = r
private var ens = Bool(false)
private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))
private def update(en: Bool, update: UInt) = {
_next = update
ens = ens || en
when (ens) { r := _next }
}
}
}

View File

@ -23,21 +23,28 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) {
val icache = Module(new Frontend, { case CacheName => "L1I"; case CoreName => "Rocket" })
val dcache = Module(new HellaCache, { case CacheName => "L1D" })
val ptw = Module(new PTW(params(NPTWPorts)))
val core = Module(new Core, { case CoreName => "Rocket" })
val core = Module(new Rocket, { case CoreName => "Rocket" })
dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache
val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts)))
dcArb.io.requestor(0) <> ptw.io.mem
dcArb.io.requestor(1) <> core.io.dmem
dcArb.io.mem <> dcache.io.cpu
dcache.io.cpu <> dcArb.io.mem
ptw.io.requestor(0) <> icache.io.ptw
ptw.io.requestor(1) <> dcache.io.ptw
core.io.host <> io.host
core.io.imem <> icache.io.cpu
io.host <> core.io.host
icache.io.cpu <> core.io.imem
core.io.ptw <> ptw.io.dpath
//If so specified, build an FPU module and wire it in
params(BuildFPU)
.map { bf => bf() }
.foreach { fpu =>
core.io.fpu <> fpu.io
}
// Connect the caches and ROCC to the outer memory system
io.cached <> dcache.io.mem
// If so specified, build an RoCC module and wire it in

View File

@ -4,7 +4,7 @@ package rocket
import Chisel._
import Util._
import uncore._
import junctions._
import scala.math._
case object NTLBEntries extends Field[Int]
@ -109,7 +109,7 @@ class TLB extends TLBModule {
val r_req = Reg(new TLBReq)
val tag_cam = Module(new RocketCAM)
val tag_ram = Mem(io.ptw.resp.bits.pte.ppn.clone, entries)
val tag_ram = Mem(io.ptw.resp.bits.pte.ppn, entries)
val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt
tag_cam.io.tag := lookup_tag
@ -119,18 +119,18 @@ class TLB extends TLBModule {
val tag_hit_addr = OHToUInt(tag_cam.io.hits)
// permission bit arrays
val valid_array = Reg(Bits()) // PTE is valid (not equivalent to CAM tag valid bit!)
val ur_array = Reg(Bits()) // user read permission
val uw_array = Reg(Bits()) // user write permission
val ux_array = Reg(Bits()) // user execute permission
val sr_array = Reg(Bits()) // supervisor read permission
val sw_array = Reg(Bits()) // supervisor write permission
val sx_array = Reg(Bits()) // supervisor execute permission
val dirty_array = Reg(Bits()) // PTE dirty bit
val valid_array = Reg(Vec(Bool(), entries)) // PTE is valid (not equivalent to CAM tag valid bit!)
val ur_array = Reg(Vec(Bool(), entries)) // user read permission
val uw_array = Reg(Vec(Bool(), entries)) // user write permission
val ux_array = Reg(Vec(Bool(), entries)) // user execute permission
val sr_array = Reg(Vec(Bool(), entries)) // supervisor read permission
val sw_array = Reg(Vec(Bool(), entries)) // supervisor write permission
val sx_array = Reg(Vec(Bool(), entries)) // supervisor execute permission
val dirty_array = Reg(Vec(Bool(), entries)) // PTE dirty bit
when (io.ptw.resp.valid) {
val pte = io.ptw.resp.bits.pte
tag_ram(r_refill_waddr) := pte.ppn
valid_array := valid_array.bitSet(r_refill_waddr, !io.ptw.resp.bits.error)
valid_array(r_refill_waddr) := !io.ptw.resp.bits.error
ur_array(r_refill_waddr) := pte.ur() && !io.ptw.resp.bits.error
uw_array(r_refill_waddr) := pte.uw() && !io.ptw.resp.bits.error
ux_array(r_refill_waddr) := pte.ux() && !io.ptw.resp.bits.error
@ -151,14 +151,14 @@ class TLB extends TLBModule {
val priv_uses_vm = priv <= PRV_S
val req_xwr = Cat(!r_req.store, r_req.store, !(r_req.instruction || r_req.store))
val r_array = Mux(priv_s, sr_array, ur_array)
val w_array = Mux(priv_s, sw_array, uw_array)
val x_array = Mux(priv_s, sx_array, ux_array)
val r_array = Mux(priv_s, sr_array.toBits, ur_array.toBits)
val w_array = Mux(priv_s, sw_array.toBits, uw_array.toBits)
val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits)
val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm
val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1)
// it's only a store hit if the dirty bit is set
val tag_hits = tag_cam.io.hits & (dirty_array | ~(io.req.bits.store.toSInt & w_array))
val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0)))
val tag_hit = tag_hits.orR
val tlb_hit = vm_enabled && tag_hit
val tlb_miss = vm_enabled && !tag_hit && !bad_va
@ -177,8 +177,8 @@ class TLB extends TLBModule {
// clear invalid entries on access, or all entries on a TLB flush
tag_cam.io.clear := io.ptw.invalidate || io.req.fire()
tag_cam.io.clear_mask := ~valid_array | (tag_cam.io.hits & ~tag_hits)
when (io.ptw.invalidate) { tag_cam.io.clear_mask := SInt(-1) }
tag_cam.io.clear_mask := ~valid_array.toBits | (tag_cam.io.hits & ~tag_hits)
when (io.ptw.invalidate) { tag_cam.io.clear_mask := ~UInt(0, entries) }
io.ptw.req.valid := state === s_request
io.ptw.req.bits.addr := r_refill_tag

View File

@ -9,8 +9,8 @@ import scala.math._
object Util {
implicit def intToUInt(x: Int): UInt = UInt(x)
implicit def booleanToBool(x: Boolean): Bits = Bool(x)
implicit def intSeqToUIntSeq(x: Iterable[Int]): Iterable[UInt] = x.map(UInt(_))
implicit def seqToVec[T <: Data](x: Iterable[T]): Vec[T] = Vec(x)
implicit def intSeqToUIntSeq(x: Seq[Int]): Seq[UInt] = x.map(UInt(_))
implicit def seqToVec[T <: Data](x: Seq[T]): Vec[T] = Vec(x)
implicit def wcToUInt(c: WideCounter): UInt = c.value
implicit def sextToConv(x: UInt) = new AnyRef {
def sextTo(n: Int): UInt = Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x)

View File

@ -1,8 +0,0 @@
// Provide a managed dependency on chisel if -DuncoreVersion="" is
// supplied on the command line.
val uncoreVersion = System.getProperty("uncoreVersion", "None")
libraryDependencies ++= ( if (uncoreVersion != "None" ) (
"edu.berkeley.cs" %% "uncore" % uncoreVersion
) :: Nil; else Nil)