diff --git a/rocket/.gitignore b/rocket/.gitignore
new file mode 100644
index 00000000..eb5a316c
--- /dev/null
+++ b/rocket/.gitignore
@@ -0,0 +1 @@
+target
diff --git a/rocket/LICENSE b/rocket/LICENSE
new file mode 100644
index 00000000..60e19fad
--- /dev/null
+++ b/rocket/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2011-2014, The Regents of the University of California
+(Regents). All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+3. Neither the name of the Regents nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
+OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
+BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
+HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
+MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
diff --git a/rocket/README.md b/rocket/README.md
new file mode 100644
index 00000000..72707989
--- /dev/null
+++ b/rocket/README.md
@@ -0,0 +1,29 @@
+Rocket Core
+===========
+
+Rocket is a 6-stage single-issue in-order pipeline that executes the 64-bit
+scalar RISC-V ISA. Rocket implements an MMU that supports page-based virtual
+memory and is able to boot modern operating systems such as Linux. Rocket
+also has an optional IEEE 754-2008-compliant FPU, which implements both
+single- and double-precision floating-point operations, including fused
+multiply-add.
+
+This repository is not intended to be a self-running repository. To
+instantiate a Rocket core, please use the Rocket chip generator found in the
+rocket-chip git repository.
+
+The following table compares a 32-bit ARM Cortex-A5 core to a 64-bit RISC-V
+Rocket core built in the same TSMC process (40GPLUS). Fourth column is the
+ratio of RISC-V Rocket to ARM Cortex-A5. Both use single-instruction-issue,
+in-order pipelines, yet the RISC-V core is faster, smaller, and uses less
+power.
+
+ISA/Implementation | ARM Cortex-A5 | RISC-V Rocket | R/A
+--- | --- | --- | ---
+ISA Register Width | 32 bits | 64 bits | 2
+Frequency | >1 GHz | >1 GHz | 1
+Dhrystone Performance | 1.57 DMIPS/MHz | 1.72 DMIPS/MHz | 1.1
+Area excluding caches | 0.27 mm2 | 0.14 mm2 | 0.5
+Area with 16KB caches | 0.53 mm2 | 0.39 mm2 | 0.7
+Area Efficiency | 2.96 DMIPS/MHz/mm2 | 4.41 DMIPS/MHz/mm2 | 1.5
+Dynamic Power | <0.08 mW/MHz | 0.034 mW/MHz | >= 0.4
diff --git a/rocket/build.sbt b/rocket/build.sbt
new file mode 100644
index 00000000..97c51700
--- /dev/null
+++ b/rocket/build.sbt
@@ -0,0 +1,10 @@
+organization := "edu.berkeley.cs"
+
+version := "1.2"
+
+name := "rocket"
+
+scalaVersion := "2.11.6"
+
+libraryDependencies ++= (Seq("chisel", "hardfloat", "uncore", "junctions", "cde").map {
+ dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten
diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala
new file mode 100644
index 00000000..1a686d5b
--- /dev/null
+++ b/rocket/src/main/scala/arbiter.scala
@@ -0,0 +1,113 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import cde.{Parameters, Field}
+import junctions.{ParameterizedBundle, DecoupledHelper}
+
+class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
+{
+ val io = new Bundle {
+ val requestor = Vec(n, new HellaCacheIO).flip
+ val mem = new HellaCacheIO
+ }
+
+ if (n == 1) {
+ io.mem <> io.requestor.head
+ } else {
+ val s1_id = Reg(UInt())
+ val s2_id = Reg(next=s1_id)
+
+ io.mem.invalidate_lr := io.requestor.map(_.invalidate_lr).reduce(_||_)
+ io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_)
+ io.requestor(0).req.ready := io.mem.req.ready
+ for (i <- 1 until n)
+ io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid
+
+ for (i <- n-1 to 0 by -1) {
+ val req = io.requestor(i).req
+ def connect_s0() = {
+ io.mem.req.bits.cmd := req.bits.cmd
+ io.mem.req.bits.typ := req.bits.typ
+ io.mem.req.bits.addr := req.bits.addr
+ io.mem.req.bits.phys := req.bits.phys
+ io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n)))
+ s1_id := UInt(i)
+ }
+ def connect_s1() = {
+ io.mem.s1_kill := io.requestor(i).s1_kill
+ io.mem.s1_data := io.requestor(i).s1_data
+ }
+
+ if (i == n-1) {
+ connect_s0()
+ connect_s1()
+ } else {
+ when (req.valid) { connect_s0() }
+ when (s1_id === UInt(i)) { connect_s1() }
+ }
+ }
+
+ for (i <- 0 until n) {
+ val resp = io.requestor(i).resp
+ val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i)
+ resp.valid := io.mem.resp.valid && tag_hit
+ io.requestor(i).xcpt := io.mem.xcpt
+ io.requestor(i).ordered := io.mem.ordered
+ io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
+ resp.bits := io.mem.resp.bits
+ resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
+
+ io.requestor(i).replay_next := io.mem.replay_next
+ }
+ }
+}
+
+class InOrderArbiter[T <: Data, U <: Data](reqTyp: T, respTyp: U, n: Int)
+ (implicit p: Parameters) extends Module {
+ val io = new Bundle {
+ val in_req = Vec(n, Decoupled(reqTyp)).flip
+ val in_resp = Vec(n, Decoupled(respTyp))
+ val out_req = Decoupled(reqTyp)
+ val out_resp = Decoupled(respTyp).flip
+ }
+
+ if (n > 1) {
+ val route_q = Module(new Queue(UInt(width = log2Up(n)), 2))
+ val req_arb = Module(new RRArbiter(reqTyp, n))
+ req_arb.io.in <> io.in_req
+
+ val req_helper = DecoupledHelper(
+ req_arb.io.out.valid,
+ route_q.io.enq.ready,
+ io.out_req.ready)
+
+ io.out_req.bits := req_arb.io.out.bits
+ io.out_req.valid := req_helper.fire(io.out_req.ready)
+
+ route_q.io.enq.bits := req_arb.io.chosen
+ route_q.io.enq.valid := req_helper.fire(route_q.io.enq.ready)
+
+ req_arb.io.out.ready := req_helper.fire(req_arb.io.out.valid)
+
+ val resp_sel = route_q.io.deq.bits
+ val resp_ready = io.in_resp(resp_sel).ready
+ val resp_helper = DecoupledHelper(
+ resp_ready,
+ route_q.io.deq.valid,
+ io.out_resp.valid)
+
+ val resp_valid = resp_helper.fire(resp_ready)
+ for (i <- 0 until n) {
+ io.in_resp(i).bits := io.out_resp.bits
+ io.in_resp(i).valid := resp_valid && resp_sel === UInt(i)
+ }
+
+ route_q.io.deq.ready := resp_helper.fire(route_q.io.deq.valid)
+ io.out_resp.ready := resp_helper.fire(io.out_resp.valid)
+ } else {
+ io.out_req <> io.in_req.head
+ io.in_resp.head <> io.out_resp
+ }
+}
diff --git a/rocket/src/main/scala/breakpoint.scala b/rocket/src/main/scala/breakpoint.scala
new file mode 100644
index 00000000..ee484c28
--- /dev/null
+++ b/rocket/src/main/scala/breakpoint.scala
@@ -0,0 +1,82 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import Util._
+import cde.Parameters
+
+class TDRSelect(implicit p: Parameters) extends CoreBundle()(p) {
+ val tdrmode = Bool()
+ val reserved = UInt(width = xLen - 1 - log2Up(nTDR))
+ val tdrindex = UInt(width = log2Up(nTDR))
+
+ def nTDR = p(NBreakpoints)
+}
+
+class BPControl(implicit p: Parameters) extends CoreBundle()(p) {
+ val tdrtype = UInt(width = 4)
+ val bpamaskmax = UInt(width = 5)
+ val reserved = UInt(width = xLen-28)
+ val bpaction = UInt(width = 8)
+ val bpmatch = UInt(width = 4)
+ val m = Bool()
+ val h = Bool()
+ val s = Bool()
+ val u = Bool()
+ val r = Bool()
+ val w = Bool()
+ val x = Bool()
+
+ def tdrType = 1
+ def bpaMaskMax = 4
+ def enabled(mstatus: MStatus) = Cat(m, h, s, u)(mstatus.prv)
+}
+
+class BP(implicit p: Parameters) extends CoreBundle()(p) {
+ val control = new BPControl
+ val address = UInt(width = vaddrBits)
+
+ def mask(dummy: Int = 0) = {
+ var mask: UInt = control.bpmatch(1)
+ for (i <- 1 until control.bpaMaskMax)
+ mask = Cat(mask(i-1) && address(i-1), mask)
+ mask
+ }
+
+ def pow2AddressMatch(x: UInt) =
+ (~x | mask()) === (~address | mask())
+}
+
+class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) {
+ val io = new Bundle {
+ val status = new MStatus().asInput
+ val bp = Vec(p(NBreakpoints), new BP).asInput
+ val pc = UInt(INPUT, vaddrBits)
+ val ea = UInt(INPUT, vaddrBits)
+ val xcpt_if = Bool(OUTPUT)
+ val xcpt_ld = Bool(OUTPUT)
+ val xcpt_st = Bool(OUTPUT)
+ }
+
+ io.xcpt_if := false
+ io.xcpt_ld := false
+ io.xcpt_st := false
+
+ for (bp <- io.bp) {
+ when (bp.control.enabled(io.status)) {
+ when (bp.pow2AddressMatch(io.pc) && bp.control.x) { io.xcpt_if := true }
+ when (bp.pow2AddressMatch(io.ea) && bp.control.r) { io.xcpt_ld := true }
+ when (bp.pow2AddressMatch(io.ea) && bp.control.w) { io.xcpt_st := true }
+ }
+ }
+
+ if (!io.bp.isEmpty) for ((bpl, bph) <- io.bp zip io.bp.tail) {
+ def matches(x: UInt) = !(x < bpl.address) && x < bph.address
+ when (bph.control.enabled(io.status) && bph.control.bpmatch === 1) {
+ when (matches(io.pc) && bph.control.x) { io.xcpt_if := true }
+ when (matches(io.ea) && bph.control.r) { io.xcpt_ld := true }
+ when (matches(io.ea) && bph.control.w) { io.xcpt_st := true }
+ }
+ }
+}
diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala
new file mode 100644
index 00000000..d16c4725
--- /dev/null
+++ b/rocket/src/main/scala/btb.scala
@@ -0,0 +1,272 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import junctions._
+import cde.{Parameters, Field}
+import Util._
+
+case object BtbKey extends Field[BtbParameters]
+
+case class BtbParameters(
+ nEntries: Int = 62,
+ nRAS: Int = 2,
+ updatesOutOfOrder: Boolean = false)
+
+abstract trait HasBtbParameters extends HasCoreParameters {
+ val matchBits = pgIdxBits
+ val entries = p(BtbKey).nEntries
+ val nRAS = p(BtbKey).nRAS
+ val updatesOutOfOrder = p(BtbKey).updatesOutOfOrder
+ val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages
+ val opaqueBits = log2Up(entries)
+ val nBHT = 1 << log2Up(entries*2)
+}
+
+abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters
+abstract class BtbBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
+ with HasBtbParameters
+
+class RAS(nras: Int) {
+ def push(addr: UInt): Unit = {
+ when (count < nras) { count := count + 1 }
+ val nextPos = Mux(Bool(isPow2(nras)) || pos < nras-1, pos+1, UInt(0))
+ stack(nextPos) := addr
+ pos := nextPos
+ }
+ def peek: UInt = stack(pos)
+ def pop(): Unit = when (!isEmpty) {
+ count := count - 1
+ pos := Mux(Bool(isPow2(nras)) || pos > 0, pos-1, UInt(nras-1))
+ }
+ def clear(): Unit = count := UInt(0)
+ def isEmpty: Bool = count === UInt(0)
+
+ private val count = Reg(UInt(width = log2Up(nras+1)))
+ private val pos = Reg(UInt(width = log2Up(nras)))
+ private val stack = Reg(Vec(nras, UInt()))
+}
+
+class BHTResp(implicit p: Parameters) extends BtbBundle()(p) {
+ val history = UInt(width = log2Up(nBHT).max(1))
+ val value = UInt(width = 2)
+}
+
+// BHT contains table of 2-bit counters and a global history register.
+// The BHT only predicts and updates when there is a BTB hit.
+// The global history:
+// - updated speculatively in fetch (if there's a BTB hit).
+// - on a mispredict, the history register is reset (again, only if BTB hit).
+// The counter table:
+// - each counter corresponds with the address of the fetch packet ("fetch pc").
+// - updated when a branch resolves (and BTB was a hit for that branch).
+// The updating branch must provide its "fetch pc".
+class BHT(nbht: Int)(implicit p: Parameters) {
+ val nbhtbits = log2Up(nbht)
+ def get(addr: UInt, update: Bool): BHTResp = {
+ val res = Wire(new BHTResp)
+ val index = addr(nbhtbits+1,2) ^ history
+ res.value := table(index)
+ res.history := history
+ val taken = res.value(0)
+ when (update) { history := Cat(taken, history(nbhtbits-1,1)) }
+ res
+ }
+ def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = {
+ val index = addr(nbhtbits+1,2) ^ d.history
+ table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken))
+ when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) }
+ }
+
+ private val table = Mem(nbht, UInt(width = 2))
+ val history = Reg(UInt(width = nbhtbits))
+}
+
+// BTB update occurs during branch resolution (and only on a mispredict).
+// - "pc" is what future fetch PCs will tag match against.
+// - "br_pc" is the PC of the branch instruction.
+class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) {
+ val prediction = Valid(new BTBResp)
+ val pc = UInt(width = vaddrBits)
+ val target = UInt(width = vaddrBits)
+ val taken = Bool()
+ val isJump = Bool()
+ val isReturn = Bool()
+ val br_pc = UInt(width = vaddrBits)
+}
+
+// BHT update occurs during branch resolution on all conditional branches.
+// - "pc" is what future fetch PCs will tag match against.
+class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) {
+ val prediction = Valid(new BTBResp)
+ val pc = UInt(width = vaddrBits)
+ val taken = Bool()
+ val mispredict = Bool()
+}
+
+class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
+ val isCall = Bool()
+ val isReturn = Bool()
+ val returnAddr = UInt(width = vaddrBits)
+ val prediction = Valid(new BTBResp)
+}
+
+// - "bridx" is the low-order PC bits of the predicted branch (after
+// shifting off the lowest log(inst_bytes) bits off).
+// - "mask" provides a mask of valid instructions (instructions are
+// masked off by the predicted taken branch from the BTB).
+class BTBResp(implicit p: Parameters) extends BtbBundle()(p) {
+ val taken = Bool()
+ val mask = Bits(width = fetchWidth)
+ val bridx = Bits(width = log2Up(fetchWidth))
+ val target = UInt(width = vaddrBits)
+ val entry = UInt(width = opaqueBits)
+ val bht = new BHTResp
+}
+
+class BTBReq(implicit p: Parameters) extends BtbBundle()(p) {
+ val addr = UInt(width = vaddrBits)
+}
+
+// fully-associative branch target buffer
+// Higher-performance processors may cause BTB updates to occur out-of-order,
+// which requires an extra CAM port for updates (to ensure no duplicates get
+// placed in BTB).
+class BTB(implicit p: Parameters) extends BtbModule {
+ val io = new Bundle {
+ val req = Valid(new BTBReq).flip
+ val resp = Valid(new BTBResp)
+ val btb_update = Valid(new BTBUpdate).flip
+ val bht_update = Valid(new BHTUpdate).flip
+ val ras_update = Valid(new RASUpdate).flip
+ }
+
+ val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
+ val idxPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
+ val tgts = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
+ val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
+ val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits)))
+ val pageValid = Reg(init = UInt(0, nPages))
+ val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0))
+ val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0))
+
+ val useRAS = Reg(UInt(width = entries))
+ val isJump = Reg(UInt(width = entries))
+ val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth))))
+
+ private def page(addr: UInt) = addr >> matchBits
+ private def pageMatch(addr: UInt) = {
+ val p = page(addr)
+ pageValid & pages.map(_ === p).toBits
+ }
+ private def tagMatch(addr: UInt, pgMatch: UInt) = {
+ val idxMatch = idxs.map(_ === addr(matchBits-1, log2Up(coreInstBytes))).toBits
+ val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits
+ idxMatch & idxPageMatch
+ }
+
+ val r_btb_update = Pipe(io.btb_update)
+ val update_target = io.req.bits.addr
+
+ val pageHit = pageMatch(io.req.bits.addr)
+ val hitsVec = tagMatch(io.req.bits.addr, pageHit)
+ val hits = hitsVec.toBits
+ val updatePageHit = pageMatch(r_btb_update.bits.pc)
+
+ val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit)
+ val updateHit = if (updatesOutOfOrder) updateHits.orR else r_btb_update.bits.prediction.valid
+ val updateHitAddr = if (updatesOutOfOrder) OHToUInt(updateHits) else r_btb_update.bits.prediction.bits.entry
+
+ // guarantee one-hotness of idx after reset
+ val resetting = Reg(init = Bool(true))
+ val (nextRepl, wrap) = Counter(resetting || (r_btb_update.valid && !updateHit), entries)
+ when (wrap) { resetting := false }
+
+ val useUpdatePageHit = updatePageHit.orR
+ val usePageHit = pageHit.orR
+ val doIdxPageRepl = !useUpdatePageHit
+ val nextPageRepl = Reg(UInt(width = log2Ceil(nPages)))
+ val idxPageRepl = Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), UIntToOH(nextPageRepl))
+ val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
+ val idxPageUpdate = OHToUInt(idxPageUpdateOH)
+ val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
+
+ val samePage = page(r_btb_update.bits.pc) === page(update_target)
+ val doTgtPageRepl = !samePage && !usePageHit
+ val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1)))
+ val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl))
+ val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
+
+ when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) {
+ val both = doIdxPageRepl && doTgtPageRepl
+ val next = nextPageRepl + Mux[UInt](both, 2, 1)
+ nextPageRepl := Mux(next >= nPages, next(0), next)
+ }
+
+ when (r_btb_update.valid || resetting) {
+ assert(resetting || io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target")
+
+ val waddr = Mux(updateHit && !resetting, updateHitAddr, nextRepl)
+ val mask = UIntToOH(waddr)
+ val newIdx = r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes))
+ idxs(waddr) := Mux(resetting, Cat(newIdx >> log2Ceil(entries), nextRepl), newIdx)
+ tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
+ idxPages(waddr) := idxPageUpdate
+ tgtPages(waddr) := tgtPageUpdate
+ useRAS := Mux(r_btb_update.bits.isReturn, useRAS | mask, useRAS & ~mask)
+ isJump := Mux(r_btb_update.bits.isJump, isJump | mask, isJump & ~mask)
+ if (fetchWidth > 1)
+ brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes)
+
+ require(nPages % 2 == 0)
+ val idxWritesEven = !idxPageUpdate(0)
+
+ def writeBank(i: Int, mod: Int, en: UInt, data: UInt) =
+ for (i <- i until nPages by mod)
+ when (en(i)) { pages(i) := data }
+
+ writeBank(0, 2, Mux(idxWritesEven, idxPageReplEn, tgtPageReplEn),
+ Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target)))
+ writeBank(1, 2, Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn),
+ Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc)))
+ pageValid := pageValid | tgtPageReplEn | idxPageReplEn
+ }
+
+ io.resp.valid := hits.orR
+ io.resp.bits.taken := io.resp.valid
+ io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes))
+ io.resp.bits.entry := OHToUInt(hits)
+ io.resp.bits.bridx := Mux1H(hitsVec, brIdx)
+ io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
+
+ if (nBHT > 0) {
+ val bht = new BHT(nBHT)
+ val isBranch = !(hits & isJump).orR
+ val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
+ val update_btb_hit = io.bht_update.bits.prediction.valid
+ when (io.bht_update.valid && update_btb_hit) {
+ bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict)
+ }
+ when (!res.value(0) && isBranch) { io.resp.bits.taken := false }
+ io.resp.bits.bht := res
+ }
+
+ if (nRAS > 0) {
+ val ras = new RAS(nRAS)
+ val doPeek = (hits & useRAS).orR
+ when (!ras.isEmpty && doPeek) {
+ io.resp.bits.target := ras.peek
+ }
+ when (io.ras_update.valid) {
+ when (io.ras_update.bits.isCall) {
+ ras.push(io.ras_update.bits.returnAddr)
+ when (doPeek) {
+ io.resp.bits.target := io.ras_update.bits.returnAddr
+ }
+ }.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) {
+ ras.pop()
+ }
+ }
+ }
+}
diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala
new file mode 100644
index 00000000..74386c12
--- /dev/null
+++ b/rocket/src/main/scala/consts.scala
@@ -0,0 +1,49 @@
+// See LICENSE for license details.
+
+package rocket
+package constants
+
+import Chisel._
+import scala.math._
+
+trait ScalarOpConstants {
+ val SZ_BR = 3
+ val BR_X = BitPat("b???")
+ val BR_EQ = UInt(0, 3)
+ val BR_NE = UInt(1, 3)
+ val BR_J = UInt(2, 3)
+ val BR_N = UInt(3, 3)
+ val BR_LT = UInt(4, 3)
+ val BR_GE = UInt(5, 3)
+ val BR_LTU = UInt(6, 3)
+ val BR_GEU = UInt(7, 3)
+
+ val A1_X = BitPat("b??")
+ val A1_ZERO = UInt(0, 2)
+ val A1_RS1 = UInt(1, 2)
+ val A1_PC = UInt(2, 2)
+
+ val IMM_X = BitPat("b???")
+ val IMM_S = UInt(0, 3)
+ val IMM_SB = UInt(1, 3)
+ val IMM_U = UInt(2, 3)
+ val IMM_UJ = UInt(3, 3)
+ val IMM_I = UInt(4, 3)
+ val IMM_Z = UInt(5, 3)
+
+ val A2_X = BitPat("b??")
+ val A2_ZERO = UInt(0, 2)
+ val A2_FOUR = UInt(1, 2)
+ val A2_RS2 = UInt(2, 2)
+ val A2_IMM = UInt(3, 2)
+
+ val X = BitPat("b?")
+ val N = BitPat("b0")
+ val Y = BitPat("b1")
+
+ val SZ_DW = 1
+ val DW_X = X
+ val DW_32 = N
+ val DW_64 = Y
+ val DW_XPR = Y
+}
diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala
new file mode 100644
index 00000000..86090c2f
--- /dev/null
+++ b/rocket/src/main/scala/csr.scala
@@ -0,0 +1,589 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import Util._
+import Instructions._
+import cde.{Parameters, Field}
+import uncore.devices._
+import junctions.AddrMap
+
+class MStatus extends Bundle {
+ val debug = Bool() // not truly part of mstatus, but convenient
+ val prv = UInt(width = PRV.SZ) // not truly part of mstatus, but convenient
+ val sd = Bool()
+ val zero3 = UInt(width = 31)
+ val sd_rv32 = Bool()
+ val zero2 = UInt(width = 2)
+ val vm = UInt(width = 5)
+ val zero1 = UInt(width = 4)
+ val mxr = Bool()
+ val pum = Bool()
+ val mprv = Bool()
+ val xs = UInt(width = 2)
+ val fs = UInt(width = 2)
+ val mpp = UInt(width = 2)
+ val hpp = UInt(width = 2)
+ val spp = UInt(width = 1)
+ val mpie = Bool()
+ val hpie = Bool()
+ val spie = Bool()
+ val upie = Bool()
+ val mie = Bool()
+ val hie = Bool()
+ val sie = Bool()
+ val uie = Bool()
+}
+
+class DCSR extends Bundle {
+ val xdebugver = UInt(width = 2)
+ val ndreset = Bool()
+ val fullreset = Bool()
+ val hwbpcount = UInt(width = 12)
+ val ebreakm = Bool()
+ val ebreakh = Bool()
+ val ebreaks = Bool()
+ val ebreaku = Bool()
+ val zero2 = Bool()
+ val stopcycle = Bool()
+ val stoptime = Bool()
+ val cause = UInt(width = 3)
+ val debugint = Bool()
+ val zero1 = Bool()
+ val halt = Bool()
+ val step = Bool()
+ val prv = UInt(width = PRV.SZ)
+}
+
+class MIP extends Bundle {
+ val rocc = Bool()
+ val meip = Bool()
+ val heip = Bool()
+ val seip = Bool()
+ val ueip = Bool()
+ val mtip = Bool()
+ val htip = Bool()
+ val stip = Bool()
+ val utip = Bool()
+ val msip = Bool()
+ val hsip = Bool()
+ val ssip = Bool()
+ val usip = Bool()
+}
+
+class PTBR(implicit p: Parameters) extends CoreBundle()(p) {
+ require(maxPAddrBits - pgIdxBits + asIdBits <= xLen)
+ val asid = UInt(width = asIdBits)
+ val ppn = UInt(width = maxPAddrBits - pgIdxBits)
+}
+
+object PRV
+{
+ val SZ = 2
+ val U = 0
+ val S = 1
+ val H = 2
+ val M = 3
+}
+
+object CSR
+{
+ // commands
+ val SZ = 3
+ val X = BitPat.DC(SZ)
+ val N = UInt(0,SZ)
+ val W = UInt(1,SZ)
+ val S = UInt(2,SZ)
+ val C = UInt(3,SZ)
+ val I = UInt(4,SZ)
+ val R = UInt(5,SZ)
+
+ val ADDRSZ = 12
+}
+
+class CSRFileIO(implicit p: Parameters) extends CoreBundle {
+ val prci = new PRCITileIO().flip
+ val rw = new Bundle {
+ val addr = UInt(INPUT, CSR.ADDRSZ)
+ val cmd = Bits(INPUT, CSR.SZ)
+ val rdata = Bits(OUTPUT, xLen)
+ val wdata = Bits(INPUT, xLen)
+ }
+
+ val csr_stall = Bool(OUTPUT)
+ val csr_xcpt = Bool(OUTPUT)
+ val eret = Bool(OUTPUT)
+ val singleStep = Bool(OUTPUT)
+
+ val status = new MStatus().asOutput
+ val ptbr = new PTBR().asOutput
+ val evec = UInt(OUTPUT, vaddrBitsExtended)
+ val exception = Bool(INPUT)
+ val retire = UInt(INPUT, log2Up(1+retireWidth))
+ val custom_mrw_csrs = Vec(nCustomMrwCsrs, UInt(INPUT, xLen))
+ val cause = UInt(INPUT, xLen)
+ val pc = UInt(INPUT, vaddrBitsExtended)
+ val badaddr = UInt(INPUT, vaddrBitsExtended)
+ val fatc = Bool(OUTPUT)
+ val time = UInt(OUTPUT, xLen)
+ val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ)
+ val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip
+ val rocc = new RoCCInterface().flip
+ val interrupt = Bool(OUTPUT)
+ val interrupt_cause = UInt(OUTPUT, xLen)
+ val bp = Vec(p(NBreakpoints), new BP).asOutput
+}
+
+class CSRFile(implicit p: Parameters) extends CoreModule()(p)
+{
+ val io = new CSRFileIO
+
+ val reset_mstatus = Wire(init=new MStatus().fromBits(0))
+ reset_mstatus.mpp := PRV.M
+ reset_mstatus.prv := PRV.M
+ val reg_mstatus = Reg(init=reset_mstatus)
+
+ val reset_dcsr = Wire(init=new DCSR().fromBits(0))
+ reset_dcsr.xdebugver := 1
+ reset_dcsr.prv := PRV.M
+ val reg_dcsr = Reg(init=reset_dcsr)
+
+ val (supported_interrupts, delegable_interrupts) = {
+ val sup = Wire(init=new MIP().fromBits(0))
+ sup.ssip := Bool(p(UseVM))
+ sup.msip := true
+ sup.stip := Bool(p(UseVM))
+ sup.mtip := true
+ sup.meip := true
+ sup.seip := Bool(p(UseVM))
+ sup.rocc := usingRoCC
+
+ val del = Wire(init=sup)
+ del.msip := false
+ del.mtip := false
+ del.meip := false
+
+ (sup.toBits, del.toBits)
+ }
+ val delegable_exceptions = UInt(Seq(
+ Causes.misaligned_fetch,
+ Causes.fault_fetch,
+ Causes.breakpoint,
+ Causes.fault_load,
+ Causes.fault_store,
+ Causes.user_ecall).map(1 << _).sum)
+
+ val exception = io.exception || io.csr_xcpt
+ val reg_debug = Reg(init=Bool(false))
+ val reg_dpc = Reg(UInt(width = vaddrBitsExtended))
+ val reg_dscratch = Reg(UInt(width = xLen))
+
+ val reg_singleStepped = Reg(Bool())
+ when (io.retire(0) || exception) { reg_singleStepped := true }
+ when (!io.singleStep) { reg_singleStepped := false }
+ assert(!io.singleStep || io.retire <= UInt(1))
+ assert(!reg_singleStepped || io.retire === UInt(0))
+
+ val reg_tdrselect = Reg(new TDRSelect)
+ val reg_bp = Reg(Vec(1 << log2Up(p(NBreakpoints)), new BP))
+
+ val reg_mie = Reg(init=UInt(0, xLen))
+ val reg_mideleg = Reg(init=UInt(0, xLen))
+ val reg_medeleg = Reg(init=UInt(0, xLen))
+ val reg_mip = Reg(new MIP)
+ val reg_mepc = Reg(UInt(width = vaddrBitsExtended))
+ val reg_mcause = Reg(Bits(width = xLen))
+ val reg_mbadaddr = Reg(UInt(width = vaddrBitsExtended))
+ val reg_mscratch = Reg(Bits(width = xLen))
+ val reg_mtvec = Reg(init=UInt(p(MtvecInit), paddrBits min xLen))
+
+ val reg_sepc = Reg(UInt(width = vaddrBitsExtended))
+ val reg_scause = Reg(Bits(width = xLen))
+ val reg_sbadaddr = Reg(UInt(width = vaddrBitsExtended))
+ val reg_sscratch = Reg(Bits(width = xLen))
+ val reg_stvec = Reg(UInt(width = vaddrBits))
+ val reg_sptbr = Reg(new PTBR)
+ val reg_wfi = Reg(init=Bool(false))
+
+ val reg_fflags = Reg(UInt(width = 5))
+ val reg_frm = Reg(UInt(width = 3))
+
+ val reg_instret = WideCounter(64, io.retire)
+ val reg_cycle: UInt = if (enableCommitLog) { reg_instret } else { WideCounter(64) }
+
+ val mip = Wire(init=reg_mip)
+ mip.rocc := io.rocc.interrupt
+ val read_mip = mip.toBits & supported_interrupts
+
+ val pending_interrupts = read_mip & reg_mie
+ val m_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.M || (reg_mstatus.prv === PRV.M && reg_mstatus.mie)), pending_interrupts & ~reg_mideleg, UInt(0))
+ val s_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie)), pending_interrupts & reg_mideleg, UInt(0))
+ val all_interrupts = m_interrupts | s_interrupts
+ val interruptMSB = BigInt(1) << (xLen-1)
+ val interruptCause = interruptMSB + PriorityEncoder(all_interrupts)
+ io.interrupt := all_interrupts.orR && !io.singleStep || reg_singleStepped
+ io.interrupt_cause := interruptCause
+ io.bp := reg_bp take p(NBreakpoints)
+
+ val debugIntCause = reg_mip.getWidth
+ // debug interrupts are only masked by being in debug mode
+ when (Bool(usingDebug) && reg_dcsr.debugint && !reg_debug) {
+ io.interrupt := true
+ io.interrupt_cause := interruptMSB + debugIntCause
+ }
+
+ val system_insn = io.rw.cmd === CSR.I
+ val cpu_ren = io.rw.cmd =/= CSR.N && !system_insn
+
+ val isa_string = "IM" +
+ (if (usingVM) "S" else "") +
+ (if (usingUser) "U" else "") +
+ (if (usingAtomics) "A" else "") +
+ (if (usingFPU) "FD" else "") +
+ (if (usingRoCC) "X" else "")
+ val isa = (BigInt(log2Ceil(xLen) - 4) << (xLen-2)) |
+ isa_string.map(x => 1 << (x - 'A')).reduce(_|_)
+ val read_mstatus = io.status.toBits()(xLen-1,0)
+
+ val read_mapping = collection.mutable.LinkedHashMap[Int,Bits](
+ CSRs.tdrselect -> reg_tdrselect.toBits,
+ CSRs.tdrdata1 -> reg_bp(reg_tdrselect.tdrindex).control.toBits,
+ CSRs.tdrdata2 -> reg_bp(reg_tdrselect.tdrindex).address,
+ CSRs.mimpid -> UInt(0),
+ CSRs.marchid -> UInt(0),
+ CSRs.mvendorid -> UInt(0),
+ CSRs.mcycle -> reg_cycle,
+ CSRs.minstret -> reg_instret,
+ CSRs.mucounteren -> UInt(0),
+ CSRs.mutime_delta -> UInt(0),
+ CSRs.mucycle_delta -> UInt(0),
+ CSRs.muinstret_delta -> UInt(0),
+ CSRs.misa -> UInt(isa),
+ CSRs.mstatus -> read_mstatus,
+ CSRs.mtvec -> reg_mtvec,
+ CSRs.mip -> read_mip,
+ CSRs.mie -> reg_mie,
+ CSRs.mideleg -> reg_mideleg,
+ CSRs.medeleg -> reg_medeleg,
+ CSRs.mscratch -> reg_mscratch,
+ CSRs.mepc -> reg_mepc.sextTo(xLen),
+ CSRs.mbadaddr -> reg_mbadaddr.sextTo(xLen),
+ CSRs.mcause -> reg_mcause,
+ CSRs.mhartid -> io.prci.id)
+
+ if (usingDebug) {
+ read_mapping += CSRs.dcsr -> reg_dcsr.toBits
+ read_mapping += CSRs.dpc -> reg_dpc.toBits
+ read_mapping += CSRs.dscratch -> reg_dscratch.toBits
+ }
+
+ if (usingFPU) {
+ read_mapping += CSRs.fflags -> reg_fflags
+ read_mapping += CSRs.frm -> reg_frm
+ read_mapping += CSRs.fcsr -> Cat(reg_frm, reg_fflags)
+ }
+
+ if (usingVM) {
+ val read_sie = reg_mie & reg_mideleg
+ val read_sip = read_mip & reg_mideleg
+ val read_sstatus = Wire(init=io.status)
+ read_sstatus.vm := 0
+ read_sstatus.mprv := 0
+ read_sstatus.mpp := 0
+ read_sstatus.hpp := 0
+ read_sstatus.mpie := 0
+ read_sstatus.hpie := 0
+ read_sstatus.mie := 0
+ read_sstatus.hie := 0
+
+ read_mapping += CSRs.sstatus -> (read_sstatus.toBits())(xLen-1,0)
+ read_mapping += CSRs.sip -> read_sip.toBits
+ read_mapping += CSRs.sie -> read_sie.toBits
+ read_mapping += CSRs.sscratch -> reg_sscratch
+ read_mapping += CSRs.scause -> reg_scause
+ read_mapping += CSRs.sbadaddr -> reg_sbadaddr.sextTo(xLen)
+ read_mapping += CSRs.sptbr -> reg_sptbr.toBits
+ read_mapping += CSRs.sepc -> reg_sepc.sextTo(xLen)
+ read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen)
+ read_mapping += CSRs.mscounteren -> UInt(0)
+ read_mapping += CSRs.mstime_delta -> UInt(0)
+ read_mapping += CSRs.mscycle_delta -> UInt(0)
+ read_mapping += CSRs.msinstret_delta -> UInt(0)
+ }
+
+ if (xLen == 32) {
+ read_mapping += CSRs.mcycleh -> (reg_cycle >> 32)
+ read_mapping += CSRs.minstreth -> (reg_instret >> 32)
+ read_mapping += CSRs.mutime_deltah -> UInt(0)
+ read_mapping += CSRs.mucycle_deltah -> UInt(0)
+ read_mapping += CSRs.muinstret_deltah -> UInt(0)
+ if (usingVM) {
+ read_mapping += CSRs.mstime_deltah -> UInt(0)
+ read_mapping += CSRs.mscycle_deltah -> UInt(0)
+ read_mapping += CSRs.msinstret_deltah -> UInt(0)
+ }
+ }
+
+ for (i <- 0 until nCustomMrwCsrs) {
+ val addr = 0xff0 + i
+ require(addr < (1 << CSR.ADDRSZ))
+ require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use")
+ read_mapping += addr -> io.custom_mrw_csrs(i)
+ }
+
+ for ((addr, i) <- roccCsrs.zipWithIndex) {
+ require(!read_mapping.contains(addr), "RoCC: CSR address " + addr + " is already in use")
+ read_mapping += addr -> io.rocc.csr.rdata(i)
+ }
+
+ val decoded_addr = read_mapping map { case (k, v) => k -> (io.rw.addr === k) }
+
+ val addr_valid = decoded_addr.values.reduce(_||_)
+ val fp_csr =
+ if (usingFPU) decoded_addr(CSRs.fflags) || decoded_addr(CSRs.frm) || decoded_addr(CSRs.fcsr)
+ else Bool(false)
+ val csr_debug = Bool(usingDebug) && io.rw.addr(5)
+ val csr_addr_priv = Cat(io.rw.addr(6,5).andR, io.rw.addr(9,8))
+ val priv_sufficient = Cat(reg_debug, reg_mstatus.prv) >= csr_addr_priv
+ val read_only = io.rw.addr(11,10).andR
+ val cpu_wen = cpu_ren && io.rw.cmd =/= CSR.R && priv_sufficient
+ val wen = cpu_wen && !read_only
+
+ val wdata = (Mux((io.rw.cmd === CSR.S || io.rw.cmd === CSR.C), io.rw.rdata, UInt(0)) |
+ Mux(io.rw.cmd =/= CSR.C, io.rw.wdata, UInt(0))) &
+ ~Mux(io.rw.cmd === CSR.C, io.rw.wdata, UInt(0))
+
+ val do_system_insn = priv_sufficient && system_insn
+ val opcode = UInt(1) << io.rw.addr(2,0)
+ val insn_call = do_system_insn && opcode(0)
+ val insn_break = do_system_insn && opcode(1)
+ val insn_ret = do_system_insn && opcode(2)
+ val insn_sfence_vm = do_system_insn && opcode(4)
+ val insn_wfi = do_system_insn && opcode(5)
+
+ io.csr_xcpt := (cpu_wen && read_only) ||
+ (cpu_ren && (!priv_sufficient || !addr_valid || fp_csr && !io.status.fs.orR)) ||
+ (system_insn && !priv_sufficient) ||
+ insn_call || insn_break
+
+ when (insn_wfi) { reg_wfi := true }
+ when (pending_interrupts.orR) { reg_wfi := false }
+
+ val cause =
+ Mux(!io.csr_xcpt, io.cause,
+ Mux(insn_call, reg_mstatus.prv + Causes.user_ecall,
+ Mux[UInt](insn_break, Causes.breakpoint, Causes.illegal_instruction)))
+ val cause_lsbs = cause(log2Up(xLen)-1,0)
+ val causeIsDebugInt = cause(xLen-1) && cause_lsbs === debugIntCause
+ val causeIsDebugBreak = cause === Causes.breakpoint && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv)
+ val trapToDebug = Bool(usingDebug) && (reg_singleStepped || causeIsDebugInt || causeIsDebugBreak || reg_debug)
+ val delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs))
+ val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800))
+ val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec))
+ val epc = Mux(csr_debug, reg_dpc, Mux(Bool(p(UseVM)) && !csr_addr_priv(1), reg_sepc, reg_mepc))
+ io.fatc := insn_sfence_vm
+ io.evec := Mux(exception, tvec, epc)
+ io.ptbr := reg_sptbr
+ io.eret := insn_ret
+ io.singleStep := reg_dcsr.step && !reg_debug
+ io.status := reg_mstatus
+ io.status.sd := io.status.fs.andR || io.status.xs.andR
+ io.status.debug := reg_debug
+ if (xLen == 32)
+ io.status.sd_rv32 := io.status.sd
+
+ when (exception) {
+ val epc = ~(~io.pc | (coreInstBytes-1))
+ val pie = read_mstatus(reg_mstatus.prv)
+
+ when (trapToDebug) {
+ reg_debug := true
+ reg_dpc := epc
+ reg_dcsr.cause := Mux(reg_singleStepped, UInt(4), Mux(causeIsDebugInt, UInt(3), UInt(1)))
+ reg_dcsr.prv := reg_mstatus.prv
+ }.elsewhen (delegate) {
+ reg_sepc := epc
+ reg_scause := cause
+ reg_sbadaddr := io.badaddr
+ reg_mstatus.spie := pie
+ reg_mstatus.spp := reg_mstatus.prv
+ reg_mstatus.sie := false
+ reg_mstatus.prv := PRV.S
+ }.otherwise {
+ reg_mepc := epc
+ reg_mcause := cause
+ reg_mbadaddr := io.badaddr
+ reg_mstatus.mpie := pie
+ reg_mstatus.mpp := reg_mstatus.prv
+ reg_mstatus.mie := false
+ reg_mstatus.prv := PRV.M
+ }
+ }
+
+ when (insn_ret) {
+ when (Bool(p(UseVM)) && !csr_addr_priv(1)) {
+ when (reg_mstatus.spp.toBool) { reg_mstatus.sie := reg_mstatus.spie }
+ reg_mstatus.spie := false
+ reg_mstatus.spp := PRV.U
+ reg_mstatus.prv := reg_mstatus.spp
+ }.elsewhen (csr_debug) {
+ reg_mstatus.prv := reg_dcsr.prv
+ reg_debug := false
+ }.otherwise {
+ when (reg_mstatus.mpp(1)) { reg_mstatus.mie := reg_mstatus.mpie }
+ .elsewhen (Bool(usingVM) && reg_mstatus.mpp(0)) { reg_mstatus.sie := reg_mstatus.mpie }
+ reg_mstatus.mpie := false
+ reg_mstatus.mpp := PRV.U
+ reg_mstatus.prv := reg_mstatus.mpp
+ }
+ }
+
+ assert(PopCount(insn_ret :: io.exception :: io.csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive")
+
+ io.time := reg_cycle
+ io.csr_stall := reg_wfi
+
+ io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v)
+
+ io.fcsr_rm := reg_frm
+ when (io.fcsr_flags.valid) {
+ reg_fflags := reg_fflags | io.fcsr_flags.bits
+ }
+
+ val supportedModes = Vec((PRV.M +: (if (usingUser) Some(PRV.U) else None) ++: (if (usingVM) Seq(PRV.S) else Nil)).map(UInt(_)))
+
+ when (wen) {
+ when (decoded_addr(CSRs.mstatus)) {
+ val new_mstatus = new MStatus().fromBits(wdata)
+ reg_mstatus.mie := new_mstatus.mie
+ reg_mstatus.mpie := new_mstatus.mpie
+
+ if (supportedModes.size > 1) {
+ reg_mstatus.mprv := new_mstatus.mprv
+ when (supportedModes contains new_mstatus.mpp) { reg_mstatus.mpp := new_mstatus.mpp }
+ if (supportedModes.size > 2) {
+ reg_mstatus.mxr := new_mstatus.mxr
+ reg_mstatus.pum := new_mstatus.pum
+ reg_mstatus.spp := new_mstatus.spp
+ reg_mstatus.spie := new_mstatus.spie
+ reg_mstatus.sie := new_mstatus.sie
+ }
+ }
+
+ if (usingVM) {
+ require(if (xLen == 32) pgLevels == 2 else pgLevels > 2 && pgLevels < 6)
+ val vm_on = 6 + pgLevels // TODO Sv48 support should imply Sv39 support
+ when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 }
+ when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on }
+ }
+ if (usingVM || usingFPU) reg_mstatus.fs := Fill(2, new_mstatus.fs.orR)
+ if (usingRoCC) reg_mstatus.xs := Fill(2, new_mstatus.xs.orR)
+ }
+ when (decoded_addr(CSRs.mip)) {
+ val new_mip = new MIP().fromBits(wdata)
+ if (usingVM) {
+ reg_mip.ssip := new_mip.ssip
+ reg_mip.stip := new_mip.stip
+ }
+ }
+ when (decoded_addr(CSRs.mie)) { reg_mie := wdata & supported_interrupts }
+ when (decoded_addr(CSRs.mepc)) { reg_mepc := ~(~wdata | (coreInstBytes-1)) }
+ when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata }
+ if (p(MtvecWritable))
+ when (decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata >> 2 << 2 }
+ when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ }
+ when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) }
+ if (usingFPU) {
+ when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata }
+ when (decoded_addr(CSRs.frm)) { reg_frm := wdata }
+ when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth }
+ }
+ if (usingDebug) {
+ when (decoded_addr(CSRs.dcsr)) {
+ val new_dcsr = new DCSR().fromBits(wdata)
+ reg_dcsr.halt := new_dcsr.halt
+ reg_dcsr.step := new_dcsr.step
+ reg_dcsr.ebreakm := new_dcsr.ebreakm
+ if (usingVM) reg_dcsr.ebreaks := new_dcsr.ebreaks
+ if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku
+ if (supportedModes.size > 1) reg_dcsr.prv := new_dcsr.prv
+ }
+ when (decoded_addr(CSRs.dpc)) { reg_dpc := ~(~wdata | (coreInstBytes-1)) }
+ when (decoded_addr(CSRs.dscratch)) { reg_dscratch := wdata }
+ }
+ if (usingVM) {
+ when (decoded_addr(CSRs.sstatus)) {
+ val new_sstatus = new MStatus().fromBits(wdata)
+ reg_mstatus.sie := new_sstatus.sie
+ reg_mstatus.spie := new_sstatus.spie
+ reg_mstatus.spp := new_sstatus.spp
+ reg_mstatus.pum := new_sstatus.pum
+ reg_mstatus.fs := Fill(2, new_sstatus.fs.orR) // even without an FPU
+ if (usingRoCC) reg_mstatus.xs := Fill(2, new_sstatus.xs.orR)
+ }
+ when (decoded_addr(CSRs.sip)) {
+ val new_sip = new MIP().fromBits(wdata)
+ reg_mip.ssip := new_sip.ssip
+ }
+ when (decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~reg_mideleg) | (wdata & reg_mideleg) }
+ when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata }
+ when (decoded_addr(CSRs.sptbr)) { reg_sptbr.ppn := wdata(ppnBits-1,0) }
+ when (decoded_addr(CSRs.sepc)) { reg_sepc := ~(~wdata | (coreInstBytes-1)) }
+ when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata >> 2 << 2 }
+ when (decoded_addr(CSRs.scause)) { reg_scause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ }
+ when (decoded_addr(CSRs.sbadaddr)) { reg_sbadaddr := wdata(vaddrBitsExtended-1,0) }
+ when (decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata & delegable_interrupts }
+ when (decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata & delegable_exceptions }
+ }
+ if (p(NBreakpoints) > 0) {
+ val newTDR = new TDRSelect().fromBits(wdata)
+ when (decoded_addr(CSRs.tdrselect)) { reg_tdrselect.tdrindex := newTDR.tdrindex }
+
+ when (reg_tdrselect.tdrmode || reg_debug) {
+ when (decoded_addr(CSRs.tdrdata1)) {
+ val newBPC = new BPControl().fromBits(wdata)
+ reg_bp(reg_tdrselect.tdrindex).control := newBPC
+ reg_bp(reg_tdrselect.tdrindex).control.bpmatch := newBPC.bpmatch & 2 /* exact/NAPOT only */
+ }
+ when (decoded_addr(CSRs.tdrdata2)) { reg_bp(reg_tdrselect.tdrindex).address := wdata }
+ }
+ }
+ }
+
+ reg_mip := io.prci.interrupts
+ reg_dcsr.debugint := io.prci.interrupts.debug
+ reg_dcsr.hwbpcount := UInt(p(NBreakpoints))
+
+ io.rocc.csr.waddr := io.rw.addr
+ io.rocc.csr.wdata := wdata
+ io.rocc.csr.wen := wen
+
+ if (!usingUser) {
+ reg_mstatus.mpp := PRV.M
+ reg_mstatus.prv := PRV.M
+ reg_mstatus.mprv := false
+ }
+
+ reg_sptbr.asid := 0
+ reg_tdrselect.reserved := 0
+ reg_tdrselect.tdrmode := true // TODO support D-mode breakpoint theft
+ if (reg_bp.isEmpty) reg_tdrselect.tdrindex := 0
+ for (bpc <- reg_bp map {_.control}) {
+ bpc.tdrtype := bpc.tdrType
+ bpc.bpamaskmax := bpc.bpaMaskMax
+ bpc.reserved := 0
+ bpc.bpaction := 0
+ bpc.h := false
+ if (!usingVM) bpc.s := false
+ if (!usingUser) bpc.u := false
+ if (!usingVM && !usingUser) bpc.m := true
+ when (reset) {
+ bpc.r := false
+ bpc.w := false
+ bpc.x := false
+ }
+ }
+ for (bp <- reg_bp drop p(NBreakpoints))
+ bp := new BP().fromBits(0)
+}
diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala
new file mode 100644
index 00000000..82de400e
--- /dev/null
+++ b/rocket/src/main/scala/dcache.scala
@@ -0,0 +1,447 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import junctions._
+import uncore.tilelink._
+import uncore.agents._
+import uncore.coherence._
+import uncore.util._
+import uncore.constants._
+import cde.{Parameters, Field}
+import Util._
+
+class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
+ val addr = Bits(width = untagBits)
+ val write = Bool()
+ val wdata = Bits(width = rowBits)
+ val wmask = Bits(width = rowBytes)
+ val way_en = Bits(width = nWays)
+}
+
+class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
+ val io = new Bundle {
+ val req = Valid(new DCacheDataReq).flip
+ val resp = Vec(nWays, Bits(OUTPUT, rowBits))
+ }
+
+ val addr = io.req.bits.addr >> rowOffBits
+ for (w <- 0 until nWays) {
+ val array = SeqMem(nSets*refillCycles, Vec(rowBytes, Bits(width=8)))
+ val valid = io.req.valid && (Bool(nWays == 1) || io.req.bits.way_en(w))
+ when (valid && io.req.bits.write) {
+ val data = Vec.tabulate(rowBytes)(i => io.req.bits.wdata(8*(i+1)-1, 8*i))
+ array.write(addr, data, io.req.bits.wmask.toBools)
+ }
+ io.resp(w) := array.read(addr, valid && !io.req.bits.write).toBits
+ }
+}
+
+class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
+ val io = new Bundle {
+ val cpu = (new HellaCacheIO).flip
+ val ptw = new TLBPTWIO()
+ val mem = new ClientTileLinkIO
+ }
+
+ val fq = Module(new FinishQueue(1))
+
+ require(rowBits == encRowBits) // no ECC
+ require(refillCyclesPerBeat == 1)
+ require(rowBits >= coreDataBits)
+
+ // tags
+ val replacer = p(Replacer)()
+ def onReset = L1Metadata(UInt(0), ClientMetadata.onReset)
+ val meta = Module(new MetadataArray(onReset _))
+ val metaReadArb = Module(new Arbiter(new MetaReadReq, 3))
+ val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3))
+ meta.io.read <> metaReadArb.io.out
+ meta.io.write <> metaWriteArb.io.out
+
+ // data
+ val data = Module(new DCacheDataArray)
+ val dataArb = Module(new Arbiter(new DCacheDataReq, 4))
+ data.io.req <> dataArb.io.out
+ dataArb.io.out.ready := true
+
+ val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
+ val s1_probe = Reg(next=io.mem.probe.fire(), init=Bool(false))
+ val probe_bits = RegEnable(io.mem.probe.bits, io.mem.probe.fire())
+ val s1_nack = Wire(init=Bool(false))
+ val s1_valid_masked = s1_valid && !io.cpu.s1_kill
+ val s1_valid_not_nacked = s1_valid_masked && !s1_nack
+ val s1_req = Reg(io.cpu.req.bits)
+ when (metaReadArb.io.out.valid) {
+ s1_req := io.cpu.req.bits
+ s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaReadArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0))
+ }
+ val s1_read = isRead(s1_req.cmd)
+ val s1_write = isWrite(s1_req.cmd)
+ val s1_readwrite = s1_read || s1_write
+ val s1_flush_valid = Reg(Bool())
+
+ val s_ready :: s_grant_wait :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8)
+ val grant_wait = Reg(init=Bool(false))
+ val release_ack_wait = Reg(init=Bool(false))
+ val release_state = Reg(init=s_ready)
+ val pstore1_valid = Wire(Bool())
+ val pstore2_valid = Reg(Bool())
+ val inWriteback = release_state === s_voluntary_writeback || release_state === s_probe_rep_dirty
+ val releaseWay = Wire(UInt())
+ io.cpu.req.ready := (release_state === s_ready) && !grant_wait && !s1_nack
+
+ // hit initiation path
+ dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)
+ dataArb.io.in(3).bits.write := false
+ dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr
+ dataArb.io.in(3).bits.way_en := ~UInt(0, nWays)
+ when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false }
+ metaReadArb.io.in(2).valid := io.cpu.req.valid
+ metaReadArb.io.in(2).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB)
+ metaReadArb.io.in(2).bits.way_en := ~UInt(0, nWays)
+ when (!metaReadArb.io.in(2).ready) { io.cpu.req.ready := false }
+
+ // address translation
+ val tlb = Module(new TLB)
+ io.ptw <> tlb.io.ptw
+ tlb.io.req.valid := s1_valid_masked && s1_readwrite
+ tlb.io.req.bits.passthrough := s1_req.phys
+ tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits
+ tlb.io.req.bits.instruction := false
+ tlb.io.req.bits.store := s1_write
+ when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false }
+ when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true }
+
+ val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
+ val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits))
+ val s1_hit_way = meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).toBits
+ val s1_hit_state = ClientMetadata.onReset.fromBits(
+ meta.io.resp.map(r => Mux(r.tag === s1_tag, r.coh.toBits, UInt(0)))
+ .reduce (_|_))
+ val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
+ val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
+ val s1_victim_way = Wire(init = replacer.way)
+
+ val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
+ val s2_probe = Reg(next=s1_probe, init=Bool(false))
+ val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
+ val s2_valid_masked = s2_valid && Reg(next = !s1_nack)
+ val s2_req = Reg(io.cpu.req.bits)
+ val s2_uncached = Reg(Bool())
+ when (s1_valid_not_nacked || s1_flush_valid) {
+ s2_req := s1_req
+ s2_req.addr := s1_paddr
+ s2_uncached := !tlb.io.resp.cacheable
+ }
+ val s2_read = isRead(s2_req.cmd)
+ val s2_write = isWrite(s2_req.cmd)
+ val s2_readwrite = s2_read || s2_write
+ val s2_flush_valid = RegNext(s1_flush_valid)
+ val s2_data = RegEnable(s1_data, s1_valid || inWriteback)
+ val s2_probe_way = RegEnable(s1_hit_way, s1_probe)
+ val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
+ val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
+ val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked)
+ val s2_hit = s2_hit_state.isHit(s2_req.cmd)
+ val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit
+ val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait
+ val s2_valid_cached_miss = s2_valid_miss && !s2_uncached
+ val s2_victimize = s2_valid_cached_miss || s2_flush_valid
+ val s2_valid_uncached = s2_valid_miss && s2_uncached
+ val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)))
+ val s2_victim_tag = RegEnable(meta.io.resp(s1_victim_way).tag, s1_valid_not_nacked || s1_flush_valid)
+ val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(meta.io.resp(s1_victim_way).coh, s1_valid_not_nacked || s1_flush_valid))
+ val s2_victim_valid = s2_victim_state.isValid()
+ val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback()
+ io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready)
+ when (s2_valid && !s2_valid_hit) { s1_nack := true }
+
+ // exceptions
+ val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned
+ io.cpu.xcpt.ma.ld := s1_read && misaligned
+ io.cpu.xcpt.ma.st := s1_write && misaligned
+ io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld
+ io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st
+ assert(!(Reg(next=
+ (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) &&
+ s2_valid_masked),
+ "DCache exception occurred - cache response not killed.")
+
+ // load reservations
+ val s2_lr = Bool(usingAtomics) && s2_req.cmd === M_XLR
+ val s2_sc = Bool(usingAtomics) && s2_req.cmd === M_XSC
+ val lrscCount = Reg(init=UInt(0))
+ val lrscValid = lrscCount > 0
+ val lrscAddr = Reg(UInt())
+ val s2_sc_fail = s2_sc && !(lrscValid && lrscAddr === (s2_req.addr >> blockOffBits))
+ when (s2_valid_hit && s2_lr) {
+ lrscCount := lrscCycles - 1
+ lrscAddr := s2_req.addr >> blockOffBits
+ }
+ when (lrscValid) { lrscCount := lrscCount - 1 }
+ when ((s2_valid_hit && s2_sc) || io.cpu.invalidate_lr) { lrscCount := 0 }
+
+ // pending store buffer
+ val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write)
+ val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write)
+ val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write)
+ val pstore1_data = RegEnable(io.cpu.s1_data, s1_valid_not_nacked && s1_write)
+ val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write)
+ val pstore1_storegen = new StoreGen(pstore1_typ, pstore1_addr, pstore1_data, wordBytes)
+ val pstore1_storegen_data = Wire(init = pstore1_storegen.data)
+ val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd)
+ val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_amo)
+ val pstore_drain_opportunistic = !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd))
+ val pstore_drain_on_miss = releaseInFlight || io.cpu.s2_nack
+ val pstore_drain =
+ Bool(usingAtomics) && pstore_drain_structural ||
+ (((pstore1_valid && !pstore1_amo) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss))
+ pstore1_valid := {
+ val s2_store_valid = s2_valid_hit && s2_write && !s2_sc_fail
+ val pstore1_held = Reg(Bool())
+ assert(!s2_store_valid || !pstore1_held)
+ pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain
+ s2_store_valid || pstore1_held
+ }
+ val advance_pstore1 = pstore1_valid && (pstore2_valid === pstore_drain)
+ pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1
+ val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1)
+ val pstore2_way = RegEnable(pstore1_way, advance_pstore1)
+ val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1)
+ val pstore2_storegen_mask = RegEnable(pstore1_storegen.mask, advance_pstore1)
+ dataArb.io.in(0).valid := pstore_drain
+ dataArb.io.in(0).bits.write := true
+ dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
+ dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way)
+ dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data))
+ val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits
+ dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift
+
+ // store->load RAW hazard detection
+ val s1_idx = s1_req.addr(idxMSB, wordOffBits)
+ val s1_raw_hazard = s1_read &&
+ ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx) ||
+ (pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx))
+ when (s1_valid && s1_raw_hazard) { s1_nack := true }
+
+ val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd)
+ metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_hit_state =/= s2_new_hit_state) || (s2_victimize && !s2_victim_dirty)
+ metaWriteArb.io.in(0).bits.way_en := s2_victim_way
+ metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB)
+ metaWriteArb.io.in(0).bits.data.coh := Mux(s2_hit, s2_new_hit_state, ClientMetadata.onReset)
+ metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
+
+ // acquire
+ val cachedGetMessage = s2_hit_state.makeAcquire(
+ client_xact_id = UInt(0),
+ addr_block = s2_req.addr(paddrBits-1, blockOffBits),
+ op_code = s2_req.cmd)
+ val uncachedGetMessage = Get(
+ client_xact_id = UInt(0),
+ addr_block = s2_req.addr(paddrBits-1, blockOffBits),
+ addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
+ addr_byte = s2_req.addr(beatOffBits-1, 0),
+ operand_size = s2_req.typ,
+ alloc = Bool(false))
+ val uncachedPutOffset = s2_req.addr.extract(beatOffBits-1, wordOffBits)
+ val uncachedPutMessage = Put(
+ client_xact_id = UInt(0),
+ addr_block = s2_req.addr(paddrBits-1, blockOffBits),
+ addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
+ data = Fill(beatWords, pstore1_storegen.data),
+ wmask = Some(pstore1_storegen.mask << (uncachedPutOffset << wordOffBits)),
+ alloc = Bool(false))
+ val uncachedPutAtomicMessage = PutAtomic(
+ client_xact_id = UInt(0),
+ addr_block = s2_req.addr(paddrBits-1, blockOffBits),
+ addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
+ addr_byte = s2_req.addr(beatOffBits-1, 0),
+ atomic_opcode = s2_req.cmd,
+ operand_size = s2_req.typ,
+ data = Fill(beatWords, pstore1_storegen.data))
+ io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready
+ io.mem.acquire.bits := cachedGetMessage
+ when (s2_uncached) {
+ assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access")
+ io.mem.acquire.bits := uncachedGetMessage
+ when (s2_write) {
+ io.mem.acquire.bits := uncachedPutMessage
+ when (pstore1_amo) {
+ io.mem.acquire.bits := uncachedPutAtomicMessage
+ }
+ }
+ }
+ when (io.mem.acquire.fire()) { grant_wait := true }
+
+ // grant
+ val grantIsRefill = io.mem.grant.bits.hasMultibeatData()
+ val grantIsVoluntary = io.mem.grant.bits.isVoluntary()
+ val grantIsUncached = !grantIsRefill && !grantIsVoluntary
+ when (io.mem.grant.valid) {
+ assert(grant_wait || grantIsVoluntary && release_ack_wait, "unexpected grant")
+ when (grantIsUncached) { s2_data := io.mem.grant.bits.data }
+ when (grantIsVoluntary) { release_ack_wait := false }
+ }
+ val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles)
+ val grantDone = refillDone || grantIsUncached
+ when (io.mem.grant.fire() && grantDone) { grant_wait := false }
+
+ // data refill
+ dataArb.io.in(1).valid := grantIsRefill && io.mem.grant.valid
+ io.mem.grant.ready := true
+ assert(dataArb.io.in(1).ready || !dataArb.io.in(1).valid)
+ dataArb.io.in(1).bits.write := true
+ dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits
+ dataArb.io.in(1).bits.way_en := s2_victim_way
+ dataArb.io.in(1).bits.wdata := io.mem.grant.bits.data
+ dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes)
+ // tag updates on refill
+ metaWriteArb.io.in(1).valid := refillDone
+ assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready)
+ metaWriteArb.io.in(1).bits.way_en := s2_victim_way
+ metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB)
+ metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd)
+ metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
+
+ // finish
+ fq.io.enq.valid := io.mem.grant.fire() && io.mem.grant.bits.requiresAck() && (!grantIsRefill || refillDone)
+ fq.io.enq.bits := io.mem.grant.bits.makeFinish()
+ io.mem.finish <> fq.io.deq
+ when (fq.io.enq.valid) { assert(fq.io.enq.ready) }
+ when (refillDone) { replacer.miss }
+
+ // probe
+ val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr)
+ metaReadArb.io.in(1).valid := io.mem.probe.valid && !block_probe
+ io.mem.probe.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit)
+ metaReadArb.io.in(1).bits.idx := io.mem.probe.bits.addr_block
+ metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays)
+
+ // release
+ val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles)
+ val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback)
+ val releaseRejected = io.mem.release.valid && !io.mem.release.ready
+ val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire())
+ val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected)
+ val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid))
+ io.mem.release.valid := s2_release_data_valid
+ io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits)
+ val voluntaryReleaseMessage = s2_victim_state.makeVoluntaryWriteback(UInt(0), UInt(0))
+ val voluntaryNewCoh = s2_victim_state.onCacheControl(M_FLUSH)
+ val probeResponseMessage = s2_probe_state.makeRelease(probe_bits)
+ val probeNewCoh = s2_probe_state.onProbe(probe_bits)
+ val newCoh = Wire(init = probeNewCoh)
+ releaseWay := s2_probe_way
+ when (s2_victimize && s2_victim_dirty) {
+ assert(!s2_hit_state.isValid())
+ release_state := s_voluntary_writeback
+ probe_bits.addr_block := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB))
+ }
+ when (s2_probe) {
+ when (s2_probe_state.requiresVoluntaryWriteback()) { release_state := s_probe_rep_dirty }
+ .elsewhen (s2_probe_state.isValid()) { release_state := s_probe_rep_clean }
+ .otherwise {
+ io.mem.release.valid := true
+ release_state := s_probe_rep_miss
+ }
+ }
+ when (releaseDone) { release_state := s_ready }
+ when (release_state === s_probe_rep_miss || release_state === s_probe_rep_clean) {
+ io.mem.release.valid := true
+ }
+ when (release_state === s_probe_rep_clean || release_state === s_probe_rep_dirty) {
+ io.mem.release.bits := probeResponseMessage
+ when (releaseDone) { release_state := s_probe_write_meta }
+ }
+ when (release_state === s_voluntary_writeback || release_state === s_voluntary_write_meta) {
+ io.mem.release.bits := voluntaryReleaseMessage
+ newCoh := voluntaryNewCoh
+ releaseWay := s2_victim_way
+ when (releaseDone) {
+ release_state := s_voluntary_write_meta
+ release_ack_wait := true
+ }
+ }
+ when (s2_probe && !io.mem.release.fire()) { s1_nack := true }
+ io.mem.release.bits.addr_block := probe_bits.addr_block
+ io.mem.release.bits.addr_beat := writebackCount
+ io.mem.release.bits.data := s2_data
+
+ dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles
+ dataArb.io.in(2).bits.write := false
+ dataArb.io.in(2).bits.addr := Cat(io.mem.release.bits.addr_block, releaseDataBeat(log2Up(refillCycles)-1,0)) << rowOffBits
+ dataArb.io.in(2).bits.way_en := ~UInt(0, nWays)
+
+ metaWriteArb.io.in(2).valid := (release_state === s_voluntary_write_meta || release_state === s_probe_write_meta)
+ metaWriteArb.io.in(2).bits.way_en := releaseWay
+ metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB)
+ metaWriteArb.io.in(2).bits.data.coh := newCoh
+ metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits)
+ when (metaWriteArb.io.in(2).fire()) { release_state := s_ready }
+
+ // cached response
+ io.cpu.resp.valid := s2_valid_hit
+ io.cpu.resp.bits := s2_req
+ io.cpu.resp.bits.has_data := s2_read
+ io.cpu.resp.bits.replay := false
+ io.cpu.ordered := !(s1_valid || s2_valid || grant_wait)
+
+ // uncached response
+ io.cpu.replay_next := io.mem.grant.valid && grantIsUncached
+ val doUncachedResp = Reg(next = io.cpu.replay_next)
+ when (doUncachedResp) {
+ assert(!s2_valid_hit)
+ io.cpu.resp.valid := true
+ io.cpu.resp.bits.replay := true
+ }
+
+ // load data subword mux/sign extension
+ val s2_word_idx = s2_req.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes))
+ val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits)))
+ val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes)
+ io.cpu.resp.bits.data := loadgen.data | s2_sc_fail
+ io.cpu.resp.bits.data_word_bypass := loadgen.wordData
+ io.cpu.resp.bits.store_data := pstore1_data
+
+ // AMOs
+ if (usingAtomics) {
+ val amoalu = Module(new AMOALU)
+ amoalu.io.addr := pstore1_addr
+ amoalu.io.cmd := pstore1_cmd
+ amoalu.io.typ := pstore1_typ
+ amoalu.io.lhs := s2_data_word
+ amoalu.io.rhs := pstore1_data
+ pstore1_storegen_data := amoalu.io.out
+ } else {
+ assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation")
+ }
+
+ // flushes
+ val flushed = Reg(init=Bool(true))
+ val flushing = Reg(init=Bool(false))
+ val flushCounter = Counter(nSets * nWays)
+ when (io.mem.acquire.fire()) { flushed := false }
+ when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
+ io.cpu.s2_nack := !flushed
+ when (!flushed) {
+ flushing := !release_ack_wait
+ }
+ }
+ s1_flush_valid := metaReadArb.io.in(0).fire() && !s1_flush_valid && !s2_flush_valid && release_state === s_ready && !release_ack_wait
+ metaReadArb.io.in(0).valid := flushing
+ metaReadArb.io.in(0).bits.idx := flushCounter.value
+ metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays)
+ when (flushing) {
+ s1_victim_way := flushCounter.value >> log2Up(nSets)
+ when (s2_flush_valid) {
+ when (flushCounter.inc()) {
+ flushed := true
+ }
+ }
+ when (flushed && release_state === s_ready && !release_ack_wait) {
+ flushing := false
+ }
+ }
+}
diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala
new file mode 100644
index 00000000..07cdc1d6
--- /dev/null
+++ b/rocket/src/main/scala/decode.scala
@@ -0,0 +1,203 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+
+object DecodeLogic
+{
+ def term(lit: BitPat) =
+ new Term(lit.value, BigInt(2).pow(lit.getWidth)-(lit.mask+1))
+ def logic(addr: UInt, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bool], terms: Seq[Term]) = {
+ terms.map { t =>
+ cache.getOrElseUpdate(t, (if (t.mask == 0) addr else addr & Bits(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth)) === Bits(t.value, addrWidth))
+ }.foldLeft(Bool(false))(_||_)
+ }
+ def apply(addr: UInt, default: BitPat, mapping: Iterable[(BitPat, BitPat)]): UInt = {
+ val cache = caches.getOrElseUpdate(addr, collection.mutable.Map[Term,Bool]())
+ val dterm = term(default)
+ val (keys, values) = mapping.unzip
+ val addrWidth = keys.map(_.getWidth).max
+ val terms = keys.toList.map(k => term(k))
+ val termvalues = terms zip values.toList.map(term(_))
+
+ for (t <- keys.zip(terms).tails; if !t.isEmpty)
+ for (u <- t.tail)
+ assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap")
+
+ (0 until default.getWidth.max(values.map(_.getWidth).max)).map({ case (i: Int) =>
+ val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1)
+ val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1)
+ val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1)
+
+ if (((dterm.mask >> i) & 1) != 0) {
+ logic(addr, addrWidth, cache, SimplifyDC(mint, maxt, addrWidth)).toBits
+ } else {
+ val defbit = (dterm.value.toInt >> i) & 1
+ val t = if (defbit == 0) mint else maxt
+ val bit = logic(addr, addrWidth, cache, Simplify(t, dc, addrWidth)).toBits
+ if (defbit == 0) bit else ~bit
+ }
+ }).reverse.reduceRight(Cat(_,_))
+ }
+ def apply(addr: UInt, default: Seq[BitPat], mappingIn: Iterable[(BitPat, Seq[BitPat])]): Seq[UInt] = {
+ val mapping = collection.mutable.ArrayBuffer.fill(default.size)(collection.mutable.ArrayBuffer[(BitPat, BitPat)]())
+ for ((key, values) <- mappingIn)
+ for ((value, i) <- values zipWithIndex)
+ mapping(i) += key -> value
+ for ((thisDefault, thisMapping) <- default zip mapping)
+ yield apply(addr, thisDefault, thisMapping)
+ }
+ def apply(addr: UInt, default: Seq[BitPat], mappingIn: List[(UInt, Seq[BitPat])]): Seq[UInt] =
+ apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Seq[BitPat])]])
+ def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool =
+ apply(addr, BitPat.DC(1), trues.map(BitPat(_) -> BitPat("b1")) ++ falses.map(BitPat(_) -> BitPat("b0"))).toBool
+ private val caches = collection.mutable.Map[UInt,collection.mutable.Map[Term,Bool]]()
+}
+
+class Term(val value: BigInt, val mask: BigInt = 0)
+{
+ var prime = true
+
+ def covers(x: Term) = ((value ^ x.value) &~ mask | x.mask &~ mask) == 0
+ def intersects(x: Term) = ((value ^ x.value) &~ mask &~ x.mask) == 0
+ override def equals(that: Any) = that match {
+ case x: Term => x.value == value && x.mask == mask
+ case _ => false
+ }
+ override def hashCode = value.toInt
+ def < (that: Term) = value < that.value || value == that.value && mask < that.mask
+ def similar(x: Term) = {
+ val diff = value - x.value
+ mask == x.mask && value > x.value && (diff & diff-1) == 0
+ }
+ def merge(x: Term) = {
+ prime = false
+ x.prime = false
+ val bit = value - x.value
+ new Term(value &~ bit, mask | bit)
+ }
+
+ override def toString = value.toString(16) + "-" + mask.toString(16) + (if (prime) "p" else "")
+}
+
+object Simplify
+{
+ def getPrimeImplicants(implicants: Seq[Term], bits: Int) = {
+ var prime = List[Term]()
+ implicants.foreach(_.prime = true)
+ val cols = (0 to bits).map(b => implicants.filter(b == _.mask.bitCount))
+ val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount):_*)))
+ for (i <- 0 to bits) {
+ for (j <- 0 until bits-i)
+ table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_.similar(a)).map(_.merge(a)))
+ for (r <- table(i))
+ for (p <- r; if p.prime)
+ prime = p :: prime
+ }
+ prime.sortWith(_<_)
+ }
+ def getEssentialPrimeImplicants(prime: Seq[Term], minterms: Seq[Term]): (Seq[Term],Seq[Term],Seq[Term]) = {
+ for (i <- 0 until prime.size) {
+ val icover = minterms.filter(prime(i) covers _)
+ for (j <- 0 until prime.size) {
+ val jcover = minterms.filter(prime(j) covers _)
+ if (icover.size > jcover.size && jcover.forall(prime(i) covers _))
+ return getEssentialPrimeImplicants(prime.filter(_ != prime(j)), minterms)
+ }
+ }
+
+ val essentiallyCovered = minterms.filter(t => prime.count(_ covers t) == 1)
+ val essential = prime.filter(p => essentiallyCovered.exists(p covers _))
+ val nonessential = prime.filterNot(essential contains _)
+ val uncovered = minterms.filterNot(t => essential.exists(_ covers t))
+ if (essential.isEmpty || uncovered.isEmpty)
+ (essential, nonessential, uncovered)
+ else {
+ val (a, b, c) = getEssentialPrimeImplicants(nonessential, uncovered)
+ (essential ++ a, b, c)
+ }
+ }
+ def getCost(cover: Seq[Term], bits: Int) = cover.map(bits - _.mask.bitCount).sum
+ def cheaper(a: List[Term], b: List[Term], bits: Int) = {
+ val ca = getCost(a, bits)
+ val cb = getCost(b, bits)
+ def listLess(a: List[Term], b: List[Term]): Boolean = !b.isEmpty && (a.isEmpty || a.head < b.head || a.head == b.head && listLess(a.tail, b.tail))
+ ca < cb || ca == cb && listLess(a.sortWith(_<_), b.sortWith(_<_))
+ }
+ def getCover(implicants: Seq[Term], minterms: Seq[Term], bits: Int) = {
+ if (minterms.nonEmpty) {
+ val cover = minterms.map(m => implicants.filter(_.covers(m)).map(i => collection.mutable.Set(i)))
+ val all = cover.reduceLeft((c0, c1) => c0.map(a => c1.map(_ ++ a)).reduceLeft(_++_))
+ all.map(_.toList).reduceLeft((a, b) => if (cheaper(a, b, bits)) a else b)
+ } else
+ Seq[Term]()
+ }
+ def stringify(s: Seq[Term], bits: Int) = s.map(t => (0 until bits).map(i => if ((t.mask & (1 << i)) != 0) "x" else ((t.value >> i) & 1).toString).reduceLeft(_+_).reverse).reduceLeft(_+" + "+_)
+
+ def apply(minterms: Seq[Term], dontcares: Seq[Term], bits: Int) = {
+ val prime = getPrimeImplicants(minterms ++ dontcares, bits)
+ minterms.foreach(t => assert(prime.exists(_.covers(t))))
+ val (eprime, prime2, uncovered) = getEssentialPrimeImplicants(prime, minterms)
+ val cover = eprime ++ getCover(prime2, uncovered, bits)
+ minterms.foreach(t => assert(cover.exists(_.covers(t)))) // sanity check
+ cover
+ }
+}
+
+object SimplifyDC
+{
+ def getImplicitDC(maxterms: Seq[Term], term: Term, bits: Int, above: Boolean): Term = {
+ for (i <- 0 until bits) {
+ var t: Term = null
+ if (above && ((term.value | term.mask) & (BigInt(1) << i)) == 0)
+ t = new Term(term.value | (BigInt(1) << i), term.mask)
+ else if (!above && (term.value & (BigInt(1) << i)) != 0)
+ t = new Term(term.value & ~(BigInt(1) << i), term.mask)
+ if (t != null && !maxterms.exists(_.intersects(t)))
+ return t
+ }
+ null
+ }
+ def getPrimeImplicants(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = {
+ var prime = List[Term]()
+ minterms.foreach(_.prime = true)
+ var mint = minterms.map(t => new Term(t.value, t.mask))
+ val cols = (0 to bits).map(b => mint.filter(b == _.mask.bitCount))
+ val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount):_*)))
+
+ for (i <- 0 to bits) {
+ for (j <- 0 until bits-i) {
+ table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_ similar a).map(_ merge a))
+ }
+ for (j <- 0 until bits-i) {
+ for (a <- table(i)(j).filter(_.prime)) {
+ val dc = getImplicitDC(maxterms, a, bits, true)
+ if (dc != null)
+ table(i+1)(j) += dc merge a
+ }
+ for (a <- table(i)(j+1).filter(_.prime)) {
+ val dc = getImplicitDC(maxterms, a, bits, false)
+ if (dc != null)
+ table(i+1)(j) += a merge dc
+ }
+ }
+ for (r <- table(i))
+ for (p <- r; if p.prime)
+ prime = p :: prime
+ }
+ prime.sortWith(_<_)
+ }
+
+ def verify(cover: Seq[Term], minterms: Seq[Term], maxterms: Seq[Term]) = {
+ assert(minterms.forall(t => cover.exists(_ covers t)))
+ assert(maxterms.forall(t => !cover.exists(_ intersects t)))
+ }
+ def apply(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = {
+ val prime = getPrimeImplicants(minterms, maxterms, bits)
+ val (eprime, prime2, uncovered) = Simplify.getEssentialPrimeImplicants(prime, minterms)
+ val cover = eprime ++ Simplify.getCover(prime2, uncovered, bits)
+ verify(cover, minterms, maxterms)
+ cover
+ }
+}
diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala
new file mode 100644
index 00000000..ce1af0ed
--- /dev/null
+++ b/rocket/src/main/scala/dma.scala
@@ -0,0 +1,400 @@
+package rocket
+
+import Chisel._
+import uncore.tilelink._
+import uncore.devices._
+import uncore.devices.DmaRequest._
+import uncore.agents._
+import uncore.util._
+import junctions.{ParameterizedBundle, AddrMap}
+import cde.Parameters
+
+trait HasClientDmaParameters extends HasCoreParameters with HasDmaParameters {
+ val dmaAddrBits = coreMaxAddrBits
+ val dmaSegmentSizeBits = coreMaxAddrBits
+ val dmaSegmentBits = 24
+}
+
+abstract class ClientDmaBundle(implicit val p: Parameters)
+ extends ParameterizedBundle()(p) with HasClientDmaParameters
+abstract class ClientDmaModule(implicit val p: Parameters)
+ extends Module with HasClientDmaParameters
+
+class ClientDmaRequest(implicit p: Parameters) extends ClientDmaBundle()(p) {
+ val cmd = UInt(width = DMA_CMD_SZ)
+ val src_start = UInt(width = dmaAddrBits)
+ val dst_start = UInt(width = dmaAddrBits)
+ val src_stride = UInt(width = dmaSegmentSizeBits)
+ val dst_stride = UInt(width = dmaSegmentSizeBits)
+ val segment_size = UInt(width = dmaSegmentSizeBits)
+ val nsegments = UInt(width = dmaSegmentBits)
+ val word_size = UInt(width = dmaWordSizeBits)
+}
+
+object ClientDmaRequest {
+ def apply(cmd: UInt,
+ src_start: UInt,
+ dst_start: UInt,
+ segment_size: UInt,
+ nsegments: UInt = UInt(1),
+ src_stride: UInt = UInt(0),
+ dst_stride: UInt = UInt(0),
+ word_size: UInt = UInt(0))
+ (implicit p: Parameters) = {
+ val req = Wire(new ClientDmaRequest)
+ req.cmd := cmd
+ req.src_start := src_start
+ req.dst_start := dst_start
+ req.src_stride := src_stride
+ req.dst_stride := dst_stride
+ req.segment_size := segment_size
+ req.nsegments := nsegments
+ req.word_size := word_size
+ req
+ }
+}
+
+object ClientDmaResponse {
+ val pagefault = UInt("b01")
+ val invalid_region = UInt("b10")
+
+ def apply(status: UInt = UInt(0))(implicit p: Parameters) = {
+ val resp = Wire(new ClientDmaResponse)
+ resp.status := status
+ resp
+ }
+}
+
+class ClientDmaResponse(implicit p: Parameters) extends ClientDmaBundle {
+ val status = UInt(width = dmaStatusBits)
+}
+
+class ClientDmaIO(implicit p: Parameters) extends ParameterizedBundle()(p) {
+ val req = Decoupled(new ClientDmaRequest)
+ val resp = Valid(new ClientDmaResponse).flip
+}
+
+class DmaFrontend(implicit p: Parameters) extends CoreModule()(p)
+ with HasClientDmaParameters with HasTileLinkParameters {
+ val io = new Bundle {
+ val cpu = (new ClientDmaIO).flip
+ val mem = new ClientUncachedTileLinkIO
+ val ptw = new TLBPTWIO
+ val busy = Bool(OUTPUT)
+ val incr_outstanding = Bool(OUTPUT)
+ val host_id = UInt(INPUT, log2Up(nCores))
+ }
+
+ val tlb = Module(new DecoupledTLB()(p.alterPartial({
+ case CacheName => "L1D"
+ })))
+ io.ptw <> tlb.io.ptw
+
+ private val pgSize = 1 << pgIdxBits
+
+ val cmd = Reg(UInt(width = DMA_CMD_SZ))
+ val adv_ptr = MuxLookup(cmd, UInt("b11"), Seq(
+ DMA_CMD_PFR -> UInt("b10"),
+ DMA_CMD_PFW -> UInt("b10"),
+ DMA_CMD_SIN -> UInt("b10"),
+ DMA_CMD_SOUT -> UInt("b01")))
+
+ val segment_size = Reg(UInt(width = dmaSegmentSizeBits))
+ val bytes_left = Reg(UInt(width = dmaSegmentSizeBits))
+ val segments_left = Reg(UInt(width = dmaSegmentBits))
+ val word_size = Reg(UInt(width = dmaWordSizeBits))
+
+ val src_vaddr = Reg(UInt(width = dmaAddrBits))
+ val dst_vaddr = Reg(UInt(width = dmaAddrBits))
+ val src_vpn = src_vaddr(dmaAddrBits - 1, pgIdxBits)
+ val dst_vpn = dst_vaddr(dmaAddrBits - 1, pgIdxBits)
+ val src_idx = src_vaddr(pgIdxBits - 1, 0)
+ val dst_idx = dst_vaddr(pgIdxBits - 1, 0)
+ val src_pglen = UInt(pgSize) - src_idx
+ val dst_pglen = UInt(pgSize) - dst_idx
+
+ val src_stride = Reg(UInt(width = dmaSegmentSizeBits))
+ val dst_stride = Reg(UInt(width = dmaSegmentSizeBits))
+
+ val src_ppn = Reg(UInt(width = ppnBits))
+ val dst_ppn = Reg(UInt(width = ppnBits))
+
+ val src_paddr = Cat(src_ppn, src_idx)
+ val dst_paddr = Cat(dst_ppn, dst_idx)
+
+ val last_src_vpn = Reg(UInt(width = vpnBits))
+ val last_dst_vpn = Reg(UInt(width = vpnBits))
+
+ val tx_len = Util.minUInt(src_pglen, dst_pglen, bytes_left)
+
+ val dma_busy = Reg(init = UInt(0, tlMaxClientXacts))
+ val dma_xact_id = PriorityEncoder(~dma_busy)
+ val (dma_req_beat, dma_req_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
+
+ val (s_idle :: s_translate :: s_dma_req :: s_dma_update ::
+ s_prepare :: s_finish :: Nil) = Enum(Bits(), 6)
+ val state = Reg(init = s_idle)
+
+ // lower bit is for src, higher bit is for dst
+ val to_translate = Reg(init = UInt(0, 2))
+ val tlb_sent = Reg(init = UInt(0, 2))
+ val tlb_to_send = to_translate & ~tlb_sent
+ val resp_status = Reg(UInt(width = dmaStatusBits))
+
+ def make_acquire(
+ addr_beat: UInt, client_xact_id: UInt, client_id: UInt,
+ cmd: UInt, source: UInt, dest: UInt,
+ length: UInt, size: UInt): Acquire = {
+
+ val data_blob = Wire(UInt(width = tlDataBeats * tlDataBits))
+ data_blob := DmaRequest(
+ xact_id = UInt(0),
+ client_id = client_id,
+ cmd = cmd,
+ source = source,
+ dest = dest,
+ length = length,
+ size = size).toBits
+ val data_beats = Vec(tlDataBeats, UInt(width = tlDataBits)).fromBits(data_blob)
+ val base_addr = addrMap("devices:dma").start
+ val addr_block = UInt(base_addr >> (tlBeatAddrBits + tlByteAddrBits))
+
+ PutBlock(
+ client_xact_id = client_xact_id,
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ data = data_beats(addr_beat),
+ alloc = Bool(false))
+ }
+
+ def check_region(cmd: UInt, src: UInt, dst: UInt): Bool = {
+ val src_cacheable = addrMap.isCacheable(src)
+ val dst_cacheable = addrMap.isCacheable(dst)
+ val dst_ok = Mux(cmd === DMA_CMD_SOUT, !dst_cacheable, dst_cacheable)
+ val src_ok = Mux(cmd === DMA_CMD_SIN, !src_cacheable, Bool(true))
+ dst_ok && src_ok
+ }
+
+ tlb.io.req.valid := tlb_to_send.orR
+ tlb.io.req.bits.vpn := Mux(tlb_to_send(0), src_vpn, dst_vpn)
+ tlb.io.req.bits.passthrough := Bool(false)
+ tlb.io.req.bits.instruction := Bool(false)
+ tlb.io.req.bits.store := !tlb_to_send(0)
+ tlb.io.resp.ready := tlb_sent.orR
+
+ when (tlb.io.req.fire()) {
+ tlb_sent := tlb_sent | PriorityEncoderOH(tlb_to_send)
+ }
+
+ when (tlb.io.resp.fire()) {
+ val recv_choice = PriorityEncoderOH(to_translate)
+ val error = Mux(recv_choice(0),
+ tlb.io.resp.bits.xcpt_ld, tlb.io.resp.bits.xcpt_st)
+
+ when (error) {
+ resp_status := ClientDmaResponse.pagefault
+ state := s_finish
+ }
+
+ // getting the src translation
+ when (recv_choice(0)) {
+ src_ppn := tlb.io.resp.bits.ppn
+ } .otherwise {
+ dst_ppn := tlb.io.resp.bits.ppn
+ }
+
+ to_translate := to_translate & ~recv_choice
+ }
+
+ io.cpu.req.ready := state === s_idle
+ io.cpu.resp.valid := state === s_finish
+ io.cpu.resp.bits := ClientDmaResponse(resp_status)
+
+ io.mem.acquire.valid := (state === s_dma_req) && !dma_busy.andR
+ io.mem.acquire.bits := make_acquire(
+ addr_beat = dma_req_beat,
+ client_id = io.host_id,
+ client_xact_id = dma_xact_id,
+ cmd = cmd, source = src_paddr, dest = dst_paddr,
+ length = tx_len, size = word_size)
+
+ io.mem.grant.ready := (state =/= s_dma_req)
+
+ when (io.cpu.req.fire()) {
+ val req = io.cpu.req.bits
+ val is_prefetch = req.cmd(2, 1) === UInt("b01")
+ cmd := req.cmd
+ src_vaddr := req.src_start
+ dst_vaddr := req.dst_start
+ src_stride := req.src_stride
+ dst_stride := req.dst_stride
+ segment_size := req.segment_size
+ segments_left := req.nsegments - UInt(1)
+ bytes_left := req.segment_size
+ word_size := req.word_size
+ to_translate := Mux(is_prefetch, UInt("b10"), UInt("b11"))
+ tlb_sent := UInt(0)
+ state := s_translate
+ }
+
+ when (state === s_translate && !to_translate.orR) {
+ when (check_region(cmd, src_paddr, dst_paddr)) {
+ state := s_dma_req
+ } .otherwise {
+ resp_status := ClientDmaResponse.invalid_region
+ state := s_finish
+ }
+ }
+
+ def setBusy(set: Bool, xact_id: UInt): UInt =
+ Mux(set, UIntToOH(xact_id), UInt(0))
+
+ dma_busy := (dma_busy |
+ setBusy(dma_req_done, dma_xact_id)) &
+ ~setBusy(io.mem.grant.fire(), io.mem.grant.bits.client_xact_id)
+
+
+ when (dma_req_done) {
+ src_vaddr := src_vaddr + Mux(adv_ptr(0), tx_len, UInt(0))
+ dst_vaddr := dst_vaddr + Mux(adv_ptr(1), tx_len, UInt(0))
+ bytes_left := bytes_left - tx_len
+ state := s_dma_update
+ }
+
+ when (state === s_dma_update) {
+ when (bytes_left === UInt(0)) {
+ when (segments_left === UInt(0)) {
+ resp_status := UInt(0)
+ state := s_finish
+ } .otherwise {
+ last_src_vpn := src_vpn
+ last_dst_vpn := dst_vpn
+ src_vaddr := src_vaddr + src_stride
+ dst_vaddr := dst_vaddr + dst_stride
+ bytes_left := segment_size
+ segments_left := segments_left - UInt(1)
+ state := s_prepare
+ }
+ } .otherwise {
+ to_translate := adv_ptr & Cat(dst_idx === UInt(0), src_idx === UInt(0))
+ tlb_sent := UInt(0)
+ state := s_translate
+ }
+ }
+
+ when (state === s_prepare) {
+ to_translate := adv_ptr & Cat(
+ dst_vpn =/= last_dst_vpn,
+ src_vpn =/= last_src_vpn)
+ tlb_sent := UInt(0)
+ state := s_translate
+ }
+
+ when (state === s_finish) { state := s_idle }
+
+ io.busy := (state =/= s_idle) || dma_busy.orR
+ io.incr_outstanding := dma_req_done
+}
+
+object DmaCtrlRegNumbers {
+ val SRC_STRIDE = 0
+ val DST_STRIDE = 1
+ val SEGMENT_SIZE = 2
+ val NSEGMENTS = 3
+ val WORD_SIZE = 4
+ val RESP_STATUS = 5
+ val OUTSTANDING = 6
+ val NCSRS = 7
+ val CSR_BASE = 0x800
+ val CSR_END = CSR_BASE + NCSRS
+}
+import DmaCtrlRegNumbers._
+
+class DmaCtrlRegFile(implicit val p: Parameters) extends Module
+ with HasClientDmaParameters with HasTileLinkParameters {
+
+ private val nWriteRegs = 5
+ private val nRegs = nWriteRegs + 2
+
+ val io = new Bundle {
+ val wen = Bool(INPUT)
+ val waddr = UInt(INPUT, log2Up(nRegs))
+ val wdata = UInt(INPUT, dmaSegmentSizeBits)
+
+ val src_stride = UInt(OUTPUT, dmaSegmentSizeBits)
+ val dst_stride = UInt(OUTPUT, dmaSegmentSizeBits)
+ val segment_size = UInt(OUTPUT, dmaSegmentSizeBits)
+ val nsegments = UInt(OUTPUT, dmaSegmentBits)
+ val word_size = UInt(OUTPUT, dmaWordSizeBits)
+
+ val incr_outstanding = Bool(INPUT)
+ val xact_outstanding = Bool(OUTPUT)
+ }
+
+ val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSegmentSizeBits)))
+ val waddr = io.waddr(log2Up(NCSRS) - 1, 0)
+
+ io.src_stride := regs(SRC_STRIDE)
+ io.dst_stride := regs(DST_STRIDE)
+ io.segment_size := regs(SEGMENT_SIZE)
+ io.nsegments := regs(NSEGMENTS)
+ io.word_size := regs(WORD_SIZE)
+
+ when (io.wen && waddr < UInt(nWriteRegs)) {
+ regs.write(waddr, io.wdata)
+ }
+
+ val outstanding_cnt = TwoWayCounter(
+ io.incr_outstanding,
+ io.wen && io.waddr === UInt(OUTSTANDING),
+ tlMaxClientXacts)
+
+ io.xact_outstanding := outstanding_cnt > UInt(0)
+}
+
+class DmaController(implicit p: Parameters) extends RoCC()(p)
+ with HasClientDmaParameters {
+ io.mem.req.valid := Bool(false)
+ io.resp.valid := Bool(false)
+ io.interrupt := Bool(false)
+
+ val cmd = Queue(io.cmd)
+ val inst = cmd.bits.inst
+ val is_transfer = inst.funct < UInt(8)
+
+ val reg_status = Reg(UInt(width = dmaStatusBits))
+ val crfile = Module(new DmaCtrlRegFile)
+ crfile.io.waddr := io.csr.waddr
+ crfile.io.wdata := io.csr.wdata
+ crfile.io.wen := io.csr.wen
+
+ io.csr.rdata(SRC_STRIDE) := crfile.io.src_stride
+ io.csr.rdata(DST_STRIDE) := crfile.io.dst_stride
+ io.csr.rdata(SEGMENT_SIZE) := crfile.io.segment_size
+ io.csr.rdata(NSEGMENTS) := crfile.io.nsegments
+ io.csr.rdata(WORD_SIZE) := crfile.io.word_size
+ io.csr.rdata(RESP_STATUS) := reg_status
+
+ val frontend = Module(new DmaFrontend)
+ io.ptw(0) <> frontend.io.ptw
+ io.autl <> frontend.io.mem
+ crfile.io.incr_outstanding := frontend.io.incr_outstanding
+ frontend.io.host_id := io.host_id
+ frontend.io.cpu.req.valid := cmd.valid && is_transfer
+ frontend.io.cpu.req.bits := ClientDmaRequest(
+ cmd = cmd.bits.inst.funct,
+ src_start = cmd.bits.rs2,
+ dst_start = cmd.bits.rs1,
+ src_stride = crfile.io.src_stride,
+ dst_stride = crfile.io.dst_stride,
+ segment_size = crfile.io.segment_size,
+ nsegments = crfile.io.nsegments,
+ word_size = crfile.io.word_size)
+ cmd.ready := is_transfer && frontend.io.cpu.req.ready
+
+ when (frontend.io.cpu.resp.valid) {
+ reg_status := frontend.io.cpu.resp.bits.status
+ }
+
+ io.busy := cmd.valid || frontend.io.busy || crfile.io.xact_outstanding
+}
diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala
new file mode 100644
index 00000000..841f0ec0
--- /dev/null
+++ b/rocket/src/main/scala/dpath_alu.scala
@@ -0,0 +1,96 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import cde.{Parameters, Field}
+import Instructions._
+
+object ALU
+{
+ val SZ_ALU_FN = 4
+ val FN_X = BitPat("b????")
+ val FN_ADD = UInt(0)
+ val FN_SL = UInt(1)
+ val FN_SEQ = UInt(2)
+ val FN_SNE = UInt(3)
+ val FN_XOR = UInt(4)
+ val FN_SR = UInt(5)
+ val FN_OR = UInt(6)
+ val FN_AND = UInt(7)
+ val FN_SUB = UInt(10)
+ val FN_SRA = UInt(11)
+ val FN_SLT = UInt(12)
+ val FN_SGE = UInt(13)
+ val FN_SLTU = UInt(14)
+ val FN_SGEU = UInt(15)
+
+ val FN_DIV = FN_XOR
+ val FN_DIVU = FN_SR
+ val FN_REM = FN_OR
+ val FN_REMU = FN_AND
+
+ val FN_MUL = FN_ADD
+ val FN_MULH = FN_SL
+ val FN_MULHSU = FN_SLT
+ val FN_MULHU = FN_SLTU
+
+ def isMulFN(fn: UInt, cmp: UInt) = fn(1,0) === cmp(1,0)
+ def isSub(cmd: UInt) = cmd(3)
+ def isCmp(cmd: UInt) = cmd === FN_SEQ || cmd === FN_SNE || cmd >= FN_SLT
+ def cmpUnsigned(cmd: UInt) = cmd(1)
+ def cmpInverted(cmd: UInt) = cmd(0)
+ def cmpEq(cmd: UInt) = !cmd(3)
+}
+import ALU._
+
+class ALU(implicit p: Parameters) extends CoreModule()(p) {
+ val io = new Bundle {
+ val dw = Bits(INPUT, SZ_DW)
+ val fn = Bits(INPUT, SZ_ALU_FN)
+ val in2 = UInt(INPUT, xLen)
+ val in1 = UInt(INPUT, xLen)
+ val out = UInt(OUTPUT, xLen)
+ val adder_out = UInt(OUTPUT, xLen)
+ val cmp_out = Bool(OUTPUT)
+ }
+
+ // ADD, SUB
+ val in2_inv = Mux(isSub(io.fn), ~io.in2, io.in2)
+ val in1_xor_in2 = io.in1 ^ in2_inv
+ io.adder_out := io.in1 + in2_inv + isSub(io.fn)
+
+ // SLT, SLTU
+ io.cmp_out := cmpInverted(io.fn) ^
+ Mux(cmpEq(io.fn), in1_xor_in2 === UInt(0),
+ Mux(io.in1(xLen-1) === io.in2(xLen-1), io.adder_out(xLen-1),
+ Mux(cmpUnsigned(io.fn), io.in2(xLen-1), io.in1(xLen-1))))
+
+ // SLL, SRL, SRA
+ val (shamt, shin_r) =
+ if (xLen == 32) (io.in2(4,0), io.in1)
+ else {
+ require(xLen == 64)
+ val shin_hi_32 = Fill(32, isSub(io.fn) && io.in1(31))
+ val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32)
+ val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0))
+ (shamt, Cat(shin_hi, io.in1(31,0)))
+ }
+ val shin = Mux(io.fn === FN_SR || io.fn === FN_SRA, shin_r, Reverse(shin_r))
+ val shout_r = (Cat(isSub(io.fn) & shin(xLen-1), shin).toSInt >> shamt)(xLen-1,0)
+ val shout_l = Reverse(shout_r)
+ val shout = Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, UInt(0)) |
+ Mux(io.fn === FN_SL, shout_l, UInt(0))
+
+ // AND, OR, XOR
+ val logic = Mux(io.fn === FN_XOR || io.fn === FN_OR, in1_xor_in2, UInt(0)) |
+ Mux(io.fn === FN_OR || io.fn === FN_AND, io.in1 & io.in2, UInt(0))
+ val shift_logic = (isCmp(io.fn) && io.cmp_out) | logic | shout
+ val out = Mux(io.fn === FN_ADD || io.fn === FN_SUB, io.adder_out, shift_logic)
+
+ io.out := out
+ if (xLen > 32) {
+ require(xLen == 64)
+ when (io.dw === DW_32) { io.out := Cat(Fill(32, out(31)), out(31,0)) }
+ }
+}
diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala
new file mode 100644
index 00000000..75153249
--- /dev/null
+++ b/rocket/src/main/scala/fpu.scala
@@ -0,0 +1,641 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import Instructions._
+import Util._
+import FPConstants._
+import uncore.constants.MemoryOpConstants._
+import cde.{Parameters, Field}
+
+case object SFMALatency extends Field[Int]
+case object DFMALatency extends Field[Int]
+
+object FPConstants
+{
+ val FCMD_ADD = BitPat("b0??00")
+ val FCMD_SUB = BitPat("b0??01")
+ val FCMD_MUL = BitPat("b0??10")
+ val FCMD_MADD = BitPat("b1??00")
+ val FCMD_MSUB = BitPat("b1??01")
+ val FCMD_NMSUB = BitPat("b1??10")
+ val FCMD_NMADD = BitPat("b1??11")
+ val FCMD_DIV = BitPat("b?0011")
+ val FCMD_SQRT = BitPat("b?1011")
+ val FCMD_SGNJ = BitPat("b??1?0")
+ val FCMD_MINMAX = BitPat("b?01?1")
+ val FCMD_CVT_FF = BitPat("b??0??")
+ val FCMD_CVT_IF = BitPat("b?10??")
+ val FCMD_CMP = BitPat("b?01??")
+ val FCMD_MV_XF = BitPat("b?11??")
+ val FCMD_CVT_FI = BitPat("b??0??")
+ val FCMD_MV_FX = BitPat("b??1??")
+ val FCMD_X = BitPat("b?????")
+ val FCMD_WIDTH = 5
+
+ val RM_SZ = 3
+ val FLAGS_SZ = 5
+}
+
+class FPUCtrlSigs extends Bundle
+{
+ val cmd = Bits(width = FCMD_WIDTH)
+ val ldst = Bool()
+ val wen = Bool()
+ val ren1 = Bool()
+ val ren2 = Bool()
+ val ren3 = Bool()
+ val swap12 = Bool()
+ val swap23 = Bool()
+ val single = Bool()
+ val fromint = Bool()
+ val toint = Bool()
+ val fastpipe = Bool()
+ val fma = Bool()
+ val div = Bool()
+ val sqrt = Bool()
+ val round = Bool()
+ val wflags = Bool()
+}
+
+class FPUDecoder extends Module
+{
+ val io = new Bundle {
+ val inst = Bits(INPUT, 32)
+ val sigs = new FPUCtrlSigs().asOutput
+ }
+
+ val decoder = DecodeLogic(io.inst,
+ List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X),
+ Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N),
+ FLD -> List(FCMD_X, Y,Y,N,N,N,X,X,N,N,N,N,N,N,N,N,N),
+ FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,Y,N,Y,N,N,N,N,N,N),
+ FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,N,N,Y,N,N,N,N,N,N),
+ FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,N),
+ FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,N),
+ FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
+ FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
+ FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
+ FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
+ FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
+ FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
+ FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
+ FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
+ FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
+ FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
+ FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
+ FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
+ FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
+ FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
+ FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
+ FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
+ FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
+ FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
+ FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
+ FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
+ FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,Y,N,N,Y,N,N,N,Y,Y),
+ FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,N,N,N,Y,N,N,N,Y,Y),
+ FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
+ FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
+ FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
+ FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
+ FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
+ FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
+ FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
+ FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
+ FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
+ FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
+ FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
+ FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
+ FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
+ FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
+ FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
+ FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
+ FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
+ FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
+ FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,Y,N,N,N,Y,N,N,Y,Y),
+ FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
+ FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
+ FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y,Y),
+ FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
+ FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
+ FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
+ FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
+ FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
+ FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
+ FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
+ FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
+ FDIV_S -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,Y,N,N,N,N,Y,N,Y,Y),
+ FSQRT_S -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,Y,N,N,N,N,N,Y,Y,Y),
+ FDIV_D -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,Y,Y),
+ FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y)
+ ))
+ val s = io.sigs
+ val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12,
+ s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma,
+ s.div, s.sqrt, s.round, s.wflags)
+ sigs zip decoder map {case(s,d) => s := d}
+}
+
+class FPUIO(implicit p: Parameters) extends CoreBundle {
+ val inst = Bits(INPUT, 32)
+ val fromint_data = Bits(INPUT, xLen)
+
+ val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ)
+ val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ))
+
+ val store_data = Bits(OUTPUT, 64)
+ val toint_data = Bits(OUTPUT, xLen)
+
+ val dmem_resp_val = Bool(INPUT)
+ val dmem_resp_type = Bits(INPUT, 3)
+ val dmem_resp_tag = UInt(INPUT, 5)
+ val dmem_resp_data = Bits(INPUT, 64)
+
+ val valid = Bool(INPUT)
+ val fcsr_rdy = Bool(OUTPUT)
+ val nack_mem = Bool(OUTPUT)
+ val illegal_rm = Bool(OUTPUT)
+ val killx = Bool(INPUT)
+ val killm = Bool(INPUT)
+ val dec = new FPUCtrlSigs().asOutput
+ val sboard_set = Bool(OUTPUT)
+ val sboard_clr = Bool(OUTPUT)
+ val sboard_clra = UInt(OUTPUT, 5)
+
+ val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
+ val cp_resp = Decoupled(new FPResult())
+}
+
+class FPResult extends Bundle
+{
+ val data = Bits(width = 65)
+ val exc = Bits(width = 5)
+}
+
+class FPInput extends FPUCtrlSigs {
+ val rm = Bits(width = 3)
+ val typ = Bits(width = 2)
+ val in1 = Bits(width = 65)
+ val in2 = Bits(width = 65)
+ val in3 = Bits(width = 65)
+}
+
+object ClassifyRecFN {
+ def apply(expWidth: Int, sigWidth: Int, in: UInt) = {
+ val sign = in(sigWidth + expWidth)
+ val exp = in(sigWidth + expWidth - 1, sigWidth - 1)
+ val sig = in(sigWidth - 2, 0)
+
+ val code = exp(expWidth,expWidth-2)
+ val codeHi = code(2, 1)
+ val isSpecial = codeHi === UInt(3)
+
+ val isHighSubnormalIn = exp(expWidth-2, 0) < UInt(2)
+ val isSubnormal = code === UInt(1) || codeHi === UInt(1) && isHighSubnormalIn
+ val isNormal = codeHi === UInt(1) && !isHighSubnormalIn || codeHi === UInt(2)
+ val isZero = code === UInt(0)
+ val isInf = isSpecial && !exp(expWidth-2)
+ val isNaN = code.andR
+ val isSNaN = isNaN && !sig(sigWidth-2)
+ val isQNaN = isNaN && sig(sigWidth-2)
+
+ Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign,
+ isSubnormal && !sign, isZero && !sign, isZero && sign,
+ isSubnormal && sign, isNormal && sign, isInf && sign)
+ }
+}
+
+class FPToInt extends Module
+{
+ val io = new Bundle {
+ val in = Valid(new FPInput).flip
+ val as_double = new FPInput().asOutput
+ val out = Valid(new Bundle {
+ val lt = Bool()
+ val store = Bits(width = 64)
+ val toint = Bits(width = 64)
+ val exc = Bits(width = 5)
+ })
+ }
+
+ val in = Reg(new FPInput)
+ val valid = Reg(next=io.in.valid)
+
+ def upconvert(x: UInt) = {
+ val s2d = Module(new hardfloat.RecFNToRecFN(8, 24, 11, 53))
+ s2d.io.in := x
+ s2d.io.roundingMode := UInt(0)
+ s2d.io.out
+ }
+
+ val in1_upconvert = upconvert(io.in.bits.in1)
+ val in2_upconvert = upconvert(io.in.bits.in2)
+
+ when (io.in.valid) {
+ in := io.in.bits
+ when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd =/= FCMD_MV_XF) {
+ in.in1 := in1_upconvert
+ in.in2 := in2_upconvert
+ }
+ }
+
+ val unrec_s = hardfloat.fNFromRecFN(8, 24, in.in1)
+ val unrec_d = hardfloat.fNFromRecFN(11, 53, in.in1)
+ val unrec_out = Mux(in.single, Cat(Fill(32, unrec_s(31)), unrec_s), unrec_d)
+
+ val classify_s = ClassifyRecFN(8, 24, in.in1)
+ val classify_d = ClassifyRecFN(11, 53, in.in1)
+ val classify_out = Mux(in.single, classify_s, classify_d)
+
+ val dcmp = Module(new hardfloat.CompareRecFN(11, 53))
+ dcmp.io.a := in.in1
+ dcmp.io.b := in.in2
+ dcmp.io.signaling := Bool(true)
+ val dcmp_out = (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR
+ val dcmp_exc = dcmp.io.exceptionFlags
+
+ val d2l = Module(new hardfloat.RecFNToIN(11, 53, 64))
+ val d2w = Module(new hardfloat.RecFNToIN(11, 53, 32))
+ d2l.io.in := in.in1
+ d2l.io.roundingMode := in.rm
+ d2l.io.signedOut := ~in.typ(0)
+ d2w.io.in := in.in1
+ d2w.io.roundingMode := in.rm
+ d2w.io.signedOut := ~in.typ(0)
+
+ io.out.bits.toint := Mux(in.rm(0), classify_out, unrec_out)
+ io.out.bits.store := unrec_out
+ io.out.bits.exc := Bits(0)
+
+ when (in.cmd === FCMD_CMP) {
+ io.out.bits.toint := dcmp_out
+ io.out.bits.exc := dcmp_exc
+ }
+ when (in.cmd === FCMD_CVT_IF) {
+ io.out.bits.toint := Mux(in.typ(1), d2l.io.out.toSInt, d2w.io.out.toSInt).toUInt
+ val dflags = Mux(in.typ(1), d2l.io.intExceptionFlags, d2w.io.intExceptionFlags)
+ io.out.bits.exc := Cat(dflags(2, 1).orR, UInt(0, 3), dflags(0))
+ }
+
+ io.out.valid := valid
+ io.out.bits.lt := dcmp.io.lt
+ io.as_double := in
+}
+
+class IntToFP(val latency: Int) extends Module
+{
+ val io = new Bundle {
+ val in = Valid(new FPInput).flip
+ val out = Valid(new FPResult)
+ }
+
+ val in = Pipe(io.in)
+
+ val mux = Wire(new FPResult)
+ mux.exc := Bits(0)
+ mux.data := hardfloat.recFNFromFN(11, 53, in.bits.in1)
+ when (in.bits.single) {
+ mux.data := Cat(SInt(-1, 32), hardfloat.recFNFromFN(8, 24, in.bits.in1))
+ }
+
+ val longValue =
+ Mux(in.bits.typ(1), in.bits.in1.toSInt,
+ Mux(in.bits.typ(0), in.bits.in1(31,0).zext, in.bits.in1(31,0).toSInt))
+ val l2s = Module(new hardfloat.INToRecFN(64, 8, 24))
+ l2s.io.signedIn := ~in.bits.typ(0)
+ l2s.io.in := longValue.toUInt
+ l2s.io.roundingMode := in.bits.rm
+
+ val l2d = Module(new hardfloat.INToRecFN(64, 11, 53))
+ l2d.io.signedIn := ~in.bits.typ(0)
+ l2d.io.in := longValue.toUInt
+ l2d.io.roundingMode := in.bits.rm
+
+ when (in.bits.cmd === FCMD_CVT_FI) {
+ when (in.bits.single) {
+ mux.data := Cat(SInt(-1, 32), l2s.io.out)
+ mux.exc := l2s.io.exceptionFlags
+ }.otherwise {
+ mux.data := l2d.io.out
+ mux.exc := l2d.io.exceptionFlags
+ }
+ }
+
+ io.out <> Pipe(in.valid, mux, latency-1)
+}
+
+class FPToFP(val latency: Int) extends Module
+{
+ val io = new Bundle {
+ val in = Valid(new FPInput).flip
+ val out = Valid(new FPResult)
+ val lt = Bool(INPUT) // from FPToInt
+ }
+
+ val in = Pipe(io.in)
+
+ // fp->fp units
+ val isSgnj = in.bits.cmd === FCMD_SGNJ
+ def fsgnjSign(in1: Bits, in2: Bits, pos: Int, en: Bool, rm: Bits) =
+ Mux(rm(1) || !en, in1(pos), rm(0)) ^ (en && in2(pos))
+ val sign_s = fsgnjSign(in.bits.in1, in.bits.in2, 32, in.bits.single && isSgnj, in.bits.rm)
+ val sign_d = fsgnjSign(in.bits.in1, in.bits.in2, 64, !in.bits.single && isSgnj, in.bits.rm)
+ val fsgnj = Cat(sign_d, in.bits.in1(63,33), sign_s, in.bits.in1(31,0))
+
+ val s2d = Module(new hardfloat.RecFNToRecFN(8, 24, 11, 53))
+ val d2s = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
+ s2d.io.in := in.bits.in1
+ s2d.io.roundingMode := in.bits.rm
+ d2s.io.in := in.bits.in1
+ d2s.io.roundingMode := in.bits.rm
+
+ val isnan1 = Mux(in.bits.single, in.bits.in1(31,29).andR, in.bits.in1(63,61).andR)
+ val isnan2 = Mux(in.bits.single, in.bits.in2(31,29).andR, in.bits.in2(63,61).andR)
+ val issnan1 = isnan1 && ~Mux(in.bits.single, in.bits.in1(22), in.bits.in1(51))
+ val issnan2 = isnan2 && ~Mux(in.bits.single, in.bits.in2(22), in.bits.in2(51))
+ val minmax_exc = Cat(issnan1 || issnan2, Bits(0,4))
+ val isMax = in.bits.rm(0)
+ val isLHS = isnan2 || isMax =/= io.lt && !isnan1
+
+ val mux = Wire(new FPResult)
+ mux.exc := minmax_exc
+ mux.data := in.bits.in2
+
+ when (isSgnj) { mux.exc := UInt(0) }
+ when (isSgnj || isLHS) { mux.data := fsgnj }
+ when (in.bits.cmd === FCMD_CVT_FF) {
+ when (in.bits.single) {
+ mux.data := Cat(SInt(-1, 32), d2s.io.out)
+ mux.exc := d2s.io.exceptionFlags
+ }.otherwise {
+ mux.data := s2d.io.out
+ mux.exc := s2d.io.exceptionFlags
+ }
+ }
+
+ io.out <> Pipe(in.valid, mux, latency-1)
+}
+
+class FPUFMAPipe(val latency: Int, expWidth: Int, sigWidth: Int) extends Module
+{
+ val io = new Bundle {
+ val in = Valid(new FPInput).flip
+ val out = Valid(new FPResult)
+ }
+
+ val width = sigWidth + expWidth
+ val one = UInt(1) << (width-1)
+ val zero = (io.in.bits.in1(width) ^ io.in.bits.in2(width)) << width
+
+ val valid = Reg(next=io.in.valid)
+ val in = Reg(new FPInput)
+ when (io.in.valid) {
+ in := io.in.bits
+ val cmd_fma = io.in.bits.ren3
+ val cmd_addsub = io.in.bits.swap23
+ in.cmd := Cat(io.in.bits.cmd(1) & (cmd_fma || cmd_addsub), io.in.bits.cmd(0))
+ when (cmd_addsub) { in.in2 := one }
+ unless (cmd_fma || cmd_addsub) { in.in3 := zero }
+ }
+
+ val fma = Module(new hardfloat.MulAddRecFN(expWidth, sigWidth))
+ fma.io.op := in.cmd
+ fma.io.roundingMode := in.rm
+ fma.io.a := in.in1
+ fma.io.b := in.in2
+ fma.io.c := in.in3
+
+ val res = Wire(new FPResult)
+ res.data := Cat(SInt(-1, 32), fma.io.out)
+ res.exc := fma.io.exceptionFlags
+ io.out := Pipe(valid, res, latency-1)
+}
+
+class FPU(implicit p: Parameters) extends CoreModule()(p) {
+ require(xLen == 64, "RV32 Rocket FP support missing")
+ val io = new FPUIO
+
+ val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
+ val req_valid = ex_reg_valid || io.cp_req.valid
+ val ex_reg_inst = RegEnable(io.inst, io.valid)
+ val ex_cp_valid = io.cp_req.valid && !ex_reg_valid
+ val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx || ex_cp_valid, init=Bool(false))
+ val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
+ val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false))
+ val killm = (io.killm || io.nack_mem) && !mem_cp_valid
+ val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false))
+ val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false))
+
+ val fp_decoder = Module(new FPUDecoder)
+ fp_decoder.io.inst := io.inst
+
+ val cp_ctrl = Wire(new FPUCtrlSigs)
+ cp_ctrl <> io.cp_req.bits
+ io.cp_resp.valid := Bool(false)
+ io.cp_resp.bits.data := UInt(0)
+
+ val id_ctrl = fp_decoder.io.sigs
+ val ex_ctrl = Mux(ex_reg_valid, RegEnable(id_ctrl, io.valid), cp_ctrl)
+ val mem_ctrl = RegEnable(ex_ctrl, req_valid)
+ val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
+
+ // load response
+ val load_wb = Reg(next=io.dmem_resp_val)
+ val load_wb_single = RegEnable(io.dmem_resp_type === MT_W || io.dmem_resp_type === MT_WU, io.dmem_resp_val)
+ val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val)
+ val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val)
+ val rec_s = hardfloat.recFNFromFN(8, 24, load_wb_data)
+ val rec_d = hardfloat.recFNFromFN(11, 53, load_wb_data)
+ val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d)
+
+ // regfile
+ val regfile = Mem(32, Bits(width = 65))
+ when (load_wb) {
+ regfile(load_wb_tag) := load_wb_data_recoded
+ if (enableCommitLog) {
+ printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32),
+ Mux(load_wb_single, load_wb_data(31,0), load_wb_data))
+ }
+ }
+
+ val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
+ when (io.valid) {
+ when (id_ctrl.ren1) {
+ when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) }
+ when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) }
+ }
+ when (id_ctrl.ren2) {
+ when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) }
+ when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) }
+ when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) }
+ }
+ when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) }
+ }
+ val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_))
+ val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
+
+ val cp_rs1 = io.cp_req.bits.in1
+ val cp_rs2 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in3, io.cp_req.bits.in2)
+ val cp_rs3 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in2, io.cp_req.bits.in3)
+
+ val req = Wire(new FPInput)
+ req := ex_ctrl
+ req.rm := Mux(ex_reg_valid, ex_rm, io.cp_req.bits.rm)
+ req.in1 := Mux(ex_reg_valid, ex_rs1, cp_rs1)
+ req.in2 := Mux(ex_reg_valid, ex_rs2, cp_rs2)
+ req.in3 := Mux(ex_reg_valid, ex_rs3, cp_rs3)
+ req.typ := Mux(ex_reg_valid, ex_reg_inst(21,20), io.cp_req.bits.typ)
+
+ val sfma = Module(new FPUFMAPipe(p(SFMALatency), 8, 24))
+ sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single
+ sfma.io.in.bits := req
+
+ val dfma = Module(new FPUFMAPipe(p(DFMALatency), 11, 53))
+ dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single
+ dfma.io.in.bits := req
+
+ val fpiu = Module(new FPToInt)
+ fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
+ fpiu.io.in.bits := req
+ io.store_data := fpiu.io.out.bits.store
+ io.toint_data := fpiu.io.out.bits.toint
+ when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
+ io.cp_resp.bits.data := fpiu.io.out.bits.toint
+ io.cp_resp.valid := Bool(true)
+ }
+
+ val ifpu = Module(new IntToFP(3))
+ ifpu.io.in.valid := req_valid && ex_ctrl.fromint
+ ifpu.io.in.bits := req
+ ifpu.io.in.bits.in1 := Mux(ex_reg_valid, io.fromint_data, cp_rs1)
+
+ val fpmu = Module(new FPToFP(2))
+ fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
+ fpmu.io.in.bits := req
+ fpmu.io.lt := fpiu.io.out.bits.lt
+
+ val divSqrt_wen = Reg(next=Bool(false))
+ val divSqrt_inReady = Wire(init=Bool(false))
+ val divSqrt_waddr = Reg(Bits())
+ val divSqrt_wdata = Wire(Bits())
+ val divSqrt_flags = Wire(Bits())
+ val divSqrt_in_flight = Reg(init=Bool(false))
+ val divSqrt_killed = Reg(Bool())
+
+ // writeback arbitration
+ case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult)
+ val pipes = List(
+ Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits),
+ Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits),
+ Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out.bits),
+ Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits))
+ def latencyMask(c: FPUCtrlSigs, offset: Int) = {
+ require(pipes.forall(_.lat >= offset))
+ pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_)
+ }
+ def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), UInt(p._2), UInt(0))).reduce(_|_)
+ val maxLatency = pipes.map(_.lat).max
+ val memLatencyMask = latencyMask(mem_ctrl, 2)
+
+ val wen = Reg(init=Bits(0, maxLatency-1))
+ val winfo = Reg(Vec(maxLatency-1, Bits()))
+ val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
+ val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
+ val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_ctrl.single, mem_reg_inst(11,7)) //single only used for debugging
+
+ for (i <- 0 until maxLatency-2) {
+ when (wen(i+1)) { winfo(i) := winfo(i+1) }
+ }
+ wen := wen >> 1
+ when (mem_wen) {
+ when (!killm) {
+ wen := wen >> 1 | memLatencyMask
+ }
+ for (i <- 0 until maxLatency-1) {
+ when (!write_port_busy && memLatencyMask(i)) {
+ winfo(i) := mem_winfo
+ }
+ }
+ }
+
+ val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt)
+ val wsrc = (winfo(0) >> 6)(log2Up(pipes.size) - 1,0)
+ val wcp = winfo(0)(6+log2Up(pipes.size))
+ val wdata = Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wsrc))
+ val wexc = (pipes.map(_.res.exc): Seq[UInt])(wsrc)
+ when ((!wcp && wen(0)) || divSqrt_wen) {
+ regfile(waddr) := wdata
+ if (enableCommitLog) {
+ val wdata_unrec_s = hardfloat.fNFromRecFN(8, 24, wdata(64,0))
+ val wdata_unrec_d = hardfloat.fNFromRecFN(11, 53, wdata(64,0))
+ val wb_single = (winfo(0) >> 5)(0)
+ printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32),
+ Mux(wb_single, Cat(UInt(0,32), wdata_unrec_s), wdata_unrec_d))
+ }
+ }
+ when (wcp && wen(0)) {
+ io.cp_resp.bits.data := wdata
+ io.cp_resp.valid := Bool(true)
+ }
+ io.cp_req.ready := !ex_reg_valid
+
+ val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
+ val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
+ io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
+ io.fcsr_flags.bits :=
+ Mux(wb_toint_valid, wb_toint_exc, UInt(0)) |
+ Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
+ Mux(wen(0), wexc, UInt(0))
+
+ val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR) // || mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid))
+ io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
+ io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
+ io.dec <> fp_decoder.io.sigs
+ def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
+ io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
+ io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))))
+ io.sboard_clra := waddr
+ // we don't currently support round-max-magnitude (rm=4)
+ io.illegal_rm := ex_rm(2) && ex_ctrl.round
+
+ divSqrt_wdata := 0
+ divSqrt_flags := 0
+ if (p(FDivSqrt)) {
+ val divSqrt_single = Reg(Bool())
+ val divSqrt_rm = Reg(Bits())
+ val divSqrt_flags_double = Reg(Bits())
+ val divSqrt_wdata_double = Reg(Bits())
+
+ val divSqrt = Module(new hardfloat.DivSqrtRecF64)
+ divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
+ val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
+ divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_in_flight
+ divSqrt.io.sqrtOp := mem_ctrl.sqrt
+ divSqrt.io.a := fpiu.io.as_double.in1
+ divSqrt.io.b := fpiu.io.as_double.in2
+ divSqrt.io.roundingMode := fpiu.io.as_double.rm
+
+ when (divSqrt.io.inValid && divSqrt_inReady) {
+ divSqrt_in_flight := true
+ divSqrt_killed := killm
+ divSqrt_single := mem_ctrl.single
+ divSqrt_waddr := mem_reg_inst(11,7)
+ divSqrt_rm := divSqrt.io.roundingMode
+ }
+
+ when (divSqrt_outValid) {
+ divSqrt_wen := !divSqrt_killed
+ divSqrt_wdata_double := divSqrt.io.out
+ divSqrt_in_flight := false
+ divSqrt_flags_double := divSqrt.io.exceptionFlags
+ }
+
+ val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
+ divSqrt_toSingle.io.in := divSqrt_wdata_double
+ divSqrt_toSingle.io.roundingMode := divSqrt_rm
+ divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle.io.out, divSqrt_wdata_double)
+ divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0))
+ }
+}
diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala
new file mode 100644
index 00000000..77e8f6e6
--- /dev/null
+++ b/rocket/src/main/scala/frontend.scala
@@ -0,0 +1,130 @@
+package rocket
+
+import Chisel._
+import uncore.tilelink._
+import Util._
+import cde.{Parameters, Field}
+
+class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) {
+ val pc = UInt(width = vaddrBitsExtended)
+ val speculative = Bool()
+}
+
+class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
+ val pc = UInt(width = vaddrBitsExtended) // ID stage PC
+ val data = Vec(fetchWidth, Bits(width = coreInstBits))
+ val mask = Bits(width = fetchWidth)
+ val xcpt_if = Bool()
+ val replay = Bool()
+}
+
+class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
+ val req = Valid(new FrontendReq)
+ val resp = Decoupled(new FrontendResp).flip
+ val btb_resp = Valid(new BTBResp).flip
+ val btb_update = Valid(new BTBUpdate)
+ val bht_update = Valid(new BHTUpdate)
+ val ras_update = Valid(new RASUpdate)
+ val flush_icache = Bool(OUTPUT)
+ val flush_tlb = Bool(OUTPUT)
+ val npc = UInt(INPUT, width = vaddrBitsExtended)
+}
+
+class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
+ val io = new Bundle {
+ val cpu = new FrontendIO().flip
+ val ptw = new TLBPTWIO()
+ val mem = new ClientUncachedTileLinkIO
+ }
+
+ val icache = Module(new ICache(latency = 2))
+ val tlb = Module(new TLB)
+
+ val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
+ val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
+ val s1_speculative = Reg(Bool())
+ val s1_same_block = Reg(Bool())
+ val s2_valid = Reg(init=Bool(true))
+ val s2_pc = Reg(init=UInt(p(ResetVector)))
+ val s2_btb_resp_valid = Reg(init=Bool(false))
+ val s2_btb_resp_bits = Reg(new BTBResp)
+ val s2_xcpt_if = Reg(init=Bool(false))
+ val s2_speculative = Reg(init=Bool(false))
+
+ val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
+ val predicted_npc = Wire(init = ntpc)
+ val icmiss = s2_valid && !icache.io.resp.valid
+ val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt
+ val s0_same_block = Wire(init = !icmiss && !io.cpu.req.valid && ((ntpc & rowBytes) === (s1_pc & rowBytes)))
+
+ val stall = io.cpu.resp.valid && !io.cpu.resp.ready
+ when (!stall) {
+ s1_same_block := s0_same_block && !tlb.io.resp.miss
+ s1_pc_ := npc
+ s1_speculative := Mux(icmiss, s2_speculative, true)
+ s2_valid := !icmiss
+ when (!icmiss) {
+ s2_pc := s1_pc
+ s2_speculative := s1_speculative && !tlb.io.resp.cacheable
+ s2_xcpt_if := tlb.io.resp.xcpt_if
+ }
+ }
+ when (io.cpu.req.valid) {
+ s1_same_block := Bool(false)
+ s1_pc_ := io.cpu.req.bits.pc
+ s1_speculative := io.cpu.req.bits.speculative
+ s2_valid := Bool(false)
+ }
+
+ if (p(BtbKey).nEntries > 0) {
+ val btb = Module(new BTB)
+ btb.io.req.valid := false
+ btb.io.req.bits.addr := s1_pc
+ btb.io.btb_update := io.cpu.btb_update
+ btb.io.bht_update := io.cpu.bht_update
+ btb.io.ras_update := io.cpu.ras_update
+ when (!stall && !icmiss) {
+ btb.io.req.valid := true
+ s2_btb_resp_valid := btb.io.resp.valid
+ s2_btb_resp_bits := btb.io.resp.bits
+ }
+ when (btb.io.resp.bits.taken) {
+ predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
+ s0_same_block := Bool(false)
+ }
+ }
+
+ io.ptw <> tlb.io.ptw
+ tlb.io.req.valid := !stall && !icmiss
+ tlb.io.req.bits.vpn := s1_pc >> pgIdxBits
+ tlb.io.req.bits.passthrough := Bool(false)
+ tlb.io.req.bits.instruction := Bool(true)
+ tlb.io.req.bits.store := Bool(false)
+
+ io.mem <> icache.io.mem
+ icache.io.req.valid := !stall && !s0_same_block
+ icache.io.req.bits.addr := io.cpu.npc
+ icache.io.invalidate := io.cpu.flush_icache
+ icache.io.s1_ppn := tlb.io.resp.ppn
+ icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb
+ icache.io.s2_kill := s2_speculative
+ icache.io.resp.ready := !stall && !s1_same_block
+
+ io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || s2_speculative || s2_xcpt_if)
+ io.cpu.resp.bits.pc := s2_pc
+ io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
+
+ require(fetchWidth * coreInstBytes <= rowBytes)
+ val fetch_data = icache.io.resp.bits.datablock >> (s2_pc.extract(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits))
+
+ for (i <- 0 until fetchWidth) {
+ io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits)
+ }
+
+ io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Up(fetchWidth)+log2Up(coreInstBytes)-1, log2Up(coreInstBytes))
+ io.cpu.resp.bits.xcpt_if := s2_xcpt_if
+ io.cpu.resp.bits.replay := s2_speculative && !icache.io.resp.valid && !s2_xcpt_if
+
+ io.cpu.btb_resp.valid := s2_btb_resp_valid
+ io.cpu.btb_resp.bits := s2_btb_resp_bits
+}
diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala
new file mode 100644
index 00000000..107b332c
--- /dev/null
+++ b/rocket/src/main/scala/icache.scala
@@ -0,0 +1,157 @@
+package rocket
+
+import Chisel._
+import uncore.agents._
+import uncore.tilelink._
+import uncore.util._
+import Util._
+import cde.{Parameters, Field}
+
+trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters {
+ val outerDataBeats = p(TLKey(p(TLId))).dataBeats
+ val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat
+ val refillCyclesPerBeat = outerDataBits/rowBits
+ val refillCycles = refillCyclesPerBeat*outerDataBeats
+}
+
+class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
+ val addr = UInt(width = vaddrBits)
+}
+
+class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
+ val data = Bits(width = coreInstBits)
+ val datablock = Bits(width = rowBits)
+}
+
+class ICache(latency: Int)(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
+ val io = new Bundle {
+ val req = Valid(new ICacheReq).flip
+ val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req
+ val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
+ val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
+
+ val resp = Decoupled(new ICacheResp)
+ val invalidate = Bool(INPUT)
+ val mem = new ClientUncachedTileLinkIO
+ }
+ require(isPow2(nSets) && isPow2(nWays))
+ require(isPow2(coreInstBytes))
+ require(!usingVM || pgIdxBits >= untagBits)
+
+ val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(UInt(), 4)
+ val state = Reg(init=s_ready)
+ val invalidated = Reg(Bool())
+ val stall = !io.resp.ready
+ val rdy = Wire(Bool())
+
+ val refill_addr = Reg(UInt(width = paddrBits))
+ val s1_any_tag_hit = Wire(Bool())
+
+ val s1_valid = Reg(init=Bool(false))
+ val s1_vaddr = Reg(UInt())
+ val s1_paddr = Cat(io.s1_ppn, s1_vaddr(pgIdxBits-1,0)).toUInt
+ val s1_tag = s1_paddr(tagBits+untagBits-1,untagBits)
+
+ val s0_valid = io.req.valid || s1_valid && stall
+ val s0_vaddr = Mux(s1_valid && stall, s1_vaddr, io.req.bits.addr)
+
+ s1_valid := io.req.valid && rdy || s1_valid && stall && !io.s1_kill
+ when (io.req.valid && rdy) {
+ s1_vaddr := io.req.bits.addr
+ }
+
+ val out_valid = s1_valid && !io.s1_kill && state === s_ready
+ val s1_idx = s1_vaddr(untagBits-1,blockOffBits)
+ val s1_hit = out_valid && s1_any_tag_hit
+ val s1_miss = out_valid && !s1_any_tag_hit
+ rdy := state === s_ready && !s1_miss
+
+ when (s1_miss && state === s_ready) {
+ refill_addr := s1_paddr
+ }
+ val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
+
+ val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat)
+ val (refill_cnt, refill_wrap) = Counter(narrow_grant.fire(), refillCycles)
+ val refill_done = state === s_refill && refill_wrap
+ narrow_grant.ready := Bool(true)
+
+ val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0)
+ val entagbits = code.width(tagBits)
+ val tag_array = SeqMem(nSets, Vec(nWays, Bits(width = entagbits)))
+ val tag_rdata = tag_array.read(s0_vaddr(untagBits-1,blockOffBits), !refill_done && s0_valid)
+ when (refill_done) {
+ val tag = code.encode(refill_tag).toUInt
+ tag_array.write(s1_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _))
+ }
+
+ val vb_array = Reg(init=Bits(0, nSets*nWays))
+ when (refill_done && !invalidated) {
+ vb_array := vb_array.bitSet(Cat(repl_way, s1_idx), Bool(true))
+ }
+ when (io.invalidate) {
+ vb_array := Bits(0)
+ invalidated := Bool(true)
+ }
+ val s1_disparity = Wire(Vec(nWays, Bool()))
+ for (i <- 0 until nWays)
+ when (s1_valid && s1_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s1_idx), Bool(false)) }
+
+ val s1_tag_match = Wire(Vec(nWays, Bool()))
+ val s1_tag_hit = Wire(Vec(nWays, Bool()))
+ val s1_dout = Wire(Vec(nWays, Bits(width = rowBits)))
+
+ for (i <- 0 until nWays) {
+ val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_vaddr(untagBits-1,blockOffBits))).toBool
+ val tag_out = tag_rdata(i)
+ val s1_tag_disparity = code.decode(tag_out).error
+ s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag
+ s1_tag_hit(i) := s1_vb && s1_tag_match(i)
+ s1_disparity(i) := s1_vb && (s1_tag_disparity || code.decode(s1_dout(i)).error)
+ }
+ s1_any_tag_hit := s1_tag_hit.reduceLeft(_||_) && !s1_disparity.reduceLeft(_||_)
+
+ for (i <- 0 until nWays) {
+ val data_array = SeqMem(nSets * refillCycles, Bits(width = code.width(rowBits)))
+ val wen = narrow_grant.valid && repl_way === UInt(i)
+ when (wen) {
+ val e_d = code.encode(narrow_grant.bits.data).toUInt
+ data_array.write((s1_idx << log2Ceil(refillCycles)) | refill_cnt, e_d)
+ }
+ val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
+ s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid)
+ }
+
+ // output signals
+ latency match {
+ case 1 =>
+ io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
+ io.resp.valid := s1_hit
+ case 2 =>
+ val s2_hit = RegEnable(s1_hit, !stall)
+ val s2_tag_hit = RegEnable(s1_tag_hit, !stall)
+ val s2_dout = RegEnable(s1_dout, !stall)
+ io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout)
+ io.resp.valid := s2_hit
+ }
+ io.mem.acquire.valid := state === s_request && !io.s2_kill
+ io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)
+
+ // control state machine
+ switch (state) {
+ is (s_ready) {
+ when (s1_miss) { state := s_request }
+ invalidated := Bool(false)
+ }
+ is (s_request) {
+ when (io.mem.acquire.ready) { state := s_refill_wait }
+ when (io.s2_kill) { state := s_ready }
+ }
+ is (s_refill_wait) {
+ when (io.mem.grant.valid) { state := s_refill }
+ }
+ is (s_refill) {
+ when (refill_done) { state := s_ready }
+ }
+ }
+}
diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala
new file mode 100644
index 00000000..2168922c
--- /dev/null
+++ b/rocket/src/main/scala/idecode.scala
@@ -0,0 +1,319 @@
+// See LICENSE for license details
+
+package rocket
+
+import Chisel._
+import Instructions._
+import uncore.constants.MemoryOpConstants._
+import ALU._
+import cde.Parameters
+import Util._
+
+abstract trait DecodeConstants extends HasCoreParameters
+{
+ val table: Array[(BitPat, List[BitPat])]
+}
+
+class IntCtrlSigs extends Bundle {
+ val legal = Bool()
+ val fp = Bool()
+ val rocc = Bool()
+ val branch = Bool()
+ val jal = Bool()
+ val jalr = Bool()
+ val rxs2 = Bool()
+ val rxs1 = Bool()
+ val sel_alu2 = Bits(width = A2_X.getWidth)
+ val sel_alu1 = Bits(width = A1_X.getWidth)
+ val sel_imm = Bits(width = IMM_X.getWidth)
+ val alu_dw = Bool()
+ val alu_fn = Bits(width = FN_X.getWidth)
+ val mem = Bool()
+ val mem_cmd = Bits(width = M_SZ)
+ val mem_type = Bits(width = MT_SZ)
+ val rfs1 = Bool()
+ val rfs2 = Bool()
+ val rfs3 = Bool()
+ val wfd = Bool()
+ val div = Bool()
+ val wxd = Bool()
+ val csr = Bits(width = CSR.SZ)
+ val fence_i = Bool()
+ val fence = Bool()
+ val amo = Bool()
+
+ def default: List[BitPat] =
+ // jal renf1 fence.i
+ // val | jalr | renf2 |
+ // | fp_val| | renx2 | | renf3 |
+ // | | rocc| | | renx1 s_alu1 mem_val | | | wfd |
+ // | | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div |
+ // | | | | | | | | | | | | | | | | | | | | | wxd | fence
+ // | | | | | | | | | | | | | | | | | | | | | | csr | | amo
+ // | | | | | | | | | | | | | | | | | | | | | | | | | |
+ List(N,X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X)
+
+ def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = {
+ val decoder = DecodeLogic(inst, default, table)
+ val sigs = Seq(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2,
+ sel_alu1, sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type,
+ rfs1, rfs2, rfs3, wfd, div, wxd, csr, fence_i, fence, amo)
+ sigs zip decoder map {case(s,d) => s := d}
+ this
+ }
+}
+
+class IDecode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ BNE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ BEQ-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ BLT-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ BLTU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ BGE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ BGEU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+
+ JAL-> List(Y,N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ JALR-> List(Y,N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ AUIPC-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+
+ LB-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N),
+ LH-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N),
+ LW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N),
+ LBU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N),
+ LHU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N),
+ SB-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N),
+ SH-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N),
+ SW-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N),
+
+ LUI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ ADDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SLTI -> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SLTIU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ ANDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ ORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ XORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SLLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SRLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SRAI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ ADD-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SUB-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SLT-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SLTU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ AND-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ OR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ XOR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SLL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SRL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SRA-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+
+ FENCE-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N),
+ FENCE_I-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FLUSH_ALL,MT_X, N,N,N,N,N,N,CSR.N,Y,N,N),
+
+ SCALL-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
+ SBREAK-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
+ MRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
+ WFI-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
+ CSRRW-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N),
+ CSRRS-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N),
+ CSRRC-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N),
+ CSRRWI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N),
+ CSRRSI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N),
+ CSRRCI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N))
+}
+
+class SDecode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ SFENCE_VM-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
+ SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N))
+}
+
+class DebugDecode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ DRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N))
+}
+
+class I64Decode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ LD-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N),
+ LWU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N),
+ SD-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N),
+
+ ADDIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SLLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SRLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SRAIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ ADDW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SUBW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SLLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SRLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ SRAW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N))
+}
+
+class MDecode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ MUL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+ MULH-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+ MULHU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+ MULHSU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+
+ DIV-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+ DIVU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+ REM-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+ REMU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N))
+}
+
+class M64Decode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ MULW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+
+ DIVW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+ DIVUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+ REMW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
+ REMUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N))
+}
+
+class ADecode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ AMOADD_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOXOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOSWAP_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOAND_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOMIN_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOMINU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOMAX_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOMAXU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
+
+ LR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ SC_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y))
+}
+
+class A64Decode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ AMOADD_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOSWAP_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOXOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOAND_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOMIN_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOMINU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOMAX_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ AMOMAXU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
+
+ LR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
+ SC_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y))
+}
+
+class FDecode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ FCVT_S_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N),
+ FCVT_D_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N),
+ FSGNJ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FSGNJ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FSGNJX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FSGNJX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FSGNJN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FSGNJN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FMIN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FMIN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FMAX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FMAX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FMUL_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FMUL_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
+ FMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
+ FMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
+ FMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
+ FNMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
+ FNMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
+ FNMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
+ FNMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
+ FCLASS_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FCLASS_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FMV_X_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FCVT_W_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FCVT_W_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FCVT_WU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FCVT_WU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FEQ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
+ FEQ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
+ FLT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
+ FLT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
+ FLE_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
+ FLE_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
+ FMV_S_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FCVT_S_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FCVT_D_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FCVT_S_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FCVT_D_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FLW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FLD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FSW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N),
+ FSD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N))
+}
+
+class F64Decode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ FMV_X_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FCVT_L_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FCVT_L_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FCVT_LU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FCVT_LU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
+ FMV_D_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FCVT_S_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FCVT_D_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FCVT_S_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
+ FCVT_D_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N))
+}
+
+class FDivSqrtDecode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ FDIV_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FDIV_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FSQRT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
+ FSQRT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N))
+}
+
+class RoCCDecode(implicit val p: Parameters) extends DecodeConstants
+{
+ val table: Array[(BitPat, List[BitPat])] = Array(
+ CUSTOM0-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM0_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM0_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM0_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM0_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM0_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM1-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM1_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM1_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM1_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM1_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM1_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM2-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM2_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM2_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM2_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM2_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM2_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM3-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM3_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM3_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
+ CUSTOM3_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM3_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
+ CUSTOM3_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N))
+}
diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala
new file mode 100644
index 00000000..339ac42f
--- /dev/null
+++ b/rocket/src/main/scala/instructions.scala
@@ -0,0 +1,383 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+
+/* Automatically generated by parse-opcodes */
+object Instructions {
+ def BEQ = BitPat("b?????????????????000?????1100011")
+ def BNE = BitPat("b?????????????????001?????1100011")
+ def BLT = BitPat("b?????????????????100?????1100011")
+ def BGE = BitPat("b?????????????????101?????1100011")
+ def BLTU = BitPat("b?????????????????110?????1100011")
+ def BGEU = BitPat("b?????????????????111?????1100011")
+ def JALR = BitPat("b?????????????????000?????1100111")
+ def JAL = BitPat("b?????????????????????????1101111")
+ def LUI = BitPat("b?????????????????????????0110111")
+ def AUIPC = BitPat("b?????????????????????????0010111")
+ def ADDI = BitPat("b?????????????????000?????0010011")
+ def SLLI = BitPat("b000000???????????001?????0010011")
+ def SLTI = BitPat("b?????????????????010?????0010011")
+ def SLTIU = BitPat("b?????????????????011?????0010011")
+ def XORI = BitPat("b?????????????????100?????0010011")
+ def SRLI = BitPat("b000000???????????101?????0010011")
+ def SRAI = BitPat("b010000???????????101?????0010011")
+ def ORI = BitPat("b?????????????????110?????0010011")
+ def ANDI = BitPat("b?????????????????111?????0010011")
+ def ADD = BitPat("b0000000??????????000?????0110011")
+ def SUB = BitPat("b0100000??????????000?????0110011")
+ def SLL = BitPat("b0000000??????????001?????0110011")
+ def SLT = BitPat("b0000000??????????010?????0110011")
+ def SLTU = BitPat("b0000000??????????011?????0110011")
+ def XOR = BitPat("b0000000??????????100?????0110011")
+ def SRL = BitPat("b0000000??????????101?????0110011")
+ def SRA = BitPat("b0100000??????????101?????0110011")
+ def OR = BitPat("b0000000??????????110?????0110011")
+ def AND = BitPat("b0000000??????????111?????0110011")
+ def ADDIW = BitPat("b?????????????????000?????0011011")
+ def SLLIW = BitPat("b0000000??????????001?????0011011")
+ def SRLIW = BitPat("b0000000??????????101?????0011011")
+ def SRAIW = BitPat("b0100000??????????101?????0011011")
+ def ADDW = BitPat("b0000000??????????000?????0111011")
+ def SUBW = BitPat("b0100000??????????000?????0111011")
+ def SLLW = BitPat("b0000000??????????001?????0111011")
+ def SRLW = BitPat("b0000000??????????101?????0111011")
+ def SRAW = BitPat("b0100000??????????101?????0111011")
+ def LB = BitPat("b?????????????????000?????0000011")
+ def LH = BitPat("b?????????????????001?????0000011")
+ def LW = BitPat("b?????????????????010?????0000011")
+ def LD = BitPat("b?????????????????011?????0000011")
+ def LBU = BitPat("b?????????????????100?????0000011")
+ def LHU = BitPat("b?????????????????101?????0000011")
+ def LWU = BitPat("b?????????????????110?????0000011")
+ def SB = BitPat("b?????????????????000?????0100011")
+ def SH = BitPat("b?????????????????001?????0100011")
+ def SW = BitPat("b?????????????????010?????0100011")
+ def SD = BitPat("b?????????????????011?????0100011")
+ def FENCE = BitPat("b?????????????????000?????0001111")
+ def FENCE_I = BitPat("b?????????????????001?????0001111")
+ def MUL = BitPat("b0000001??????????000?????0110011")
+ def MULH = BitPat("b0000001??????????001?????0110011")
+ def MULHSU = BitPat("b0000001??????????010?????0110011")
+ def MULHU = BitPat("b0000001??????????011?????0110011")
+ def DIV = BitPat("b0000001??????????100?????0110011")
+ def DIVU = BitPat("b0000001??????????101?????0110011")
+ def REM = BitPat("b0000001??????????110?????0110011")
+ def REMU = BitPat("b0000001??????????111?????0110011")
+ def MULW = BitPat("b0000001??????????000?????0111011")
+ def DIVW = BitPat("b0000001??????????100?????0111011")
+ def DIVUW = BitPat("b0000001??????????101?????0111011")
+ def REMW = BitPat("b0000001??????????110?????0111011")
+ def REMUW = BitPat("b0000001??????????111?????0111011")
+ def AMOADD_W = BitPat("b00000????????????010?????0101111")
+ def AMOXOR_W = BitPat("b00100????????????010?????0101111")
+ def AMOOR_W = BitPat("b01000????????????010?????0101111")
+ def AMOAND_W = BitPat("b01100????????????010?????0101111")
+ def AMOMIN_W = BitPat("b10000????????????010?????0101111")
+ def AMOMAX_W = BitPat("b10100????????????010?????0101111")
+ def AMOMINU_W = BitPat("b11000????????????010?????0101111")
+ def AMOMAXU_W = BitPat("b11100????????????010?????0101111")
+ def AMOSWAP_W = BitPat("b00001????????????010?????0101111")
+ def LR_W = BitPat("b00010??00000?????010?????0101111")
+ def SC_W = BitPat("b00011????????????010?????0101111")
+ def AMOADD_D = BitPat("b00000????????????011?????0101111")
+ def AMOXOR_D = BitPat("b00100????????????011?????0101111")
+ def AMOOR_D = BitPat("b01000????????????011?????0101111")
+ def AMOAND_D = BitPat("b01100????????????011?????0101111")
+ def AMOMIN_D = BitPat("b10000????????????011?????0101111")
+ def AMOMAX_D = BitPat("b10100????????????011?????0101111")
+ def AMOMINU_D = BitPat("b11000????????????011?????0101111")
+ def AMOMAXU_D = BitPat("b11100????????????011?????0101111")
+ def AMOSWAP_D = BitPat("b00001????????????011?????0101111")
+ def LR_D = BitPat("b00010??00000?????011?????0101111")
+ def SC_D = BitPat("b00011????????????011?????0101111")
+ def ECALL = BitPat("b00000000000000000000000001110011")
+ def EBREAK = BitPat("b00000000000100000000000001110011")
+ def URET = BitPat("b00000000001000000000000001110011")
+ def SRET = BitPat("b00010000001000000000000001110011")
+ def HRET = BitPat("b00100000001000000000000001110011")
+ def MRET = BitPat("b00110000001000000000000001110011")
+ def DRET = BitPat("b01111011001000000000000001110011")
+ def SFENCE_VM = BitPat("b000100000100?????000000001110011")
+ def WFI = BitPat("b00010000010100000000000001110011")
+ def CSRRW = BitPat("b?????????????????001?????1110011")
+ def CSRRS = BitPat("b?????????????????010?????1110011")
+ def CSRRC = BitPat("b?????????????????011?????1110011")
+ def CSRRWI = BitPat("b?????????????????101?????1110011")
+ def CSRRSI = BitPat("b?????????????????110?????1110011")
+ def CSRRCI = BitPat("b?????????????????111?????1110011")
+ def FADD_S = BitPat("b0000000??????????????????1010011")
+ def FSUB_S = BitPat("b0000100??????????????????1010011")
+ def FMUL_S = BitPat("b0001000??????????????????1010011")
+ def FDIV_S = BitPat("b0001100??????????????????1010011")
+ def FSGNJ_S = BitPat("b0010000??????????000?????1010011")
+ def FSGNJN_S = BitPat("b0010000??????????001?????1010011")
+ def FSGNJX_S = BitPat("b0010000??????????010?????1010011")
+ def FMIN_S = BitPat("b0010100??????????000?????1010011")
+ def FMAX_S = BitPat("b0010100??????????001?????1010011")
+ def FSQRT_S = BitPat("b010110000000?????????????1010011")
+ def FADD_D = BitPat("b0000001??????????????????1010011")
+ def FSUB_D = BitPat("b0000101??????????????????1010011")
+ def FMUL_D = BitPat("b0001001??????????????????1010011")
+ def FDIV_D = BitPat("b0001101??????????????????1010011")
+ def FSGNJ_D = BitPat("b0010001??????????000?????1010011")
+ def FSGNJN_D = BitPat("b0010001??????????001?????1010011")
+ def FSGNJX_D = BitPat("b0010001??????????010?????1010011")
+ def FMIN_D = BitPat("b0010101??????????000?????1010011")
+ def FMAX_D = BitPat("b0010101??????????001?????1010011")
+ def FCVT_S_D = BitPat("b010000000001?????????????1010011")
+ def FCVT_D_S = BitPat("b010000100000?????????????1010011")
+ def FSQRT_D = BitPat("b010110100000?????????????1010011")
+ def FLE_S = BitPat("b1010000??????????000?????1010011")
+ def FLT_S = BitPat("b1010000??????????001?????1010011")
+ def FEQ_S = BitPat("b1010000??????????010?????1010011")
+ def FLE_D = BitPat("b1010001??????????000?????1010011")
+ def FLT_D = BitPat("b1010001??????????001?????1010011")
+ def FEQ_D = BitPat("b1010001??????????010?????1010011")
+ def FCVT_W_S = BitPat("b110000000000?????????????1010011")
+ def FCVT_WU_S = BitPat("b110000000001?????????????1010011")
+ def FCVT_L_S = BitPat("b110000000010?????????????1010011")
+ def FCVT_LU_S = BitPat("b110000000011?????????????1010011")
+ def FMV_X_S = BitPat("b111000000000?????000?????1010011")
+ def FCLASS_S = BitPat("b111000000000?????001?????1010011")
+ def FCVT_W_D = BitPat("b110000100000?????????????1010011")
+ def FCVT_WU_D = BitPat("b110000100001?????????????1010011")
+ def FCVT_L_D = BitPat("b110000100010?????????????1010011")
+ def FCVT_LU_D = BitPat("b110000100011?????????????1010011")
+ def FMV_X_D = BitPat("b111000100000?????000?????1010011")
+ def FCLASS_D = BitPat("b111000100000?????001?????1010011")
+ def FCVT_S_W = BitPat("b110100000000?????????????1010011")
+ def FCVT_S_WU = BitPat("b110100000001?????????????1010011")
+ def FCVT_S_L = BitPat("b110100000010?????????????1010011")
+ def FCVT_S_LU = BitPat("b110100000011?????????????1010011")
+ def FMV_S_X = BitPat("b111100000000?????000?????1010011")
+ def FCVT_D_W = BitPat("b110100100000?????????????1010011")
+ def FCVT_D_WU = BitPat("b110100100001?????????????1010011")
+ def FCVT_D_L = BitPat("b110100100010?????????????1010011")
+ def FCVT_D_LU = BitPat("b110100100011?????????????1010011")
+ def FMV_D_X = BitPat("b111100100000?????000?????1010011")
+ def FLW = BitPat("b?????????????????010?????0000111")
+ def FLD = BitPat("b?????????????????011?????0000111")
+ def FSW = BitPat("b?????????????????010?????0100111")
+ def FSD = BitPat("b?????????????????011?????0100111")
+ def FMADD_S = BitPat("b?????00??????????????????1000011")
+ def FMSUB_S = BitPat("b?????00??????????????????1000111")
+ def FNMSUB_S = BitPat("b?????00??????????????????1001011")
+ def FNMADD_S = BitPat("b?????00??????????????????1001111")
+ def FMADD_D = BitPat("b?????01??????????????????1000011")
+ def FMSUB_D = BitPat("b?????01??????????????????1000111")
+ def FNMSUB_D = BitPat("b?????01??????????????????1001011")
+ def FNMADD_D = BitPat("b?????01??????????????????1001111")
+ def CUSTOM0 = BitPat("b?????????????????000?????0001011")
+ def CUSTOM0_RS1 = BitPat("b?????????????????010?????0001011")
+ def CUSTOM0_RS1_RS2 = BitPat("b?????????????????011?????0001011")
+ def CUSTOM0_RD = BitPat("b?????????????????100?????0001011")
+ def CUSTOM0_RD_RS1 = BitPat("b?????????????????110?????0001011")
+ def CUSTOM0_RD_RS1_RS2 = BitPat("b?????????????????111?????0001011")
+ def CUSTOM1 = BitPat("b?????????????????000?????0101011")
+ def CUSTOM1_RS1 = BitPat("b?????????????????010?????0101011")
+ def CUSTOM1_RS1_RS2 = BitPat("b?????????????????011?????0101011")
+ def CUSTOM1_RD = BitPat("b?????????????????100?????0101011")
+ def CUSTOM1_RD_RS1 = BitPat("b?????????????????110?????0101011")
+ def CUSTOM1_RD_RS1_RS2 = BitPat("b?????????????????111?????0101011")
+ def CUSTOM2 = BitPat("b?????????????????000?????1011011")
+ def CUSTOM2_RS1 = BitPat("b?????????????????010?????1011011")
+ def CUSTOM2_RS1_RS2 = BitPat("b?????????????????011?????1011011")
+ def CUSTOM2_RD = BitPat("b?????????????????100?????1011011")
+ def CUSTOM2_RD_RS1 = BitPat("b?????????????????110?????1011011")
+ def CUSTOM2_RD_RS1_RS2 = BitPat("b?????????????????111?????1011011")
+ def CUSTOM3 = BitPat("b?????????????????000?????1111011")
+ def CUSTOM3_RS1 = BitPat("b?????????????????010?????1111011")
+ def CUSTOM3_RS1_RS2 = BitPat("b?????????????????011?????1111011")
+ def CUSTOM3_RD = BitPat("b?????????????????100?????1111011")
+ def CUSTOM3_RD_RS1 = BitPat("b?????????????????110?????1111011")
+ def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011")
+ def SLLI_RV32 = BitPat("b0000000??????????001?????0010011")
+ def SRLI_RV32 = BitPat("b0000000??????????101?????0010011")
+ def SRAI_RV32 = BitPat("b0100000??????????101?????0010011")
+ def FRFLAGS = BitPat("b00000000000100000010?????1110011")
+ def FSFLAGS = BitPat("b000000000001?????001?????1110011")
+ def FSFLAGSI = BitPat("b000000000001?????101?????1110011")
+ def FRRM = BitPat("b00000000001000000010?????1110011")
+ def FSRM = BitPat("b000000000010?????001?????1110011")
+ def FSRMI = BitPat("b000000000010?????101?????1110011")
+ def FSCSR = BitPat("b000000000011?????001?????1110011")
+ def FRCSR = BitPat("b00000000001100000010?????1110011")
+ def RDCYCLE = BitPat("b11000000000000000010?????1110011")
+ def RDTIME = BitPat("b11000000000100000010?????1110011")
+ def RDINSTRET = BitPat("b11000000001000000010?????1110011")
+ def RDCYCLEH = BitPat("b11001000000000000010?????1110011")
+ def RDTIMEH = BitPat("b11001000000100000010?????1110011")
+ def RDINSTRETH = BitPat("b11001000001000000010?????1110011")
+ def SCALL = BitPat("b00000000000000000000000001110011")
+ def SBREAK = BitPat("b00000000000100000000000001110011")
+}
+object Causes {
+ val misaligned_fetch = 0x0
+ val fault_fetch = 0x1
+ val illegal_instruction = 0x2
+ val breakpoint = 0x3
+ val misaligned_load = 0x4
+ val fault_load = 0x5
+ val misaligned_store = 0x6
+ val fault_store = 0x7
+ val user_ecall = 0x8
+ val supervisor_ecall = 0x9
+ val hypervisor_ecall = 0xa
+ val machine_ecall = 0xb
+ val all = {
+ val res = collection.mutable.ArrayBuffer[Int]()
+ res += misaligned_fetch
+ res += fault_fetch
+ res += illegal_instruction
+ res += breakpoint
+ res += misaligned_load
+ res += fault_load
+ res += misaligned_store
+ res += fault_store
+ res += user_ecall
+ res += supervisor_ecall
+ res += hypervisor_ecall
+ res += machine_ecall
+ res.toArray
+ }
+}
+object CSRs {
+ val fflags = 0x1
+ val frm = 0x2
+ val fcsr = 0x3
+ val cycle = 0xc00
+ val time = 0xc01
+ val instret = 0xc02
+ val sstatus = 0x100
+ val sie = 0x104
+ val stvec = 0x105
+ val sscratch = 0x140
+ val sepc = 0x141
+ val scause = 0x142
+ val sbadaddr = 0x143
+ val sip = 0x144
+ val sptbr = 0x180
+ val scycle = 0xd00
+ val stime = 0xd01
+ val sinstret = 0xd02
+ val mstatus = 0x300
+ val medeleg = 0x302
+ val mideleg = 0x303
+ val mie = 0x304
+ val mtvec = 0x305
+ val mscratch = 0x340
+ val mepc = 0x341
+ val mcause = 0x342
+ val mbadaddr = 0x343
+ val mip = 0x344
+ val mucounteren = 0x310
+ val mscounteren = 0x311
+ val mucycle_delta = 0x700
+ val mutime_delta = 0x701
+ val muinstret_delta = 0x702
+ val mscycle_delta = 0x704
+ val mstime_delta = 0x705
+ val msinstret_delta = 0x706
+ val tdrselect = 0x7a0
+ val tdrdata1 = 0x7a1
+ val tdrdata2 = 0x7a2
+ val tdrdata3 = 0x7a3
+ val dcsr = 0x7b0
+ val dpc = 0x7b1
+ val dscratch = 0x7b2
+ val mcycle = 0xf00
+ val mtime = 0xf01
+ val minstret = 0xf02
+ val misa = 0xf10
+ val mvendorid = 0xf11
+ val marchid = 0xf12
+ val mimpid = 0xf13
+ val mhartid = 0xf14
+ val mreset = 0x7c2
+ val cycleh = 0xc80
+ val timeh = 0xc81
+ val instreth = 0xc82
+ val mucycle_deltah = 0x780
+ val mutime_deltah = 0x781
+ val muinstret_deltah = 0x782
+ val mscycle_deltah = 0x784
+ val mstime_deltah = 0x785
+ val msinstret_deltah = 0x786
+ val mcycleh = 0xf80
+ val mtimeh = 0xf81
+ val minstreth = 0xf82
+ val all = {
+ val res = collection.mutable.ArrayBuffer[Int]()
+ res += fflags
+ res += frm
+ res += fcsr
+ res += cycle
+ res += time
+ res += instret
+ res += sstatus
+ res += sie
+ res += stvec
+ res += sscratch
+ res += sepc
+ res += scause
+ res += sbadaddr
+ res += sip
+ res += sptbr
+ res += scycle
+ res += stime
+ res += sinstret
+ res += mstatus
+ res += medeleg
+ res += mideleg
+ res += mie
+ res += mtvec
+ res += mscratch
+ res += mepc
+ res += mcause
+ res += mbadaddr
+ res += mip
+ res += mucounteren
+ res += mscounteren
+ res += mucycle_delta
+ res += mutime_delta
+ res += muinstret_delta
+ res += mscycle_delta
+ res += mstime_delta
+ res += msinstret_delta
+ res += tdrselect
+ res += tdrdata1
+ res += tdrdata2
+ res += tdrdata3
+ res += dcsr
+ res += dpc
+ res += dscratch
+ res += mcycle
+ res += mtime
+ res += minstret
+ res += misa
+ res += mvendorid
+ res += marchid
+ res += mimpid
+ res += mhartid
+ res += mreset
+ res.toArray
+ }
+ val all32 = {
+ val res = collection.mutable.ArrayBuffer(all:_*)
+ res += cycleh
+ res += timeh
+ res += instreth
+ res += mucycle_deltah
+ res += mutime_deltah
+ res += muinstret_deltah
+ res += mscycle_deltah
+ res += mstime_deltah
+ res += msinstret_deltah
+ res += mcycleh
+ res += mtimeh
+ res += minstreth
+ res.toArray
+ }
+}
diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala
new file mode 100644
index 00000000..9770d632
--- /dev/null
+++ b/rocket/src/main/scala/multiplier.scala
@@ -0,0 +1,152 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import ALU._
+import Util._
+
+class MultiplierReq(dataBits: Int, tagBits: Int) extends Bundle {
+ val fn = Bits(width = SZ_ALU_FN)
+ val dw = Bits(width = SZ_DW)
+ val in1 = Bits(width = dataBits)
+ val in2 = Bits(width = dataBits)
+ val tag = UInt(width = tagBits)
+ override def cloneType = new MultiplierReq(dataBits, tagBits).asInstanceOf[this.type]
+}
+
+class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle {
+ val data = Bits(width = dataBits)
+ val tag = UInt(width = tagBits)
+ override def cloneType = new MultiplierResp(dataBits, tagBits).asInstanceOf[this.type]
+}
+
+class MultiplierIO(dataBits: Int, tagBits: Int) extends Bundle {
+ val req = Decoupled(new MultiplierReq(dataBits, tagBits)).flip
+ val kill = Bool(INPUT)
+ val resp = Decoupled(new MultiplierResp(dataBits, tagBits))
+}
+
+class MulDiv(
+ width: Int,
+ nXpr: Int = 32,
+ unroll: Int = 1,
+ earlyOut: Boolean = false) extends Module {
+ val io = new MultiplierIO(width, log2Up(nXpr))
+ val w = io.req.bits.in1.getWidth
+ val mulw = (w+unroll-1)/unroll*unroll
+
+ val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6)
+ val state = Reg(init=s_ready)
+
+ val req = Reg(io.req.bits)
+ val count = Reg(UInt(width = log2Up(w+1)))
+ val neg_out = Reg(Bool())
+ val isMul = Reg(Bool())
+ val isHi = Reg(Bool())
+ val divisor = Reg(Bits(width = w+1)) // div only needs w bits
+ val remainder = Reg(Bits(width = 2*mulw+2)) // div only needs 2*w+1 bits
+
+ val cmdMul :: cmdHi :: lhsSigned :: rhsSigned :: Nil =
+ DecodeLogic(io.req.bits.fn, List(X, X, X, X), List(
+ FN_DIV -> List(N, N, Y, Y),
+ FN_REM -> List(N, Y, Y, Y),
+ FN_DIVU -> List(N, N, N, N),
+ FN_REMU -> List(N, Y, N, N),
+ FN_MUL -> List(Y, N, X, X),
+ FN_MULH -> List(Y, Y, Y, Y),
+ FN_MULHU -> List(Y, Y, N, N),
+ FN_MULHSU -> List(Y, Y, Y, N))).map(_ toBool)
+
+ require(w == 32 || w == 64)
+ def halfWidth(req: MultiplierReq) = Bool(w > 32) && req.dw === DW_32
+
+ def sext(x: Bits, halfW: Bool, signed: Bool) = {
+ val sign = signed && Mux(halfW, x(w/2-1), x(w-1))
+ val hi = Mux(halfW, Fill(w/2, sign), x(w-1,w/2))
+ (Cat(hi, x(w/2-1,0)), sign)
+ }
+ val (lhs_in, lhs_sign) = sext(io.req.bits.in1, halfWidth(io.req.bits), lhsSigned)
+ val (rhs_in, rhs_sign) = sext(io.req.bits.in2, halfWidth(io.req.bits), rhsSigned)
+
+ val subtractor = remainder(2*w,w) - divisor(w,0)
+ val less = subtractor(w)
+ val negated_remainder = -remainder(w-1,0)
+
+ when (state === s_neg_inputs) {
+ when (remainder(w-1) || isMul) {
+ remainder := negated_remainder
+ }
+ when (divisor(w-1) || isMul) {
+ divisor := subtractor
+ }
+ state := s_busy
+ }
+
+ when (state === s_neg_output) {
+ remainder := negated_remainder
+ state := s_done
+ }
+ when (state === s_move_rem) {
+ remainder := remainder(2*w, w+1)
+ state := Mux(neg_out, s_neg_output, s_done)
+ }
+ when (state === s_busy && isMul) {
+ val mulReg = Cat(remainder(2*mulw+1,w+1),remainder(w-1,0))
+ val mplier = mulReg(mulw-1,0)
+ val accum = mulReg(2*mulw,mulw).toSInt
+ val mpcand = divisor.toSInt
+ val prod = mplier(unroll-1,0) * mpcand + accum
+ val nextMulReg = Cat(prod, mplier(mulw-1,unroll)).toUInt
+
+ val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * unroll)(log2Up(mulw)-1,0))(mulw-1,0)
+ val eOut = Bool(earlyOut) && count =/= mulw/unroll-1 && count =/= 0 &&
+ !isHi && (mplier & ~eOutMask) === UInt(0)
+ val eOutRes = (mulReg >> (mulw - count * unroll)(log2Up(mulw)-1,0))
+ val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0))
+ remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0))
+
+ count := count + 1
+ when (eOut || count === mulw/unroll-1) {
+ state := Mux(isHi, s_move_rem, s_done)
+ }
+ }
+ when (state === s_busy && !isMul) {
+ when (count === w) {
+ state := Mux(isHi, s_move_rem, Mux(neg_out, s_neg_output, s_done))
+ }
+ count := count + 1
+
+ remainder := Cat(Mux(less, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !less)
+
+ val divisorMSB = Log2(divisor(w-1,0), w)
+ val dividendMSB = Log2(remainder(w-1,0), w)
+ val eOutPos = UInt(w-1) + divisorMSB - dividendMSB
+ val eOutZero = divisorMSB > dividendMSB
+ val eOut = count === 0 && less /* not divby0 */ && (eOutPos > 0 || eOutZero)
+ when (Bool(earlyOut) && eOut) {
+ val shift = Mux(eOutZero, UInt(w-1), eOutPos(log2Up(w)-1,0))
+ remainder := remainder(w-1,0) << shift
+ count := shift
+ }
+ when (count === 0 && !less /* divby0 */ && !isHi) { neg_out := false }
+ }
+ when (io.resp.fire() || io.kill) {
+ state := s_ready
+ }
+ when (io.req.fire()) {
+ state := Mux(lhs_sign || rhs_sign && !cmdMul, s_neg_inputs, s_busy)
+ isMul := cmdMul
+ isHi := cmdHi
+ count := 0
+ neg_out := !cmdMul && Mux(cmdHi, lhs_sign, lhs_sign =/= rhs_sign)
+ divisor := Cat(rhs_sign, rhs_in)
+ remainder := lhs_in
+ req := io.req.bits
+ }
+
+ io.resp.bits := req
+ io.resp.bits.data := Mux(halfWidth(req), Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0))
+ io.resp.valid := state === s_done
+ io.req.ready := state === s_ready
+}
diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala
new file mode 100644
index 00000000..c3b783c7
--- /dev/null
+++ b/rocket/src/main/scala/nbdcache.scala
@@ -0,0 +1,1247 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import junctions._
+import uncore.tilelink._
+import uncore.coherence._
+import uncore.agents._
+import uncore.util._
+import uncore.constants._
+import cde.{Parameters, Field}
+import Util._
+
+case object WordBits extends Field[Int]
+case object StoreDataQueueDepth extends Field[Int]
+case object ReplayQueueDepth extends Field[Int]
+case object NMSHRs extends Field[Int]
+case object LRSCCycles extends Field[Int]
+
+trait HasL1HellaCacheParameters extends HasL1CacheParameters {
+ val wordBits = p(WordBits)
+ val wordBytes = wordBits/8
+ val wordOffBits = log2Up(wordBytes)
+ val beatBytes = p(CacheBlockBytes) / outerDataBeats
+ val beatWords = beatBytes / wordBytes
+ val beatOffBits = log2Up(beatBytes)
+ val idxMSB = untagBits-1
+ val idxLSB = blockOffBits
+ val offsetmsb = idxLSB-1
+ val offsetlsb = wordOffBits
+ val rowWords = rowBits/wordBits
+ val doNarrowRead = coreDataBits * nWays % rowBits == 0
+ val encDataBits = code.width(coreDataBits)
+ val encRowBits = encDataBits*rowWords
+ val sdqDepth = p(StoreDataQueueDepth)
+ val nMSHRs = p(NMSHRs)
+ val nIOMSHRs = 1
+ val lrscCycles = p(LRSCCycles)
+
+ require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed
+ require(isPow2(nSets))
+ require(rowBits <= outerDataBits)
+ require(!usingVM || untagBits <= pgIdxBits)
+}
+
+abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module
+ with HasL1HellaCacheParameters
+abstract class L1HellaCacheBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p)
+ with HasL1HellaCacheParameters
+
+trait HasCoreMemOp extends HasCoreParameters {
+ val addr = UInt(width = coreMaxAddrBits)
+ val tag = Bits(width = coreDCacheReqTagBits)
+ val cmd = Bits(width = M_SZ)
+ val typ = Bits(width = MT_SZ)
+}
+
+trait HasCoreData extends HasCoreParameters {
+ val data = Bits(width = coreDataBits)
+}
+
+trait HasSDQId extends HasL1HellaCacheParameters {
+ val sdq_id = UInt(width = log2Up(sdqDepth))
+}
+
+trait HasMissInfo extends HasL1HellaCacheParameters {
+ val tag_match = Bool()
+ val old_meta = new L1Metadata
+ val way_en = Bits(width = nWays)
+}
+
+class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p)
+ with HasCoreMemOp {
+ val phys = Bool()
+}
+
+class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData
+
+class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p)
+ with HasCoreMemOp
+ with HasCoreData {
+ val replay = Bool()
+ val has_data = Bool()
+ val data_word_bypass = Bits(width = coreDataBits)
+ val store_data = Bits(width = coreDataBits)
+}
+
+class AlignmentExceptions extends Bundle {
+ val ld = Bool()
+ val st = Bool()
+}
+
+class HellaCacheExceptions extends Bundle {
+ val ma = new AlignmentExceptions
+ val pf = new AlignmentExceptions
+}
+
+// interface between D$ and processor/DTLB
+class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
+ val req = Decoupled(new HellaCacheReq)
+ val s1_kill = Bool(OUTPUT) // kill previous cycle's req
+ val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req
+ val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
+
+ val resp = Valid(new HellaCacheResp).flip
+ val replay_next = Bool(INPUT)
+ val xcpt = (new HellaCacheExceptions).asInput
+ val invalidate_lr = Bool(OUTPUT)
+ val ordered = Bool(INPUT)
+}
+
+class L1DataReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
+ val way_en = Bits(width = nWays)
+ val addr = Bits(width = untagBits)
+}
+
+class L1DataWriteReq(implicit p: Parameters) extends L1DataReadReq()(p) {
+ val wmask = Bits(width = rowWords)
+ val data = Bits(width = encRowBits)
+}
+
+class L1RefillReq(implicit p: Parameters) extends L1DataReadReq()(p)
+
+class L1MetaReadReq(implicit p: Parameters) extends MetaReadReq {
+ val tag = Bits(width = tagBits)
+ override def cloneType = new L1MetaReadReq()(p).asInstanceOf[this.type] //TODO remove
+}
+
+class L1MetaWriteReq(implicit p: Parameters) extends
+ MetaWriteReq[L1Metadata](new L1Metadata)
+
+object L1Metadata {
+ def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = {
+ val meta = Wire(new L1Metadata)
+ meta.tag := tag
+ meta.coh := coh
+ meta
+ }
+}
+class L1Metadata(implicit p: Parameters) extends Metadata()(p) with HasL1HellaCacheParameters {
+ val coh = new ClientMetadata
+}
+
+class Replay(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData
+class ReplayInternal(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasSDQId
+
+class MSHRReq(implicit p: Parameters) extends Replay()(p) with HasMissInfo
+class MSHRReqInternal(implicit p: Parameters) extends ReplayInternal()(p) with HasMissInfo
+
+class ProbeInternal(implicit p: Parameters) extends Probe()(p) with HasClientTransactionId
+
+class WritebackReq(implicit p: Parameters) extends Release()(p) with HasCacheParameters {
+ val way_en = Bits(width = nWays)
+}
+
+class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) {
+ val io = new Bundle {
+ val req = Decoupled(new HellaCacheReq).flip
+ val acquire = Decoupled(new Acquire)
+ val grant = Valid(new GrantFromSrc).flip
+ val finish = Decoupled(new FinishToDst)
+ val resp = Decoupled(new HellaCacheResp)
+ val replay_next = Bool(OUTPUT)
+ }
+
+ def beatOffset(addr: UInt) = addr.extract(beatOffBits - 1, wordOffBits)
+
+ def wordFromBeat(addr: UInt, dat: UInt) = {
+ val shift = Cat(beatOffset(addr), UInt(0, wordOffBits + log2Up(wordBytes)))
+ (dat >> shift)(wordBits - 1, 0)
+ }
+
+ val req = Reg(new HellaCacheReq)
+ val req_cmd_sc = req.cmd === M_XSC
+ val grant_word = Reg(UInt(width = wordBits))
+ val fq = Module(new FinishQueue(1))
+
+ val s_idle :: s_acquire :: s_grant :: s_resp :: s_finish :: Nil = Enum(Bits(), 5)
+ val state = Reg(init = s_idle)
+ io.req.ready := (state === s_idle)
+
+ fq.io.enq.valid := io.grant.valid && io.grant.bits.requiresAck()
+ fq.io.enq.bits := io.grant.bits.makeFinish()
+ io.finish.valid := fq.io.deq.valid && (state === s_finish)
+ io.finish.bits := fq.io.deq.bits
+ fq.io.deq.ready := io.finish.ready && (state === s_finish)
+
+ val storegen = new StoreGen(req.typ, req.addr, req.data, wordBytes)
+ val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, wordBytes)
+
+ val beat_mask = (storegen.mask << Cat(beatOffset(req.addr), UInt(0, wordOffBits)))
+ val beat_data = Fill(beatWords, storegen.data)
+
+ val addr_block = req.addr(paddrBits - 1, blockOffBits)
+ val addr_beat = req.addr(blockOffBits - 1, beatOffBits)
+ val addr_byte = req.addr(beatOffBits - 1, 0)
+
+ val get_acquire = Get(
+ client_xact_id = UInt(id),
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ addr_byte = addr_byte,
+ operand_size = req.typ,
+ alloc = Bool(false))
+
+ val put_acquire = Put(
+ client_xact_id = UInt(id),
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ data = beat_data,
+ wmask = Some(beat_mask),
+ alloc = Bool(false))
+
+ val putAtomic_acquire = PutAtomic(
+ client_xact_id = UInt(id),
+ addr_block = addr_block,
+ addr_beat = addr_beat,
+ addr_byte = addr_byte,
+ atomic_opcode = req.cmd,
+ operand_size = req.typ,
+ data = beat_data)
+
+ io.acquire.valid := (state === s_acquire)
+ io.acquire.bits := Mux(isAMO(req.cmd), putAtomic_acquire, Mux(isRead(req.cmd), get_acquire, put_acquire))
+
+ io.replay_next := (state === s_grant) || io.resp.valid && !io.resp.ready
+ io.resp.valid := (state === s_resp)
+ io.resp.bits := req
+ io.resp.bits.has_data := isRead(req.cmd)
+ io.resp.bits.data := loadgen.data | req_cmd_sc
+ io.resp.bits.store_data := req.data
+ io.resp.bits.replay := Bool(true)
+
+ when (io.req.fire()) {
+ req := io.req.bits
+ state := s_acquire
+ }
+
+ when (io.acquire.fire()) {
+ state := s_grant
+ }
+
+ when (state === s_grant && io.grant.valid) {
+ state := s_resp
+ when (isRead(req.cmd)) {
+ grant_word := wordFromBeat(req.addr, io.grant.bits.data)
+ }
+ }
+
+ when (io.resp.fire()) {
+ state := s_finish
+ }
+
+ when (io.finish.fire()) {
+ state := s_idle
+ }
+}
+
+class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) {
+ val io = new Bundle {
+ val req_pri_val = Bool(INPUT)
+ val req_pri_rdy = Bool(OUTPUT)
+ val req_sec_val = Bool(INPUT)
+ val req_sec_rdy = Bool(OUTPUT)
+ val req_bits = new MSHRReqInternal().asInput
+
+ val idx_match = Bool(OUTPUT)
+ val tag = Bits(OUTPUT, tagBits)
+
+ val mem_req = Decoupled(new Acquire)
+ val refill = new L1RefillReq().asOutput // Data is bypassed
+ val meta_read = Decoupled(new L1MetaReadReq)
+ val meta_write = Decoupled(new L1MetaWriteReq)
+ val replay = Decoupled(new ReplayInternal)
+ val mem_grant = Valid(new GrantFromSrc).flip
+ val mem_finish = Decoupled(new FinishToDst)
+ val wb_req = Decoupled(new WritebackReq)
+ val probe_rdy = Bool(OUTPUT)
+ }
+
+ val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(UInt(), 9)
+ val state = Reg(init=s_invalid)
+
+ def stateIsOneOf(check_states: Seq[UInt]): Bool =
+ check_states.map(state === _).reduce(_ || _)
+
+ def stateIsOneOf(st1: UInt, st2: UInt*): Bool =
+ stateIsOneOf(st1 +: st2)
+
+ val new_coh_state = Reg(init=ClientMetadata.onReset)
+ val req = Reg(new MSHRReqInternal())
+ val req_idx = req.addr(untagBits-1,blockOffBits)
+ val idx_match = req_idx === io.req_bits.addr(untagBits-1,blockOffBits)
+ // We only accept secondary misses if we haven't yet sent an Acquire to outer memory
+ // or if the Acquire that was sent will obtain a Grant with sufficient permissions
+ // to let us replay this new request. I.e. we don't handle multiple outstanding
+ // Acquires on the same block for now.
+ val cmd_requires_second_acquire =
+ req.old_meta.coh.requiresAcquireOnSecondaryMiss(req.cmd, io.req_bits.cmd)
+ // Track whether or not a secondary acquire will cause the coherence state
+ // to go from clean to dirty.
+ val dirties_coh = Reg(Bool())
+ val states_before_refill = Seq(s_wb_req, s_wb_resp, s_meta_clear)
+ val gnt_multi_data = io.mem_grant.bits.hasMultibeatData()
+ val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles)
+ val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done)
+ val sec_rdy = idx_match &&
+ (stateIsOneOf(states_before_refill) ||
+ (stateIsOneOf(s_refill_req, s_refill_resp) &&
+ !cmd_requires_second_acquire && !refill_done))
+
+ val rpq = Module(new Queue(new ReplayInternal, p(ReplayQueueDepth)))
+ rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd)
+ rpq.io.enq.bits := io.req_bits
+ rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid
+
+ val coh_on_grant = req.old_meta.coh.onGrant(
+ incoming = io.mem_grant.bits,
+ pending = Mux(dirties_coh, M_XWR, req.cmd))
+ val coh_on_hit = io.req_bits.old_meta.coh.onHit(io.req_bits.cmd)
+
+ when (state === s_drain_rpq && !rpq.io.deq.valid) {
+ state := s_invalid
+ }
+ when (state === s_meta_write_resp) {
+ // this wait state allows us to catch RAW hazards on the tags via nack_victim
+ state := s_drain_rpq
+ }
+ when (state === s_meta_write_req && io.meta_write.ready) {
+ state := s_meta_write_resp
+ }
+ when (state === s_refill_resp && refill_done) {
+ state := s_meta_write_req
+ new_coh_state := coh_on_grant
+ }
+ when (io.mem_req.fire()) { // s_refill_req
+ state := s_refill_resp
+ }
+ when (state === s_meta_clear && io.meta_write.ready) {
+ state := s_refill_req
+ }
+ when (state === s_wb_resp && io.mem_grant.valid) {
+ state := s_meta_clear
+ }
+ when (io.wb_req.fire()) { // s_wb_req
+ state := Mux(io.wb_req.bits.requiresAck(), s_wb_resp, s_meta_clear)
+ }
+ when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req
+ //If we get a secondary miss that needs more permissions before we've sent
+ // out the primary miss's Acquire, we can upgrade the permissions we're
+ // going to ask for in s_refill_req
+ when(cmd_requires_second_acquire) {
+ req.cmd := io.req_bits.cmd
+ }
+ dirties_coh := dirties_coh || isWrite(io.req_bits.cmd)
+ }
+ when (io.req_pri_val && io.req_pri_rdy) {
+ val coh = io.req_bits.old_meta.coh
+ req := io.req_bits
+ dirties_coh := isWrite(io.req_bits.cmd)
+ when (io.req_bits.tag_match) {
+ when(coh.isHit(io.req_bits.cmd)) { // set dirty bit
+ state := s_meta_write_req
+ new_coh_state := coh_on_hit
+ }.otherwise { // upgrade permissions
+ state := s_refill_req
+ }
+ }.otherwise { // writback if necessary and refill
+ state := Mux(coh.requiresVoluntaryWriteback(), s_wb_req, s_meta_clear)
+ }
+ }
+
+ val fq = Module(new FinishQueue(1))
+ val g = io.mem_grant.bits
+ val can_finish = state === s_invalid || state === s_refill_req
+ fq.io.enq.valid := io.mem_grant.valid && g.requiresAck() && refill_done
+ fq.io.enq.bits := g.makeFinish()
+ io.mem_finish.valid := fq.io.deq.valid && can_finish
+ fq.io.deq.ready := io.mem_finish.ready && can_finish
+ io.mem_finish.bits := fq.io.deq.bits
+
+ io.idx_match := (state =/= s_invalid) && idx_match
+ io.refill.way_en := req.way_en
+ io.refill.addr := ((req_idx << log2Ceil(refillCycles)) | refill_cnt) << rowOffBits
+ io.tag := req.addr >> untagBits
+ io.req_pri_rdy := state === s_invalid
+ io.req_sec_rdy := sec_rdy && rpq.io.enq.ready
+
+ val meta_hazard = Reg(init=UInt(0,2))
+ when (meta_hazard =/= UInt(0)) { meta_hazard := meta_hazard + 1 }
+ when (io.meta_write.fire()) { meta_hazard := 1 }
+ io.probe_rdy := !idx_match || (!stateIsOneOf(states_before_refill) && meta_hazard === 0)
+
+ io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear
+ io.meta_write.bits.idx := req_idx
+ io.meta_write.bits.data.coh := Mux(state === s_meta_clear,
+ req.old_meta.coh.onCacheControl(M_FLUSH),
+ new_coh_state)
+ io.meta_write.bits.data.tag := io.tag
+ io.meta_write.bits.way_en := req.way_en
+
+ io.wb_req.valid := state === s_wb_req
+ io.wb_req.bits := req.old_meta.coh.makeVoluntaryWriteback(
+ client_xact_id = UInt(id),
+ addr_block = Cat(req.old_meta.tag, req_idx))
+ io.wb_req.bits.way_en := req.way_en
+
+ io.mem_req.valid := state === s_refill_req && fq.io.enq.ready
+ io.mem_req.bits := req.old_meta.coh.makeAcquire(
+ addr_block = Cat(io.tag, req_idx).toUInt,
+ client_xact_id = Bits(id),
+ op_code = req.cmd)
+
+ io.meta_read.valid := state === s_drain_rpq
+ io.meta_read.bits.idx := req_idx
+ io.meta_read.bits.tag := io.tag
+
+ io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid
+ io.replay.bits := rpq.io.deq.bits
+ io.replay.bits.phys := Bool(true)
+ io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(blockOffBits-1,0)).toUInt
+
+ when (!io.meta_read.ready) {
+ rpq.io.deq.ready := Bool(false)
+ io.replay.bits.cmd := M_FLUSH_ALL /* nop */
+ }
+}
+
+class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) {
+ val io = new Bundle {
+ val req = Decoupled(new MSHRReq).flip
+ val resp = Decoupled(new HellaCacheResp)
+ val secondary_miss = Bool(OUTPUT)
+
+ val mem_req = Decoupled(new Acquire)
+ val refill = new L1RefillReq().asOutput
+ val meta_read = Decoupled(new L1MetaReadReq)
+ val meta_write = Decoupled(new L1MetaWriteReq)
+ val replay = Decoupled(new Replay)
+ val mem_grant = Valid(new GrantFromSrc).flip
+ val mem_finish = Decoupled(new FinishToDst)
+ val wb_req = Decoupled(new WritebackReq)
+
+ val probe_rdy = Bool(OUTPUT)
+ val fence_rdy = Bool(OUTPUT)
+ val replay_next = Bool(OUTPUT)
+ }
+
+ // determine if the request is cacheable or not
+ val cacheable = addrMap.isCacheable(io.req.bits.addr)
+
+ val sdq_val = Reg(init=Bits(0, sdqDepth))
+ val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0))
+ val sdq_rdy = !sdq_val.andR
+ val sdq_enq = io.req.valid && io.req.ready && cacheable && isWrite(io.req.bits.cmd)
+ val sdq = Mem(sdqDepth, io.req.bits.data)
+ when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data }
+
+ val idxMatch = Wire(Vec(nMSHRs, Bool()))
+ val tagList = Wire(Vec(nMSHRs, Bits(width = tagBits)))
+ val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits
+
+ val wbTagList = Wire(Vec(nMSHRs, Bits()))
+ val refillMux = Wire(Vec(nMSHRs, new L1RefillReq))
+ val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs))
+ val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs))
+ val mem_req_arb = Module(new LockingArbiter(
+ new Acquire,
+ nMSHRs + nIOMSHRs,
+ outerDataBeats,
+ Some((a: Acquire) => a.hasMultibeatData())))
+ val mem_finish_arb = Module(new Arbiter(new FinishToDst, nMSHRs + nIOMSHRs))
+ val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs))
+ val replay_arb = Module(new Arbiter(new ReplayInternal, nMSHRs))
+ val alloc_arb = Module(new Arbiter(Bool(), nMSHRs))
+
+ var idx_match = Bool(false)
+ var pri_rdy = Bool(false)
+ var sec_rdy = Bool(false)
+
+ io.fence_rdy := true
+ io.probe_rdy := true
+
+ for (i <- 0 until nMSHRs) {
+ val mshr = Module(new MSHR(i))
+
+ idxMatch(i) := mshr.io.idx_match
+ tagList(i) := mshr.io.tag
+ wbTagList(i) := mshr.io.wb_req.bits.addr_block >> idxBits
+
+ alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy
+ mshr.io.req_pri_val := alloc_arb.io.in(i).ready
+
+ mshr.io.req_sec_val := io.req.valid && sdq_rdy && tag_match
+ mshr.io.req_bits := io.req.bits
+ mshr.io.req_bits.sdq_id := sdq_alloc_id
+
+ meta_read_arb.io.in(i) <> mshr.io.meta_read
+ meta_write_arb.io.in(i) <> mshr.io.meta_write
+ mem_req_arb.io.in(i) <> mshr.io.mem_req
+ mem_finish_arb.io.in(i) <> mshr.io.mem_finish
+ wb_req_arb.io.in(i) <> mshr.io.wb_req
+ replay_arb.io.in(i) <> mshr.io.replay
+
+ mshr.io.mem_grant.valid := io.mem_grant.valid &&
+ io.mem_grant.bits.client_xact_id === UInt(i)
+ mshr.io.mem_grant.bits := io.mem_grant.bits
+ refillMux(i) := mshr.io.refill
+
+ pri_rdy = pri_rdy || mshr.io.req_pri_rdy
+ sec_rdy = sec_rdy || mshr.io.req_sec_rdy
+ idx_match = idx_match || mshr.io.idx_match
+
+ when (!mshr.io.req_pri_rdy) { io.fence_rdy := false }
+ when (!mshr.io.probe_rdy) { io.probe_rdy := false }
+ }
+
+ alloc_arb.io.out.ready := io.req.valid && sdq_rdy && cacheable && !idx_match
+
+ io.meta_read <> meta_read_arb.io.out
+ io.meta_write <> meta_write_arb.io.out
+ io.mem_req <> mem_req_arb.io.out
+ io.mem_finish <> mem_finish_arb.io.out
+ io.wb_req <> wb_req_arb.io.out
+
+ val mmio_alloc_arb = Module(new Arbiter(Bool(), nIOMSHRs))
+ val resp_arb = Module(new Arbiter(new HellaCacheResp, nIOMSHRs))
+
+ var mmio_rdy = Bool(false)
+ io.replay_next := Bool(false)
+
+ for (i <- 0 until nIOMSHRs) {
+ val id = nMSHRs + i
+ val mshr = Module(new IOMSHR(id))
+
+ mmio_alloc_arb.io.in(i).valid := mshr.io.req.ready
+ mshr.io.req.valid := mmio_alloc_arb.io.in(i).ready
+ mshr.io.req.bits := io.req.bits
+
+ mmio_rdy = mmio_rdy || mshr.io.req.ready
+
+ mem_req_arb.io.in(id) <> mshr.io.acquire
+ mem_finish_arb.io.in(id) <> mshr.io.finish
+
+ mshr.io.grant.bits := io.mem_grant.bits
+ mshr.io.grant.valid := io.mem_grant.valid &&
+ io.mem_grant.bits.client_xact_id === UInt(id)
+
+ resp_arb.io.in(i) <> mshr.io.resp
+
+ when (!mshr.io.req.ready) { io.fence_rdy := Bool(false) }
+ when (mshr.io.replay_next) { io.replay_next := Bool(true) }
+ }
+
+ mmio_alloc_arb.io.out.ready := io.req.valid && !cacheable
+
+ io.resp <> resp_arb.io.out
+ io.req.ready := Mux(!cacheable, mmio_rdy,
+ Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy)
+ io.secondary_miss := idx_match
+ io.refill := refillMux(io.mem_grant.bits.client_xact_id)
+
+ val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd)
+ io.replay.bits.data := sdq(RegEnable(replay_arb.io.out.bits.sdq_id, free_sdq))
+ io.replay <> replay_arb.io.out
+
+ when (io.replay.valid || sdq_enq) {
+ sdq_val := sdq_val & ~(UIntToOH(replay_arb.io.out.bits.sdq_id) & Fill(sdqDepth, free_sdq)) |
+ PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq)
+ }
+}
+
+class WritebackUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) {
+ val io = new Bundle {
+ val req = Decoupled(new WritebackReq).flip
+ val meta_read = Decoupled(new L1MetaReadReq)
+ val data_req = Decoupled(new L1DataReadReq)
+ val data_resp = Bits(INPUT, encRowBits)
+ val release = Decoupled(new Release)
+ }
+
+ val active = Reg(init=Bool(false))
+ val r1_data_req_fired = Reg(init=Bool(false))
+ val r2_data_req_fired = Reg(init=Bool(false))
+ val data_req_cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) //TODO Zero width
+ val buf_v = (if(refillCyclesPerBeat > 1) Reg(init=Bits(0, width = refillCyclesPerBeat-1)) else Bits(1))
+ val beat_done = buf_v.andR
+ val (beat_cnt, all_beats_done) = Counter(io.release.fire(), outerDataBeats)
+ val req = Reg(new WritebackReq)
+
+ io.release.valid := false
+ when (active) {
+ r1_data_req_fired := false
+ r2_data_req_fired := r1_data_req_fired
+ when (io.data_req.fire() && io.meta_read.fire()) {
+ r1_data_req_fired := true
+ data_req_cnt := data_req_cnt + 1
+ }
+ when (r2_data_req_fired) {
+ io.release.valid := beat_done
+ when(beat_done) {
+ when(!io.release.ready) {
+ r1_data_req_fired := false
+ r2_data_req_fired := false
+ data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1)
+ } .otherwise { if(refillCyclesPerBeat > 1) buf_v := 0 }
+ }
+ when(!r1_data_req_fired) {
+ // We're done if this is the final data request and the Release can be sent
+ active := data_req_cnt < UInt(refillCycles) || !io.release.ready
+ }
+ }
+ }
+ when (io.req.fire()) {
+ active := true
+ data_req_cnt := 0
+ if(refillCyclesPerBeat > 1) buf_v := 0
+ req := io.req.bits
+ }
+
+ io.req.ready := !active
+
+ val req_idx = req.addr_block(idxBits-1, 0)
+ val fire = active && data_req_cnt < UInt(refillCycles)
+
+ // We reissue the meta read as it sets up the mux ctrl for s2_data_muxed
+ io.meta_read.valid := fire
+ io.meta_read.bits.idx := req_idx
+ io.meta_read.bits.tag := req.addr_block >> idxBits
+
+ io.data_req.valid := fire
+ io.data_req.bits.way_en := req.way_en
+ io.data_req.bits.addr := (if(refillCycles > 1)
+ Cat(req_idx, data_req_cnt(log2Up(refillCycles)-1,0))
+ else req_idx) << rowOffBits
+
+ io.release.bits := req
+ io.release.bits.addr_beat := beat_cnt
+ io.release.bits.data := (if(refillCyclesPerBeat > 1) {
+ // If the cache rows are narrower than a TLDataBeat,
+ // then buffer enough data_resps to make a whole beat
+ val data_buf = Reg(Bits())
+ when(active && r2_data_req_fired && !beat_done) {
+ data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat)*encRowBits-1, encRowBits))
+ buf_v := (if(refillCyclesPerBeat > 2)
+ Cat(UInt(1), buf_v(refillCyclesPerBeat-2,1))
+ else UInt(1))
+ }
+ Cat(io.data_resp, data_buf)
+ } else { io.data_resp })
+}
+
+class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) {
+ val io = new Bundle {
+ val req = Decoupled(new ProbeInternal).flip
+ val rep = Decoupled(new Release)
+ val meta_read = Decoupled(new L1MetaReadReq)
+ val meta_write = Decoupled(new L1MetaWriteReq)
+ val wb_req = Decoupled(new WritebackReq)
+ val way_en = Bits(INPUT, nWays)
+ val mshr_rdy = Bool(INPUT)
+ val block_state = new ClientMetadata().asInput
+ }
+
+ val (s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req ::
+ s_mshr_resp :: s_release :: s_writeback_req :: s_writeback_resp ::
+ s_meta_write :: Nil) = Enum(UInt(), 9)
+ val state = Reg(init=s_invalid)
+ val old_coh = Reg(new ClientMetadata)
+ val way_en = Reg(Bits())
+ val req = Reg(new ProbeInternal)
+ val tag_matches = way_en.orR
+
+ val miss_coh = ClientMetadata.onReset
+ val reply_coh = Mux(tag_matches, old_coh, miss_coh)
+ val reply = reply_coh.makeRelease(req)
+ io.req.ready := state === s_invalid
+ io.rep.valid := state === s_release
+ io.rep.bits := reply
+
+ assert(!io.rep.valid || !io.rep.bits.hasData(),
+ "ProbeUnit should not send releases with data")
+
+ io.meta_read.valid := state === s_meta_read
+ io.meta_read.bits.idx := req.addr_block
+ io.meta_read.bits.tag := req.addr_block >> idxBits
+
+ io.meta_write.valid := state === s_meta_write
+ io.meta_write.bits.way_en := way_en
+ io.meta_write.bits.idx := req.addr_block
+ io.meta_write.bits.data.tag := req.addr_block >> idxBits
+ io.meta_write.bits.data.coh := old_coh.onProbe(req)
+
+ io.wb_req.valid := state === s_writeback_req
+ io.wb_req.bits := reply
+ io.wb_req.bits.way_en := way_en
+
+ // state === s_invalid
+ when (io.req.fire()) {
+ state := s_meta_read
+ req := io.req.bits
+ }
+
+ // state === s_meta_read
+ when (io.meta_read.fire()) {
+ state := s_meta_resp
+ }
+
+ // we need to wait one cycle for the metadata to be read from the array
+ when (state === s_meta_resp) {
+ state := s_mshr_req
+ }
+
+ when (state === s_mshr_req) {
+ state := s_mshr_resp
+ old_coh := io.block_state
+ way_en := io.way_en
+ // if the read didn't go through, we need to retry
+ when (!io.mshr_rdy) { state := s_meta_read }
+ }
+
+ when (state === s_mshr_resp) {
+ val needs_writeback = tag_matches && old_coh.requiresVoluntaryWriteback()
+ state := Mux(needs_writeback, s_writeback_req, s_release)
+ }
+
+ when (state === s_release && io.rep.ready) {
+ state := Mux(tag_matches, s_meta_write, s_invalid)
+ }
+
+ // state === s_writeback_req
+ when (io.wb_req.fire()) {
+ state := s_writeback_resp
+ }
+
+ // wait for the writeback request to finish before updating the metadata
+ when (state === s_writeback_resp && io.wb_req.ready) {
+ state := s_meta_write
+ }
+
+ when (io.meta_write.fire()) {
+ state := s_invalid
+ }
+}
+
+class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
+ val io = new Bundle {
+ val read = Decoupled(new L1DataReadReq).flip
+ val write = Decoupled(new L1DataWriteReq).flip
+ val resp = Vec(nWays, Bits(OUTPUT, encRowBits))
+ }
+
+ val waddr = io.write.bits.addr >> rowOffBits
+ val raddr = io.read.bits.addr >> rowOffBits
+
+ if (doNarrowRead) {
+ for (w <- 0 until nWays by rowWords) {
+ val wway_en = io.write.bits.way_en(w+rowWords-1,w)
+ val rway_en = io.read.bits.way_en(w+rowWords-1,w)
+ val resp = Wire(Vec(rowWords, Bits(width = encRowBits)))
+ val r_raddr = RegEnable(io.read.bits.addr, io.read.valid)
+ for (p <- 0 until resp.size) {
+ val array = SeqMem(nSets*refillCycles, Vec(rowWords, Bits(width=encDataBits)))
+ when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) {
+ val data = Vec.fill(rowWords)(io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p))
+ array.write(waddr, data, wway_en.toBools)
+ }
+ resp(p) := array.read(raddr, rway_en.orR && io.read.valid).toBits
+ }
+ for (dw <- 0 until rowWords) {
+ val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw)))
+ val resp_mux =
+ if (r.size == 1) r
+ else Vec(r(r_raddr(rowOffBits-1,wordOffBits)), r.tail:_*)
+ io.resp(w+dw) := resp_mux.toBits
+ }
+ }
+ } else {
+ for (w <- 0 until nWays) {
+ val array = SeqMem(nSets*refillCycles, Vec(rowWords, Bits(width=encDataBits)))
+ when (io.write.bits.way_en(w) && io.write.valid) {
+ val data = Vec.tabulate(rowWords)(i => io.write.bits.data(encDataBits*(i+1)-1,encDataBits*i))
+ array.write(waddr, data, io.write.bits.wmask.toBools)
+ }
+ io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid).toBits
+ }
+ }
+
+ io.read.ready := Bool(true)
+ io.write.ready := Bool(true)
+}
+
+class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
+ val io = new Bundle {
+ val cpu = (new HellaCacheIO).flip
+ val ptw = new TLBPTWIO()
+ val mem = new ClientTileLinkIO
+ }
+
+ require(isPow2(nWays)) // TODO: relax this
+
+ val wb = Module(new WritebackUnit)
+ val prober = Module(new ProbeUnit)
+ val mshrs = Module(new MSHRFile)
+
+ io.cpu.req.ready := Bool(true)
+ val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
+ val s1_req = Reg(io.cpu.req.bits)
+ val s1_valid_masked = s1_valid && !io.cpu.s1_kill
+ val s1_replay = Reg(init=Bool(false))
+ val s1_clk_en = Reg(Bool())
+
+ val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
+ val s2_req = Reg(io.cpu.req.bits)
+ val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL
+ val s2_recycle = Wire(Bool())
+ val s2_valid_masked = Wire(Bool())
+
+ val s3_valid = Reg(init=Bool(false))
+ val s3_req = Reg(io.cpu.req.bits)
+ val s3_way = Reg(Bits())
+
+ val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en)
+ val s1_read = isRead(s1_req.cmd)
+ val s1_write = isWrite(s1_req.cmd)
+ val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd)
+
+ val dtlb = Module(new TLB)
+ io.ptw <> dtlb.io.ptw
+ dtlb.io.req.valid := s1_valid_masked && s1_readwrite
+ dtlb.io.req.bits.passthrough := s1_req.phys
+ dtlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits
+ dtlb.io.req.bits.instruction := Bool(false)
+ dtlb.io.req.bits.store := s1_write
+ when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) }
+
+ when (io.cpu.req.valid) {
+ s1_req := io.cpu.req.bits
+ }
+ when (wb.io.meta_read.valid) {
+ s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << blockOffBits
+ s1_req.phys := Bool(true)
+ }
+ when (prober.io.meta_read.valid) {
+ s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << blockOffBits
+ s1_req.phys := Bool(true)
+ }
+ when (mshrs.io.replay.valid) {
+ s1_req := mshrs.io.replay.bits
+ }
+ when (s2_recycle) {
+ s1_req := s2_req
+ }
+ val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
+
+ when (s1_clk_en) {
+ s2_req.typ := s1_req.typ
+ s2_req.phys := s1_req.phys
+ s2_req.addr := s1_addr
+ when (s1_write) {
+ s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data)
+ }
+ when (s1_recycled) { s2_req.data := s1_req.data }
+ s2_req.tag := s1_req.tag
+ s2_req.cmd := s1_req.cmd
+ }
+
+ val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned
+ io.cpu.xcpt.ma.ld := s1_read && misaligned
+ io.cpu.xcpt.ma.st := s1_write && misaligned
+ io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld
+ io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st
+
+ assert (!(Reg(next=
+ (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) &&
+ s2_valid_masked),
+ "DCache exception occurred - cache response not killed.")
+
+ // tags
+ def onReset = L1Metadata(UInt(0), ClientMetadata.onReset)
+ val meta = Module(new MetadataArray(onReset _))
+ val metaReadArb = Module(new Arbiter(new MetaReadReq, 5))
+ val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2))
+ meta.io.read <> metaReadArb.io.out
+ meta.io.write <> metaWriteArb.io.out
+
+ // data
+ val data = Module(new DataArray)
+ val readArb = Module(new Arbiter(new L1DataReadReq, 4))
+ val writeArb = Module(new Arbiter(new L1DataWriteReq, 2))
+ data.io.write.valid := writeArb.io.out.valid
+ writeArb.io.out.ready := data.io.write.ready
+ data.io.write.bits := writeArb.io.out.bits
+ val wdata_encoded = (0 until rowWords).map(i => code.encode(writeArb.io.out.bits.data(coreDataBits*(i+1)-1,coreDataBits*i)))
+ data.io.write.bits.data := wdata_encoded.toBits
+
+ // tag read for new requests
+ metaReadArb.io.in(4).valid := io.cpu.req.valid
+ metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> blockOffBits
+ when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) }
+
+ // data read for new requests
+ readArb.io.in(3).valid := io.cpu.req.valid
+ readArb.io.in(3).bits.addr := io.cpu.req.bits.addr
+ readArb.io.in(3).bits.way_en := ~UInt(0, nWays)
+ when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) }
+
+ // recycled requests
+ metaReadArb.io.in(0).valid := s2_recycle
+ metaReadArb.io.in(0).bits.idx := s2_req.addr >> blockOffBits
+ readArb.io.in(0).valid := s2_recycle
+ readArb.io.in(0).bits.addr := s2_req.addr
+ readArb.io.in(0).bits.way_en := ~UInt(0, nWays)
+
+ // tag check and way muxing
+ def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f))
+ val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> untagBits)).toBits
+ val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.isValid()).toBits
+ s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug
+ val s1_writeback = s1_clk_en && !s1_valid && !s1_replay
+ val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en)
+ val s2_tag_match = s2_tag_match_way.orR
+ val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en)))
+ val s2_hit = s2_tag_match &&
+ s2_hit_state.isHit(s2_req.cmd) &&
+ s2_hit_state === s2_hit_state.onHit(s2_req.cmd)
+
+ // load-reserved/store-conditional
+ val lrsc_count = Reg(init=UInt(0))
+ val lrsc_valid = lrsc_count.orR
+ val lrsc_addr = Reg(UInt())
+ val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC)
+ val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> blockOffBits)
+ val s2_sc_fail = s2_sc && !s2_lrsc_addr_match
+ when (lrsc_valid) { lrsc_count := lrsc_count - 1 }
+ when (s2_valid_masked && s2_hit || s2_replay) {
+ when (s2_lr) {
+ when (!lrsc_valid) { lrsc_count := lrscCycles-1 }
+ lrsc_addr := s2_req.addr >> blockOffBits
+ }
+ when (s2_sc) {
+ lrsc_count := 0
+ }
+ }
+ when (io.cpu.invalidate_lr) { lrsc_count := 0 }
+
+ val s2_data = Wire(Vec(nWays, Bits(width=encRowBits)))
+ for (w <- 0 until nWays) {
+ val regs = Reg(Vec(rowWords, Bits(width = encDataBits)))
+ val en1 = s1_clk_en && s1_tag_eq_way(w)
+ for (i <- 0 until regs.size) {
+ val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback)
+ when (en) { regs(i) := data.io.resp(w) >> encDataBits*i }
+ }
+ s2_data(w) := regs.toBits
+ }
+ val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
+ val s2_data_decoded = (0 until rowWords).map(i => code.decode(s2_data_muxed(encDataBits*(i+1)-1,encDataBits*i)))
+ val s2_data_corrected = s2_data_decoded.map(_.corrected).toBits
+ val s2_data_uncorrected = s2_data_decoded.map(_.uncorrected).toBits
+ val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,log2Up(wordBytes))
+ val s2_data_correctable = s2_data_decoded.map(_.correctable).toBits()(s2_word_idx)
+
+ // store/amo hits
+ s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd)
+ val amoalu = Module(new AMOALU)
+ when ((s2_valid || s2_replay) && (isWrite(s2_req.cmd) || s2_data_correctable)) {
+ s3_req := s2_req
+ s3_req.data := Mux(s2_data_correctable, s2_data_corrected, amoalu.io.out)
+ s3_way := s2_tag_match_way
+ }
+
+ writeArb.io.in(0).bits.addr := s3_req.addr
+ writeArb.io.in(0).bits.wmask := UIntToOH(s3_req.addr.extract(rowOffBits-1,offsetlsb))
+ writeArb.io.in(0).bits.data := Fill(rowWords, s3_req.data)
+ writeArb.io.in(0).valid := s3_valid
+ writeArb.io.in(0).bits.way_en := s3_way
+
+ // replacement policy
+ val replacer = p(Replacer)()
+ val s1_replaced_way_en = UIntToOH(replacer.way)
+ val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en))
+ val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq)
+
+ // miss handling
+ mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd))
+ mshrs.io.req.bits := s2_req
+ mshrs.io.req.bits.tag_match := s2_tag_match
+ mshrs.io.req.bits.old_meta := Mux(s2_tag_match, L1Metadata(s2_repl_meta.tag, s2_hit_state), s2_repl_meta)
+ mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en)
+ mshrs.io.req.bits.data := s2_req.data
+ when (mshrs.io.req.fire()) { replacer.miss }
+ io.mem.acquire <> mshrs.io.mem_req
+
+ // replays
+ readArb.io.in(1).valid := mshrs.io.replay.valid
+ readArb.io.in(1).bits := mshrs.io.replay.bits
+ readArb.io.in(1).bits.way_en := ~UInt(0, nWays)
+ mshrs.io.replay.ready := readArb.io.in(1).ready
+ s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready
+ metaReadArb.io.in(1) <> mshrs.io.meta_read
+ metaWriteArb.io.in(0) <> mshrs.io.meta_write
+
+ // probes and releases
+ val releaseArb = Module(new LockingArbiter(
+ new Release, 2, outerDataBeats,
+ Some((r: Release) => r.hasMultibeatData())))
+ io.mem.release <> releaseArb.io.out
+
+ prober.io.req.valid := io.mem.probe.valid && !lrsc_valid
+ io.mem.probe.ready := prober.io.req.ready && !lrsc_valid
+ prober.io.req.bits := io.mem.probe.bits
+ releaseArb.io.in(1) <> prober.io.rep
+ prober.io.way_en := s2_tag_match_way
+ prober.io.block_state := s2_hit_state
+ metaReadArb.io.in(2) <> prober.io.meta_read
+ metaWriteArb.io.in(1) <> prober.io.meta_write
+ prober.io.mshr_rdy := mshrs.io.probe_rdy
+
+ // refills
+ val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat)
+ mshrs.io.mem_grant.valid := narrow_grant.fire()
+ mshrs.io.mem_grant.bits := narrow_grant.bits
+ narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData()
+ /* The last clause here is necessary in order to prevent the responses for
+ * the IOMSHRs from being written into the data array. It works because the
+ * IOMSHR ids start right the ones for the regular MSHRs. */
+ writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() &&
+ narrow_grant.bits.client_xact_id < UInt(nMSHRs)
+ writeArb.io.in(1).bits.addr := mshrs.io.refill.addr
+ writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en
+ writeArb.io.in(1).bits.wmask := ~UInt(0, rowWords)
+ writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0)
+ data.io.read <> readArb.io.out
+ readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked
+ io.mem.finish <> mshrs.io.mem_finish
+
+ // writebacks
+ val wbArb = Module(new Arbiter(new WritebackReq, 2))
+ wbArb.io.in(0) <> prober.io.wb_req
+ wbArb.io.in(1) <> mshrs.io.wb_req
+ wb.io.req <> wbArb.io.out
+ metaReadArb.io.in(3) <> wb.io.meta_read
+ readArb.io.in(2) <> wb.io.data_req
+ wb.io.data_resp := s2_data_corrected
+ releaseArb.io.in(0) <> wb.io.release
+
+ // store->load bypassing
+ val s4_valid = Reg(next=s3_valid, init=Bool(false))
+ val s4_req = RegEnable(s3_req, s3_valid && metaReadArb.io.out.valid)
+ val bypasses = List(
+ ((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out),
+ (s3_valid, s3_req, s3_req.data),
+ (s4_valid, s4_req, s4_req.data)
+ ).map(r => (r._1 && (s1_addr >> wordOffBits === r._2.addr >> wordOffBits) && isWrite(r._2.cmd), r._3))
+ val s2_store_bypass_data = Reg(Bits(width = coreDataBits))
+ val s2_store_bypass = Reg(Bool())
+ when (s1_clk_en) {
+ s2_store_bypass := false
+ when (bypasses.map(_._1).reduce(_||_)) {
+ s2_store_bypass_data := PriorityMux(bypasses)
+ s2_store_bypass := true
+ }
+ }
+
+ // load data subword mux/sign extension
+ val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits)))
+ val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass)
+ val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes)
+
+ amoalu.io.addr := s2_req.addr
+ amoalu.io.cmd := s2_req.cmd
+ amoalu.io.typ := s2_req.typ
+ amoalu.io.lhs := s2_data_word
+ amoalu.io.rhs := s2_req.data
+
+ // nack it like it's hot
+ val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss ||
+ s1_req.addr(idxMSB,idxLSB) === prober.io.meta_write.bits.idx && !prober.io.req.ready
+ val s2_nack_hit = RegEnable(s1_nack, s1_valid || s1_replay)
+ when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) }
+ val s2_nack_victim = s2_hit && mshrs.io.secondary_miss
+ val s2_nack_miss = !s2_hit && !mshrs.io.req.ready
+ val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss
+ s2_valid_masked := s2_valid && !s2_nack
+
+ val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable
+ val s2_recycle_next = Reg(init=Bool(false))
+ when (s1_valid || s1_replay) { s2_recycle_next := s2_recycle_ecc }
+ s2_recycle := s2_recycle_ecc || s2_recycle_next
+
+ // after a nack, block until nack condition resolves to save energy
+ val block_miss = Reg(init=Bool(false))
+ block_miss := (s2_valid || block_miss) && s2_nack_miss
+ when (block_miss) {
+ io.cpu.req.ready := Bool(false)
+ }
+
+ val cache_resp = Wire(Valid(new HellaCacheResp))
+ cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
+ cache_resp.bits := s2_req
+ cache_resp.bits.has_data := isRead(s2_req.cmd)
+ cache_resp.bits.data := loadgen.data | s2_sc_fail
+ cache_resp.bits.store_data := s2_req.data
+ cache_resp.bits.replay := s2_replay
+
+ val uncache_resp = Wire(Valid(new HellaCacheResp))
+ uncache_resp.bits := mshrs.io.resp.bits
+ uncache_resp.valid := mshrs.io.resp.valid
+ mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay))
+
+ io.cpu.s2_nack := s2_valid && s2_nack
+ io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp)
+ io.cpu.resp.bits.data_word_bypass := loadgen.wordData
+ io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
+ io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next
+}
+
+/**
+ * This module buffers requests made by the SimpleHellaCacheIF in case they
+ * are nacked. Nacked requests must be replayed in order, and no other requests
+ * must be allowed to go through until the replayed requests are successfully
+ * completed.
+ */
+class SimpleHellaCacheIFReplayQueue(depth: Int)
+ (implicit val p: Parameters) extends Module
+ with HasL1HellaCacheParameters {
+ val io = new Bundle {
+ val req = Decoupled(new HellaCacheReq).flip
+ val nack = Valid(Bits(width = coreDCacheReqTagBits)).flip
+ val resp = Valid(new HellaCacheResp).flip
+ val replay = Decoupled(new HellaCacheReq)
+ }
+
+ // Registers to store the sent request
+ // When a request is sent the first time,
+ // it is stored in one of the reqs registers
+ // and the corresponding inflight bit is set.
+ // The reqs register will be deallocated once the request is
+ // successfully completed.
+ val inflight = Reg(init = UInt(0, depth))
+ val reqs = Reg(Vec(depth, new HellaCacheReq))
+
+ // The nack queue stores the index of nacked requests (in the reqs vector)
+ // in the order that they were nacked. A request is enqueued onto nackq
+ // when it is newly nacked (i.e. not a nack for a previous replay).
+ // The head of the nack queue will be replayed until it is
+ // successfully completed, at which time the request is dequeued.
+ // No new requests will be made or other replays attempted until the head
+ // of the nackq is successfully completed.
+ val nackq = Module(new Queue(UInt(width = log2Up(depth)), depth))
+ val replaying = Reg(init = Bool(false))
+
+ val next_inflight_onehot = PriorityEncoderOH(~inflight)
+ val next_inflight = OHToUInt(next_inflight_onehot)
+
+ val next_replay = nackq.io.deq.bits
+ val next_replay_onehot = UIntToOH(next_replay)
+ val next_replay_req = reqs(next_replay)
+
+ // Keep sending the head of the nack queue until it succeeds
+ io.replay.valid := nackq.io.deq.valid && !replaying
+ io.replay.bits := next_replay_req
+ // Don't allow new requests if there is are replays waiting
+ // or something being nacked.
+ io.req.ready := !inflight.andR && !nackq.io.deq.valid && !io.nack.valid
+
+ // Match on the tags to determine the index of nacks or responses
+ val nack_onehot = Cat(reqs.map(_.tag === io.nack.bits).reverse) & inflight
+ val resp_onehot = Cat(reqs.map(_.tag === io.resp.bits.tag).reverse) & inflight
+
+ val replay_complete = io.resp.valid && replaying && io.resp.bits.tag === next_replay_req.tag
+ val nack_head = io.nack.valid && nackq.io.deq.valid && io.nack.bits === next_replay_req.tag
+
+ // Enqueue to the nack queue if there is a nack that is not in response to
+ // the previous replay
+ nackq.io.enq.valid := io.nack.valid && !nack_head
+ nackq.io.enq.bits := OHToUInt(nack_onehot)
+ assert(!nackq.io.enq.valid || nackq.io.enq.ready,
+ "SimpleHellaCacheIF: ReplayQueue nack queue overflow")
+
+ // Dequeue from the nack queue if the last replay was successfully completed
+ nackq.io.deq.ready := replay_complete
+ assert(!nackq.io.deq.ready || nackq.io.deq.valid,
+ "SimpleHellaCacheIF: ReplayQueue nack queue underflow")
+
+ // Set inflight bit when a request is made
+ // Clear it when it is successfully completed
+ inflight := (inflight | Mux(io.req.fire(), next_inflight_onehot, UInt(0))) &
+ ~Mux(io.resp.valid, resp_onehot, UInt(0))
+
+ when (io.req.fire()) {
+ reqs(next_inflight) := io.req.bits
+ }
+
+ // Only one replay outstanding at a time
+ when (io.replay.fire()) { replaying := Bool(true) }
+ when (nack_head || replay_complete) { replaying := Bool(false) }
+}
+
+// exposes a sane decoupled request interface
+class SimpleHellaCacheIF(implicit p: Parameters) extends Module
+{
+ val io = new Bundle {
+ val requestor = new HellaCacheIO().flip
+ val cache = new HellaCacheIO
+ }
+
+ val replayq = Module(new SimpleHellaCacheIFReplayQueue(2))
+ val req_arb = Module(new Arbiter(new HellaCacheReq, 2))
+
+ val req_helper = DecoupledHelper(
+ req_arb.io.in(1).ready,
+ replayq.io.req.ready,
+ io.requestor.req.valid)
+
+ req_arb.io.in(0) <> replayq.io.replay
+ req_arb.io.in(1).valid := req_helper.fire(req_arb.io.in(1).ready)
+ req_arb.io.in(1).bits := io.requestor.req.bits
+ io.requestor.req.ready := req_helper.fire(io.requestor.req.valid)
+ replayq.io.req.valid := req_helper.fire(replayq.io.req.ready)
+ replayq.io.req.bits := io.requestor.req.bits
+
+ val s0_req_fire = io.cache.req.fire()
+ val s1_req_fire = Reg(next = s0_req_fire)
+ val s2_req_fire = Reg(next = s1_req_fire)
+ val s1_req_tag = Reg(next = io.cache.req.bits.tag)
+ val s2_req_tag = Reg(next = s1_req_tag)
+ val s2_kill = Reg(next = io.cache.s1_kill)
+
+ io.cache.invalidate_lr := io.requestor.invalidate_lr
+ io.cache.req <> req_arb.io.out
+ io.cache.req.bits.phys := Bool(true)
+ io.cache.s1_kill := io.cache.s2_nack
+ io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
+
+ replayq.io.nack.valid := (io.cache.s2_nack || s2_kill) && s2_req_fire
+ replayq.io.nack.bits := s2_req_tag
+ replayq.io.resp := io.cache.resp
+ io.requestor.resp := io.cache.resp
+
+ assert(!Reg(next = io.cache.req.fire()) ||
+ !(io.cache.xcpt.ma.ld || io.cache.xcpt.ma.st ||
+ io.cache.xcpt.pf.ld || io.cache.xcpt.pf.st),
+ "SimpleHellaCacheIF exception")
+}
diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala
new file mode 100644
index 00000000..30368040
--- /dev/null
+++ b/rocket/src/main/scala/package.scala
@@ -0,0 +1,4 @@
+// See LICENSE for license details.
+
+package object rocket extends
+ rocket.constants.ScalarOpConstants
diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala
new file mode 100644
index 00000000..c5a64764
--- /dev/null
+++ b/rocket/src/main/scala/ptw.scala
@@ -0,0 +1,203 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import uncore.agents._
+import uncore.constants._
+import Util._
+import cde.{Parameters, Field}
+
+class PTWReq(implicit p: Parameters) extends CoreBundle()(p) {
+ val prv = Bits(width = 2)
+ val pum = Bool()
+ val mxr = Bool()
+ val addr = UInt(width = vpnBits)
+ val store = Bool()
+ val fetch = Bool()
+}
+
+class PTWResp(implicit p: Parameters) extends CoreBundle()(p) {
+ val pte = new PTE
+}
+
+class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
+ val req = Decoupled(new PTWReq)
+ val resp = Valid(new PTWResp).flip
+ val ptbr = new PTBR().asInput
+ val invalidate = Bool(INPUT)
+ val status = new MStatus().asInput
+}
+
+class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
+ val ptbr = new PTBR().asInput
+ val invalidate = Bool(INPUT)
+ val status = new MStatus().asInput
+}
+
+class PTE(implicit p: Parameters) extends CoreBundle()(p) {
+ val reserved_for_hardware = Bits(width = 16)
+ val ppn = UInt(width = 38)
+ val reserved_for_software = Bits(width = 2)
+ val d = Bool()
+ val a = Bool()
+ val g = Bool()
+ val u = Bool()
+ val x = Bool()
+ val w = Bool()
+ val r = Bool()
+ val v = Bool()
+
+ def table(dummy: Int = 0) = v && !r && !w && !x
+ def leaf(dummy: Int = 0) = v && (r || (x && !w))
+ def ur(dummy: Int = 0) = sr() && u
+ def uw(dummy: Int = 0) = sw() && u
+ def ux(dummy: Int = 0) = sx() && u
+ def sr(dummy: Int = 0) = leaf() && r
+ def sw(dummy: Int = 0) = leaf() && w
+ def sx(dummy: Int = 0) = leaf() && x
+
+ def access_ok(req: PTWReq) = {
+ val perm_ok = Mux(req.fetch, x, Mux(req.store, w, r || (x && req.mxr)))
+ val priv_ok = Mux(u, !req.pum, req.prv(0))
+ leaf() && priv_ok && perm_ok
+ }
+}
+
+class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
+ val io = new Bundle {
+ val requestor = Vec(n, new TLBPTWIO).flip
+ val mem = new HellaCacheIO
+ val dpath = new DatapathPTWIO
+ }
+
+ require(usingAtomics, "PTW requires atomic memory operations")
+
+ val s_ready :: s_req :: s_wait :: s_set_dirty :: s_wait_dirty :: s_done :: Nil = Enum(UInt(), 6)
+ val state = Reg(init=s_ready)
+ val count = Reg(UInt(width = log2Up(pgLevels)))
+
+ val r_req = Reg(new PTWReq)
+ val r_req_dest = Reg(Bits())
+ val r_pte = Reg(new PTE)
+
+ val vpn_idxs = (0 until pgLevels).map(i => (r_req.addr >> (pgLevels-i-1)*pgLevelBits)(pgLevelBits-1,0))
+ val vpn_idx = vpn_idxs(count)
+
+ val arb = Module(new RRArbiter(new PTWReq, n))
+ arb.io.in <> io.requestor.map(_.req)
+ arb.io.out.ready := state === s_ready
+
+ val pte = new PTE().fromBits(io.mem.resp.bits.data)
+ val pte_addr = Cat(r_pte.ppn, vpn_idx).toUInt << log2Up(xLen/8)
+
+ when (arb.io.out.fire()) {
+ r_req := arb.io.out.bits
+ r_req_dest := arb.io.chosen
+ r_pte.ppn := io.dpath.ptbr.ppn
+ }
+
+ val (pte_cache_hit, pte_cache_data) = {
+ val size = 1 << log2Up(pgLevels * 2)
+ val plru = new PseudoLRU(size)
+ val valid = Reg(init = UInt(0, size))
+ val tags = Reg(Vec(size, UInt(width = paddrBits)))
+ val data = Reg(Vec(size, UInt(width = ppnBits)))
+
+ val hits = tags.map(_ === pte_addr).toBits & valid
+ val hit = hits.orR
+ when (io.mem.resp.valid && pte.table() && !hit) {
+ val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid))
+ valid := valid | UIntToOH(r)
+ tags(r) := pte_addr
+ data(r) := pte.ppn
+ }
+ when (hit && state === s_req) { plru.access(OHToUInt(hits)) }
+ when (io.dpath.invalidate) { valid := 0 }
+
+ (hit, Mux1H(hits, data))
+ }
+
+ val set_dirty_bit = pte.access_ok(r_req) && (!pte.a || (r_req.store && !pte.d))
+ when (io.mem.resp.valid && state === s_wait && !set_dirty_bit) {
+ r_pte := pte
+ }
+
+ val pte_wdata = Wire(init=new PTE().fromBits(0))
+ pte_wdata.a := true
+ pte_wdata.d := r_req.store
+
+ io.mem.req.valid := state === s_req || state === s_set_dirty
+ io.mem.req.bits.phys := Bool(true)
+ io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD)
+ io.mem.req.bits.typ := MT_D
+ io.mem.req.bits.addr := pte_addr
+ io.mem.s1_data := pte_wdata.toBits
+ io.mem.s1_kill := Bool(false)
+ io.mem.invalidate_lr := Bool(false)
+
+ val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits
+ val resp_ppns = (0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn
+ val resp_ppn = resp_ppns(count)
+ val resp_val = state === s_done
+
+ for (i <- 0 until io.requestor.size) {
+ io.requestor(i).resp.valid := resp_val && (r_req_dest === i)
+ io.requestor(i).resp.bits.pte := r_pte
+ io.requestor(i).resp.bits.pte.ppn := resp_ppn
+ io.requestor(i).ptbr := io.dpath.ptbr
+ io.requestor(i).invalidate := io.dpath.invalidate
+ io.requestor(i).status := io.dpath.status
+ }
+
+ // control state machine
+ switch (state) {
+ is (s_ready) {
+ when (arb.io.out.valid) {
+ state := s_req
+ }
+ count := UInt(0)
+ }
+ is (s_req) {
+ when (pte_cache_hit && count < pgLevels-1) {
+ io.mem.req.valid := false
+ state := s_req
+ count := count + 1
+ r_pte.ppn := pte_cache_data
+ }.elsewhen (io.mem.req.ready) {
+ state := s_wait
+ }
+ }
+ is (s_wait) {
+ when (io.mem.s2_nack) {
+ state := s_req
+ }
+ when (io.mem.resp.valid) {
+ state := s_done
+ when (set_dirty_bit) {
+ state := s_set_dirty
+ }
+ when (pte.table() && count < pgLevels-1) {
+ state := s_req
+ count := count + 1
+ }
+ }
+ }
+ is (s_set_dirty) {
+ when (io.mem.req.ready) {
+ state := s_wait_dirty
+ }
+ }
+ is (s_wait_dirty) {
+ when (io.mem.s2_nack) {
+ state := s_set_dirty
+ }
+ when (io.mem.resp.valid) {
+ state := s_req
+ }
+ }
+ is (s_done) {
+ state := s_ready
+ }
+ }
+}
diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala
new file mode 100644
index 00000000..ce0fcfbe
--- /dev/null
+++ b/rocket/src/main/scala/rocc.scala
@@ -0,0 +1,303 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import uncore.tilelink._
+import uncore.constants._
+import uncore.agents.CacheName
+import Util._
+import cde.{Parameters, Field}
+
+case object RoccMaxTaggedMemXacts extends Field[Int]
+case object RoccNMemChannels extends Field[Int]
+case object RoccNPTWPorts extends Field[Int]
+case object RoccNCSRs extends Field[Int]
+
+class RoCCCSRs(implicit p: Parameters) extends CoreBundle()(p) {
+ val rdata = Vec(nRoccCsrs, UInt(INPUT, xLen))
+ val waddr = UInt(OUTPUT, CSR.ADDRSZ)
+ val wdata = UInt(OUTPUT, xLen)
+ val wen = Bool(OUTPUT)
+}
+
+class RoCCInstruction extends Bundle
+{
+ val funct = Bits(width = 7)
+ val rs2 = Bits(width = 5)
+ val rs1 = Bits(width = 5)
+ val xd = Bool()
+ val xs1 = Bool()
+ val xs2 = Bool()
+ val rd = Bits(width = 5)
+ val opcode = Bits(width = 7)
+}
+
+class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) {
+ val inst = new RoCCInstruction
+ val rs1 = Bits(width = xLen)
+ val rs2 = Bits(width = xLen)
+ val status = new MStatus
+}
+
+class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) {
+ val rd = Bits(width = 5)
+ val data = Bits(width = xLen)
+}
+
+class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) {
+ val cmd = Decoupled(new RoCCCommand).flip
+ val resp = Decoupled(new RoCCResponse)
+ val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
+ val busy = Bool(OUTPUT)
+ val interrupt = Bool(OUTPUT)
+
+ // These should be handled differently, eventually
+ val autl = new ClientUncachedTileLinkIO
+ val utl = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO)
+ val ptw = Vec(p(RoccNPTWPorts), new TLBPTWIO)
+ val fpu_req = Decoupled(new FPInput)
+ val fpu_resp = Decoupled(new FPResult).flip
+ val exception = Bool(INPUT)
+ val csr = (new RoCCCSRs).flip
+ val host_id = UInt(INPUT, log2Up(nCores))
+
+ override def cloneType = new RoCCInterface().asInstanceOf[this.type]
+}
+
+abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) {
+ val io = new RoCCInterface
+ io.mem.req.bits.phys := Bool(true) // don't perform address translation
+}
+
+class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) {
+ val regfile = Mem(n, UInt(width = xLen))
+ val busy = Reg(init = Vec.fill(n){Bool(false)})
+
+ val cmd = Queue(io.cmd)
+ val funct = cmd.bits.inst.funct
+ val addr = cmd.bits.rs2(log2Up(n)-1,0)
+ val doWrite = funct === UInt(0)
+ val doRead = funct === UInt(1)
+ val doLoad = funct === UInt(2)
+ val doAccum = funct === UInt(3)
+ val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)
+
+ // datapath
+ val addend = cmd.bits.rs1
+ val accum = regfile(addr)
+ val wdata = Mux(doWrite, addend, accum + addend)
+
+ when (cmd.fire() && (doWrite || doAccum)) {
+ regfile(addr) := wdata
+ }
+
+ when (io.mem.resp.valid) {
+ regfile(memRespTag) := io.mem.resp.bits.data
+ }
+
+ // control
+ when (io.mem.req.fire()) {
+ busy(addr) := Bool(true)
+ }
+
+ when (io.mem.resp.valid) {
+ busy(memRespTag) := Bool(false)
+ }
+
+ val doResp = cmd.bits.inst.xd
+ val stallReg = busy(addr)
+ val stallLoad = doLoad && !io.mem.req.ready
+ val stallResp = doResp && !io.resp.ready
+
+ cmd.ready := !stallReg && !stallLoad && !stallResp
+ // command resolved if no stalls AND not issuing a load that will need a request
+
+ // PROC RESPONSE INTERFACE
+ io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
+ // valid response if valid command, need a response, and no stalls
+ io.resp.bits.rd := cmd.bits.inst.rd
+ // Must respond with the appropriate tag or undefined behavior
+ io.resp.bits.data := accum
+ // Semantics is to always send out prior accumulator register value
+
+ io.busy := cmd.valid || busy.reduce(_||_)
+ // Be busy when have pending memory requests or committed possibility of pending requests
+ io.interrupt := Bool(false)
+ // Set this true to trigger an interrupt on the processor (please refer to supervisor documentation)
+
+ // MEMORY REQUEST INTERFACE
+ io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
+ io.mem.req.bits.addr := addend
+ io.mem.req.bits.tag := addr
+ io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
+ io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
+ io.mem.req.bits.data := Bits(0) // we're not performing any stores...
+ io.mem.invalidate_lr := false
+
+ io.autl.acquire.valid := false
+ io.autl.grant.ready := false
+}
+
+class TranslatorExample(implicit p: Parameters) extends RoCC()(p) {
+ val req_addr = Reg(UInt(width = coreMaxAddrBits))
+ val req_rd = Reg(io.resp.bits.rd)
+ val req_offset = req_addr(pgIdxBits - 1, 0)
+ val req_vpn = req_addr(coreMaxAddrBits - 1, pgIdxBits)
+ val pte = Reg(new PTE)
+
+ val s_idle :: s_ptw_req :: s_ptw_resp :: s_resp :: Nil = Enum(Bits(), 4)
+ val state = Reg(init = s_idle)
+
+ io.cmd.ready := (state === s_idle)
+
+ when (io.cmd.fire()) {
+ req_rd := io.cmd.bits.inst.rd
+ req_addr := io.cmd.bits.rs1
+ state := s_ptw_req
+ }
+
+ private val ptw = io.ptw(0)
+
+ when (ptw.req.fire()) { state := s_ptw_resp }
+
+ when (state === s_ptw_resp && ptw.resp.valid) {
+ pte := ptw.resp.bits.pte
+ state := s_resp
+ }
+
+ when (io.resp.fire()) { state := s_idle }
+
+ ptw.req.valid := (state === s_ptw_req)
+ ptw.req.bits.addr := req_vpn
+ ptw.req.bits.store := Bool(false)
+ ptw.req.bits.fetch := Bool(false)
+
+ io.resp.valid := (state === s_resp)
+ io.resp.bits.rd := req_rd
+ io.resp.bits.data := Mux(pte.leaf(), Cat(pte.ppn, req_offset), ~UInt(0, xLen))
+
+ io.busy := (state =/= s_idle)
+ io.interrupt := Bool(false)
+ io.mem.req.valid := Bool(false)
+ io.mem.invalidate_lr := Bool(false)
+ io.autl.acquire.valid := Bool(false)
+ io.autl.grant.ready := Bool(false)
+}
+
+class CharacterCountExample(implicit p: Parameters) extends RoCC()(p)
+ with HasTileLinkParameters {
+
+ private val blockOffset = tlBeatAddrBits + tlByteAddrBits
+
+ val needle = Reg(UInt(width = 8))
+ val addr = Reg(UInt(width = coreMaxAddrBits))
+ val count = Reg(UInt(width = xLen))
+ val resp_rd = Reg(io.resp.bits.rd)
+
+ val addr_block = addr(coreMaxAddrBits - 1, blockOffset)
+ val offset = addr(blockOffset - 1, 0)
+ val next_addr = (addr_block + UInt(1)) << UInt(blockOffset)
+
+ val s_idle :: s_acq :: s_gnt :: s_check :: s_resp :: Nil = Enum(Bits(), 5)
+ val state = Reg(init = s_idle)
+
+ val gnt = io.autl.grant.bits
+ val recv_data = Reg(UInt(width = tlDataBits))
+ val recv_beat = Reg(UInt(width = tlBeatAddrBits))
+
+ val data_bytes = Vec.tabulate(tlDataBytes) { i => recv_data(8 * (i + 1) - 1, 8 * i) }
+ val zero_match = data_bytes.map(_ === UInt(0))
+ val needle_match = data_bytes.map(_ === needle)
+ val first_zero = PriorityEncoder(zero_match)
+
+ val chars_found = PopCount(needle_match.zipWithIndex.map {
+ case (matches, i) =>
+ val idx = Cat(recv_beat, UInt(i, tlByteAddrBits))
+ matches && idx >= offset && UInt(i) <= first_zero
+ })
+ val zero_found = zero_match.reduce(_ || _)
+ val finished = Reg(Bool())
+
+ io.cmd.ready := (state === s_idle)
+ io.resp.valid := (state === s_resp)
+ io.resp.bits.rd := resp_rd
+ io.resp.bits.data := count
+ io.autl.acquire.valid := (state === s_acq)
+ io.autl.acquire.bits := GetBlock(addr_block = addr_block)
+ io.autl.grant.ready := (state === s_gnt)
+
+ when (io.cmd.fire()) {
+ addr := io.cmd.bits.rs1
+ needle := io.cmd.bits.rs2
+ resp_rd := io.cmd.bits.inst.rd
+ count := UInt(0)
+ finished := Bool(false)
+ state := s_acq
+ }
+
+ when (io.autl.acquire.fire()) { state := s_gnt }
+
+ when (io.autl.grant.fire()) {
+ recv_beat := gnt.addr_beat
+ recv_data := gnt.data
+ state := s_check
+ }
+
+ when (state === s_check) {
+ when (!finished) {
+ count := count + chars_found
+ }
+ when (zero_found) { finished := Bool(true) }
+ when (recv_beat === UInt(tlDataBeats - 1)) {
+ addr := next_addr
+ state := Mux(zero_found || finished, s_resp, s_acq)
+ } .otherwise {
+ state := s_gnt
+ }
+ }
+
+ when (io.resp.fire()) { state := s_idle }
+
+ io.busy := (state =/= s_idle)
+ io.interrupt := Bool(false)
+ io.mem.req.valid := Bool(false)
+ io.mem.invalidate_lr := Bool(false)
+}
+
+class OpcodeSet(val opcodes: Seq[UInt]) {
+ def |(set: OpcodeSet) =
+ new OpcodeSet(this.opcodes ++ set.opcodes)
+
+ def matches(oc: UInt) = opcodes.map(_ === oc).reduce(_ || _)
+}
+
+object OpcodeSet {
+ val custom0 = new OpcodeSet(Seq(Bits("b0001011")))
+ val custom1 = new OpcodeSet(Seq(Bits("b0101011")))
+ val custom2 = new OpcodeSet(Seq(Bits("b1011011")))
+ val custom3 = new OpcodeSet(Seq(Bits("b1111011")))
+ val all = custom0 | custom1 | custom2 | custom3
+}
+
+class RoccCommandRouter(opcodes: Seq[OpcodeSet])(implicit p: Parameters)
+ extends CoreModule()(p) {
+ val io = new Bundle {
+ val in = Decoupled(new RoCCCommand).flip
+ val out = Vec(opcodes.size, Decoupled(new RoCCCommand))
+ val busy = Bool(OUTPUT)
+ }
+
+ val cmd = Queue(io.in)
+ val cmdReadys = io.out.zip(opcodes).map { case (out, opcode) =>
+ val me = opcode.matches(cmd.bits.inst.opcode)
+ out.valid := cmd.valid && me
+ out.bits := cmd.bits
+ out.ready && me
+ }
+ cmd.ready := cmdReadys.reduce(_ || _)
+ io.busy := cmd.valid
+
+ assert(PopCount(cmdReadys) <= UInt(1),
+ "Custom opcode matched for more than one accelerator")
+}
diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala
new file mode 100644
index 00000000..7756ab18
--- /dev/null
+++ b/rocket/src/main/scala/rocket.scala
@@ -0,0 +1,679 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import junctions._
+import uncore.devices._
+import uncore.agents.CacheName
+import uncore.constants._
+import Util._
+import cde.{Parameters, Field}
+
+case object UseFPU extends Field[Boolean]
+case object FDivSqrt extends Field[Boolean]
+case object XLen extends Field[Int]
+case object FetchWidth extends Field[Int]
+case object RetireWidth extends Field[Int]
+case object UseVM extends Field[Boolean]
+case object UseUser extends Field[Boolean]
+case object UseDebug extends Field[Boolean]
+case object UseAtomics extends Field[Boolean]
+case object UsePerfCounters extends Field[Boolean]
+case object FastLoadWord extends Field[Boolean]
+case object FastLoadByte extends Field[Boolean]
+case object MulUnroll extends Field[Int]
+case object DivEarlyOut extends Field[Boolean]
+case object CoreInstBits extends Field[Int]
+case object CoreDataBits extends Field[Int]
+case object CoreDCacheReqTagBits extends Field[Int]
+case object NCustomMRWCSRs extends Field[Int]
+case object MtvecWritable extends Field[Boolean]
+case object MtvecInit extends Field[BigInt]
+case object ResetVector extends Field[BigInt]
+case object NBreakpoints extends Field[Int]
+
+trait HasCoreParameters extends HasAddrMapParameters {
+ implicit val p: Parameters
+ val xLen = p(XLen)
+
+ val usingVM = p(UseVM)
+ val usingUser = p(UseUser)
+ val usingDebug = p(UseDebug)
+ val usingFPU = p(UseFPU)
+ val usingAtomics = p(UseAtomics)
+ val usingFDivSqrt = p(FDivSqrt)
+ val usingRoCC = !p(BuildRoCC).isEmpty
+ val mulUnroll = p(MulUnroll)
+ val divEarlyOut = p(DivEarlyOut)
+ val fastLoadWord = p(FastLoadWord)
+ val fastLoadByte = p(FastLoadByte)
+
+ val retireWidth = p(RetireWidth)
+ val fetchWidth = p(FetchWidth)
+ val coreInstBits = p(CoreInstBits)
+ val coreInstBytes = coreInstBits/8
+ val coreDataBits = xLen
+ val coreDataBytes = coreDataBits/8
+ val coreDCacheReqTagBits = 7 + (2 + (if(!usingRoCC) 0 else 1))
+ val vpnBitsExtended = vpnBits + (vaddrBits < xLen).toInt
+ val vaddrBitsExtended = vpnBitsExtended + pgIdxBits
+ val coreMaxAddrBits = paddrBits max vaddrBitsExtended
+ val nCustomMrwCsrs = p(NCustomMRWCSRs)
+ val roccCsrs = if (p(BuildRoCC).isEmpty) Nil
+ else p(BuildRoCC).flatMap(_.csrs)
+ val nRoccCsrs = p(RoccNCSRs)
+ val nCores = p(NTiles)
+
+ // Print out log of committed instructions and their writeback values.
+ // Requires post-processing due to out-of-order writebacks.
+ val enableCommitLog = false
+ val usingPerfCounters = p(UsePerfCounters)
+
+ val maxPAddrBits = xLen match {
+ case 32 => 34
+ case 64 => 50
+ }
+
+ require(paddrBits < maxPAddrBits)
+ require(!fastLoadByte || fastLoadWord)
+}
+
+abstract class CoreModule(implicit val p: Parameters) extends Module
+ with HasCoreParameters
+abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
+ with HasCoreParameters
+
+class RegFile(n: Int, w: Int, zero: Boolean = false) {
+ private val rf = Mem(n, UInt(width = w))
+ private def access(addr: UInt) = rf(~addr(log2Up(n)-1,0))
+ private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]()
+ private var canRead = true
+ def read(addr: UInt) = {
+ require(canRead)
+ reads += addr -> Wire(UInt())
+ reads.last._2 := Mux(Bool(zero) && addr === UInt(0), UInt(0), access(addr))
+ reads.last._2
+ }
+ def write(addr: UInt, data: UInt) = {
+ canRead = false
+ when (addr =/= UInt(0)) {
+ access(addr) := data
+ for ((raddr, rdata) <- reads)
+ when (addr === raddr) { rdata := data }
+ }
+ }
+}
+
+object ImmGen {
+ def apply(sel: UInt, inst: UInt) = {
+ val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt)
+ val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign)
+ val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19,12).toSInt)
+ val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0),
+ Mux(sel === IMM_UJ, inst(20).toSInt,
+ Mux(sel === IMM_SB, inst(7).toSInt, sign)))
+ val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25))
+ val b4_1 = Mux(sel === IMM_U, Bits(0),
+ Mux(sel === IMM_S || sel === IMM_SB, inst(11,8),
+ Mux(sel === IMM_Z, inst(19,16), inst(24,21))))
+ val b0 = Mux(sel === IMM_S, inst(7),
+ Mux(sel === IMM_I, inst(20),
+ Mux(sel === IMM_Z, inst(15), Bits(0))))
+
+ Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt
+ }
+}
+
+class Rocket(implicit p: Parameters) extends CoreModule()(p) {
+ val io = new Bundle {
+ val prci = new PRCITileIO().flip
+ val imem = new FrontendIO()(p.alterPartial({case CacheName => "L1I" }))
+ val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
+ val ptw = new DatapathPTWIO().flip
+ val fpu = new FPUIO().flip
+ val rocc = new RoCCInterface().flip
+ }
+
+ val decode_table = {
+ (if (true) new MDecode +: (if (xLen > 32) Seq(new M64Decode) else Nil) else Nil) ++:
+ (if (usingAtomics) new ADecode +: (if (xLen > 32) Seq(new A64Decode) else Nil) else Nil) ++:
+ (if (usingFPU) new FDecode +: (if (xLen > 32) Seq(new F64Decode) else Nil) else Nil) ++:
+ (if (usingFPU && usingFDivSqrt) Some(new FDivSqrtDecode) else None) ++:
+ (if (usingRoCC) Some(new RoCCDecode) else None) ++:
+ (if (xLen > 32) Some(new I64Decode) else None) ++:
+ (if (usingVM) Some(new SDecode) else None) ++:
+ (if (usingDebug) Some(new DebugDecode) else None) ++:
+ Seq(new IDecode)
+ } flatMap(_.table)
+
+ val ex_ctrl = Reg(new IntCtrlSigs)
+ val mem_ctrl = Reg(new IntCtrlSigs)
+ val wb_ctrl = Reg(new IntCtrlSigs)
+
+ val ex_reg_xcpt_interrupt = Reg(Bool())
+ val ex_reg_valid = Reg(Bool())
+ val ex_reg_btb_hit = Reg(Bool())
+ val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits)
+ val ex_reg_xcpt = Reg(Bool())
+ val ex_reg_flush_pipe = Reg(Bool())
+ val ex_reg_load_use = Reg(Bool())
+ val ex_reg_cause = Reg(UInt())
+ val ex_reg_replay = Reg(Bool())
+ val ex_reg_pc = Reg(UInt())
+ val ex_reg_inst = Reg(Bits())
+
+ val mem_reg_xcpt_interrupt = Reg(Bool())
+ val mem_reg_valid = Reg(Bool())
+ val mem_reg_btb_hit = Reg(Bool())
+ val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits)
+ val mem_reg_xcpt = Reg(Bool())
+ val mem_reg_replay = Reg(Bool())
+ val mem_reg_flush_pipe = Reg(Bool())
+ val mem_reg_cause = Reg(UInt())
+ val mem_reg_slow_bypass = Reg(Bool())
+ val mem_reg_load = Reg(Bool())
+ val mem_reg_store = Reg(Bool())
+ val mem_reg_pc = Reg(UInt())
+ val mem_reg_inst = Reg(Bits())
+ val mem_reg_wdata = Reg(Bits())
+ val mem_reg_rs2 = Reg(Bits())
+ val take_pc_mem = Wire(Bool())
+
+ val wb_reg_valid = Reg(Bool())
+ val wb_reg_xcpt = Reg(Bool())
+ val wb_reg_mem_xcpt = Reg(Bool())
+ val wb_reg_replay = Reg(Bool())
+ val wb_reg_cause = Reg(UInt())
+ val wb_reg_pc = Reg(UInt())
+ val wb_reg_inst = Reg(Bits())
+ val wb_reg_wdata = Reg(Bits())
+ val wb_reg_rs2 = Reg(Bits())
+ val take_pc_wb = Wire(Bool())
+
+ val take_pc_mem_wb = take_pc_wb || take_pc_mem
+ val take_pc = take_pc_mem_wb
+
+ // decode stage
+ val id_pc = io.imem.resp.bits.pc
+ val id_inst = io.imem.resp.bits.data(0).toBits; require(fetchWidth == 1)
+ val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table)
+ val id_raddr3 = id_inst(31,27)
+ val id_raddr2 = id_inst(24,20)
+ val id_raddr1 = id_inst(19,15)
+ val id_waddr = id_inst(11,7)
+ val id_load_use = Wire(Bool())
+ val id_reg_fence = Reg(init=Bool(false))
+ val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2)
+ val id_raddr = IndexedSeq(id_raddr1, id_raddr2)
+ val rf = new RegFile(31, xLen)
+ val id_rs = id_raddr.map(rf.read _)
+ val ctrl_killd = Wire(Bool())
+
+ val csr = Module(new CSRFile)
+ val id_csr_en = id_ctrl.csr =/= CSR.N
+ val id_system_insn = id_ctrl.csr === CSR.I
+ val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0)
+ val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
+ val id_csr_addr = id_inst(31,20)
+ // this is overly conservative
+ val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil
+ val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*)
+ val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs.map(UInt(_)), (legal_csrs -- safe_csrs).toList.map(UInt(_))))
+
+ val id_illegal_insn = !id_ctrl.legal ||
+ id_ctrl.fp && !csr.io.status.fs.orR ||
+ id_ctrl.rocc && !csr.io.status.xs.orR
+ // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
+ val id_amo_aq = id_inst(26)
+ val id_amo_rl = id_inst(25)
+ val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
+ val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
+ val id_rocc_busy = Bool(usingRoCC) &&
+ (io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||
+ mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)
+ id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy
+ val id_do_fence = id_rocc_busy && id_ctrl.fence ||
+ id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en)
+
+ val bpu = Module(new BreakpointUnit)
+ bpu.io.status := csr.io.status
+ bpu.io.bp := csr.io.bp
+ bpu.io.pc := id_pc
+ bpu.io.ea := mem_reg_wdata
+
+ val (id_xcpt, id_cause) = checkExceptions(List(
+ (csr.io.interrupt, csr.io.interrupt_cause),
+ (bpu.io.xcpt_if, UInt(Causes.breakpoint)),
+ (io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)),
+ (id_illegal_insn, UInt(Causes.illegal_instruction))))
+
+ val dcache_bypass_data =
+ if (fastLoadByte) io.dmem.resp.bits.data
+ else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass
+ else wb_reg_wdata
+
+ // detect bypass opportunities
+ val ex_waddr = ex_reg_inst(11,7)
+ val mem_waddr = mem_reg_inst(11,7)
+ val wb_waddr = wb_reg_inst(11,7)
+ val bypass_sources = IndexedSeq(
+ (Bool(true), UInt(0), UInt(0)), // treat reading x0 as a bypass
+ (ex_reg_valid && ex_ctrl.wxd, ex_waddr, mem_reg_wdata),
+ (mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, mem_waddr, wb_reg_wdata),
+ (mem_reg_valid && mem_ctrl.wxd, mem_waddr, dcache_bypass_data))
+ val id_bypass_src = id_raddr.map(raddr => bypass_sources.map(s => s._1 && s._2 === raddr))
+
+ // execute stage
+ val bypass_mux = Vec(bypass_sources.map(_._3))
+ val ex_reg_rs_bypass = Reg(Vec(id_raddr.size, Bool()))
+ val ex_reg_rs_lsb = Reg(Vec(id_raddr.size, UInt()))
+ val ex_reg_rs_msb = Reg(Vec(id_raddr.size, UInt()))
+ val ex_rs = for (i <- 0 until id_raddr.size)
+ yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
+ val ex_imm = ImmGen(ex_ctrl.sel_imm, ex_reg_inst)
+ val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq(
+ A1_RS1 -> ex_rs(0).toSInt,
+ A1_PC -> ex_reg_pc.toSInt))
+ val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq(
+ A2_RS2 -> ex_rs(1).toSInt,
+ A2_IMM -> ex_imm,
+ A2_FOUR -> SInt(4)))
+
+ val alu = Module(new ALU)
+ alu.io.dw := ex_ctrl.alu_dw
+ alu.io.fn := ex_ctrl.alu_fn
+ alu.io.in2 := ex_op2.toUInt
+ alu.io.in1 := ex_op1.toUInt
+
+ // multiplier and divider
+ val div = Module(new MulDiv(width = xLen,
+ unroll = mulUnroll,
+ earlyOut = divEarlyOut))
+
+ div.io.req.valid := ex_reg_valid && ex_ctrl.div
+ div.io.req.bits.dw := ex_ctrl.alu_dw
+ div.io.req.bits.fn := ex_ctrl.alu_fn
+ div.io.req.bits.in1 := ex_rs(0)
+ div.io.req.bits.in2 := ex_rs(1)
+ div.io.req.bits.tag := ex_waddr
+
+ ex_reg_valid := !ctrl_killd
+ ex_reg_replay := !take_pc && io.imem.resp.valid && io.imem.resp.bits.replay
+ ex_reg_xcpt := !ctrl_killd && id_xcpt
+ ex_reg_xcpt_interrupt := !take_pc && io.imem.resp.valid && csr.io.interrupt
+ when (id_xcpt) { ex_reg_cause := id_cause }
+
+ when (!ctrl_killd) {
+ ex_ctrl := id_ctrl
+ ex_ctrl.csr := id_csr
+ ex_reg_btb_hit := io.imem.btb_resp.valid
+ when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits }
+ ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep
+ ex_reg_load_use := id_load_use
+
+ when (id_ctrl.jalr && csr.io.status.debug) {
+ ex_reg_flush_pipe := true
+ ex_ctrl.fence_i := true
+ }
+
+ for (i <- 0 until id_raddr.size) {
+ val do_bypass = id_bypass_src(i).reduce(_||_)
+ val bypass_src = PriorityEncoder(id_bypass_src(i))
+ ex_reg_rs_bypass(i) := do_bypass
+ ex_reg_rs_lsb(i) := bypass_src
+ when (id_ren(i) && !do_bypass) {
+ ex_reg_rs_lsb(i) := id_rs(i)(bypass_src.getWidth-1,0)
+ ex_reg_rs_msb(i) := id_rs(i) >> bypass_src.getWidth
+ }
+ }
+ }
+ when (!ctrl_killd || csr.io.interrupt || io.imem.resp.bits.replay) {
+ ex_reg_inst := id_inst
+ ex_reg_pc := id_pc
+ }
+
+ // replay inst in ex stage?
+ val ex_pc_valid = ex_reg_valid || ex_reg_replay || ex_reg_xcpt_interrupt
+ val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
+ val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
+ ex_ctrl.div && !div.io.req.ready
+ val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
+ val replay_ex = ex_reg_replay || (ex_reg_valid && (replay_ex_structural || replay_ex_load_use))
+ val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid
+ // detect 2-cycle load-use delay for LB/LH/SC
+ val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
+
+ val (ex_xcpt, ex_cause) = checkExceptions(List(
+ (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause),
+ (ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction))))
+
+ // memory stage
+ val mem_br_taken = mem_reg_wdata(0)
+ val mem_br_target = mem_reg_pc.toSInt +
+ Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
+ Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4)))
+ val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt
+ val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).toSInt, mem_br_target) & SInt(-2)).toUInt
+ val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(io.imem.resp.valid, mem_npc =/= id_pc, Bool(true)))
+ val mem_npc_misaligned = mem_npc(1)
+ val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
+ val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal
+ val mem_misprediction =
+ if (p(BtbKey).nEntries == 0) mem_cfi_taken
+ else mem_wrong_npc
+ val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
+ take_pc_mem := want_take_pc_mem && !mem_npc_misaligned
+
+ mem_reg_valid := !ctrl_killx
+ mem_reg_replay := !take_pc_mem_wb && replay_ex
+ mem_reg_xcpt := !ctrl_killx && ex_xcpt
+ mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
+ when (ex_xcpt) { mem_reg_cause := ex_cause }
+
+ when (ex_pc_valid) {
+ mem_ctrl := ex_ctrl
+ mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
+ mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
+ mem_reg_btb_hit := ex_reg_btb_hit
+ when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
+ mem_reg_flush_pipe := ex_reg_flush_pipe
+ mem_reg_slow_bypass := ex_slow_bypass
+
+ mem_reg_inst := ex_reg_inst
+ mem_reg_pc := ex_reg_pc
+ mem_reg_wdata := alu.io.out
+ when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) {
+ mem_reg_rs2 := ex_rs(1)
+ }
+ }
+
+ val (mem_new_xcpt, mem_new_cause) = checkExceptions(List(
+ (mem_reg_load && bpu.io.xcpt_ld, UInt(Causes.breakpoint)),
+ (mem_reg_store && bpu.io.xcpt_st, UInt(Causes.breakpoint)),
+ (want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)),
+ (mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)),
+ (mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)),
+ (mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)),
+ (mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load))))
+
+ val (mem_xcpt, mem_cause) = checkExceptions(List(
+ (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
+ (mem_reg_valid && mem_new_xcpt, mem_new_cause)))
+
+ val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port
+ val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
+ val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
+ val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
+ div.io.kill := killm_common && Reg(next = div.io.req.fire())
+ val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
+
+ // writeback stage
+ wb_reg_valid := !ctrl_killm
+ wb_reg_replay := replay_mem && !take_pc_wb
+ wb_reg_xcpt := mem_xcpt && !take_pc_wb
+ wb_reg_mem_xcpt := mem_reg_valid && mem_new_xcpt && !(mem_reg_xcpt_interrupt || mem_reg_xcpt)
+ when (mem_xcpt) { wb_reg_cause := mem_cause }
+ when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) {
+ wb_ctrl := mem_ctrl
+ wb_reg_wdata := Mux(mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
+ when (mem_ctrl.rocc) {
+ wb_reg_rs2 := mem_reg_rs2
+ }
+ wb_reg_inst := mem_reg_inst
+ wb_reg_pc := mem_reg_pc
+ }
+
+ val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
+ val replay_wb_common = io.dmem.s2_nack || wb_reg_replay
+ val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
+ val replay_wb = replay_wb_common || replay_wb_rocc
+ val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt
+ take_pc_wb := replay_wb || wb_xcpt || csr.io.eret
+
+ // writeback arbitration
+ val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
+ val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
+ val dmem_resp_waddr = io.dmem.resp.bits.tag >> 1
+ val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
+ val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay
+
+ div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
+ val ll_wdata = Wire(init = div.io.resp.bits.data)
+ val ll_waddr = Wire(init = div.io.resp.bits.tag)
+ val ll_wen = Wire(init = div.io.resp.fire())
+ if (usingRoCC) {
+ io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
+ when (io.rocc.resp.fire()) {
+ div.io.resp.ready := Bool(false)
+ ll_wdata := io.rocc.resp.bits.data
+ ll_waddr := io.rocc.resp.bits.rd
+ ll_wen := Bool(true)
+ }
+ }
+ when (dmem_resp_replay && dmem_resp_xpu) {
+ div.io.resp.ready := Bool(false)
+ if (usingRoCC)
+ io.rocc.resp.ready := Bool(false)
+ ll_waddr := dmem_resp_waddr
+ ll_wen := Bool(true)
+ }
+
+ val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt
+ val wb_wen = wb_valid && wb_ctrl.wxd
+ val rf_wen = wb_wen || ll_wen
+ val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
+ val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data,
+ Mux(ll_wen, ll_wdata,
+ Mux(wb_ctrl.csr =/= CSR.N, csr.io.rw.rdata,
+ wb_reg_wdata)))
+ when (rf_wen) { rf.write(rf_waddr, rf_wdata) }
+
+ // hook up control/status regfile
+ csr.io.exception := wb_reg_xcpt
+ csr.io.cause := wb_reg_cause
+ csr.io.retire := wb_valid
+ csr.io.prci <> io.prci
+ io.fpu.fcsr_rm := csr.io.fcsr_rm
+ csr.io.fcsr_flags := io.fpu.fcsr_flags
+ io.rocc.csr <> csr.io.rocc.csr
+ csr.io.rocc.interrupt <> io.rocc.interrupt
+ csr.io.pc := wb_reg_pc
+ csr.io.badaddr := Mux(wb_reg_mem_xcpt, encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata), wb_reg_pc)
+ io.ptw.ptbr := csr.io.ptbr
+ io.ptw.invalidate := csr.io.fatc
+ io.ptw.status := csr.io.status
+ csr.io.rw.addr := wb_reg_inst(31,20)
+ csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
+ csr.io.rw.wdata := wb_reg_wdata
+
+ val hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 =/= UInt(0), id_raddr1),
+ (id_ctrl.rxs2 && id_raddr2 =/= UInt(0), id_raddr2),
+ (id_ctrl.wxd && id_waddr =/= UInt(0), id_waddr))
+ val fp_hazard_targets = Seq((io.fpu.dec.ren1, id_raddr1),
+ (io.fpu.dec.ren2, id_raddr2),
+ (io.fpu.dec.ren3, id_raddr3),
+ (io.fpu.dec.wen, id_waddr))
+
+ val sboard = new Scoreboard(32)
+ sboard.clear(ll_wen, ll_waddr)
+ val id_sboard_hazard = checkHazards(hazard_targets, sboard.read _)
+ sboard.set(wb_set_sboard && wb_wen, wb_waddr)
+
+ // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
+ val ex_cannot_bypass = ex_ctrl.csr =/= CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc
+ val data_hazard_ex = ex_ctrl.wxd && checkHazards(hazard_targets, _ === ex_waddr)
+ val fp_data_hazard_ex = ex_ctrl.wfd && checkHazards(fp_hazard_targets, _ === ex_waddr)
+ val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex)
+
+ // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.
+ val mem_mem_cmd_bh =
+ if (fastLoadWord) Bool(!fastLoadByte) && mem_reg_slow_bypass
+ else Bool(true)
+ val mem_cannot_bypass = mem_ctrl.csr =/= CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc
+ val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr)
+ val fp_data_hazard_mem = mem_ctrl.wfd && checkHazards(fp_hazard_targets, _ === mem_waddr)
+ val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem)
+ id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem
+
+ // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback.
+ val data_hazard_wb = wb_ctrl.wxd && checkHazards(hazard_targets, _ === wb_waddr)
+ val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr)
+ val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)
+
+ val id_stall_fpu = if (usingFPU) {
+ val fp_sboard = new Scoreboard(32)
+ fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr)
+ fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr)
+ fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra)
+
+ id_csr_en && !io.fpu.fcsr_rdy || checkHazards(fp_hazard_targets, fp_sboard.read _)
+ } else Bool(false)
+
+ val dcache_blocked = Reg(Bool())
+ dcache_blocked := !io.dmem.req.ready && (io.dmem.req.valid || dcache_blocked)
+ val rocc_blocked = Reg(Bool())
+ rocc_blocked := !wb_reg_xcpt && !io.rocc.cmd.ready && (io.rocc.cmd.valid || rocc_blocked)
+
+ val ctrl_stalld =
+ id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||
+ id_ctrl.fp && id_stall_fpu ||
+ id_ctrl.mem && dcache_blocked || // reduce activity during D$ misses
+ id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy
+ id_do_fence ||
+ csr.io.csr_stall
+ ctrl_killd := !io.imem.resp.valid || io.imem.resp.bits.replay || take_pc || ctrl_stalld || csr.io.interrupt
+
+ io.imem.req.valid := take_pc
+ io.imem.req.bits.speculative := !take_pc_wb
+ io.imem.req.bits.pc :=
+ Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
+ Mux(replay_wb, wb_reg_pc, // replay
+ mem_npc)).toUInt // mispredicted branch
+ io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
+ io.imem.flush_tlb := csr.io.fatc
+ io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt || take_pc_mem
+
+ io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && mem_cfi_taken && !take_pc_wb
+ io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
+ io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1")
+ io.imem.btb_update.bits.pc := mem_reg_pc
+ io.imem.btb_update.bits.target := io.imem.req.bits.pc
+ io.imem.btb_update.bits.br_pc := mem_reg_pc
+ io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
+ io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
+
+ io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb
+ io.imem.bht_update.bits.pc := mem_reg_pc
+ io.imem.bht_update.bits.taken := mem_br_taken
+ io.imem.bht_update.bits.mispredict := mem_wrong_npc
+ io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction
+
+ io.imem.ras_update.valid := mem_reg_valid && io.imem.btb_update.bits.isJump && !mem_npc_misaligned && !take_pc_wb
+ io.imem.ras_update.bits.returnAddr := mem_int_wdata
+ io.imem.ras_update.bits.isCall := mem_ctrl.wxd && mem_waddr(0)
+ io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn
+ io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction
+
+ io.fpu.valid := !ctrl_killd && id_ctrl.fp
+ io.fpu.killx := ctrl_killx
+ io.fpu.killm := killm_common
+ io.fpu.inst := id_inst
+ io.fpu.fromint_data := ex_rs(0)
+ io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
+ io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass
+ io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
+ io.fpu.dmem_resp_tag := dmem_resp_waddr
+
+ io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
+ val ex_dcache_tag = Cat(ex_waddr, ex_ctrl.fp)
+ require(coreDCacheReqTagBits >= ex_dcache_tag.getWidth)
+ io.dmem.req.bits.tag := ex_dcache_tag
+ io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
+ io.dmem.req.bits.typ := ex_ctrl.mem_type
+ io.dmem.req.bits.phys := Bool(false)
+ io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out)
+ io.dmem.s1_kill := killm_common || mem_xcpt
+ io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
+ io.dmem.invalidate_lr := wb_xcpt
+
+ io.rocc.cmd.valid := wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
+ io.rocc.exception := wb_xcpt && csr.io.status.xs.orR
+ io.rocc.cmd.bits.status := csr.io.status
+ io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst)
+ io.rocc.cmd.bits.rs1 := wb_reg_wdata
+ io.rocc.cmd.bits.rs2 := wb_reg_rs2
+
+ if (enableCommitLog) {
+ val pc = Wire(SInt(width=xLen))
+ pc := wb_reg_pc
+ val inst = wb_reg_inst
+ val rd = RegNext(RegNext(RegNext(id_waddr)))
+ val wfd = wb_ctrl.wfd
+ val wxd = wb_ctrl.wxd
+ val has_data = wb_wen && !wb_set_sboard
+ val priv = csr.io.status.prv
+
+ when (wb_valid) {
+ when (wfd) {
+ printf ("%d 0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd+UInt(32))
+ }
+ .elsewhen (wxd && rd =/= UInt(0) && has_data) {
+ printf ("%d 0x%x (0x%x) x%d 0x%x\n", priv, pc, inst, rd, rf_wdata)
+ }
+ .elsewhen (wxd && rd =/= UInt(0) && !has_data) {
+ printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd)
+ }
+ .otherwise {
+ printf ("%d 0x%x (0x%x)\n", priv, pc, inst)
+ }
+ }
+
+ when (ll_wen && rf_waddr =/= UInt(0)) {
+ printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata)
+ }
+ }
+ else {
+ printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",
+ io.prci.id, csr.io.time(31,0), wb_valid, wb_reg_pc,
+ Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen,
+ wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),
+ wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),
+ wb_reg_inst, wb_reg_inst)
+ }
+
+ def checkExceptions(x: Seq[(Bool, UInt)]) =
+ (x.map(_._1).reduce(_||_), PriorityMux(x))
+
+ def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) =
+ targets.map(h => h._1 && cond(h._2)).reduce(_||_)
+
+ def encodeVirtualAddress(a0: UInt, ea: UInt) = if (vaddrBitsExtended == vaddrBits) ea else {
+ // efficient means to compress 64-bit VA into vaddrBits+1 bits
+ // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1))
+ val a = a0 >> vaddrBits-1
+ val e = ea(vaddrBits,vaddrBits-1).toSInt
+ val msb =
+ Mux(a === UInt(0) || a === UInt(1), e =/= SInt(0),
+ Mux(a.toSInt === SInt(-1) || a.toSInt === SInt(-2), e === SInt(-1), e(0)))
+ Cat(msb, ea(vaddrBits-1,0))
+ }
+
+ class Scoreboard(n: Int)
+ {
+ def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))
+ def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))
+ def read(addr: UInt): Bool = r(addr)
+ def readBypassed(addr: UInt): Bool = _next(addr)
+
+ private val r = Reg(init=Bits(0, n))
+ private var _next = r
+ private var ens = Bool(false)
+ private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))
+ private def update(en: Bool, update: UInt) = {
+ _next = update
+ ens = ens || en
+ when (ens) { r := _next }
+ }
+ }
+}
diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala
new file mode 100644
index 00000000..66b16553
--- /dev/null
+++ b/rocket/src/main/scala/tile.scala
@@ -0,0 +1,151 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import uncore.tilelink._
+import uncore.agents._
+import uncore.devices._
+import Util._
+import cde.{Parameters, Field}
+
+case object CoreName extends Field[String]
+case object BuildRoCC extends Field[Seq[RoccParameters]]
+case object NCachedTileLinkPorts extends Field[Int]
+case object NUncachedTileLinkPorts extends Field[Int]
+
+case class RoccParameters(
+ opcodes: OpcodeSet,
+ generator: Parameters => RoCC,
+ nMemChannels: Int = 0,
+ nPTWPorts : Int = 0,
+ csrs: Seq[Int] = Nil,
+ useFPU: Boolean = false)
+
+abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null)
+ (implicit p: Parameters) extends Module(Option(clockSignal), Option(resetSignal)) {
+ val nCachedTileLinkPorts = p(NCachedTileLinkPorts)
+ val nUncachedTileLinkPorts = p(NUncachedTileLinkPorts)
+ val dcacheParams = p.alterPartial({ case CacheName => "L1D" })
+
+ val io = new Bundle {
+ val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO)
+ val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO)
+ val prci = new PRCITileIO().flip
+ }
+}
+
+class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null)
+ (implicit p: Parameters) extends Tile(clockSignal, resetSignal)(p) {
+ val buildRocc = p(BuildRoCC)
+ val usingRocc = !buildRocc.isEmpty
+ val nRocc = buildRocc.size
+ val nFPUPorts = buildRocc.filter(_.useFPU).size
+
+ val core = Module(new Rocket()(p.alterPartial({ case CoreName => "Rocket" })))
+ val icache = Module(new Frontend()(p.alterPartial({
+ case CacheName => "L1I"
+ case CoreName => "Rocket" })))
+ val dcache =
+ if (p(NMSHRs) == 0) Module(new DCache()(dcacheParams)).io
+ else Module(new HellaCache()(dcacheParams)).io
+
+ val ptwPorts = collection.mutable.ArrayBuffer(icache.io.ptw, dcache.ptw)
+ val dcPorts = collection.mutable.ArrayBuffer(core.io.dmem)
+ val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem)
+ val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]()
+ val cachedPorts = collection.mutable.ArrayBuffer(dcache.mem)
+ core.io.prci <> io.prci
+ icache.io.cpu <> core.io.imem
+
+ val fpuOpt = if (p(UseFPU)) Some(Module(new FPU)) else None
+ fpuOpt.foreach(fpu => core.io.fpu <> fpu.io)
+
+ if (usingRocc) {
+ val respArb = Module(new RRArbiter(new RoCCResponse, nRocc))
+ core.io.rocc.resp <> respArb.io.out
+
+ val roccOpcodes = buildRocc.map(_.opcodes)
+ val cmdRouter = Module(new RoccCommandRouter(roccOpcodes))
+ cmdRouter.io.in <> core.io.rocc.cmd
+
+ val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) =>
+ val rocc = accelParams.generator(p.alterPartial({
+ case RoccNMemChannels => accelParams.nMemChannels
+ case RoccNPTWPorts => accelParams.nPTWPorts
+ case RoccNCSRs => accelParams.csrs.size
+ }))
+ val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams))
+ rocc.io.cmd <> cmdRouter.io.out(i)
+ rocc.io.exception := core.io.rocc.exception
+ rocc.io.host_id := io.prci.id
+ dcIF.io.requestor <> rocc.io.mem
+ dcPorts += dcIF.io.cache
+ uncachedArbPorts += rocc.io.autl
+ rocc
+ }
+
+ if (nFPUPorts > 0) {
+ fpuOpt.foreach { fpu =>
+ val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nFPUPorts))
+ val fp_roccs = roccs.zip(buildRocc)
+ .filter { case (_, params) => params.useFPU }
+ .map { case (rocc, _) => rocc.io }
+ fpArb.io.in_req <> fp_roccs.map(_.fpu_req)
+ fp_roccs.zip(fpArb.io.in_resp).foreach {
+ case (rocc, fpu_resp) => rocc.fpu_resp <> fpu_resp
+ }
+ fpu.io.cp_req <> fpArb.io.out_req
+ fpArb.io.out_resp <> fpu.io.cp_resp
+ }
+ }
+
+ core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _)
+ core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
+ respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp))
+
+ if (p(RoccNCSRs) > 0) {
+ core.io.rocc.csr.rdata <> roccs.flatMap(_.io.csr.rdata)
+ for ((rocc, accelParams) <- roccs.zip(buildRocc)) {
+ rocc.io.csr.waddr := core.io.rocc.csr.waddr
+ rocc.io.csr.wdata := core.io.rocc.csr.wdata
+ rocc.io.csr.wen := core.io.rocc.csr.wen &&
+ accelParams.csrs
+ .map(core.io.rocc.csr.waddr === UInt(_))
+ .reduce((a, b) => a || b)
+ }
+ }
+
+ ptwPorts ++= roccs.flatMap(_.io.ptw)
+ uncachedPorts ++= roccs.flatMap(_.io.utl)
+ }
+
+ val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size))
+ uncachedArb.io.in <> uncachedArbPorts
+ uncachedArb.io.out +=: uncachedPorts
+
+ // Connect the caches and RoCC to the outer memory system
+ io.uncached <> uncachedPorts
+ io.cached <> cachedPorts
+ // TODO remove nCached/nUncachedTileLinkPorts parameters and these assertions
+ require(uncachedPorts.size == nUncachedTileLinkPorts)
+ require(cachedPorts.size == nCachedTileLinkPorts)
+
+ if (p(UseVM)) {
+ val ptw = Module(new PTW(ptwPorts.size)(dcacheParams))
+ ptw.io.requestor <> ptwPorts
+ ptw.io.mem +=: dcPorts
+ core.io.ptw <> ptw.io.dpath
+ }
+
+ val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams))
+ dcArb.io.requestor <> dcPorts
+ dcache.cpu <> dcArb.io.mem
+
+ if (!usingRocc || nFPUPorts == 0) {
+ fpuOpt.foreach { fpu =>
+ fpu.io.cp_req.valid := Bool(false)
+ fpu.io.cp_resp.ready := Bool(false)
+ }
+ }
+}
diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala
new file mode 100644
index 00000000..aca9aed2
--- /dev/null
+++ b/rocket/src/main/scala/tlb.scala
@@ -0,0 +1,176 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import Util._
+import junctions._
+import scala.math._
+import cde.{Parameters, Field}
+import uncore.agents.PseudoLRU
+import uncore.coherence._
+
+case object NTLBEntries extends Field[Int]
+
+trait HasTLBParameters extends HasCoreParameters {
+ val entries = p(NTLBEntries)
+ val camAddrBits = log2Ceil(entries)
+ val camTagBits = asIdBits + vpnBits
+}
+
+class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
+ val vpn = UInt(width = vpnBitsExtended)
+ val passthrough = Bool()
+ val instruction = Bool()
+ val store = Bool()
+}
+
+class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
+ // lookup responses
+ val miss = Bool(OUTPUT)
+ val ppn = UInt(OUTPUT, ppnBits)
+ val xcpt_ld = Bool(OUTPUT)
+ val xcpt_st = Bool(OUTPUT)
+ val xcpt_if = Bool(OUTPUT)
+ val cacheable = Bool(OUTPUT)
+}
+
+class TLB(implicit val p: Parameters) extends Module with HasTLBParameters {
+ val io = new Bundle {
+ val req = Decoupled(new TLBReq).flip
+ val resp = new TLBResp
+ val ptw = new TLBPTWIO
+ }
+
+ val valid = Reg(init = UInt(0, entries))
+ val ppns = Reg(Vec(entries, io.ptw.resp.bits.pte.ppn))
+ val tags = Reg(Vec(entries, UInt(width = asIdBits + vpnBits)))
+
+ val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4)
+ val state = Reg(init=s_ready)
+ val r_refill_tag = Reg(UInt(width = asIdBits + vpnBits))
+ val r_refill_waddr = Reg(UInt(width = log2Ceil(entries)))
+ val r_req = Reg(new TLBReq)
+
+ val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0)).toUInt
+ val hitsVec = (0 until entries).map(i => valid(i) && tags(i) === lookup_tag)
+ val hits = hitsVec.toBits
+
+ // permission bit arrays
+ val pte_array = Reg(new PTE)
+ val u_array = Reg(UInt(width = entries)) // user permission
+ val sw_array = Reg(UInt(width = entries)) // write permission
+ val sx_array = Reg(UInt(width = entries)) // execute permission
+ val sr_array = Reg(UInt(width = entries)) // read permission
+ val dirty_array = Reg(UInt(width = entries)) // PTE dirty bit
+ when (io.ptw.resp.valid) {
+ val pte = io.ptw.resp.bits.pte
+ ppns(r_refill_waddr) := pte.ppn
+ tags(r_refill_waddr) := r_refill_tag
+
+ val mask = UIntToOH(r_refill_waddr)
+ valid := valid | mask
+ u_array := Mux(pte.u, u_array | mask, u_array & ~mask)
+ sr_array := Mux(pte.sr(), sr_array | mask, sr_array & ~mask)
+ sw_array := Mux(pte.sw(), sw_array | mask, sw_array & ~mask)
+ sx_array := Mux(pte.sx(), sx_array | mask, sx_array & ~mask)
+ dirty_array := Mux(pte.d, dirty_array | mask, dirty_array & ~mask)
+ }
+
+ // high if there are any unused (invalid) entries in the TLB
+ val plru = new PseudoLRU(entries)
+ val repl_waddr = Mux(!valid.andR, PriorityEncoder(~valid), plru.replace)
+
+ val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction
+ val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv)
+ val priv_s = priv === PRV.S
+ val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug
+
+ val priv_ok = Mux(priv_s, ~Mux(io.ptw.status.pum, u_array, UInt(0)), u_array)
+ val w_array = priv_ok & sw_array
+ val x_array = priv_ok & sx_array
+ val r_array = priv_ok & (sr_array | Mux(io.ptw.status.mxr, x_array, UInt(0)))
+
+ val vm_enabled = Bool(usingVM) && io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough
+ val bad_va =
+ if (vpnBits == vpnBitsExtended) Bool(false)
+ else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1)
+ // it's only a store hit if the dirty bit is set
+ val tag_hits = hits & (dirty_array | ~Mux(io.req.bits.store, w_array, UInt(0)))
+ val tag_hit = tag_hits.orR
+ val tlb_hit = vm_enabled && tag_hit
+ val tlb_miss = vm_enabled && !tag_hit && !bad_va
+
+ when (io.req.valid && tlb_hit) {
+ plru.access(OHToUInt(hits))
+ }
+
+ val paddr = Cat(io.resp.ppn, UInt(0, pgIdxBits))
+ val addr_prot = addrMap.getProt(paddr)
+
+ io.req.ready := state === s_ready
+ io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & hits).orR)
+ io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & hits).orR)
+ io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & hits).orR)
+ io.resp.cacheable := addrMap.isCacheable(paddr)
+ io.resp.miss := tlb_miss
+ io.resp.ppn := Mux(vm_enabled, Mux1H(hitsVec, ppns), io.req.bits.vpn(ppnBits-1,0))
+
+ io.ptw.req.valid := state === s_request
+ io.ptw.req.bits := io.ptw.status
+ io.ptw.req.bits.addr := r_refill_tag
+ io.ptw.req.bits.store := r_req.store
+ io.ptw.req.bits.fetch := r_req.instruction
+
+ if (usingVM) {
+ when (io.req.fire() && tlb_miss) {
+ state := s_request
+ r_refill_tag := lookup_tag
+ r_refill_waddr := repl_waddr
+ r_req := io.req.bits
+ }
+ when (state === s_request) {
+ when (io.ptw.invalidate) {
+ state := s_ready
+ }
+ when (io.ptw.req.ready) {
+ state := s_wait
+ when (io.ptw.invalidate) { state := s_wait_invalidate }
+ }
+ }
+ when (state === s_wait && io.ptw.invalidate) {
+ state := s_wait_invalidate
+ }
+ when (io.ptw.resp.valid) {
+ state := s_ready
+ }
+
+ when (io.ptw.invalidate) {
+ valid := 0
+ }
+ }
+}
+
+class DecoupledTLB(implicit p: Parameters) extends Module {
+ val io = new Bundle {
+ val req = Decoupled(new TLBReq).flip
+ val resp = Decoupled(new TLBResp)
+ val ptw = new TLBPTWIO
+ }
+
+ val reqq = Queue(io.req)
+ val tlb = Module(new TLB)
+
+ val resp_helper = DecoupledHelper(
+ reqq.valid, tlb.io.req.ready, io.resp.ready)
+ val tlb_miss = tlb.io.resp.miss
+
+ tlb.io.req.valid := resp_helper.fire(tlb.io.req.ready)
+ tlb.io.req.bits := reqq.bits
+ reqq.ready := resp_helper.fire(reqq.valid, !tlb_miss)
+
+ io.resp.valid := resp_helper.fire(io.resp.ready, !tlb_miss)
+ io.resp.bits := tlb.io.resp
+
+ io.ptw <> tlb.io.ptw
+}
diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala
new file mode 100644
index 00000000..40a3c4a9
--- /dev/null
+++ b/rocket/src/main/scala/util.scala
@@ -0,0 +1,179 @@
+// See LICENSE for license details.
+
+package rocket
+
+import Chisel._
+import uncore._
+import scala.math._
+import cde.{Parameters, Field}
+
+object Util {
+ implicit def uintToBitPat(x: UInt): BitPat = BitPat(x)
+ implicit def intToUInt(x: Int): UInt = UInt(x)
+ implicit def bigIntToUInt(x: BigInt): UInt = UInt(x)
+ implicit def booleanToBool(x: Boolean): Bits = Bool(x)
+ implicit def intSeqToUIntSeq(x: Seq[Int]): Seq[UInt] = x.map(UInt(_))
+ implicit def wcToUInt(c: WideCounter): UInt = c.value
+
+ implicit class UIntToAugmentedUInt(val x: UInt) extends AnyVal {
+ def sextTo(n: Int): UInt =
+ if (x.getWidth == n) x
+ else Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x)
+
+ def extract(hi: Int, lo: Int): UInt = {
+ if (hi == lo-1) UInt(0)
+ else x(hi, lo)
+ }
+ }
+
+ implicit def booleanToIntConv(x: Boolean) = new AnyRef {
+ def toInt: Int = if (x) 1 else 0
+ }
+
+ implicit class SeqToAugmentedSeq[T <: Data](val x: Seq[T]) extends AnyVal {
+ def apply(idx: UInt): T = {
+ if (x.size == 1) {
+ x.head
+ } else {
+ val half = 1 << (log2Ceil(x.size) - 1)
+ val newIdx = idx & (half - 1)
+ Mux(idx >= UInt(half), x.drop(half)(newIdx), x.take(half)(newIdx))
+ }
+ }
+
+ def toBits(): UInt = Cat(x.map(_.toBits).reverse)
+ }
+
+ def minUInt(values: Seq[UInt]): UInt =
+ values.reduce((a, b) => Mux(a < b, a, b))
+
+ def minUInt(first: UInt, rest: UInt*): UInt =
+ minUInt(first +: rest.toSeq)
+}
+
+import Util._
+
+object Str
+{
+ def apply(s: String): UInt = {
+ var i = BigInt(0)
+ require(s.forall(validChar _))
+ for (c <- s)
+ i = (i << 8) | c
+ UInt(i, s.length*8)
+ }
+ def apply(x: Char): UInt = {
+ require(validChar(x))
+ UInt(x.toInt, 8)
+ }
+ def apply(x: UInt): UInt = apply(x, 10)
+ def apply(x: UInt, radix: Int): UInt = {
+ val rad = UInt(radix)
+ val w = x.getWidth
+ require(w > 0)
+
+ var q = x
+ var s = digit(q % rad)
+ for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) {
+ q = q / rad
+ s = Cat(Mux(Bool(radix == 10) && q === UInt(0), Str(' '), digit(q % rad)), s)
+ }
+ s
+ }
+ def apply(x: SInt): UInt = apply(x, 10)
+ def apply(x: SInt, radix: Int): UInt = {
+ val neg = x < SInt(0)
+ val abs = x.abs
+ if (radix != 10) {
+ Cat(Mux(neg, Str('-'), Str(' ')), Str(abs, radix))
+ } else {
+ val rad = UInt(radix)
+ val w = abs.getWidth
+ require(w > 0)
+
+ var q = abs
+ var s = digit(q % rad)
+ var needSign = neg
+ for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) {
+ q = q / rad
+ val placeSpace = q === UInt(0)
+ val space = Mux(needSign, Str('-'), Str(' '))
+ needSign = needSign && !placeSpace
+ s = Cat(Mux(placeSpace, space, digit(q % rad)), s)
+ }
+ Cat(Mux(needSign, Str('-'), Str(' ')), s)
+ }
+ }
+
+ private def digit(d: UInt): UInt = Mux(d < UInt(10), Str('0')+d, Str(('a'-10).toChar)+d)(7,0)
+ private def validChar(x: Char) = x == (x & 0xFF)
+}
+
+object Split
+{
+ // is there a better way to do do this?
+ def apply(x: Bits, n0: Int) = {
+ val w = checkWidth(x, n0)
+ (x(w-1,n0), x(n0-1,0))
+ }
+ def apply(x: Bits, n1: Int, n0: Int) = {
+ val w = checkWidth(x, n1, n0)
+ (x(w-1,n1), x(n1-1,n0), x(n0-1,0))
+ }
+ def apply(x: Bits, n2: Int, n1: Int, n0: Int) = {
+ val w = checkWidth(x, n2, n1, n0)
+ (x(w-1,n2), x(n2-1,n1), x(n1-1,n0), x(n0-1,0))
+ }
+
+ private def checkWidth(x: Bits, n: Int*) = {
+ val w = x.getWidth
+ def decreasing(x: Seq[Int]): Boolean =
+ if (x.tail.isEmpty) true
+ else x.head >= x.tail.head && decreasing(x.tail)
+ require(decreasing(w :: n.toList))
+ w
+ }
+}
+
+// a counter that clock gates most of its MSBs using the LSB carry-out
+case class WideCounter(width: Int, inc: UInt = UInt(1))
+{
+ private val isWide = width > 2*inc.getWidth
+ private val smallWidth = if (isWide) inc.getWidth max log2Up(width) else width
+ private val small = Reg(init=UInt(0, smallWidth))
+ private val nextSmall = small +& inc
+ small := nextSmall
+
+ private val large = if (isWide) {
+ val r = Reg(init=UInt(0, width - smallWidth))
+ when (nextSmall(smallWidth)) { r := r + UInt(1) }
+ r
+ } else null
+
+ val value = if (isWide) Cat(large, small) else small
+
+ def := (x: UInt) = {
+ small := x
+ if (isWide) large := x >> smallWidth
+ }
+}
+
+object Random
+{
+ def apply(mod: Int, random: UInt): UInt = {
+ if (isPow2(mod)) random(log2Up(mod)-1,0)
+ else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod))
+ }
+ def apply(mod: Int): UInt = apply(mod, randomizer)
+ def oneHot(mod: Int, random: UInt): UInt = {
+ if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0))
+ else PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod)).toBits
+ }
+ def oneHot(mod: Int): UInt = oneHot(mod, randomizer)
+
+ private def randomizer = LFSR16()
+ private def round(x: Double): Int =
+ if (x.toInt.toDouble == x) x.toInt else (x.toInt + 1) & -2
+ private def partition(value: UInt, slices: Int) =
+ Seq.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices))
+}