1
0

Merge remote-tracking branch 'rocket/master' into mono-repo

This commit is contained in:
Howard Mao 2016-07-28 11:27:29 -07:00
commit 373fd427dc
27 changed files with 7039 additions and 0 deletions

1
rocket/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
target

24
rocket/LICENSE Normal file
View File

@ -0,0 +1,24 @@
Copyright (c) 2011-2014, The Regents of the University of California
(Regents). All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the Regents nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.

29
rocket/README.md Normal file
View File

@ -0,0 +1,29 @@
Rocket Core
===========
Rocket is a 6-stage single-issue in-order pipeline that executes the 64-bit
scalar RISC-V ISA. Rocket implements an MMU that supports page-based virtual
memory and is able to boot modern operating systems such as Linux. Rocket
also has an optional IEEE 754-2008-compliant FPU, which implements both
single- and double-precision floating-point operations, including fused
multiply-add.
This repository is not intended to be a self-running repository. To
instantiate a Rocket core, please use the Rocket chip generator found in the
rocket-chip git repository.
The following table compares a 32-bit ARM Cortex-A5 core to a 64-bit RISC-V
Rocket core built in the same TSMC process (40GPLUS). Fourth column is the
ratio of RISC-V Rocket to ARM Cortex-A5. Both use single-instruction-issue,
in-order pipelines, yet the RISC-V core is faster, smaller, and uses less
power.
ISA/Implementation | ARM Cortex-A5 | RISC-V Rocket | R/A
--- | --- | --- | ---
ISA Register Width | 32 bits | 64 bits | 2
Frequency | >1 GHz | >1 GHz | 1
Dhrystone Performance | 1.57 DMIPS/MHz | 1.72 DMIPS/MHz | 1.1
Area excluding caches | 0.27 mm<sup>2</sup> | 0.14 mm<sup>2</sup> | 0.5
Area with 16KB caches | 0.53 mm<sup>2</sup> | 0.39 mm<sup>2</sup> | 0.7
Area Efficiency | 2.96 DMIPS/MHz/mm<sup>2</sup> | 4.41 DMIPS/MHz/mm<sup>2</sup> | 1.5
Dynamic Power | <0.08 mW/MHz | 0.034 mW/MHz | >= 0.4

10
rocket/build.sbt Normal file
View File

@ -0,0 +1,10 @@
organization := "edu.berkeley.cs"
version := "1.2"
name := "rocket"
scalaVersion := "2.11.6"
libraryDependencies ++= (Seq("chisel", "hardfloat", "uncore", "junctions", "cde").map {
dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten

View File

@ -0,0 +1,113 @@
// See LICENSE for license details.
package rocket
import Chisel._
import cde.{Parameters, Field}
import junctions.{ParameterizedBundle, DecoupledHelper}
class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
{
val io = new Bundle {
val requestor = Vec(n, new HellaCacheIO).flip
val mem = new HellaCacheIO
}
if (n == 1) {
io.mem <> io.requestor.head
} else {
val s1_id = Reg(UInt())
val s2_id = Reg(next=s1_id)
io.mem.invalidate_lr := io.requestor.map(_.invalidate_lr).reduce(_||_)
io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_)
io.requestor(0).req.ready := io.mem.req.ready
for (i <- 1 until n)
io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid
for (i <- n-1 to 0 by -1) {
val req = io.requestor(i).req
def connect_s0() = {
io.mem.req.bits.cmd := req.bits.cmd
io.mem.req.bits.typ := req.bits.typ
io.mem.req.bits.addr := req.bits.addr
io.mem.req.bits.phys := req.bits.phys
io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n)))
s1_id := UInt(i)
}
def connect_s1() = {
io.mem.s1_kill := io.requestor(i).s1_kill
io.mem.s1_data := io.requestor(i).s1_data
}
if (i == n-1) {
connect_s0()
connect_s1()
} else {
when (req.valid) { connect_s0() }
when (s1_id === UInt(i)) { connect_s1() }
}
}
for (i <- 0 until n) {
val resp = io.requestor(i).resp
val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i)
resp.valid := io.mem.resp.valid && tag_hit
io.requestor(i).xcpt := io.mem.xcpt
io.requestor(i).ordered := io.mem.ordered
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
resp.bits := io.mem.resp.bits
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
io.requestor(i).replay_next := io.mem.replay_next
}
}
}
class InOrderArbiter[T <: Data, U <: Data](reqTyp: T, respTyp: U, n: Int)
(implicit p: Parameters) extends Module {
val io = new Bundle {
val in_req = Vec(n, Decoupled(reqTyp)).flip
val in_resp = Vec(n, Decoupled(respTyp))
val out_req = Decoupled(reqTyp)
val out_resp = Decoupled(respTyp).flip
}
if (n > 1) {
val route_q = Module(new Queue(UInt(width = log2Up(n)), 2))
val req_arb = Module(new RRArbiter(reqTyp, n))
req_arb.io.in <> io.in_req
val req_helper = DecoupledHelper(
req_arb.io.out.valid,
route_q.io.enq.ready,
io.out_req.ready)
io.out_req.bits := req_arb.io.out.bits
io.out_req.valid := req_helper.fire(io.out_req.ready)
route_q.io.enq.bits := req_arb.io.chosen
route_q.io.enq.valid := req_helper.fire(route_q.io.enq.ready)
req_arb.io.out.ready := req_helper.fire(req_arb.io.out.valid)
val resp_sel = route_q.io.deq.bits
val resp_ready = io.in_resp(resp_sel).ready
val resp_helper = DecoupledHelper(
resp_ready,
route_q.io.deq.valid,
io.out_resp.valid)
val resp_valid = resp_helper.fire(resp_ready)
for (i <- 0 until n) {
io.in_resp(i).bits := io.out_resp.bits
io.in_resp(i).valid := resp_valid && resp_sel === UInt(i)
}
route_q.io.deq.ready := resp_helper.fire(route_q.io.deq.valid)
io.out_resp.ready := resp_helper.fire(io.out_resp.valid)
} else {
io.out_req <> io.in_req.head
io.in_resp.head <> io.out_resp
}
}

View File

@ -0,0 +1,82 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Util._
import cde.Parameters
class TDRSelect(implicit p: Parameters) extends CoreBundle()(p) {
val tdrmode = Bool()
val reserved = UInt(width = xLen - 1 - log2Up(nTDR))
val tdrindex = UInt(width = log2Up(nTDR))
def nTDR = p(NBreakpoints)
}
class BPControl(implicit p: Parameters) extends CoreBundle()(p) {
val tdrtype = UInt(width = 4)
val bpamaskmax = UInt(width = 5)
val reserved = UInt(width = xLen-28)
val bpaction = UInt(width = 8)
val bpmatch = UInt(width = 4)
val m = Bool()
val h = Bool()
val s = Bool()
val u = Bool()
val r = Bool()
val w = Bool()
val x = Bool()
def tdrType = 1
def bpaMaskMax = 4
def enabled(mstatus: MStatus) = Cat(m, h, s, u)(mstatus.prv)
}
class BP(implicit p: Parameters) extends CoreBundle()(p) {
val control = new BPControl
val address = UInt(width = vaddrBits)
def mask(dummy: Int = 0) = {
var mask: UInt = control.bpmatch(1)
for (i <- 1 until control.bpaMaskMax)
mask = Cat(mask(i-1) && address(i-1), mask)
mask
}
def pow2AddressMatch(x: UInt) =
(~x | mask()) === (~address | mask())
}
class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) {
val io = new Bundle {
val status = new MStatus().asInput
val bp = Vec(p(NBreakpoints), new BP).asInput
val pc = UInt(INPUT, vaddrBits)
val ea = UInt(INPUT, vaddrBits)
val xcpt_if = Bool(OUTPUT)
val xcpt_ld = Bool(OUTPUT)
val xcpt_st = Bool(OUTPUT)
}
io.xcpt_if := false
io.xcpt_ld := false
io.xcpt_st := false
for (bp <- io.bp) {
when (bp.control.enabled(io.status)) {
when (bp.pow2AddressMatch(io.pc) && bp.control.x) { io.xcpt_if := true }
when (bp.pow2AddressMatch(io.ea) && bp.control.r) { io.xcpt_ld := true }
when (bp.pow2AddressMatch(io.ea) && bp.control.w) { io.xcpt_st := true }
}
}
if (!io.bp.isEmpty) for ((bpl, bph) <- io.bp zip io.bp.tail) {
def matches(x: UInt) = !(x < bpl.address) && x < bph.address
when (bph.control.enabled(io.status) && bph.control.bpmatch === 1) {
when (matches(io.pc) && bph.control.x) { io.xcpt_if := true }
when (matches(io.ea) && bph.control.r) { io.xcpt_ld := true }
when (matches(io.ea) && bph.control.w) { io.xcpt_st := true }
}
}
}

View File

@ -0,0 +1,272 @@
// See LICENSE for license details.
package rocket
import Chisel._
import junctions._
import cde.{Parameters, Field}
import Util._
case object BtbKey extends Field[BtbParameters]
case class BtbParameters(
nEntries: Int = 62,
nRAS: Int = 2,
updatesOutOfOrder: Boolean = false)
abstract trait HasBtbParameters extends HasCoreParameters {
val matchBits = pgIdxBits
val entries = p(BtbKey).nEntries
val nRAS = p(BtbKey).nRAS
val updatesOutOfOrder = p(BtbKey).updatesOutOfOrder
val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages
val opaqueBits = log2Up(entries)
val nBHT = 1 << log2Up(entries*2)
}
abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters
abstract class BtbBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasBtbParameters
class RAS(nras: Int) {
def push(addr: UInt): Unit = {
when (count < nras) { count := count + 1 }
val nextPos = Mux(Bool(isPow2(nras)) || pos < nras-1, pos+1, UInt(0))
stack(nextPos) := addr
pos := nextPos
}
def peek: UInt = stack(pos)
def pop(): Unit = when (!isEmpty) {
count := count - 1
pos := Mux(Bool(isPow2(nras)) || pos > 0, pos-1, UInt(nras-1))
}
def clear(): Unit = count := UInt(0)
def isEmpty: Bool = count === UInt(0)
private val count = Reg(UInt(width = log2Up(nras+1)))
private val pos = Reg(UInt(width = log2Up(nras)))
private val stack = Reg(Vec(nras, UInt()))
}
class BHTResp(implicit p: Parameters) extends BtbBundle()(p) {
val history = UInt(width = log2Up(nBHT).max(1))
val value = UInt(width = 2)
}
// BHT contains table of 2-bit counters and a global history register.
// The BHT only predicts and updates when there is a BTB hit.
// The global history:
// - updated speculatively in fetch (if there's a BTB hit).
// - on a mispredict, the history register is reset (again, only if BTB hit).
// The counter table:
// - each counter corresponds with the address of the fetch packet ("fetch pc").
// - updated when a branch resolves (and BTB was a hit for that branch).
// The updating branch must provide its "fetch pc".
class BHT(nbht: Int)(implicit p: Parameters) {
val nbhtbits = log2Up(nbht)
def get(addr: UInt, update: Bool): BHTResp = {
val res = Wire(new BHTResp)
val index = addr(nbhtbits+1,2) ^ history
res.value := table(index)
res.history := history
val taken = res.value(0)
when (update) { history := Cat(taken, history(nbhtbits-1,1)) }
res
}
def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = {
val index = addr(nbhtbits+1,2) ^ d.history
table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken))
when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) }
}
private val table = Mem(nbht, UInt(width = 2))
val history = Reg(UInt(width = nbhtbits))
}
// BTB update occurs during branch resolution (and only on a mispredict).
// - "pc" is what future fetch PCs will tag match against.
// - "br_pc" is the PC of the branch instruction.
class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val prediction = Valid(new BTBResp)
val pc = UInt(width = vaddrBits)
val target = UInt(width = vaddrBits)
val taken = Bool()
val isJump = Bool()
val isReturn = Bool()
val br_pc = UInt(width = vaddrBits)
}
// BHT update occurs during branch resolution on all conditional branches.
// - "pc" is what future fetch PCs will tag match against.
class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val prediction = Valid(new BTBResp)
val pc = UInt(width = vaddrBits)
val taken = Bool()
val mispredict = Bool()
}
class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val isCall = Bool()
val isReturn = Bool()
val returnAddr = UInt(width = vaddrBits)
val prediction = Valid(new BTBResp)
}
// - "bridx" is the low-order PC bits of the predicted branch (after
// shifting off the lowest log(inst_bytes) bits off).
// - "mask" provides a mask of valid instructions (instructions are
// masked off by the predicted taken branch from the BTB).
class BTBResp(implicit p: Parameters) extends BtbBundle()(p) {
val taken = Bool()
val mask = Bits(width = fetchWidth)
val bridx = Bits(width = log2Up(fetchWidth))
val target = UInt(width = vaddrBits)
val entry = UInt(width = opaqueBits)
val bht = new BHTResp
}
class BTBReq(implicit p: Parameters) extends BtbBundle()(p) {
val addr = UInt(width = vaddrBits)
}
// fully-associative branch target buffer
// Higher-performance processors may cause BTB updates to occur out-of-order,
// which requires an extra CAM port for updates (to ensure no duplicates get
// placed in BTB).
class BTB(implicit p: Parameters) extends BtbModule {
val io = new Bundle {
val req = Valid(new BTBReq).flip
val resp = Valid(new BTBResp)
val btb_update = Valid(new BTBUpdate).flip
val bht_update = Valid(new BHTUpdate).flip
val ras_update = Valid(new RASUpdate).flip
}
val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
val idxPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
val tgts = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits)))
val pageValid = Reg(init = UInt(0, nPages))
val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0))
val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0))
val useRAS = Reg(UInt(width = entries))
val isJump = Reg(UInt(width = entries))
val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth))))
private def page(addr: UInt) = addr >> matchBits
private def pageMatch(addr: UInt) = {
val p = page(addr)
pageValid & pages.map(_ === p).toBits
}
private def tagMatch(addr: UInt, pgMatch: UInt) = {
val idxMatch = idxs.map(_ === addr(matchBits-1, log2Up(coreInstBytes))).toBits
val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits
idxMatch & idxPageMatch
}
val r_btb_update = Pipe(io.btb_update)
val update_target = io.req.bits.addr
val pageHit = pageMatch(io.req.bits.addr)
val hitsVec = tagMatch(io.req.bits.addr, pageHit)
val hits = hitsVec.toBits
val updatePageHit = pageMatch(r_btb_update.bits.pc)
val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit)
val updateHit = if (updatesOutOfOrder) updateHits.orR else r_btb_update.bits.prediction.valid
val updateHitAddr = if (updatesOutOfOrder) OHToUInt(updateHits) else r_btb_update.bits.prediction.bits.entry
// guarantee one-hotness of idx after reset
val resetting = Reg(init = Bool(true))
val (nextRepl, wrap) = Counter(resetting || (r_btb_update.valid && !updateHit), entries)
when (wrap) { resetting := false }
val useUpdatePageHit = updatePageHit.orR
val usePageHit = pageHit.orR
val doIdxPageRepl = !useUpdatePageHit
val nextPageRepl = Reg(UInt(width = log2Ceil(nPages)))
val idxPageRepl = Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), UIntToOH(nextPageRepl))
val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
val idxPageUpdate = OHToUInt(idxPageUpdateOH)
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
val samePage = page(r_btb_update.bits.pc) === page(update_target)
val doTgtPageRepl = !samePage && !usePageHit
val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1)))
val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl))
val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) {
val both = doIdxPageRepl && doTgtPageRepl
val next = nextPageRepl + Mux[UInt](both, 2, 1)
nextPageRepl := Mux(next >= nPages, next(0), next)
}
when (r_btb_update.valid || resetting) {
assert(resetting || io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target")
val waddr = Mux(updateHit && !resetting, updateHitAddr, nextRepl)
val mask = UIntToOH(waddr)
val newIdx = r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes))
idxs(waddr) := Mux(resetting, Cat(newIdx >> log2Ceil(entries), nextRepl), newIdx)
tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
idxPages(waddr) := idxPageUpdate
tgtPages(waddr) := tgtPageUpdate
useRAS := Mux(r_btb_update.bits.isReturn, useRAS | mask, useRAS & ~mask)
isJump := Mux(r_btb_update.bits.isJump, isJump | mask, isJump & ~mask)
if (fetchWidth > 1)
brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes)
require(nPages % 2 == 0)
val idxWritesEven = !idxPageUpdate(0)
def writeBank(i: Int, mod: Int, en: UInt, data: UInt) =
for (i <- i until nPages by mod)
when (en(i)) { pages(i) := data }
writeBank(0, 2, Mux(idxWritesEven, idxPageReplEn, tgtPageReplEn),
Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target)))
writeBank(1, 2, Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn),
Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc)))
pageValid := pageValid | tgtPageReplEn | idxPageReplEn
}
io.resp.valid := hits.orR
io.resp.bits.taken := io.resp.valid
io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes))
io.resp.bits.entry := OHToUInt(hits)
io.resp.bits.bridx := Mux1H(hitsVec, brIdx)
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
if (nBHT > 0) {
val bht = new BHT(nBHT)
val isBranch = !(hits & isJump).orR
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
val update_btb_hit = io.bht_update.bits.prediction.valid
when (io.bht_update.valid && update_btb_hit) {
bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict)
}
when (!res.value(0) && isBranch) { io.resp.bits.taken := false }
io.resp.bits.bht := res
}
if (nRAS > 0) {
val ras = new RAS(nRAS)
val doPeek = (hits & useRAS).orR
when (!ras.isEmpty && doPeek) {
io.resp.bits.target := ras.peek
}
when (io.ras_update.valid) {
when (io.ras_update.bits.isCall) {
ras.push(io.ras_update.bits.returnAddr)
when (doPeek) {
io.resp.bits.target := io.ras_update.bits.returnAddr
}
}.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) {
ras.pop()
}
}
}
}

View File

@ -0,0 +1,49 @@
// See LICENSE for license details.
package rocket
package constants
import Chisel._
import scala.math._
trait ScalarOpConstants {
val SZ_BR = 3
val BR_X = BitPat("b???")
val BR_EQ = UInt(0, 3)
val BR_NE = UInt(1, 3)
val BR_J = UInt(2, 3)
val BR_N = UInt(3, 3)
val BR_LT = UInt(4, 3)
val BR_GE = UInt(5, 3)
val BR_LTU = UInt(6, 3)
val BR_GEU = UInt(7, 3)
val A1_X = BitPat("b??")
val A1_ZERO = UInt(0, 2)
val A1_RS1 = UInt(1, 2)
val A1_PC = UInt(2, 2)
val IMM_X = BitPat("b???")
val IMM_S = UInt(0, 3)
val IMM_SB = UInt(1, 3)
val IMM_U = UInt(2, 3)
val IMM_UJ = UInt(3, 3)
val IMM_I = UInt(4, 3)
val IMM_Z = UInt(5, 3)
val A2_X = BitPat("b??")
val A2_ZERO = UInt(0, 2)
val A2_FOUR = UInt(1, 2)
val A2_RS2 = UInt(2, 2)
val A2_IMM = UInt(3, 2)
val X = BitPat("b?")
val N = BitPat("b0")
val Y = BitPat("b1")
val SZ_DW = 1
val DW_X = X
val DW_32 = N
val DW_64 = Y
val DW_XPR = Y
}

View File

@ -0,0 +1,589 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Util._
import Instructions._
import cde.{Parameters, Field}
import uncore.devices._
import junctions.AddrMap
class MStatus extends Bundle {
val debug = Bool() // not truly part of mstatus, but convenient
val prv = UInt(width = PRV.SZ) // not truly part of mstatus, but convenient
val sd = Bool()
val zero3 = UInt(width = 31)
val sd_rv32 = Bool()
val zero2 = UInt(width = 2)
val vm = UInt(width = 5)
val zero1 = UInt(width = 4)
val mxr = Bool()
val pum = Bool()
val mprv = Bool()
val xs = UInt(width = 2)
val fs = UInt(width = 2)
val mpp = UInt(width = 2)
val hpp = UInt(width = 2)
val spp = UInt(width = 1)
val mpie = Bool()
val hpie = Bool()
val spie = Bool()
val upie = Bool()
val mie = Bool()
val hie = Bool()
val sie = Bool()
val uie = Bool()
}
class DCSR extends Bundle {
val xdebugver = UInt(width = 2)
val ndreset = Bool()
val fullreset = Bool()
val hwbpcount = UInt(width = 12)
val ebreakm = Bool()
val ebreakh = Bool()
val ebreaks = Bool()
val ebreaku = Bool()
val zero2 = Bool()
val stopcycle = Bool()
val stoptime = Bool()
val cause = UInt(width = 3)
val debugint = Bool()
val zero1 = Bool()
val halt = Bool()
val step = Bool()
val prv = UInt(width = PRV.SZ)
}
class MIP extends Bundle {
val rocc = Bool()
val meip = Bool()
val heip = Bool()
val seip = Bool()
val ueip = Bool()
val mtip = Bool()
val htip = Bool()
val stip = Bool()
val utip = Bool()
val msip = Bool()
val hsip = Bool()
val ssip = Bool()
val usip = Bool()
}
class PTBR(implicit p: Parameters) extends CoreBundle()(p) {
require(maxPAddrBits - pgIdxBits + asIdBits <= xLen)
val asid = UInt(width = asIdBits)
val ppn = UInt(width = maxPAddrBits - pgIdxBits)
}
object PRV
{
val SZ = 2
val U = 0
val S = 1
val H = 2
val M = 3
}
object CSR
{
// commands
val SZ = 3
val X = BitPat.DC(SZ)
val N = UInt(0,SZ)
val W = UInt(1,SZ)
val S = UInt(2,SZ)
val C = UInt(3,SZ)
val I = UInt(4,SZ)
val R = UInt(5,SZ)
val ADDRSZ = 12
}
class CSRFileIO(implicit p: Parameters) extends CoreBundle {
val prci = new PRCITileIO().flip
val rw = new Bundle {
val addr = UInt(INPUT, CSR.ADDRSZ)
val cmd = Bits(INPUT, CSR.SZ)
val rdata = Bits(OUTPUT, xLen)
val wdata = Bits(INPUT, xLen)
}
val csr_stall = Bool(OUTPUT)
val csr_xcpt = Bool(OUTPUT)
val eret = Bool(OUTPUT)
val singleStep = Bool(OUTPUT)
val status = new MStatus().asOutput
val ptbr = new PTBR().asOutput
val evec = UInt(OUTPUT, vaddrBitsExtended)
val exception = Bool(INPUT)
val retire = UInt(INPUT, log2Up(1+retireWidth))
val custom_mrw_csrs = Vec(nCustomMrwCsrs, UInt(INPUT, xLen))
val cause = UInt(INPUT, xLen)
val pc = UInt(INPUT, vaddrBitsExtended)
val badaddr = UInt(INPUT, vaddrBitsExtended)
val fatc = Bool(OUTPUT)
val time = UInt(OUTPUT, xLen)
val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip
val rocc = new RoCCInterface().flip
val interrupt = Bool(OUTPUT)
val interrupt_cause = UInt(OUTPUT, xLen)
val bp = Vec(p(NBreakpoints), new BP).asOutput
}
class CSRFile(implicit p: Parameters) extends CoreModule()(p)
{
val io = new CSRFileIO
val reset_mstatus = Wire(init=new MStatus().fromBits(0))
reset_mstatus.mpp := PRV.M
reset_mstatus.prv := PRV.M
val reg_mstatus = Reg(init=reset_mstatus)
val reset_dcsr = Wire(init=new DCSR().fromBits(0))
reset_dcsr.xdebugver := 1
reset_dcsr.prv := PRV.M
val reg_dcsr = Reg(init=reset_dcsr)
val (supported_interrupts, delegable_interrupts) = {
val sup = Wire(init=new MIP().fromBits(0))
sup.ssip := Bool(p(UseVM))
sup.msip := true
sup.stip := Bool(p(UseVM))
sup.mtip := true
sup.meip := true
sup.seip := Bool(p(UseVM))
sup.rocc := usingRoCC
val del = Wire(init=sup)
del.msip := false
del.mtip := false
del.meip := false
(sup.toBits, del.toBits)
}
val delegable_exceptions = UInt(Seq(
Causes.misaligned_fetch,
Causes.fault_fetch,
Causes.breakpoint,
Causes.fault_load,
Causes.fault_store,
Causes.user_ecall).map(1 << _).sum)
val exception = io.exception || io.csr_xcpt
val reg_debug = Reg(init=Bool(false))
val reg_dpc = Reg(UInt(width = vaddrBitsExtended))
val reg_dscratch = Reg(UInt(width = xLen))
val reg_singleStepped = Reg(Bool())
when (io.retire(0) || exception) { reg_singleStepped := true }
when (!io.singleStep) { reg_singleStepped := false }
assert(!io.singleStep || io.retire <= UInt(1))
assert(!reg_singleStepped || io.retire === UInt(0))
val reg_tdrselect = Reg(new TDRSelect)
val reg_bp = Reg(Vec(1 << log2Up(p(NBreakpoints)), new BP))
val reg_mie = Reg(init=UInt(0, xLen))
val reg_mideleg = Reg(init=UInt(0, xLen))
val reg_medeleg = Reg(init=UInt(0, xLen))
val reg_mip = Reg(new MIP)
val reg_mepc = Reg(UInt(width = vaddrBitsExtended))
val reg_mcause = Reg(Bits(width = xLen))
val reg_mbadaddr = Reg(UInt(width = vaddrBitsExtended))
val reg_mscratch = Reg(Bits(width = xLen))
val reg_mtvec = Reg(init=UInt(p(MtvecInit), paddrBits min xLen))
val reg_sepc = Reg(UInt(width = vaddrBitsExtended))
val reg_scause = Reg(Bits(width = xLen))
val reg_sbadaddr = Reg(UInt(width = vaddrBitsExtended))
val reg_sscratch = Reg(Bits(width = xLen))
val reg_stvec = Reg(UInt(width = vaddrBits))
val reg_sptbr = Reg(new PTBR)
val reg_wfi = Reg(init=Bool(false))
val reg_fflags = Reg(UInt(width = 5))
val reg_frm = Reg(UInt(width = 3))
val reg_instret = WideCounter(64, io.retire)
val reg_cycle: UInt = if (enableCommitLog) { reg_instret } else { WideCounter(64) }
val mip = Wire(init=reg_mip)
mip.rocc := io.rocc.interrupt
val read_mip = mip.toBits & supported_interrupts
val pending_interrupts = read_mip & reg_mie
val m_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.M || (reg_mstatus.prv === PRV.M && reg_mstatus.mie)), pending_interrupts & ~reg_mideleg, UInt(0))
val s_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie)), pending_interrupts & reg_mideleg, UInt(0))
val all_interrupts = m_interrupts | s_interrupts
val interruptMSB = BigInt(1) << (xLen-1)
val interruptCause = interruptMSB + PriorityEncoder(all_interrupts)
io.interrupt := all_interrupts.orR && !io.singleStep || reg_singleStepped
io.interrupt_cause := interruptCause
io.bp := reg_bp take p(NBreakpoints)
val debugIntCause = reg_mip.getWidth
// debug interrupts are only masked by being in debug mode
when (Bool(usingDebug) && reg_dcsr.debugint && !reg_debug) {
io.interrupt := true
io.interrupt_cause := interruptMSB + debugIntCause
}
val system_insn = io.rw.cmd === CSR.I
val cpu_ren = io.rw.cmd =/= CSR.N && !system_insn
val isa_string = "IM" +
(if (usingVM) "S" else "") +
(if (usingUser) "U" else "") +
(if (usingAtomics) "A" else "") +
(if (usingFPU) "FD" else "") +
(if (usingRoCC) "X" else "")
val isa = (BigInt(log2Ceil(xLen) - 4) << (xLen-2)) |
isa_string.map(x => 1 << (x - 'A')).reduce(_|_)
val read_mstatus = io.status.toBits()(xLen-1,0)
val read_mapping = collection.mutable.LinkedHashMap[Int,Bits](
CSRs.tdrselect -> reg_tdrselect.toBits,
CSRs.tdrdata1 -> reg_bp(reg_tdrselect.tdrindex).control.toBits,
CSRs.tdrdata2 -> reg_bp(reg_tdrselect.tdrindex).address,
CSRs.mimpid -> UInt(0),
CSRs.marchid -> UInt(0),
CSRs.mvendorid -> UInt(0),
CSRs.mcycle -> reg_cycle,
CSRs.minstret -> reg_instret,
CSRs.mucounteren -> UInt(0),
CSRs.mutime_delta -> UInt(0),
CSRs.mucycle_delta -> UInt(0),
CSRs.muinstret_delta -> UInt(0),
CSRs.misa -> UInt(isa),
CSRs.mstatus -> read_mstatus,
CSRs.mtvec -> reg_mtvec,
CSRs.mip -> read_mip,
CSRs.mie -> reg_mie,
CSRs.mideleg -> reg_mideleg,
CSRs.medeleg -> reg_medeleg,
CSRs.mscratch -> reg_mscratch,
CSRs.mepc -> reg_mepc.sextTo(xLen),
CSRs.mbadaddr -> reg_mbadaddr.sextTo(xLen),
CSRs.mcause -> reg_mcause,
CSRs.mhartid -> io.prci.id)
if (usingDebug) {
read_mapping += CSRs.dcsr -> reg_dcsr.toBits
read_mapping += CSRs.dpc -> reg_dpc.toBits
read_mapping += CSRs.dscratch -> reg_dscratch.toBits
}
if (usingFPU) {
read_mapping += CSRs.fflags -> reg_fflags
read_mapping += CSRs.frm -> reg_frm
read_mapping += CSRs.fcsr -> Cat(reg_frm, reg_fflags)
}
if (usingVM) {
val read_sie = reg_mie & reg_mideleg
val read_sip = read_mip & reg_mideleg
val read_sstatus = Wire(init=io.status)
read_sstatus.vm := 0
read_sstatus.mprv := 0
read_sstatus.mpp := 0
read_sstatus.hpp := 0
read_sstatus.mpie := 0
read_sstatus.hpie := 0
read_sstatus.mie := 0
read_sstatus.hie := 0
read_mapping += CSRs.sstatus -> (read_sstatus.toBits())(xLen-1,0)
read_mapping += CSRs.sip -> read_sip.toBits
read_mapping += CSRs.sie -> read_sie.toBits
read_mapping += CSRs.sscratch -> reg_sscratch
read_mapping += CSRs.scause -> reg_scause
read_mapping += CSRs.sbadaddr -> reg_sbadaddr.sextTo(xLen)
read_mapping += CSRs.sptbr -> reg_sptbr.toBits
read_mapping += CSRs.sepc -> reg_sepc.sextTo(xLen)
read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen)
read_mapping += CSRs.mscounteren -> UInt(0)
read_mapping += CSRs.mstime_delta -> UInt(0)
read_mapping += CSRs.mscycle_delta -> UInt(0)
read_mapping += CSRs.msinstret_delta -> UInt(0)
}
if (xLen == 32) {
read_mapping += CSRs.mcycleh -> (reg_cycle >> 32)
read_mapping += CSRs.minstreth -> (reg_instret >> 32)
read_mapping += CSRs.mutime_deltah -> UInt(0)
read_mapping += CSRs.mucycle_deltah -> UInt(0)
read_mapping += CSRs.muinstret_deltah -> UInt(0)
if (usingVM) {
read_mapping += CSRs.mstime_deltah -> UInt(0)
read_mapping += CSRs.mscycle_deltah -> UInt(0)
read_mapping += CSRs.msinstret_deltah -> UInt(0)
}
}
for (i <- 0 until nCustomMrwCsrs) {
val addr = 0xff0 + i
require(addr < (1 << CSR.ADDRSZ))
require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use")
read_mapping += addr -> io.custom_mrw_csrs(i)
}
for ((addr, i) <- roccCsrs.zipWithIndex) {
require(!read_mapping.contains(addr), "RoCC: CSR address " + addr + " is already in use")
read_mapping += addr -> io.rocc.csr.rdata(i)
}
val decoded_addr = read_mapping map { case (k, v) => k -> (io.rw.addr === k) }
val addr_valid = decoded_addr.values.reduce(_||_)
val fp_csr =
if (usingFPU) decoded_addr(CSRs.fflags) || decoded_addr(CSRs.frm) || decoded_addr(CSRs.fcsr)
else Bool(false)
val csr_debug = Bool(usingDebug) && io.rw.addr(5)
val csr_addr_priv = Cat(io.rw.addr(6,5).andR, io.rw.addr(9,8))
val priv_sufficient = Cat(reg_debug, reg_mstatus.prv) >= csr_addr_priv
val read_only = io.rw.addr(11,10).andR
val cpu_wen = cpu_ren && io.rw.cmd =/= CSR.R && priv_sufficient
val wen = cpu_wen && !read_only
val wdata = (Mux((io.rw.cmd === CSR.S || io.rw.cmd === CSR.C), io.rw.rdata, UInt(0)) |
Mux(io.rw.cmd =/= CSR.C, io.rw.wdata, UInt(0))) &
~Mux(io.rw.cmd === CSR.C, io.rw.wdata, UInt(0))
val do_system_insn = priv_sufficient && system_insn
val opcode = UInt(1) << io.rw.addr(2,0)
val insn_call = do_system_insn && opcode(0)
val insn_break = do_system_insn && opcode(1)
val insn_ret = do_system_insn && opcode(2)
val insn_sfence_vm = do_system_insn && opcode(4)
val insn_wfi = do_system_insn && opcode(5)
io.csr_xcpt := (cpu_wen && read_only) ||
(cpu_ren && (!priv_sufficient || !addr_valid || fp_csr && !io.status.fs.orR)) ||
(system_insn && !priv_sufficient) ||
insn_call || insn_break
when (insn_wfi) { reg_wfi := true }
when (pending_interrupts.orR) { reg_wfi := false }
val cause =
Mux(!io.csr_xcpt, io.cause,
Mux(insn_call, reg_mstatus.prv + Causes.user_ecall,
Mux[UInt](insn_break, Causes.breakpoint, Causes.illegal_instruction)))
val cause_lsbs = cause(log2Up(xLen)-1,0)
val causeIsDebugInt = cause(xLen-1) && cause_lsbs === debugIntCause
val causeIsDebugBreak = cause === Causes.breakpoint && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv)
val trapToDebug = Bool(usingDebug) && (reg_singleStepped || causeIsDebugInt || causeIsDebugBreak || reg_debug)
val delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs))
val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800))
val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec))
val epc = Mux(csr_debug, reg_dpc, Mux(Bool(p(UseVM)) && !csr_addr_priv(1), reg_sepc, reg_mepc))
io.fatc := insn_sfence_vm
io.evec := Mux(exception, tvec, epc)
io.ptbr := reg_sptbr
io.eret := insn_ret
io.singleStep := reg_dcsr.step && !reg_debug
io.status := reg_mstatus
io.status.sd := io.status.fs.andR || io.status.xs.andR
io.status.debug := reg_debug
if (xLen == 32)
io.status.sd_rv32 := io.status.sd
when (exception) {
val epc = ~(~io.pc | (coreInstBytes-1))
val pie = read_mstatus(reg_mstatus.prv)
when (trapToDebug) {
reg_debug := true
reg_dpc := epc
reg_dcsr.cause := Mux(reg_singleStepped, UInt(4), Mux(causeIsDebugInt, UInt(3), UInt(1)))
reg_dcsr.prv := reg_mstatus.prv
}.elsewhen (delegate) {
reg_sepc := epc
reg_scause := cause
reg_sbadaddr := io.badaddr
reg_mstatus.spie := pie
reg_mstatus.spp := reg_mstatus.prv
reg_mstatus.sie := false
reg_mstatus.prv := PRV.S
}.otherwise {
reg_mepc := epc
reg_mcause := cause
reg_mbadaddr := io.badaddr
reg_mstatus.mpie := pie
reg_mstatus.mpp := reg_mstatus.prv
reg_mstatus.mie := false
reg_mstatus.prv := PRV.M
}
}
when (insn_ret) {
when (Bool(p(UseVM)) && !csr_addr_priv(1)) {
when (reg_mstatus.spp.toBool) { reg_mstatus.sie := reg_mstatus.spie }
reg_mstatus.spie := false
reg_mstatus.spp := PRV.U
reg_mstatus.prv := reg_mstatus.spp
}.elsewhen (csr_debug) {
reg_mstatus.prv := reg_dcsr.prv
reg_debug := false
}.otherwise {
when (reg_mstatus.mpp(1)) { reg_mstatus.mie := reg_mstatus.mpie }
.elsewhen (Bool(usingVM) && reg_mstatus.mpp(0)) { reg_mstatus.sie := reg_mstatus.mpie }
reg_mstatus.mpie := false
reg_mstatus.mpp := PRV.U
reg_mstatus.prv := reg_mstatus.mpp
}
}
assert(PopCount(insn_ret :: io.exception :: io.csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive")
io.time := reg_cycle
io.csr_stall := reg_wfi
io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v)
io.fcsr_rm := reg_frm
when (io.fcsr_flags.valid) {
reg_fflags := reg_fflags | io.fcsr_flags.bits
}
val supportedModes = Vec((PRV.M +: (if (usingUser) Some(PRV.U) else None) ++: (if (usingVM) Seq(PRV.S) else Nil)).map(UInt(_)))
when (wen) {
when (decoded_addr(CSRs.mstatus)) {
val new_mstatus = new MStatus().fromBits(wdata)
reg_mstatus.mie := new_mstatus.mie
reg_mstatus.mpie := new_mstatus.mpie
if (supportedModes.size > 1) {
reg_mstatus.mprv := new_mstatus.mprv
when (supportedModes contains new_mstatus.mpp) { reg_mstatus.mpp := new_mstatus.mpp }
if (supportedModes.size > 2) {
reg_mstatus.mxr := new_mstatus.mxr
reg_mstatus.pum := new_mstatus.pum
reg_mstatus.spp := new_mstatus.spp
reg_mstatus.spie := new_mstatus.spie
reg_mstatus.sie := new_mstatus.sie
}
}
if (usingVM) {
require(if (xLen == 32) pgLevels == 2 else pgLevels > 2 && pgLevels < 6)
val vm_on = 6 + pgLevels // TODO Sv48 support should imply Sv39 support
when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 }
when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on }
}
if (usingVM || usingFPU) reg_mstatus.fs := Fill(2, new_mstatus.fs.orR)
if (usingRoCC) reg_mstatus.xs := Fill(2, new_mstatus.xs.orR)
}
when (decoded_addr(CSRs.mip)) {
val new_mip = new MIP().fromBits(wdata)
if (usingVM) {
reg_mip.ssip := new_mip.ssip
reg_mip.stip := new_mip.stip
}
}
when (decoded_addr(CSRs.mie)) { reg_mie := wdata & supported_interrupts }
when (decoded_addr(CSRs.mepc)) { reg_mepc := ~(~wdata | (coreInstBytes-1)) }
when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata }
if (p(MtvecWritable))
when (decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata >> 2 << 2 }
when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ }
when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) }
if (usingFPU) {
when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata }
when (decoded_addr(CSRs.frm)) { reg_frm := wdata }
when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth }
}
if (usingDebug) {
when (decoded_addr(CSRs.dcsr)) {
val new_dcsr = new DCSR().fromBits(wdata)
reg_dcsr.halt := new_dcsr.halt
reg_dcsr.step := new_dcsr.step
reg_dcsr.ebreakm := new_dcsr.ebreakm
if (usingVM) reg_dcsr.ebreaks := new_dcsr.ebreaks
if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku
if (supportedModes.size > 1) reg_dcsr.prv := new_dcsr.prv
}
when (decoded_addr(CSRs.dpc)) { reg_dpc := ~(~wdata | (coreInstBytes-1)) }
when (decoded_addr(CSRs.dscratch)) { reg_dscratch := wdata }
}
if (usingVM) {
when (decoded_addr(CSRs.sstatus)) {
val new_sstatus = new MStatus().fromBits(wdata)
reg_mstatus.sie := new_sstatus.sie
reg_mstatus.spie := new_sstatus.spie
reg_mstatus.spp := new_sstatus.spp
reg_mstatus.pum := new_sstatus.pum
reg_mstatus.fs := Fill(2, new_sstatus.fs.orR) // even without an FPU
if (usingRoCC) reg_mstatus.xs := Fill(2, new_sstatus.xs.orR)
}
when (decoded_addr(CSRs.sip)) {
val new_sip = new MIP().fromBits(wdata)
reg_mip.ssip := new_sip.ssip
}
when (decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~reg_mideleg) | (wdata & reg_mideleg) }
when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata }
when (decoded_addr(CSRs.sptbr)) { reg_sptbr.ppn := wdata(ppnBits-1,0) }
when (decoded_addr(CSRs.sepc)) { reg_sepc := ~(~wdata | (coreInstBytes-1)) }
when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata >> 2 << 2 }
when (decoded_addr(CSRs.scause)) { reg_scause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ }
when (decoded_addr(CSRs.sbadaddr)) { reg_sbadaddr := wdata(vaddrBitsExtended-1,0) }
when (decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata & delegable_interrupts }
when (decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata & delegable_exceptions }
}
if (p(NBreakpoints) > 0) {
val newTDR = new TDRSelect().fromBits(wdata)
when (decoded_addr(CSRs.tdrselect)) { reg_tdrselect.tdrindex := newTDR.tdrindex }
when (reg_tdrselect.tdrmode || reg_debug) {
when (decoded_addr(CSRs.tdrdata1)) {
val newBPC = new BPControl().fromBits(wdata)
reg_bp(reg_tdrselect.tdrindex).control := newBPC
reg_bp(reg_tdrselect.tdrindex).control.bpmatch := newBPC.bpmatch & 2 /* exact/NAPOT only */
}
when (decoded_addr(CSRs.tdrdata2)) { reg_bp(reg_tdrselect.tdrindex).address := wdata }
}
}
}
reg_mip := io.prci.interrupts
reg_dcsr.debugint := io.prci.interrupts.debug
reg_dcsr.hwbpcount := UInt(p(NBreakpoints))
io.rocc.csr.waddr := io.rw.addr
io.rocc.csr.wdata := wdata
io.rocc.csr.wen := wen
if (!usingUser) {
reg_mstatus.mpp := PRV.M
reg_mstatus.prv := PRV.M
reg_mstatus.mprv := false
}
reg_sptbr.asid := 0
reg_tdrselect.reserved := 0
reg_tdrselect.tdrmode := true // TODO support D-mode breakpoint theft
if (reg_bp.isEmpty) reg_tdrselect.tdrindex := 0
for (bpc <- reg_bp map {_.control}) {
bpc.tdrtype := bpc.tdrType
bpc.bpamaskmax := bpc.bpaMaskMax
bpc.reserved := 0
bpc.bpaction := 0
bpc.h := false
if (!usingVM) bpc.s := false
if (!usingUser) bpc.u := false
if (!usingVM && !usingUser) bpc.m := true
when (reset) {
bpc.r := false
bpc.w := false
bpc.x := false
}
}
for (bp <- reg_bp drop p(NBreakpoints))
bp := new BP().fromBits(0)
}

View File

@ -0,0 +1,447 @@
// See LICENSE for license details.
package rocket
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.agents._
import uncore.coherence._
import uncore.util._
import uncore.constants._
import cde.{Parameters, Field}
import Util._
class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
val addr = Bits(width = untagBits)
val write = Bool()
val wdata = Bits(width = rowBits)
val wmask = Bits(width = rowBytes)
val way_en = Bits(width = nWays)
}
class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle {
val req = Valid(new DCacheDataReq).flip
val resp = Vec(nWays, Bits(OUTPUT, rowBits))
}
val addr = io.req.bits.addr >> rowOffBits
for (w <- 0 until nWays) {
val array = SeqMem(nSets*refillCycles, Vec(rowBytes, Bits(width=8)))
val valid = io.req.valid && (Bool(nWays == 1) || io.req.bits.way_en(w))
when (valid && io.req.bits.write) {
val data = Vec.tabulate(rowBytes)(i => io.req.bits.wdata(8*(i+1)-1, 8*i))
array.write(addr, data, io.req.bits.wmask.toBools)
}
io.resp(w) := array.read(addr, valid && !io.req.bits.write).toBits
}
}
class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle {
val cpu = (new HellaCacheIO).flip
val ptw = new TLBPTWIO()
val mem = new ClientTileLinkIO
}
val fq = Module(new FinishQueue(1))
require(rowBits == encRowBits) // no ECC
require(refillCyclesPerBeat == 1)
require(rowBits >= coreDataBits)
// tags
val replacer = p(Replacer)()
def onReset = L1Metadata(UInt(0), ClientMetadata.onReset)
val meta = Module(new MetadataArray(onReset _))
val metaReadArb = Module(new Arbiter(new MetaReadReq, 3))
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3))
meta.io.read <> metaReadArb.io.out
meta.io.write <> metaWriteArb.io.out
// data
val data = Module(new DCacheDataArray)
val dataArb = Module(new Arbiter(new DCacheDataReq, 4))
data.io.req <> dataArb.io.out
dataArb.io.out.ready := true
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
val s1_probe = Reg(next=io.mem.probe.fire(), init=Bool(false))
val probe_bits = RegEnable(io.mem.probe.bits, io.mem.probe.fire())
val s1_nack = Wire(init=Bool(false))
val s1_valid_masked = s1_valid && !io.cpu.s1_kill
val s1_valid_not_nacked = s1_valid_masked && !s1_nack
val s1_req = Reg(io.cpu.req.bits)
when (metaReadArb.io.out.valid) {
s1_req := io.cpu.req.bits
s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaReadArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0))
}
val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd)
val s1_readwrite = s1_read || s1_write
val s1_flush_valid = Reg(Bool())
val s_ready :: s_grant_wait :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8)
val grant_wait = Reg(init=Bool(false))
val release_ack_wait = Reg(init=Bool(false))
val release_state = Reg(init=s_ready)
val pstore1_valid = Wire(Bool())
val pstore2_valid = Reg(Bool())
val inWriteback = release_state === s_voluntary_writeback || release_state === s_probe_rep_dirty
val releaseWay = Wire(UInt())
io.cpu.req.ready := (release_state === s_ready) && !grant_wait && !s1_nack
// hit initiation path
dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)
dataArb.io.in(3).bits.write := false
dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr
dataArb.io.in(3).bits.way_en := ~UInt(0, nWays)
when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false }
metaReadArb.io.in(2).valid := io.cpu.req.valid
metaReadArb.io.in(2).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB)
metaReadArb.io.in(2).bits.way_en := ~UInt(0, nWays)
when (!metaReadArb.io.in(2).ready) { io.cpu.req.ready := false }
// address translation
val tlb = Module(new TLB)
io.ptw <> tlb.io.ptw
tlb.io.req.valid := s1_valid_masked && s1_readwrite
tlb.io.req.bits.passthrough := s1_req.phys
tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits
tlb.io.req.bits.instruction := false
tlb.io.req.bits.store := s1_write
when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false }
when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true }
val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits))
val s1_hit_way = meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).toBits
val s1_hit_state = ClientMetadata.onReset.fromBits(
meta.io.resp.map(r => Mux(r.tag === s1_tag, r.coh.toBits, UInt(0)))
.reduce (_|_))
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
val s1_victim_way = Wire(init = replacer.way)
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
val s2_probe = Reg(next=s1_probe, init=Bool(false))
val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
val s2_valid_masked = s2_valid && Reg(next = !s1_nack)
val s2_req = Reg(io.cpu.req.bits)
val s2_uncached = Reg(Bool())
when (s1_valid_not_nacked || s1_flush_valid) {
s2_req := s1_req
s2_req.addr := s1_paddr
s2_uncached := !tlb.io.resp.cacheable
}
val s2_read = isRead(s2_req.cmd)
val s2_write = isWrite(s2_req.cmd)
val s2_readwrite = s2_read || s2_write
val s2_flush_valid = RegNext(s1_flush_valid)
val s2_data = RegEnable(s1_data, s1_valid || inWriteback)
val s2_probe_way = RegEnable(s1_hit_way, s1_probe)
val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked)
val s2_hit = s2_hit_state.isHit(s2_req.cmd)
val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit
val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait
val s2_valid_cached_miss = s2_valid_miss && !s2_uncached
val s2_victimize = s2_valid_cached_miss || s2_flush_valid
val s2_valid_uncached = s2_valid_miss && s2_uncached
val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)))
val s2_victim_tag = RegEnable(meta.io.resp(s1_victim_way).tag, s1_valid_not_nacked || s1_flush_valid)
val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(meta.io.resp(s1_victim_way).coh, s1_valid_not_nacked || s1_flush_valid))
val s2_victim_valid = s2_victim_state.isValid()
val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback()
io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready)
when (s2_valid && !s2_valid_hit) { s1_nack := true }
// exceptions
val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned
io.cpu.xcpt.ma.ld := s1_read && misaligned
io.cpu.xcpt.ma.st := s1_write && misaligned
io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld
io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st
assert(!(Reg(next=
(io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) &&
s2_valid_masked),
"DCache exception occurred - cache response not killed.")
// load reservations
val s2_lr = Bool(usingAtomics) && s2_req.cmd === M_XLR
val s2_sc = Bool(usingAtomics) && s2_req.cmd === M_XSC
val lrscCount = Reg(init=UInt(0))
val lrscValid = lrscCount > 0
val lrscAddr = Reg(UInt())
val s2_sc_fail = s2_sc && !(lrscValid && lrscAddr === (s2_req.addr >> blockOffBits))
when (s2_valid_hit && s2_lr) {
lrscCount := lrscCycles - 1
lrscAddr := s2_req.addr >> blockOffBits
}
when (lrscValid) { lrscCount := lrscCount - 1 }
when ((s2_valid_hit && s2_sc) || io.cpu.invalidate_lr) { lrscCount := 0 }
// pending store buffer
val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write)
val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write)
val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write)
val pstore1_data = RegEnable(io.cpu.s1_data, s1_valid_not_nacked && s1_write)
val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write)
val pstore1_storegen = new StoreGen(pstore1_typ, pstore1_addr, pstore1_data, wordBytes)
val pstore1_storegen_data = Wire(init = pstore1_storegen.data)
val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd)
val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_amo)
val pstore_drain_opportunistic = !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd))
val pstore_drain_on_miss = releaseInFlight || io.cpu.s2_nack
val pstore_drain =
Bool(usingAtomics) && pstore_drain_structural ||
(((pstore1_valid && !pstore1_amo) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss))
pstore1_valid := {
val s2_store_valid = s2_valid_hit && s2_write && !s2_sc_fail
val pstore1_held = Reg(Bool())
assert(!s2_store_valid || !pstore1_held)
pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain
s2_store_valid || pstore1_held
}
val advance_pstore1 = pstore1_valid && (pstore2_valid === pstore_drain)
pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1
val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1)
val pstore2_way = RegEnable(pstore1_way, advance_pstore1)
val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1)
val pstore2_storegen_mask = RegEnable(pstore1_storegen.mask, advance_pstore1)
dataArb.io.in(0).valid := pstore_drain
dataArb.io.in(0).bits.write := true
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way)
dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data))
val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits
dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift
// store->load RAW hazard detection
val s1_idx = s1_req.addr(idxMSB, wordOffBits)
val s1_raw_hazard = s1_read &&
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx) ||
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx))
when (s1_valid && s1_raw_hazard) { s1_nack := true }
val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd)
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_hit_state =/= s2_new_hit_state) || (s2_victimize && !s2_victim_dirty)
metaWriteArb.io.in(0).bits.way_en := s2_victim_way
metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB)
metaWriteArb.io.in(0).bits.data.coh := Mux(s2_hit, s2_new_hit_state, ClientMetadata.onReset)
metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
// acquire
val cachedGetMessage = s2_hit_state.makeAcquire(
client_xact_id = UInt(0),
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
op_code = s2_req.cmd)
val uncachedGetMessage = Get(
client_xact_id = UInt(0),
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
addr_byte = s2_req.addr(beatOffBits-1, 0),
operand_size = s2_req.typ,
alloc = Bool(false))
val uncachedPutOffset = s2_req.addr.extract(beatOffBits-1, wordOffBits)
val uncachedPutMessage = Put(
client_xact_id = UInt(0),
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
data = Fill(beatWords, pstore1_storegen.data),
wmask = Some(pstore1_storegen.mask << (uncachedPutOffset << wordOffBits)),
alloc = Bool(false))
val uncachedPutAtomicMessage = PutAtomic(
client_xact_id = UInt(0),
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
addr_byte = s2_req.addr(beatOffBits-1, 0),
atomic_opcode = s2_req.cmd,
operand_size = s2_req.typ,
data = Fill(beatWords, pstore1_storegen.data))
io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready
io.mem.acquire.bits := cachedGetMessage
when (s2_uncached) {
assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access")
io.mem.acquire.bits := uncachedGetMessage
when (s2_write) {
io.mem.acquire.bits := uncachedPutMessage
when (pstore1_amo) {
io.mem.acquire.bits := uncachedPutAtomicMessage
}
}
}
when (io.mem.acquire.fire()) { grant_wait := true }
// grant
val grantIsRefill = io.mem.grant.bits.hasMultibeatData()
val grantIsVoluntary = io.mem.grant.bits.isVoluntary()
val grantIsUncached = !grantIsRefill && !grantIsVoluntary
when (io.mem.grant.valid) {
assert(grant_wait || grantIsVoluntary && release_ack_wait, "unexpected grant")
when (grantIsUncached) { s2_data := io.mem.grant.bits.data }
when (grantIsVoluntary) { release_ack_wait := false }
}
val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles)
val grantDone = refillDone || grantIsUncached
when (io.mem.grant.fire() && grantDone) { grant_wait := false }
// data refill
dataArb.io.in(1).valid := grantIsRefill && io.mem.grant.valid
io.mem.grant.ready := true
assert(dataArb.io.in(1).ready || !dataArb.io.in(1).valid)
dataArb.io.in(1).bits.write := true
dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits
dataArb.io.in(1).bits.way_en := s2_victim_way
dataArb.io.in(1).bits.wdata := io.mem.grant.bits.data
dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes)
// tag updates on refill
metaWriteArb.io.in(1).valid := refillDone
assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready)
metaWriteArb.io.in(1).bits.way_en := s2_victim_way
metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB)
metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd)
metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
// finish
fq.io.enq.valid := io.mem.grant.fire() && io.mem.grant.bits.requiresAck() && (!grantIsRefill || refillDone)
fq.io.enq.bits := io.mem.grant.bits.makeFinish()
io.mem.finish <> fq.io.deq
when (fq.io.enq.valid) { assert(fq.io.enq.ready) }
when (refillDone) { replacer.miss }
// probe
val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr)
metaReadArb.io.in(1).valid := io.mem.probe.valid && !block_probe
io.mem.probe.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit)
metaReadArb.io.in(1).bits.idx := io.mem.probe.bits.addr_block
metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays)
// release
val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles)
val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback)
val releaseRejected = io.mem.release.valid && !io.mem.release.ready
val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire())
val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected)
val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid))
io.mem.release.valid := s2_release_data_valid
io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits)
val voluntaryReleaseMessage = s2_victim_state.makeVoluntaryWriteback(UInt(0), UInt(0))
val voluntaryNewCoh = s2_victim_state.onCacheControl(M_FLUSH)
val probeResponseMessage = s2_probe_state.makeRelease(probe_bits)
val probeNewCoh = s2_probe_state.onProbe(probe_bits)
val newCoh = Wire(init = probeNewCoh)
releaseWay := s2_probe_way
when (s2_victimize && s2_victim_dirty) {
assert(!s2_hit_state.isValid())
release_state := s_voluntary_writeback
probe_bits.addr_block := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB))
}
when (s2_probe) {
when (s2_probe_state.requiresVoluntaryWriteback()) { release_state := s_probe_rep_dirty }
.elsewhen (s2_probe_state.isValid()) { release_state := s_probe_rep_clean }
.otherwise {
io.mem.release.valid := true
release_state := s_probe_rep_miss
}
}
when (releaseDone) { release_state := s_ready }
when (release_state === s_probe_rep_miss || release_state === s_probe_rep_clean) {
io.mem.release.valid := true
}
when (release_state === s_probe_rep_clean || release_state === s_probe_rep_dirty) {
io.mem.release.bits := probeResponseMessage
when (releaseDone) { release_state := s_probe_write_meta }
}
when (release_state === s_voluntary_writeback || release_state === s_voluntary_write_meta) {
io.mem.release.bits := voluntaryReleaseMessage
newCoh := voluntaryNewCoh
releaseWay := s2_victim_way
when (releaseDone) {
release_state := s_voluntary_write_meta
release_ack_wait := true
}
}
when (s2_probe && !io.mem.release.fire()) { s1_nack := true }
io.mem.release.bits.addr_block := probe_bits.addr_block
io.mem.release.bits.addr_beat := writebackCount
io.mem.release.bits.data := s2_data
dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles
dataArb.io.in(2).bits.write := false
dataArb.io.in(2).bits.addr := Cat(io.mem.release.bits.addr_block, releaseDataBeat(log2Up(refillCycles)-1,0)) << rowOffBits
dataArb.io.in(2).bits.way_en := ~UInt(0, nWays)
metaWriteArb.io.in(2).valid := (release_state === s_voluntary_write_meta || release_state === s_probe_write_meta)
metaWriteArb.io.in(2).bits.way_en := releaseWay
metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB)
metaWriteArb.io.in(2).bits.data.coh := newCoh
metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits)
when (metaWriteArb.io.in(2).fire()) { release_state := s_ready }
// cached response
io.cpu.resp.valid := s2_valid_hit
io.cpu.resp.bits := s2_req
io.cpu.resp.bits.has_data := s2_read
io.cpu.resp.bits.replay := false
io.cpu.ordered := !(s1_valid || s2_valid || grant_wait)
// uncached response
io.cpu.replay_next := io.mem.grant.valid && grantIsUncached
val doUncachedResp = Reg(next = io.cpu.replay_next)
when (doUncachedResp) {
assert(!s2_valid_hit)
io.cpu.resp.valid := true
io.cpu.resp.bits.replay := true
}
// load data subword mux/sign extension
val s2_word_idx = s2_req.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes))
val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits)))
val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes)
io.cpu.resp.bits.data := loadgen.data | s2_sc_fail
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
io.cpu.resp.bits.store_data := pstore1_data
// AMOs
if (usingAtomics) {
val amoalu = Module(new AMOALU)
amoalu.io.addr := pstore1_addr
amoalu.io.cmd := pstore1_cmd
amoalu.io.typ := pstore1_typ
amoalu.io.lhs := s2_data_word
amoalu.io.rhs := pstore1_data
pstore1_storegen_data := amoalu.io.out
} else {
assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation")
}
// flushes
val flushed = Reg(init=Bool(true))
val flushing = Reg(init=Bool(false))
val flushCounter = Counter(nSets * nWays)
when (io.mem.acquire.fire()) { flushed := false }
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
io.cpu.s2_nack := !flushed
when (!flushed) {
flushing := !release_ack_wait
}
}
s1_flush_valid := metaReadArb.io.in(0).fire() && !s1_flush_valid && !s2_flush_valid && release_state === s_ready && !release_ack_wait
metaReadArb.io.in(0).valid := flushing
metaReadArb.io.in(0).bits.idx := flushCounter.value
metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays)
when (flushing) {
s1_victim_way := flushCounter.value >> log2Up(nSets)
when (s2_flush_valid) {
when (flushCounter.inc()) {
flushed := true
}
}
when (flushed && release_state === s_ready && !release_ack_wait) {
flushing := false
}
}
}

View File

@ -0,0 +1,203 @@
// See LICENSE for license details.
package rocket
import Chisel._
object DecodeLogic
{
def term(lit: BitPat) =
new Term(lit.value, BigInt(2).pow(lit.getWidth)-(lit.mask+1))
def logic(addr: UInt, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bool], terms: Seq[Term]) = {
terms.map { t =>
cache.getOrElseUpdate(t, (if (t.mask == 0) addr else addr & Bits(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth)) === Bits(t.value, addrWidth))
}.foldLeft(Bool(false))(_||_)
}
def apply(addr: UInt, default: BitPat, mapping: Iterable[(BitPat, BitPat)]): UInt = {
val cache = caches.getOrElseUpdate(addr, collection.mutable.Map[Term,Bool]())
val dterm = term(default)
val (keys, values) = mapping.unzip
val addrWidth = keys.map(_.getWidth).max
val terms = keys.toList.map(k => term(k))
val termvalues = terms zip values.toList.map(term(_))
for (t <- keys.zip(terms).tails; if !t.isEmpty)
for (u <- t.tail)
assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap")
(0 until default.getWidth.max(values.map(_.getWidth).max)).map({ case (i: Int) =>
val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1)
val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1)
val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1)
if (((dterm.mask >> i) & 1) != 0) {
logic(addr, addrWidth, cache, SimplifyDC(mint, maxt, addrWidth)).toBits
} else {
val defbit = (dterm.value.toInt >> i) & 1
val t = if (defbit == 0) mint else maxt
val bit = logic(addr, addrWidth, cache, Simplify(t, dc, addrWidth)).toBits
if (defbit == 0) bit else ~bit
}
}).reverse.reduceRight(Cat(_,_))
}
def apply(addr: UInt, default: Seq[BitPat], mappingIn: Iterable[(BitPat, Seq[BitPat])]): Seq[UInt] = {
val mapping = collection.mutable.ArrayBuffer.fill(default.size)(collection.mutable.ArrayBuffer[(BitPat, BitPat)]())
for ((key, values) <- mappingIn)
for ((value, i) <- values zipWithIndex)
mapping(i) += key -> value
for ((thisDefault, thisMapping) <- default zip mapping)
yield apply(addr, thisDefault, thisMapping)
}
def apply(addr: UInt, default: Seq[BitPat], mappingIn: List[(UInt, Seq[BitPat])]): Seq[UInt] =
apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Seq[BitPat])]])
def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool =
apply(addr, BitPat.DC(1), trues.map(BitPat(_) -> BitPat("b1")) ++ falses.map(BitPat(_) -> BitPat("b0"))).toBool
private val caches = collection.mutable.Map[UInt,collection.mutable.Map[Term,Bool]]()
}
class Term(val value: BigInt, val mask: BigInt = 0)
{
var prime = true
def covers(x: Term) = ((value ^ x.value) &~ mask | x.mask &~ mask) == 0
def intersects(x: Term) = ((value ^ x.value) &~ mask &~ x.mask) == 0
override def equals(that: Any) = that match {
case x: Term => x.value == value && x.mask == mask
case _ => false
}
override def hashCode = value.toInt
def < (that: Term) = value < that.value || value == that.value && mask < that.mask
def similar(x: Term) = {
val diff = value - x.value
mask == x.mask && value > x.value && (diff & diff-1) == 0
}
def merge(x: Term) = {
prime = false
x.prime = false
val bit = value - x.value
new Term(value &~ bit, mask | bit)
}
override def toString = value.toString(16) + "-" + mask.toString(16) + (if (prime) "p" else "")
}
object Simplify
{
def getPrimeImplicants(implicants: Seq[Term], bits: Int) = {
var prime = List[Term]()
implicants.foreach(_.prime = true)
val cols = (0 to bits).map(b => implicants.filter(b == _.mask.bitCount))
val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount):_*)))
for (i <- 0 to bits) {
for (j <- 0 until bits-i)
table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_.similar(a)).map(_.merge(a)))
for (r <- table(i))
for (p <- r; if p.prime)
prime = p :: prime
}
prime.sortWith(_<_)
}
def getEssentialPrimeImplicants(prime: Seq[Term], minterms: Seq[Term]): (Seq[Term],Seq[Term],Seq[Term]) = {
for (i <- 0 until prime.size) {
val icover = minterms.filter(prime(i) covers _)
for (j <- 0 until prime.size) {
val jcover = minterms.filter(prime(j) covers _)
if (icover.size > jcover.size && jcover.forall(prime(i) covers _))
return getEssentialPrimeImplicants(prime.filter(_ != prime(j)), minterms)
}
}
val essentiallyCovered = minterms.filter(t => prime.count(_ covers t) == 1)
val essential = prime.filter(p => essentiallyCovered.exists(p covers _))
val nonessential = prime.filterNot(essential contains _)
val uncovered = minterms.filterNot(t => essential.exists(_ covers t))
if (essential.isEmpty || uncovered.isEmpty)
(essential, nonessential, uncovered)
else {
val (a, b, c) = getEssentialPrimeImplicants(nonessential, uncovered)
(essential ++ a, b, c)
}
}
def getCost(cover: Seq[Term], bits: Int) = cover.map(bits - _.mask.bitCount).sum
def cheaper(a: List[Term], b: List[Term], bits: Int) = {
val ca = getCost(a, bits)
val cb = getCost(b, bits)
def listLess(a: List[Term], b: List[Term]): Boolean = !b.isEmpty && (a.isEmpty || a.head < b.head || a.head == b.head && listLess(a.tail, b.tail))
ca < cb || ca == cb && listLess(a.sortWith(_<_), b.sortWith(_<_))
}
def getCover(implicants: Seq[Term], minterms: Seq[Term], bits: Int) = {
if (minterms.nonEmpty) {
val cover = minterms.map(m => implicants.filter(_.covers(m)).map(i => collection.mutable.Set(i)))
val all = cover.reduceLeft((c0, c1) => c0.map(a => c1.map(_ ++ a)).reduceLeft(_++_))
all.map(_.toList).reduceLeft((a, b) => if (cheaper(a, b, bits)) a else b)
} else
Seq[Term]()
}
def stringify(s: Seq[Term], bits: Int) = s.map(t => (0 until bits).map(i => if ((t.mask & (1 << i)) != 0) "x" else ((t.value >> i) & 1).toString).reduceLeft(_+_).reverse).reduceLeft(_+" + "+_)
def apply(minterms: Seq[Term], dontcares: Seq[Term], bits: Int) = {
val prime = getPrimeImplicants(minterms ++ dontcares, bits)
minterms.foreach(t => assert(prime.exists(_.covers(t))))
val (eprime, prime2, uncovered) = getEssentialPrimeImplicants(prime, minterms)
val cover = eprime ++ getCover(prime2, uncovered, bits)
minterms.foreach(t => assert(cover.exists(_.covers(t)))) // sanity check
cover
}
}
object SimplifyDC
{
def getImplicitDC(maxterms: Seq[Term], term: Term, bits: Int, above: Boolean): Term = {
for (i <- 0 until bits) {
var t: Term = null
if (above && ((term.value | term.mask) & (BigInt(1) << i)) == 0)
t = new Term(term.value | (BigInt(1) << i), term.mask)
else if (!above && (term.value & (BigInt(1) << i)) != 0)
t = new Term(term.value & ~(BigInt(1) << i), term.mask)
if (t != null && !maxterms.exists(_.intersects(t)))
return t
}
null
}
def getPrimeImplicants(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = {
var prime = List[Term]()
minterms.foreach(_.prime = true)
var mint = minterms.map(t => new Term(t.value, t.mask))
val cols = (0 to bits).map(b => mint.filter(b == _.mask.bitCount))
val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount):_*)))
for (i <- 0 to bits) {
for (j <- 0 until bits-i) {
table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_ similar a).map(_ merge a))
}
for (j <- 0 until bits-i) {
for (a <- table(i)(j).filter(_.prime)) {
val dc = getImplicitDC(maxterms, a, bits, true)
if (dc != null)
table(i+1)(j) += dc merge a
}
for (a <- table(i)(j+1).filter(_.prime)) {
val dc = getImplicitDC(maxterms, a, bits, false)
if (dc != null)
table(i+1)(j) += a merge dc
}
}
for (r <- table(i))
for (p <- r; if p.prime)
prime = p :: prime
}
prime.sortWith(_<_)
}
def verify(cover: Seq[Term], minterms: Seq[Term], maxterms: Seq[Term]) = {
assert(minterms.forall(t => cover.exists(_ covers t)))
assert(maxterms.forall(t => !cover.exists(_ intersects t)))
}
def apply(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = {
val prime = getPrimeImplicants(minterms, maxterms, bits)
val (eprime, prime2, uncovered) = Simplify.getEssentialPrimeImplicants(prime, minterms)
val cover = eprime ++ Simplify.getCover(prime2, uncovered, bits)
verify(cover, minterms, maxterms)
cover
}
}

View File

@ -0,0 +1,400 @@
package rocket
import Chisel._
import uncore.tilelink._
import uncore.devices._
import uncore.devices.DmaRequest._
import uncore.agents._
import uncore.util._
import junctions.{ParameterizedBundle, AddrMap}
import cde.Parameters
trait HasClientDmaParameters extends HasCoreParameters with HasDmaParameters {
val dmaAddrBits = coreMaxAddrBits
val dmaSegmentSizeBits = coreMaxAddrBits
val dmaSegmentBits = 24
}
abstract class ClientDmaBundle(implicit val p: Parameters)
extends ParameterizedBundle()(p) with HasClientDmaParameters
abstract class ClientDmaModule(implicit val p: Parameters)
extends Module with HasClientDmaParameters
class ClientDmaRequest(implicit p: Parameters) extends ClientDmaBundle()(p) {
val cmd = UInt(width = DMA_CMD_SZ)
val src_start = UInt(width = dmaAddrBits)
val dst_start = UInt(width = dmaAddrBits)
val src_stride = UInt(width = dmaSegmentSizeBits)
val dst_stride = UInt(width = dmaSegmentSizeBits)
val segment_size = UInt(width = dmaSegmentSizeBits)
val nsegments = UInt(width = dmaSegmentBits)
val word_size = UInt(width = dmaWordSizeBits)
}
object ClientDmaRequest {
def apply(cmd: UInt,
src_start: UInt,
dst_start: UInt,
segment_size: UInt,
nsegments: UInt = UInt(1),
src_stride: UInt = UInt(0),
dst_stride: UInt = UInt(0),
word_size: UInt = UInt(0))
(implicit p: Parameters) = {
val req = Wire(new ClientDmaRequest)
req.cmd := cmd
req.src_start := src_start
req.dst_start := dst_start
req.src_stride := src_stride
req.dst_stride := dst_stride
req.segment_size := segment_size
req.nsegments := nsegments
req.word_size := word_size
req
}
}
object ClientDmaResponse {
val pagefault = UInt("b01")
val invalid_region = UInt("b10")
def apply(status: UInt = UInt(0))(implicit p: Parameters) = {
val resp = Wire(new ClientDmaResponse)
resp.status := status
resp
}
}
class ClientDmaResponse(implicit p: Parameters) extends ClientDmaBundle {
val status = UInt(width = dmaStatusBits)
}
class ClientDmaIO(implicit p: Parameters) extends ParameterizedBundle()(p) {
val req = Decoupled(new ClientDmaRequest)
val resp = Valid(new ClientDmaResponse).flip
}
class DmaFrontend(implicit p: Parameters) extends CoreModule()(p)
with HasClientDmaParameters with HasTileLinkParameters {
val io = new Bundle {
val cpu = (new ClientDmaIO).flip
val mem = new ClientUncachedTileLinkIO
val ptw = new TLBPTWIO
val busy = Bool(OUTPUT)
val incr_outstanding = Bool(OUTPUT)
val host_id = UInt(INPUT, log2Up(nCores))
}
val tlb = Module(new DecoupledTLB()(p.alterPartial({
case CacheName => "L1D"
})))
io.ptw <> tlb.io.ptw
private val pgSize = 1 << pgIdxBits
val cmd = Reg(UInt(width = DMA_CMD_SZ))
val adv_ptr = MuxLookup(cmd, UInt("b11"), Seq(
DMA_CMD_PFR -> UInt("b10"),
DMA_CMD_PFW -> UInt("b10"),
DMA_CMD_SIN -> UInt("b10"),
DMA_CMD_SOUT -> UInt("b01")))
val segment_size = Reg(UInt(width = dmaSegmentSizeBits))
val bytes_left = Reg(UInt(width = dmaSegmentSizeBits))
val segments_left = Reg(UInt(width = dmaSegmentBits))
val word_size = Reg(UInt(width = dmaWordSizeBits))
val src_vaddr = Reg(UInt(width = dmaAddrBits))
val dst_vaddr = Reg(UInt(width = dmaAddrBits))
val src_vpn = src_vaddr(dmaAddrBits - 1, pgIdxBits)
val dst_vpn = dst_vaddr(dmaAddrBits - 1, pgIdxBits)
val src_idx = src_vaddr(pgIdxBits - 1, 0)
val dst_idx = dst_vaddr(pgIdxBits - 1, 0)
val src_pglen = UInt(pgSize) - src_idx
val dst_pglen = UInt(pgSize) - dst_idx
val src_stride = Reg(UInt(width = dmaSegmentSizeBits))
val dst_stride = Reg(UInt(width = dmaSegmentSizeBits))
val src_ppn = Reg(UInt(width = ppnBits))
val dst_ppn = Reg(UInt(width = ppnBits))
val src_paddr = Cat(src_ppn, src_idx)
val dst_paddr = Cat(dst_ppn, dst_idx)
val last_src_vpn = Reg(UInt(width = vpnBits))
val last_dst_vpn = Reg(UInt(width = vpnBits))
val tx_len = Util.minUInt(src_pglen, dst_pglen, bytes_left)
val dma_busy = Reg(init = UInt(0, tlMaxClientXacts))
val dma_xact_id = PriorityEncoder(~dma_busy)
val (dma_req_beat, dma_req_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
val (s_idle :: s_translate :: s_dma_req :: s_dma_update ::
s_prepare :: s_finish :: Nil) = Enum(Bits(), 6)
val state = Reg(init = s_idle)
// lower bit is for src, higher bit is for dst
val to_translate = Reg(init = UInt(0, 2))
val tlb_sent = Reg(init = UInt(0, 2))
val tlb_to_send = to_translate & ~tlb_sent
val resp_status = Reg(UInt(width = dmaStatusBits))
def make_acquire(
addr_beat: UInt, client_xact_id: UInt, client_id: UInt,
cmd: UInt, source: UInt, dest: UInt,
length: UInt, size: UInt): Acquire = {
val data_blob = Wire(UInt(width = tlDataBeats * tlDataBits))
data_blob := DmaRequest(
xact_id = UInt(0),
client_id = client_id,
cmd = cmd,
source = source,
dest = dest,
length = length,
size = size).toBits
val data_beats = Vec(tlDataBeats, UInt(width = tlDataBits)).fromBits(data_blob)
val base_addr = addrMap("devices:dma").start
val addr_block = UInt(base_addr >> (tlBeatAddrBits + tlByteAddrBits))
PutBlock(
client_xact_id = client_xact_id,
addr_block = addr_block,
addr_beat = addr_beat,
data = data_beats(addr_beat),
alloc = Bool(false))
}
def check_region(cmd: UInt, src: UInt, dst: UInt): Bool = {
val src_cacheable = addrMap.isCacheable(src)
val dst_cacheable = addrMap.isCacheable(dst)
val dst_ok = Mux(cmd === DMA_CMD_SOUT, !dst_cacheable, dst_cacheable)
val src_ok = Mux(cmd === DMA_CMD_SIN, !src_cacheable, Bool(true))
dst_ok && src_ok
}
tlb.io.req.valid := tlb_to_send.orR
tlb.io.req.bits.vpn := Mux(tlb_to_send(0), src_vpn, dst_vpn)
tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.instruction := Bool(false)
tlb.io.req.bits.store := !tlb_to_send(0)
tlb.io.resp.ready := tlb_sent.orR
when (tlb.io.req.fire()) {
tlb_sent := tlb_sent | PriorityEncoderOH(tlb_to_send)
}
when (tlb.io.resp.fire()) {
val recv_choice = PriorityEncoderOH(to_translate)
val error = Mux(recv_choice(0),
tlb.io.resp.bits.xcpt_ld, tlb.io.resp.bits.xcpt_st)
when (error) {
resp_status := ClientDmaResponse.pagefault
state := s_finish
}
// getting the src translation
when (recv_choice(0)) {
src_ppn := tlb.io.resp.bits.ppn
} .otherwise {
dst_ppn := tlb.io.resp.bits.ppn
}
to_translate := to_translate & ~recv_choice
}
io.cpu.req.ready := state === s_idle
io.cpu.resp.valid := state === s_finish
io.cpu.resp.bits := ClientDmaResponse(resp_status)
io.mem.acquire.valid := (state === s_dma_req) && !dma_busy.andR
io.mem.acquire.bits := make_acquire(
addr_beat = dma_req_beat,
client_id = io.host_id,
client_xact_id = dma_xact_id,
cmd = cmd, source = src_paddr, dest = dst_paddr,
length = tx_len, size = word_size)
io.mem.grant.ready := (state =/= s_dma_req)
when (io.cpu.req.fire()) {
val req = io.cpu.req.bits
val is_prefetch = req.cmd(2, 1) === UInt("b01")
cmd := req.cmd
src_vaddr := req.src_start
dst_vaddr := req.dst_start
src_stride := req.src_stride
dst_stride := req.dst_stride
segment_size := req.segment_size
segments_left := req.nsegments - UInt(1)
bytes_left := req.segment_size
word_size := req.word_size
to_translate := Mux(is_prefetch, UInt("b10"), UInt("b11"))
tlb_sent := UInt(0)
state := s_translate
}
when (state === s_translate && !to_translate.orR) {
when (check_region(cmd, src_paddr, dst_paddr)) {
state := s_dma_req
} .otherwise {
resp_status := ClientDmaResponse.invalid_region
state := s_finish
}
}
def setBusy(set: Bool, xact_id: UInt): UInt =
Mux(set, UIntToOH(xact_id), UInt(0))
dma_busy := (dma_busy |
setBusy(dma_req_done, dma_xact_id)) &
~setBusy(io.mem.grant.fire(), io.mem.grant.bits.client_xact_id)
when (dma_req_done) {
src_vaddr := src_vaddr + Mux(adv_ptr(0), tx_len, UInt(0))
dst_vaddr := dst_vaddr + Mux(adv_ptr(1), tx_len, UInt(0))
bytes_left := bytes_left - tx_len
state := s_dma_update
}
when (state === s_dma_update) {
when (bytes_left === UInt(0)) {
when (segments_left === UInt(0)) {
resp_status := UInt(0)
state := s_finish
} .otherwise {
last_src_vpn := src_vpn
last_dst_vpn := dst_vpn
src_vaddr := src_vaddr + src_stride
dst_vaddr := dst_vaddr + dst_stride
bytes_left := segment_size
segments_left := segments_left - UInt(1)
state := s_prepare
}
} .otherwise {
to_translate := adv_ptr & Cat(dst_idx === UInt(0), src_idx === UInt(0))
tlb_sent := UInt(0)
state := s_translate
}
}
when (state === s_prepare) {
to_translate := adv_ptr & Cat(
dst_vpn =/= last_dst_vpn,
src_vpn =/= last_src_vpn)
tlb_sent := UInt(0)
state := s_translate
}
when (state === s_finish) { state := s_idle }
io.busy := (state =/= s_idle) || dma_busy.orR
io.incr_outstanding := dma_req_done
}
object DmaCtrlRegNumbers {
val SRC_STRIDE = 0
val DST_STRIDE = 1
val SEGMENT_SIZE = 2
val NSEGMENTS = 3
val WORD_SIZE = 4
val RESP_STATUS = 5
val OUTSTANDING = 6
val NCSRS = 7
val CSR_BASE = 0x800
val CSR_END = CSR_BASE + NCSRS
}
import DmaCtrlRegNumbers._
class DmaCtrlRegFile(implicit val p: Parameters) extends Module
with HasClientDmaParameters with HasTileLinkParameters {
private val nWriteRegs = 5
private val nRegs = nWriteRegs + 2
val io = new Bundle {
val wen = Bool(INPUT)
val waddr = UInt(INPUT, log2Up(nRegs))
val wdata = UInt(INPUT, dmaSegmentSizeBits)
val src_stride = UInt(OUTPUT, dmaSegmentSizeBits)
val dst_stride = UInt(OUTPUT, dmaSegmentSizeBits)
val segment_size = UInt(OUTPUT, dmaSegmentSizeBits)
val nsegments = UInt(OUTPUT, dmaSegmentBits)
val word_size = UInt(OUTPUT, dmaWordSizeBits)
val incr_outstanding = Bool(INPUT)
val xact_outstanding = Bool(OUTPUT)
}
val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSegmentSizeBits)))
val waddr = io.waddr(log2Up(NCSRS) - 1, 0)
io.src_stride := regs(SRC_STRIDE)
io.dst_stride := regs(DST_STRIDE)
io.segment_size := regs(SEGMENT_SIZE)
io.nsegments := regs(NSEGMENTS)
io.word_size := regs(WORD_SIZE)
when (io.wen && waddr < UInt(nWriteRegs)) {
regs.write(waddr, io.wdata)
}
val outstanding_cnt = TwoWayCounter(
io.incr_outstanding,
io.wen && io.waddr === UInt(OUTSTANDING),
tlMaxClientXacts)
io.xact_outstanding := outstanding_cnt > UInt(0)
}
class DmaController(implicit p: Parameters) extends RoCC()(p)
with HasClientDmaParameters {
io.mem.req.valid := Bool(false)
io.resp.valid := Bool(false)
io.interrupt := Bool(false)
val cmd = Queue(io.cmd)
val inst = cmd.bits.inst
val is_transfer = inst.funct < UInt(8)
val reg_status = Reg(UInt(width = dmaStatusBits))
val crfile = Module(new DmaCtrlRegFile)
crfile.io.waddr := io.csr.waddr
crfile.io.wdata := io.csr.wdata
crfile.io.wen := io.csr.wen
io.csr.rdata(SRC_STRIDE) := crfile.io.src_stride
io.csr.rdata(DST_STRIDE) := crfile.io.dst_stride
io.csr.rdata(SEGMENT_SIZE) := crfile.io.segment_size
io.csr.rdata(NSEGMENTS) := crfile.io.nsegments
io.csr.rdata(WORD_SIZE) := crfile.io.word_size
io.csr.rdata(RESP_STATUS) := reg_status
val frontend = Module(new DmaFrontend)
io.ptw(0) <> frontend.io.ptw
io.autl <> frontend.io.mem
crfile.io.incr_outstanding := frontend.io.incr_outstanding
frontend.io.host_id := io.host_id
frontend.io.cpu.req.valid := cmd.valid && is_transfer
frontend.io.cpu.req.bits := ClientDmaRequest(
cmd = cmd.bits.inst.funct,
src_start = cmd.bits.rs2,
dst_start = cmd.bits.rs1,
src_stride = crfile.io.src_stride,
dst_stride = crfile.io.dst_stride,
segment_size = crfile.io.segment_size,
nsegments = crfile.io.nsegments,
word_size = crfile.io.word_size)
cmd.ready := is_transfer && frontend.io.cpu.req.ready
when (frontend.io.cpu.resp.valid) {
reg_status := frontend.io.cpu.resp.bits.status
}
io.busy := cmd.valid || frontend.io.busy || crfile.io.xact_outstanding
}

View File

@ -0,0 +1,96 @@
// See LICENSE for license details.
package rocket
import Chisel._
import cde.{Parameters, Field}
import Instructions._
object ALU
{
val SZ_ALU_FN = 4
val FN_X = BitPat("b????")
val FN_ADD = UInt(0)
val FN_SL = UInt(1)
val FN_SEQ = UInt(2)
val FN_SNE = UInt(3)
val FN_XOR = UInt(4)
val FN_SR = UInt(5)
val FN_OR = UInt(6)
val FN_AND = UInt(7)
val FN_SUB = UInt(10)
val FN_SRA = UInt(11)
val FN_SLT = UInt(12)
val FN_SGE = UInt(13)
val FN_SLTU = UInt(14)
val FN_SGEU = UInt(15)
val FN_DIV = FN_XOR
val FN_DIVU = FN_SR
val FN_REM = FN_OR
val FN_REMU = FN_AND
val FN_MUL = FN_ADD
val FN_MULH = FN_SL
val FN_MULHSU = FN_SLT
val FN_MULHU = FN_SLTU
def isMulFN(fn: UInt, cmp: UInt) = fn(1,0) === cmp(1,0)
def isSub(cmd: UInt) = cmd(3)
def isCmp(cmd: UInt) = cmd === FN_SEQ || cmd === FN_SNE || cmd >= FN_SLT
def cmpUnsigned(cmd: UInt) = cmd(1)
def cmpInverted(cmd: UInt) = cmd(0)
def cmpEq(cmd: UInt) = !cmd(3)
}
import ALU._
class ALU(implicit p: Parameters) extends CoreModule()(p) {
val io = new Bundle {
val dw = Bits(INPUT, SZ_DW)
val fn = Bits(INPUT, SZ_ALU_FN)
val in2 = UInt(INPUT, xLen)
val in1 = UInt(INPUT, xLen)
val out = UInt(OUTPUT, xLen)
val adder_out = UInt(OUTPUT, xLen)
val cmp_out = Bool(OUTPUT)
}
// ADD, SUB
val in2_inv = Mux(isSub(io.fn), ~io.in2, io.in2)
val in1_xor_in2 = io.in1 ^ in2_inv
io.adder_out := io.in1 + in2_inv + isSub(io.fn)
// SLT, SLTU
io.cmp_out := cmpInverted(io.fn) ^
Mux(cmpEq(io.fn), in1_xor_in2 === UInt(0),
Mux(io.in1(xLen-1) === io.in2(xLen-1), io.adder_out(xLen-1),
Mux(cmpUnsigned(io.fn), io.in2(xLen-1), io.in1(xLen-1))))
// SLL, SRL, SRA
val (shamt, shin_r) =
if (xLen == 32) (io.in2(4,0), io.in1)
else {
require(xLen == 64)
val shin_hi_32 = Fill(32, isSub(io.fn) && io.in1(31))
val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32)
val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0))
(shamt, Cat(shin_hi, io.in1(31,0)))
}
val shin = Mux(io.fn === FN_SR || io.fn === FN_SRA, shin_r, Reverse(shin_r))
val shout_r = (Cat(isSub(io.fn) & shin(xLen-1), shin).toSInt >> shamt)(xLen-1,0)
val shout_l = Reverse(shout_r)
val shout = Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, UInt(0)) |
Mux(io.fn === FN_SL, shout_l, UInt(0))
// AND, OR, XOR
val logic = Mux(io.fn === FN_XOR || io.fn === FN_OR, in1_xor_in2, UInt(0)) |
Mux(io.fn === FN_OR || io.fn === FN_AND, io.in1 & io.in2, UInt(0))
val shift_logic = (isCmp(io.fn) && io.cmp_out) | logic | shout
val out = Mux(io.fn === FN_ADD || io.fn === FN_SUB, io.adder_out, shift_logic)
io.out := out
if (xLen > 32) {
require(xLen == 64)
when (io.dw === DW_32) { io.out := Cat(Fill(32, out(31)), out(31,0)) }
}
}

View File

@ -0,0 +1,641 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Instructions._
import Util._
import FPConstants._
import uncore.constants.MemoryOpConstants._
import cde.{Parameters, Field}
case object SFMALatency extends Field[Int]
case object DFMALatency extends Field[Int]
object FPConstants
{
val FCMD_ADD = BitPat("b0??00")
val FCMD_SUB = BitPat("b0??01")
val FCMD_MUL = BitPat("b0??10")
val FCMD_MADD = BitPat("b1??00")
val FCMD_MSUB = BitPat("b1??01")
val FCMD_NMSUB = BitPat("b1??10")
val FCMD_NMADD = BitPat("b1??11")
val FCMD_DIV = BitPat("b?0011")
val FCMD_SQRT = BitPat("b?1011")
val FCMD_SGNJ = BitPat("b??1?0")
val FCMD_MINMAX = BitPat("b?01?1")
val FCMD_CVT_FF = BitPat("b??0??")
val FCMD_CVT_IF = BitPat("b?10??")
val FCMD_CMP = BitPat("b?01??")
val FCMD_MV_XF = BitPat("b?11??")
val FCMD_CVT_FI = BitPat("b??0??")
val FCMD_MV_FX = BitPat("b??1??")
val FCMD_X = BitPat("b?????")
val FCMD_WIDTH = 5
val RM_SZ = 3
val FLAGS_SZ = 5
}
class FPUCtrlSigs extends Bundle
{
val cmd = Bits(width = FCMD_WIDTH)
val ldst = Bool()
val wen = Bool()
val ren1 = Bool()
val ren2 = Bool()
val ren3 = Bool()
val swap12 = Bool()
val swap23 = Bool()
val single = Bool()
val fromint = Bool()
val toint = Bool()
val fastpipe = Bool()
val fma = Bool()
val div = Bool()
val sqrt = Bool()
val round = Bool()
val wflags = Bool()
}
class FPUDecoder extends Module
{
val io = new Bundle {
val inst = Bits(INPUT, 32)
val sigs = new FPUCtrlSigs().asOutput
}
val decoder = DecodeLogic(io.inst,
List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X),
Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N),
FLD -> List(FCMD_X, Y,Y,N,N,N,X,X,N,N,N,N,N,N,N,N,N),
FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,Y,N,Y,N,N,N,N,N,N),
FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,N,N,Y,N,N,N,N,N,N),
FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,N),
FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,N),
FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,Y,N,N,Y,N,N,N,Y,Y),
FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,N,N,N,Y,N,N,N,Y,Y),
FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,Y,N,N,N,Y,N,N,Y,Y),
FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y,Y),
FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FDIV_S -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,Y,N,N,N,N,Y,N,Y,Y),
FSQRT_S -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,Y,N,N,N,N,N,Y,Y,Y),
FDIV_D -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,Y,Y),
FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y)
))
val s = io.sigs
val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12,
s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma,
s.div, s.sqrt, s.round, s.wflags)
sigs zip decoder map {case(s,d) => s := d}
}
class FPUIO(implicit p: Parameters) extends CoreBundle {
val inst = Bits(INPUT, 32)
val fromint_data = Bits(INPUT, xLen)
val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ))
val store_data = Bits(OUTPUT, 64)
val toint_data = Bits(OUTPUT, xLen)
val dmem_resp_val = Bool(INPUT)
val dmem_resp_type = Bits(INPUT, 3)
val dmem_resp_tag = UInt(INPUT, 5)
val dmem_resp_data = Bits(INPUT, 64)
val valid = Bool(INPUT)
val fcsr_rdy = Bool(OUTPUT)
val nack_mem = Bool(OUTPUT)
val illegal_rm = Bool(OUTPUT)
val killx = Bool(INPUT)
val killm = Bool(INPUT)
val dec = new FPUCtrlSigs().asOutput
val sboard_set = Bool(OUTPUT)
val sboard_clr = Bool(OUTPUT)
val sboard_clra = UInt(OUTPUT, 5)
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
val cp_resp = Decoupled(new FPResult())
}
class FPResult extends Bundle
{
val data = Bits(width = 65)
val exc = Bits(width = 5)
}
class FPInput extends FPUCtrlSigs {
val rm = Bits(width = 3)
val typ = Bits(width = 2)
val in1 = Bits(width = 65)
val in2 = Bits(width = 65)
val in3 = Bits(width = 65)
}
object ClassifyRecFN {
def apply(expWidth: Int, sigWidth: Int, in: UInt) = {
val sign = in(sigWidth + expWidth)
val exp = in(sigWidth + expWidth - 1, sigWidth - 1)
val sig = in(sigWidth - 2, 0)
val code = exp(expWidth,expWidth-2)
val codeHi = code(2, 1)
val isSpecial = codeHi === UInt(3)
val isHighSubnormalIn = exp(expWidth-2, 0) < UInt(2)
val isSubnormal = code === UInt(1) || codeHi === UInt(1) && isHighSubnormalIn
val isNormal = codeHi === UInt(1) && !isHighSubnormalIn || codeHi === UInt(2)
val isZero = code === UInt(0)
val isInf = isSpecial && !exp(expWidth-2)
val isNaN = code.andR
val isSNaN = isNaN && !sig(sigWidth-2)
val isQNaN = isNaN && sig(sigWidth-2)
Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign,
isSubnormal && !sign, isZero && !sign, isZero && sign,
isSubnormal && sign, isNormal && sign, isInf && sign)
}
}
class FPToInt extends Module
{
val io = new Bundle {
val in = Valid(new FPInput).flip
val as_double = new FPInput().asOutput
val out = Valid(new Bundle {
val lt = Bool()
val store = Bits(width = 64)
val toint = Bits(width = 64)
val exc = Bits(width = 5)
})
}
val in = Reg(new FPInput)
val valid = Reg(next=io.in.valid)
def upconvert(x: UInt) = {
val s2d = Module(new hardfloat.RecFNToRecFN(8, 24, 11, 53))
s2d.io.in := x
s2d.io.roundingMode := UInt(0)
s2d.io.out
}
val in1_upconvert = upconvert(io.in.bits.in1)
val in2_upconvert = upconvert(io.in.bits.in2)
when (io.in.valid) {
in := io.in.bits
when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd =/= FCMD_MV_XF) {
in.in1 := in1_upconvert
in.in2 := in2_upconvert
}
}
val unrec_s = hardfloat.fNFromRecFN(8, 24, in.in1)
val unrec_d = hardfloat.fNFromRecFN(11, 53, in.in1)
val unrec_out = Mux(in.single, Cat(Fill(32, unrec_s(31)), unrec_s), unrec_d)
val classify_s = ClassifyRecFN(8, 24, in.in1)
val classify_d = ClassifyRecFN(11, 53, in.in1)
val classify_out = Mux(in.single, classify_s, classify_d)
val dcmp = Module(new hardfloat.CompareRecFN(11, 53))
dcmp.io.a := in.in1
dcmp.io.b := in.in2
dcmp.io.signaling := Bool(true)
val dcmp_out = (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR
val dcmp_exc = dcmp.io.exceptionFlags
val d2l = Module(new hardfloat.RecFNToIN(11, 53, 64))
val d2w = Module(new hardfloat.RecFNToIN(11, 53, 32))
d2l.io.in := in.in1
d2l.io.roundingMode := in.rm
d2l.io.signedOut := ~in.typ(0)
d2w.io.in := in.in1
d2w.io.roundingMode := in.rm
d2w.io.signedOut := ~in.typ(0)
io.out.bits.toint := Mux(in.rm(0), classify_out, unrec_out)
io.out.bits.store := unrec_out
io.out.bits.exc := Bits(0)
when (in.cmd === FCMD_CMP) {
io.out.bits.toint := dcmp_out
io.out.bits.exc := dcmp_exc
}
when (in.cmd === FCMD_CVT_IF) {
io.out.bits.toint := Mux(in.typ(1), d2l.io.out.toSInt, d2w.io.out.toSInt).toUInt
val dflags = Mux(in.typ(1), d2l.io.intExceptionFlags, d2w.io.intExceptionFlags)
io.out.bits.exc := Cat(dflags(2, 1).orR, UInt(0, 3), dflags(0))
}
io.out.valid := valid
io.out.bits.lt := dcmp.io.lt
io.as_double := in
}
class IntToFP(val latency: Int) extends Module
{
val io = new Bundle {
val in = Valid(new FPInput).flip
val out = Valid(new FPResult)
}
val in = Pipe(io.in)
val mux = Wire(new FPResult)
mux.exc := Bits(0)
mux.data := hardfloat.recFNFromFN(11, 53, in.bits.in1)
when (in.bits.single) {
mux.data := Cat(SInt(-1, 32), hardfloat.recFNFromFN(8, 24, in.bits.in1))
}
val longValue =
Mux(in.bits.typ(1), in.bits.in1.toSInt,
Mux(in.bits.typ(0), in.bits.in1(31,0).zext, in.bits.in1(31,0).toSInt))
val l2s = Module(new hardfloat.INToRecFN(64, 8, 24))
l2s.io.signedIn := ~in.bits.typ(0)
l2s.io.in := longValue.toUInt
l2s.io.roundingMode := in.bits.rm
val l2d = Module(new hardfloat.INToRecFN(64, 11, 53))
l2d.io.signedIn := ~in.bits.typ(0)
l2d.io.in := longValue.toUInt
l2d.io.roundingMode := in.bits.rm
when (in.bits.cmd === FCMD_CVT_FI) {
when (in.bits.single) {
mux.data := Cat(SInt(-1, 32), l2s.io.out)
mux.exc := l2s.io.exceptionFlags
}.otherwise {
mux.data := l2d.io.out
mux.exc := l2d.io.exceptionFlags
}
}
io.out <> Pipe(in.valid, mux, latency-1)
}
class FPToFP(val latency: Int) extends Module
{
val io = new Bundle {
val in = Valid(new FPInput).flip
val out = Valid(new FPResult)
val lt = Bool(INPUT) // from FPToInt
}
val in = Pipe(io.in)
// fp->fp units
val isSgnj = in.bits.cmd === FCMD_SGNJ
def fsgnjSign(in1: Bits, in2: Bits, pos: Int, en: Bool, rm: Bits) =
Mux(rm(1) || !en, in1(pos), rm(0)) ^ (en && in2(pos))
val sign_s = fsgnjSign(in.bits.in1, in.bits.in2, 32, in.bits.single && isSgnj, in.bits.rm)
val sign_d = fsgnjSign(in.bits.in1, in.bits.in2, 64, !in.bits.single && isSgnj, in.bits.rm)
val fsgnj = Cat(sign_d, in.bits.in1(63,33), sign_s, in.bits.in1(31,0))
val s2d = Module(new hardfloat.RecFNToRecFN(8, 24, 11, 53))
val d2s = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
s2d.io.in := in.bits.in1
s2d.io.roundingMode := in.bits.rm
d2s.io.in := in.bits.in1
d2s.io.roundingMode := in.bits.rm
val isnan1 = Mux(in.bits.single, in.bits.in1(31,29).andR, in.bits.in1(63,61).andR)
val isnan2 = Mux(in.bits.single, in.bits.in2(31,29).andR, in.bits.in2(63,61).andR)
val issnan1 = isnan1 && ~Mux(in.bits.single, in.bits.in1(22), in.bits.in1(51))
val issnan2 = isnan2 && ~Mux(in.bits.single, in.bits.in2(22), in.bits.in2(51))
val minmax_exc = Cat(issnan1 || issnan2, Bits(0,4))
val isMax = in.bits.rm(0)
val isLHS = isnan2 || isMax =/= io.lt && !isnan1
val mux = Wire(new FPResult)
mux.exc := minmax_exc
mux.data := in.bits.in2
when (isSgnj) { mux.exc := UInt(0) }
when (isSgnj || isLHS) { mux.data := fsgnj }
when (in.bits.cmd === FCMD_CVT_FF) {
when (in.bits.single) {
mux.data := Cat(SInt(-1, 32), d2s.io.out)
mux.exc := d2s.io.exceptionFlags
}.otherwise {
mux.data := s2d.io.out
mux.exc := s2d.io.exceptionFlags
}
}
io.out <> Pipe(in.valid, mux, latency-1)
}
class FPUFMAPipe(val latency: Int, expWidth: Int, sigWidth: Int) extends Module
{
val io = new Bundle {
val in = Valid(new FPInput).flip
val out = Valid(new FPResult)
}
val width = sigWidth + expWidth
val one = UInt(1) << (width-1)
val zero = (io.in.bits.in1(width) ^ io.in.bits.in2(width)) << width
val valid = Reg(next=io.in.valid)
val in = Reg(new FPInput)
when (io.in.valid) {
in := io.in.bits
val cmd_fma = io.in.bits.ren3
val cmd_addsub = io.in.bits.swap23
in.cmd := Cat(io.in.bits.cmd(1) & (cmd_fma || cmd_addsub), io.in.bits.cmd(0))
when (cmd_addsub) { in.in2 := one }
unless (cmd_fma || cmd_addsub) { in.in3 := zero }
}
val fma = Module(new hardfloat.MulAddRecFN(expWidth, sigWidth))
fma.io.op := in.cmd
fma.io.roundingMode := in.rm
fma.io.a := in.in1
fma.io.b := in.in2
fma.io.c := in.in3
val res = Wire(new FPResult)
res.data := Cat(SInt(-1, 32), fma.io.out)
res.exc := fma.io.exceptionFlags
io.out := Pipe(valid, res, latency-1)
}
class FPU(implicit p: Parameters) extends CoreModule()(p) {
require(xLen == 64, "RV32 Rocket FP support missing")
val io = new FPUIO
val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
val req_valid = ex_reg_valid || io.cp_req.valid
val ex_reg_inst = RegEnable(io.inst, io.valid)
val ex_cp_valid = io.cp_req.valid && !ex_reg_valid
val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx || ex_cp_valid, init=Bool(false))
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false))
val killm = (io.killm || io.nack_mem) && !mem_cp_valid
val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false))
val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false))
val fp_decoder = Module(new FPUDecoder)
fp_decoder.io.inst := io.inst
val cp_ctrl = Wire(new FPUCtrlSigs)
cp_ctrl <> io.cp_req.bits
io.cp_resp.valid := Bool(false)
io.cp_resp.bits.data := UInt(0)
val id_ctrl = fp_decoder.io.sigs
val ex_ctrl = Mux(ex_reg_valid, RegEnable(id_ctrl, io.valid), cp_ctrl)
val mem_ctrl = RegEnable(ex_ctrl, req_valid)
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
// load response
val load_wb = Reg(next=io.dmem_resp_val)
val load_wb_single = RegEnable(io.dmem_resp_type === MT_W || io.dmem_resp_type === MT_WU, io.dmem_resp_val)
val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val)
val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val)
val rec_s = hardfloat.recFNFromFN(8, 24, load_wb_data)
val rec_d = hardfloat.recFNFromFN(11, 53, load_wb_data)
val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d)
// regfile
val regfile = Mem(32, Bits(width = 65))
when (load_wb) {
regfile(load_wb_tag) := load_wb_data_recoded
if (enableCommitLog) {
printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32),
Mux(load_wb_single, load_wb_data(31,0), load_wb_data))
}
}
val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
when (io.valid) {
when (id_ctrl.ren1) {
when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) }
when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) }
}
when (id_ctrl.ren2) {
when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) }
when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) }
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) }
}
when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) }
}
val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_))
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
val cp_rs1 = io.cp_req.bits.in1
val cp_rs2 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in3, io.cp_req.bits.in2)
val cp_rs3 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in2, io.cp_req.bits.in3)
val req = Wire(new FPInput)
req := ex_ctrl
req.rm := Mux(ex_reg_valid, ex_rm, io.cp_req.bits.rm)
req.in1 := Mux(ex_reg_valid, ex_rs1, cp_rs1)
req.in2 := Mux(ex_reg_valid, ex_rs2, cp_rs2)
req.in3 := Mux(ex_reg_valid, ex_rs3, cp_rs3)
req.typ := Mux(ex_reg_valid, ex_reg_inst(21,20), io.cp_req.bits.typ)
val sfma = Module(new FPUFMAPipe(p(SFMALatency), 8, 24))
sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single
sfma.io.in.bits := req
val dfma = Module(new FPUFMAPipe(p(DFMALatency), 11, 53))
dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single
dfma.io.in.bits := req
val fpiu = Module(new FPToInt)
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
fpiu.io.in.bits := req
io.store_data := fpiu.io.out.bits.store
io.toint_data := fpiu.io.out.bits.toint
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
io.cp_resp.bits.data := fpiu.io.out.bits.toint
io.cp_resp.valid := Bool(true)
}
val ifpu = Module(new IntToFP(3))
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
ifpu.io.in.bits := req
ifpu.io.in.bits.in1 := Mux(ex_reg_valid, io.fromint_data, cp_rs1)
val fpmu = Module(new FPToFP(2))
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
fpmu.io.in.bits := req
fpmu.io.lt := fpiu.io.out.bits.lt
val divSqrt_wen = Reg(next=Bool(false))
val divSqrt_inReady = Wire(init=Bool(false))
val divSqrt_waddr = Reg(Bits())
val divSqrt_wdata = Wire(Bits())
val divSqrt_flags = Wire(Bits())
val divSqrt_in_flight = Reg(init=Bool(false))
val divSqrt_killed = Reg(Bool())
// writeback arbitration
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult)
val pipes = List(
Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits),
Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits),
Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out.bits),
Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits))
def latencyMask(c: FPUCtrlSigs, offset: Int) = {
require(pipes.forall(_.lat >= offset))
pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_)
}
def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), UInt(p._2), UInt(0))).reduce(_|_)
val maxLatency = pipes.map(_.lat).max
val memLatencyMask = latencyMask(mem_ctrl, 2)
val wen = Reg(init=Bits(0, maxLatency-1))
val winfo = Reg(Vec(maxLatency-1, Bits()))
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_ctrl.single, mem_reg_inst(11,7)) //single only used for debugging
for (i <- 0 until maxLatency-2) {
when (wen(i+1)) { winfo(i) := winfo(i+1) }
}
wen := wen >> 1
when (mem_wen) {
when (!killm) {
wen := wen >> 1 | memLatencyMask
}
for (i <- 0 until maxLatency-1) {
when (!write_port_busy && memLatencyMask(i)) {
winfo(i) := mem_winfo
}
}
}
val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt)
val wsrc = (winfo(0) >> 6)(log2Up(pipes.size) - 1,0)
val wcp = winfo(0)(6+log2Up(pipes.size))
val wdata = Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wsrc))
val wexc = (pipes.map(_.res.exc): Seq[UInt])(wsrc)
when ((!wcp && wen(0)) || divSqrt_wen) {
regfile(waddr) := wdata
if (enableCommitLog) {
val wdata_unrec_s = hardfloat.fNFromRecFN(8, 24, wdata(64,0))
val wdata_unrec_d = hardfloat.fNFromRecFN(11, 53, wdata(64,0))
val wb_single = (winfo(0) >> 5)(0)
printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32),
Mux(wb_single, Cat(UInt(0,32), wdata_unrec_s), wdata_unrec_d))
}
}
when (wcp && wen(0)) {
io.cp_resp.bits.data := wdata
io.cp_resp.valid := Bool(true)
}
io.cp_req.ready := !ex_reg_valid
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
io.fcsr_flags.bits :=
Mux(wb_toint_valid, wb_toint_exc, UInt(0)) |
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
Mux(wen(0), wexc, UInt(0))
val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR) // || mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid))
io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
io.dec <> fp_decoder.io.sigs
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))))
io.sboard_clra := waddr
// we don't currently support round-max-magnitude (rm=4)
io.illegal_rm := ex_rm(2) && ex_ctrl.round
divSqrt_wdata := 0
divSqrt_flags := 0
if (p(FDivSqrt)) {
val divSqrt_single = Reg(Bool())
val divSqrt_rm = Reg(Bits())
val divSqrt_flags_double = Reg(Bits())
val divSqrt_wdata_double = Reg(Bits())
val divSqrt = Module(new hardfloat.DivSqrtRecF64)
divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_in_flight
divSqrt.io.sqrtOp := mem_ctrl.sqrt
divSqrt.io.a := fpiu.io.as_double.in1
divSqrt.io.b := fpiu.io.as_double.in2
divSqrt.io.roundingMode := fpiu.io.as_double.rm
when (divSqrt.io.inValid && divSqrt_inReady) {
divSqrt_in_flight := true
divSqrt_killed := killm
divSqrt_single := mem_ctrl.single
divSqrt_waddr := mem_reg_inst(11,7)
divSqrt_rm := divSqrt.io.roundingMode
}
when (divSqrt_outValid) {
divSqrt_wen := !divSqrt_killed
divSqrt_wdata_double := divSqrt.io.out
divSqrt_in_flight := false
divSqrt_flags_double := divSqrt.io.exceptionFlags
}
val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
divSqrt_toSingle.io.in := divSqrt_wdata_double
divSqrt_toSingle.io.roundingMode := divSqrt_rm
divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle.io.out, divSqrt_wdata_double)
divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0))
}
}

View File

@ -0,0 +1,130 @@
package rocket
import Chisel._
import uncore.tilelink._
import Util._
import cde.{Parameters, Field}
class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) {
val pc = UInt(width = vaddrBitsExtended)
val speculative = Bool()
}
class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
val pc = UInt(width = vaddrBitsExtended) // ID stage PC
val data = Vec(fetchWidth, Bits(width = coreInstBits))
val mask = Bits(width = fetchWidth)
val xcpt_if = Bool()
val replay = Bool()
}
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Valid(new FrontendReq)
val resp = Decoupled(new FrontendResp).flip
val btb_resp = Valid(new BTBResp).flip
val btb_update = Valid(new BTBUpdate)
val bht_update = Valid(new BHTUpdate)
val ras_update = Valid(new RASUpdate)
val flush_icache = Bool(OUTPUT)
val flush_tlb = Bool(OUTPUT)
val npc = UInt(INPUT, width = vaddrBitsExtended)
}
class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
val io = new Bundle {
val cpu = new FrontendIO().flip
val ptw = new TLBPTWIO()
val mem = new ClientUncachedTileLinkIO
}
val icache = Module(new ICache(latency = 2))
val tlb = Module(new TLB)
val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
val s1_speculative = Reg(Bool())
val s1_same_block = Reg(Bool())
val s2_valid = Reg(init=Bool(true))
val s2_pc = Reg(init=UInt(p(ResetVector)))
val s2_btb_resp_valid = Reg(init=Bool(false))
val s2_btb_resp_bits = Reg(new BTBResp)
val s2_xcpt_if = Reg(init=Bool(false))
val s2_speculative = Reg(init=Bool(false))
val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
val predicted_npc = Wire(init = ntpc)
val icmiss = s2_valid && !icache.io.resp.valid
val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt
val s0_same_block = Wire(init = !icmiss && !io.cpu.req.valid && ((ntpc & rowBytes) === (s1_pc & rowBytes)))
val stall = io.cpu.resp.valid && !io.cpu.resp.ready
when (!stall) {
s1_same_block := s0_same_block && !tlb.io.resp.miss
s1_pc_ := npc
s1_speculative := Mux(icmiss, s2_speculative, true)
s2_valid := !icmiss
when (!icmiss) {
s2_pc := s1_pc
s2_speculative := s1_speculative && !tlb.io.resp.cacheable
s2_xcpt_if := tlb.io.resp.xcpt_if
}
}
when (io.cpu.req.valid) {
s1_same_block := Bool(false)
s1_pc_ := io.cpu.req.bits.pc
s1_speculative := io.cpu.req.bits.speculative
s2_valid := Bool(false)
}
if (p(BtbKey).nEntries > 0) {
val btb = Module(new BTB)
btb.io.req.valid := false
btb.io.req.bits.addr := s1_pc
btb.io.btb_update := io.cpu.btb_update
btb.io.bht_update := io.cpu.bht_update
btb.io.ras_update := io.cpu.ras_update
when (!stall && !icmiss) {
btb.io.req.valid := true
s2_btb_resp_valid := btb.io.resp.valid
s2_btb_resp_bits := btb.io.resp.bits
}
when (btb.io.resp.bits.taken) {
predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
s0_same_block := Bool(false)
}
}
io.ptw <> tlb.io.ptw
tlb.io.req.valid := !stall && !icmiss
tlb.io.req.bits.vpn := s1_pc >> pgIdxBits
tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.instruction := Bool(true)
tlb.io.req.bits.store := Bool(false)
io.mem <> icache.io.mem
icache.io.req.valid := !stall && !s0_same_block
icache.io.req.bits.addr := io.cpu.npc
icache.io.invalidate := io.cpu.flush_icache
icache.io.s1_ppn := tlb.io.resp.ppn
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb
icache.io.s2_kill := s2_speculative
icache.io.resp.ready := !stall && !s1_same_block
io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || s2_speculative || s2_xcpt_if)
io.cpu.resp.bits.pc := s2_pc
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
require(fetchWidth * coreInstBytes <= rowBytes)
val fetch_data = icache.io.resp.bits.datablock >> (s2_pc.extract(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits))
for (i <- 0 until fetchWidth) {
io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits)
}
io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Up(fetchWidth)+log2Up(coreInstBytes)-1, log2Up(coreInstBytes))
io.cpu.resp.bits.xcpt_if := s2_xcpt_if
io.cpu.resp.bits.replay := s2_speculative && !icache.io.resp.valid && !s2_xcpt_if
io.cpu.btb_resp.valid := s2_btb_resp_valid
io.cpu.btb_resp.bits := s2_btb_resp_bits
}

View File

@ -0,0 +1,157 @@
package rocket
import Chisel._
import uncore.agents._
import uncore.tilelink._
import uncore.util._
import Util._
import cde.{Parameters, Field}
trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters {
val outerDataBeats = p(TLKey(p(TLId))).dataBeats
val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat
val refillCyclesPerBeat = outerDataBits/rowBits
val refillCycles = refillCyclesPerBeat*outerDataBeats
}
class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
val addr = UInt(width = vaddrBits)
}
class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
val data = Bits(width = coreInstBits)
val datablock = Bits(width = rowBits)
}
class ICache(latency: Int)(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
val io = new Bundle {
val req = Valid(new ICacheReq).flip
val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
val resp = Decoupled(new ICacheResp)
val invalidate = Bool(INPUT)
val mem = new ClientUncachedTileLinkIO
}
require(isPow2(nSets) && isPow2(nWays))
require(isPow2(coreInstBytes))
require(!usingVM || pgIdxBits >= untagBits)
val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(UInt(), 4)
val state = Reg(init=s_ready)
val invalidated = Reg(Bool())
val stall = !io.resp.ready
val rdy = Wire(Bool())
val refill_addr = Reg(UInt(width = paddrBits))
val s1_any_tag_hit = Wire(Bool())
val s1_valid = Reg(init=Bool(false))
val s1_vaddr = Reg(UInt())
val s1_paddr = Cat(io.s1_ppn, s1_vaddr(pgIdxBits-1,0)).toUInt
val s1_tag = s1_paddr(tagBits+untagBits-1,untagBits)
val s0_valid = io.req.valid || s1_valid && stall
val s0_vaddr = Mux(s1_valid && stall, s1_vaddr, io.req.bits.addr)
s1_valid := io.req.valid && rdy || s1_valid && stall && !io.s1_kill
when (io.req.valid && rdy) {
s1_vaddr := io.req.bits.addr
}
val out_valid = s1_valid && !io.s1_kill && state === s_ready
val s1_idx = s1_vaddr(untagBits-1,blockOffBits)
val s1_hit = out_valid && s1_any_tag_hit
val s1_miss = out_valid && !s1_any_tag_hit
rdy := state === s_ready && !s1_miss
when (s1_miss && state === s_ready) {
refill_addr := s1_paddr
}
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat)
val (refill_cnt, refill_wrap) = Counter(narrow_grant.fire(), refillCycles)
val refill_done = state === s_refill && refill_wrap
narrow_grant.ready := Bool(true)
val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0)
val entagbits = code.width(tagBits)
val tag_array = SeqMem(nSets, Vec(nWays, Bits(width = entagbits)))
val tag_rdata = tag_array.read(s0_vaddr(untagBits-1,blockOffBits), !refill_done && s0_valid)
when (refill_done) {
val tag = code.encode(refill_tag).toUInt
tag_array.write(s1_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _))
}
val vb_array = Reg(init=Bits(0, nSets*nWays))
when (refill_done && !invalidated) {
vb_array := vb_array.bitSet(Cat(repl_way, s1_idx), Bool(true))
}
when (io.invalidate) {
vb_array := Bits(0)
invalidated := Bool(true)
}
val s1_disparity = Wire(Vec(nWays, Bool()))
for (i <- 0 until nWays)
when (s1_valid && s1_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s1_idx), Bool(false)) }
val s1_tag_match = Wire(Vec(nWays, Bool()))
val s1_tag_hit = Wire(Vec(nWays, Bool()))
val s1_dout = Wire(Vec(nWays, Bits(width = rowBits)))
for (i <- 0 until nWays) {
val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_vaddr(untagBits-1,blockOffBits))).toBool
val tag_out = tag_rdata(i)
val s1_tag_disparity = code.decode(tag_out).error
s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag
s1_tag_hit(i) := s1_vb && s1_tag_match(i)
s1_disparity(i) := s1_vb && (s1_tag_disparity || code.decode(s1_dout(i)).error)
}
s1_any_tag_hit := s1_tag_hit.reduceLeft(_||_) && !s1_disparity.reduceLeft(_||_)
for (i <- 0 until nWays) {
val data_array = SeqMem(nSets * refillCycles, Bits(width = code.width(rowBits)))
val wen = narrow_grant.valid && repl_way === UInt(i)
when (wen) {
val e_d = code.encode(narrow_grant.bits.data).toUInt
data_array.write((s1_idx << log2Ceil(refillCycles)) | refill_cnt, e_d)
}
val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid)
}
// output signals
latency match {
case 1 =>
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
io.resp.valid := s1_hit
case 2 =>
val s2_hit = RegEnable(s1_hit, !stall)
val s2_tag_hit = RegEnable(s1_tag_hit, !stall)
val s2_dout = RegEnable(s1_dout, !stall)
io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout)
io.resp.valid := s2_hit
}
io.mem.acquire.valid := state === s_request && !io.s2_kill
io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)
// control state machine
switch (state) {
is (s_ready) {
when (s1_miss) { state := s_request }
invalidated := Bool(false)
}
is (s_request) {
when (io.mem.acquire.ready) { state := s_refill_wait }
when (io.s2_kill) { state := s_ready }
}
is (s_refill_wait) {
when (io.mem.grant.valid) { state := s_refill }
}
is (s_refill) {
when (refill_done) { state := s_ready }
}
}
}

View File

@ -0,0 +1,319 @@
// See LICENSE for license details
package rocket
import Chisel._
import Instructions._
import uncore.constants.MemoryOpConstants._
import ALU._
import cde.Parameters
import Util._
abstract trait DecodeConstants extends HasCoreParameters
{
val table: Array[(BitPat, List[BitPat])]
}
class IntCtrlSigs extends Bundle {
val legal = Bool()
val fp = Bool()
val rocc = Bool()
val branch = Bool()
val jal = Bool()
val jalr = Bool()
val rxs2 = Bool()
val rxs1 = Bool()
val sel_alu2 = Bits(width = A2_X.getWidth)
val sel_alu1 = Bits(width = A1_X.getWidth)
val sel_imm = Bits(width = IMM_X.getWidth)
val alu_dw = Bool()
val alu_fn = Bits(width = FN_X.getWidth)
val mem = Bool()
val mem_cmd = Bits(width = M_SZ)
val mem_type = Bits(width = MT_SZ)
val rfs1 = Bool()
val rfs2 = Bool()
val rfs3 = Bool()
val wfd = Bool()
val div = Bool()
val wxd = Bool()
val csr = Bits(width = CSR.SZ)
val fence_i = Bool()
val fence = Bool()
val amo = Bool()
def default: List[BitPat] =
// jal renf1 fence.i
// val | jalr | renf2 |
// | fp_val| | renx2 | | renf3 |
// | | rocc| | | renx1 s_alu1 mem_val | | | wfd |
// | | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div |
// | | | | | | | | | | | | | | | | | | | | | wxd | fence
// | | | | | | | | | | | | | | | | | | | | | | csr | | amo
// | | | | | | | | | | | | | | | | | | | | | | | | | |
List(N,X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X)
def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = {
val decoder = DecodeLogic(inst, default, table)
val sigs = Seq(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2,
sel_alu1, sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type,
rfs1, rfs2, rfs3, wfd, div, wxd, csr, fence_i, fence, amo)
sigs zip decoder map {case(s,d) => s := d}
this
}
}
class IDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
BNE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BEQ-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BLT-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BLTU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BGE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BGEU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
JAL-> List(Y,N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
JALR-> List(Y,N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
AUIPC-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
LB-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N),
LH-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N),
LW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N),
LBU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N),
LHU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N),
SB-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N),
SH-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N),
SW-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N),
LUI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
ADDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLTI -> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLTIU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
ANDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
ORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
XORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRAI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
ADD-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SUB-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLT-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLTU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
AND-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
OR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
XOR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRA-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
FENCE-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N),
FENCE_I-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FLUSH_ALL,MT_X, N,N,N,N,N,N,CSR.N,Y,N,N),
SCALL-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
SBREAK-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
MRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
WFI-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
CSRRW-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N),
CSRRS-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N),
CSRRC-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N),
CSRRWI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N),
CSRRSI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N),
CSRRCI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N))
}
class SDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
SFENCE_VM-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N))
}
class DebugDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
DRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N))
}
class I64Decode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
LD-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N),
LWU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N),
SD-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N),
ADDIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRAIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
ADDW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SUBW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRAW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N))
}
class MDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
MUL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
MULH-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
MULHU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
MULHSU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
DIV-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
DIVU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
REM-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
REMU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N))
}
class M64Decode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
MULW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
DIVW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
DIVUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
REMW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
REMUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N))
}
class ADecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
AMOADD_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOXOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOSWAP_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOAND_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMIN_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMINU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMAX_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMAXU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
LR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
SC_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y))
}
class A64Decode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
AMOADD_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOSWAP_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOXOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOAND_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMIN_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMINU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMAX_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMAXU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
LR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
SC_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y))
}
class FDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
FCVT_S_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_D_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N),
FSGNJ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSGNJ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSGNJX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSGNJX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSGNJN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSGNJN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMIN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMIN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMAX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMAX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMUL_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMUL_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FNMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FNMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FNMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FNMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FCLASS_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCLASS_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FMV_X_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_W_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_W_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_WU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_WU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FEQ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FEQ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FLT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FLT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FLE_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FLE_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FMV_S_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_S_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_D_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_S_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_D_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FLW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N),
FLD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N),
FSW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N),
FSD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N))
}
class F64Decode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
FMV_X_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_L_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_L_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_LU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_LU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FMV_D_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_S_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_D_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_S_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_D_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N))
}
class FDivSqrtDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
FDIV_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FDIV_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSQRT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSQRT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N))
}
class RoCCDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
CUSTOM0-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM0_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM0_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM0_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM0_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM0_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM1-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM1_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM1_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM1_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM1_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM1_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM2-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM2_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM2_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM2_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM2_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM2_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM3-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM3_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM3_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM3_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM3_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM3_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N))
}

View File

@ -0,0 +1,383 @@
// See LICENSE for license details.
package rocket
import Chisel._
/* Automatically generated by parse-opcodes */
object Instructions {
def BEQ = BitPat("b?????????????????000?????1100011")
def BNE = BitPat("b?????????????????001?????1100011")
def BLT = BitPat("b?????????????????100?????1100011")
def BGE = BitPat("b?????????????????101?????1100011")
def BLTU = BitPat("b?????????????????110?????1100011")
def BGEU = BitPat("b?????????????????111?????1100011")
def JALR = BitPat("b?????????????????000?????1100111")
def JAL = BitPat("b?????????????????????????1101111")
def LUI = BitPat("b?????????????????????????0110111")
def AUIPC = BitPat("b?????????????????????????0010111")
def ADDI = BitPat("b?????????????????000?????0010011")
def SLLI = BitPat("b000000???????????001?????0010011")
def SLTI = BitPat("b?????????????????010?????0010011")
def SLTIU = BitPat("b?????????????????011?????0010011")
def XORI = BitPat("b?????????????????100?????0010011")
def SRLI = BitPat("b000000???????????101?????0010011")
def SRAI = BitPat("b010000???????????101?????0010011")
def ORI = BitPat("b?????????????????110?????0010011")
def ANDI = BitPat("b?????????????????111?????0010011")
def ADD = BitPat("b0000000??????????000?????0110011")
def SUB = BitPat("b0100000??????????000?????0110011")
def SLL = BitPat("b0000000??????????001?????0110011")
def SLT = BitPat("b0000000??????????010?????0110011")
def SLTU = BitPat("b0000000??????????011?????0110011")
def XOR = BitPat("b0000000??????????100?????0110011")
def SRL = BitPat("b0000000??????????101?????0110011")
def SRA = BitPat("b0100000??????????101?????0110011")
def OR = BitPat("b0000000??????????110?????0110011")
def AND = BitPat("b0000000??????????111?????0110011")
def ADDIW = BitPat("b?????????????????000?????0011011")
def SLLIW = BitPat("b0000000??????????001?????0011011")
def SRLIW = BitPat("b0000000??????????101?????0011011")
def SRAIW = BitPat("b0100000??????????101?????0011011")
def ADDW = BitPat("b0000000??????????000?????0111011")
def SUBW = BitPat("b0100000??????????000?????0111011")
def SLLW = BitPat("b0000000??????????001?????0111011")
def SRLW = BitPat("b0000000??????????101?????0111011")
def SRAW = BitPat("b0100000??????????101?????0111011")
def LB = BitPat("b?????????????????000?????0000011")
def LH = BitPat("b?????????????????001?????0000011")
def LW = BitPat("b?????????????????010?????0000011")
def LD = BitPat("b?????????????????011?????0000011")
def LBU = BitPat("b?????????????????100?????0000011")
def LHU = BitPat("b?????????????????101?????0000011")
def LWU = BitPat("b?????????????????110?????0000011")
def SB = BitPat("b?????????????????000?????0100011")
def SH = BitPat("b?????????????????001?????0100011")
def SW = BitPat("b?????????????????010?????0100011")
def SD = BitPat("b?????????????????011?????0100011")
def FENCE = BitPat("b?????????????????000?????0001111")
def FENCE_I = BitPat("b?????????????????001?????0001111")
def MUL = BitPat("b0000001??????????000?????0110011")
def MULH = BitPat("b0000001??????????001?????0110011")
def MULHSU = BitPat("b0000001??????????010?????0110011")
def MULHU = BitPat("b0000001??????????011?????0110011")
def DIV = BitPat("b0000001??????????100?????0110011")
def DIVU = BitPat("b0000001??????????101?????0110011")
def REM = BitPat("b0000001??????????110?????0110011")
def REMU = BitPat("b0000001??????????111?????0110011")
def MULW = BitPat("b0000001??????????000?????0111011")
def DIVW = BitPat("b0000001??????????100?????0111011")
def DIVUW = BitPat("b0000001??????????101?????0111011")
def REMW = BitPat("b0000001??????????110?????0111011")
def REMUW = BitPat("b0000001??????????111?????0111011")
def AMOADD_W = BitPat("b00000????????????010?????0101111")
def AMOXOR_W = BitPat("b00100????????????010?????0101111")
def AMOOR_W = BitPat("b01000????????????010?????0101111")
def AMOAND_W = BitPat("b01100????????????010?????0101111")
def AMOMIN_W = BitPat("b10000????????????010?????0101111")
def AMOMAX_W = BitPat("b10100????????????010?????0101111")
def AMOMINU_W = BitPat("b11000????????????010?????0101111")
def AMOMAXU_W = BitPat("b11100????????????010?????0101111")
def AMOSWAP_W = BitPat("b00001????????????010?????0101111")
def LR_W = BitPat("b00010??00000?????010?????0101111")
def SC_W = BitPat("b00011????????????010?????0101111")
def AMOADD_D = BitPat("b00000????????????011?????0101111")
def AMOXOR_D = BitPat("b00100????????????011?????0101111")
def AMOOR_D = BitPat("b01000????????????011?????0101111")
def AMOAND_D = BitPat("b01100????????????011?????0101111")
def AMOMIN_D = BitPat("b10000????????????011?????0101111")
def AMOMAX_D = BitPat("b10100????????????011?????0101111")
def AMOMINU_D = BitPat("b11000????????????011?????0101111")
def AMOMAXU_D = BitPat("b11100????????????011?????0101111")
def AMOSWAP_D = BitPat("b00001????????????011?????0101111")
def LR_D = BitPat("b00010??00000?????011?????0101111")
def SC_D = BitPat("b00011????????????011?????0101111")
def ECALL = BitPat("b00000000000000000000000001110011")
def EBREAK = BitPat("b00000000000100000000000001110011")
def URET = BitPat("b00000000001000000000000001110011")
def SRET = BitPat("b00010000001000000000000001110011")
def HRET = BitPat("b00100000001000000000000001110011")
def MRET = BitPat("b00110000001000000000000001110011")
def DRET = BitPat("b01111011001000000000000001110011")
def SFENCE_VM = BitPat("b000100000100?????000000001110011")
def WFI = BitPat("b00010000010100000000000001110011")
def CSRRW = BitPat("b?????????????????001?????1110011")
def CSRRS = BitPat("b?????????????????010?????1110011")
def CSRRC = BitPat("b?????????????????011?????1110011")
def CSRRWI = BitPat("b?????????????????101?????1110011")
def CSRRSI = BitPat("b?????????????????110?????1110011")
def CSRRCI = BitPat("b?????????????????111?????1110011")
def FADD_S = BitPat("b0000000??????????????????1010011")
def FSUB_S = BitPat("b0000100??????????????????1010011")
def FMUL_S = BitPat("b0001000??????????????????1010011")
def FDIV_S = BitPat("b0001100??????????????????1010011")
def FSGNJ_S = BitPat("b0010000??????????000?????1010011")
def FSGNJN_S = BitPat("b0010000??????????001?????1010011")
def FSGNJX_S = BitPat("b0010000??????????010?????1010011")
def FMIN_S = BitPat("b0010100??????????000?????1010011")
def FMAX_S = BitPat("b0010100??????????001?????1010011")
def FSQRT_S = BitPat("b010110000000?????????????1010011")
def FADD_D = BitPat("b0000001??????????????????1010011")
def FSUB_D = BitPat("b0000101??????????????????1010011")
def FMUL_D = BitPat("b0001001??????????????????1010011")
def FDIV_D = BitPat("b0001101??????????????????1010011")
def FSGNJ_D = BitPat("b0010001??????????000?????1010011")
def FSGNJN_D = BitPat("b0010001??????????001?????1010011")
def FSGNJX_D = BitPat("b0010001??????????010?????1010011")
def FMIN_D = BitPat("b0010101??????????000?????1010011")
def FMAX_D = BitPat("b0010101??????????001?????1010011")
def FCVT_S_D = BitPat("b010000000001?????????????1010011")
def FCVT_D_S = BitPat("b010000100000?????????????1010011")
def FSQRT_D = BitPat("b010110100000?????????????1010011")
def FLE_S = BitPat("b1010000??????????000?????1010011")
def FLT_S = BitPat("b1010000??????????001?????1010011")
def FEQ_S = BitPat("b1010000??????????010?????1010011")
def FLE_D = BitPat("b1010001??????????000?????1010011")
def FLT_D = BitPat("b1010001??????????001?????1010011")
def FEQ_D = BitPat("b1010001??????????010?????1010011")
def FCVT_W_S = BitPat("b110000000000?????????????1010011")
def FCVT_WU_S = BitPat("b110000000001?????????????1010011")
def FCVT_L_S = BitPat("b110000000010?????????????1010011")
def FCVT_LU_S = BitPat("b110000000011?????????????1010011")
def FMV_X_S = BitPat("b111000000000?????000?????1010011")
def FCLASS_S = BitPat("b111000000000?????001?????1010011")
def FCVT_W_D = BitPat("b110000100000?????????????1010011")
def FCVT_WU_D = BitPat("b110000100001?????????????1010011")
def FCVT_L_D = BitPat("b110000100010?????????????1010011")
def FCVT_LU_D = BitPat("b110000100011?????????????1010011")
def FMV_X_D = BitPat("b111000100000?????000?????1010011")
def FCLASS_D = BitPat("b111000100000?????001?????1010011")
def FCVT_S_W = BitPat("b110100000000?????????????1010011")
def FCVT_S_WU = BitPat("b110100000001?????????????1010011")
def FCVT_S_L = BitPat("b110100000010?????????????1010011")
def FCVT_S_LU = BitPat("b110100000011?????????????1010011")
def FMV_S_X = BitPat("b111100000000?????000?????1010011")
def FCVT_D_W = BitPat("b110100100000?????????????1010011")
def FCVT_D_WU = BitPat("b110100100001?????????????1010011")
def FCVT_D_L = BitPat("b110100100010?????????????1010011")
def FCVT_D_LU = BitPat("b110100100011?????????????1010011")
def FMV_D_X = BitPat("b111100100000?????000?????1010011")
def FLW = BitPat("b?????????????????010?????0000111")
def FLD = BitPat("b?????????????????011?????0000111")
def FSW = BitPat("b?????????????????010?????0100111")
def FSD = BitPat("b?????????????????011?????0100111")
def FMADD_S = BitPat("b?????00??????????????????1000011")
def FMSUB_S = BitPat("b?????00??????????????????1000111")
def FNMSUB_S = BitPat("b?????00??????????????????1001011")
def FNMADD_S = BitPat("b?????00??????????????????1001111")
def FMADD_D = BitPat("b?????01??????????????????1000011")
def FMSUB_D = BitPat("b?????01??????????????????1000111")
def FNMSUB_D = BitPat("b?????01??????????????????1001011")
def FNMADD_D = BitPat("b?????01??????????????????1001111")
def CUSTOM0 = BitPat("b?????????????????000?????0001011")
def CUSTOM0_RS1 = BitPat("b?????????????????010?????0001011")
def CUSTOM0_RS1_RS2 = BitPat("b?????????????????011?????0001011")
def CUSTOM0_RD = BitPat("b?????????????????100?????0001011")
def CUSTOM0_RD_RS1 = BitPat("b?????????????????110?????0001011")
def CUSTOM0_RD_RS1_RS2 = BitPat("b?????????????????111?????0001011")
def CUSTOM1 = BitPat("b?????????????????000?????0101011")
def CUSTOM1_RS1 = BitPat("b?????????????????010?????0101011")
def CUSTOM1_RS1_RS2 = BitPat("b?????????????????011?????0101011")
def CUSTOM1_RD = BitPat("b?????????????????100?????0101011")
def CUSTOM1_RD_RS1 = BitPat("b?????????????????110?????0101011")
def CUSTOM1_RD_RS1_RS2 = BitPat("b?????????????????111?????0101011")
def CUSTOM2 = BitPat("b?????????????????000?????1011011")
def CUSTOM2_RS1 = BitPat("b?????????????????010?????1011011")
def CUSTOM2_RS1_RS2 = BitPat("b?????????????????011?????1011011")
def CUSTOM2_RD = BitPat("b?????????????????100?????1011011")
def CUSTOM2_RD_RS1 = BitPat("b?????????????????110?????1011011")
def CUSTOM2_RD_RS1_RS2 = BitPat("b?????????????????111?????1011011")
def CUSTOM3 = BitPat("b?????????????????000?????1111011")
def CUSTOM3_RS1 = BitPat("b?????????????????010?????1111011")
def CUSTOM3_RS1_RS2 = BitPat("b?????????????????011?????1111011")
def CUSTOM3_RD = BitPat("b?????????????????100?????1111011")
def CUSTOM3_RD_RS1 = BitPat("b?????????????????110?????1111011")
def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011")
def SLLI_RV32 = BitPat("b0000000??????????001?????0010011")
def SRLI_RV32 = BitPat("b0000000??????????101?????0010011")
def SRAI_RV32 = BitPat("b0100000??????????101?????0010011")
def FRFLAGS = BitPat("b00000000000100000010?????1110011")
def FSFLAGS = BitPat("b000000000001?????001?????1110011")
def FSFLAGSI = BitPat("b000000000001?????101?????1110011")
def FRRM = BitPat("b00000000001000000010?????1110011")
def FSRM = BitPat("b000000000010?????001?????1110011")
def FSRMI = BitPat("b000000000010?????101?????1110011")
def FSCSR = BitPat("b000000000011?????001?????1110011")
def FRCSR = BitPat("b00000000001100000010?????1110011")
def RDCYCLE = BitPat("b11000000000000000010?????1110011")
def RDTIME = BitPat("b11000000000100000010?????1110011")
def RDINSTRET = BitPat("b11000000001000000010?????1110011")
def RDCYCLEH = BitPat("b11001000000000000010?????1110011")
def RDTIMEH = BitPat("b11001000000100000010?????1110011")
def RDINSTRETH = BitPat("b11001000001000000010?????1110011")
def SCALL = BitPat("b00000000000000000000000001110011")
def SBREAK = BitPat("b00000000000100000000000001110011")
}
object Causes {
val misaligned_fetch = 0x0
val fault_fetch = 0x1
val illegal_instruction = 0x2
val breakpoint = 0x3
val misaligned_load = 0x4
val fault_load = 0x5
val misaligned_store = 0x6
val fault_store = 0x7
val user_ecall = 0x8
val supervisor_ecall = 0x9
val hypervisor_ecall = 0xa
val machine_ecall = 0xb
val all = {
val res = collection.mutable.ArrayBuffer[Int]()
res += misaligned_fetch
res += fault_fetch
res += illegal_instruction
res += breakpoint
res += misaligned_load
res += fault_load
res += misaligned_store
res += fault_store
res += user_ecall
res += supervisor_ecall
res += hypervisor_ecall
res += machine_ecall
res.toArray
}
}
object CSRs {
val fflags = 0x1
val frm = 0x2
val fcsr = 0x3
val cycle = 0xc00
val time = 0xc01
val instret = 0xc02
val sstatus = 0x100
val sie = 0x104
val stvec = 0x105
val sscratch = 0x140
val sepc = 0x141
val scause = 0x142
val sbadaddr = 0x143
val sip = 0x144
val sptbr = 0x180
val scycle = 0xd00
val stime = 0xd01
val sinstret = 0xd02
val mstatus = 0x300
val medeleg = 0x302
val mideleg = 0x303
val mie = 0x304
val mtvec = 0x305
val mscratch = 0x340
val mepc = 0x341
val mcause = 0x342
val mbadaddr = 0x343
val mip = 0x344
val mucounteren = 0x310
val mscounteren = 0x311
val mucycle_delta = 0x700
val mutime_delta = 0x701
val muinstret_delta = 0x702
val mscycle_delta = 0x704
val mstime_delta = 0x705
val msinstret_delta = 0x706
val tdrselect = 0x7a0
val tdrdata1 = 0x7a1
val tdrdata2 = 0x7a2
val tdrdata3 = 0x7a3
val dcsr = 0x7b0
val dpc = 0x7b1
val dscratch = 0x7b2
val mcycle = 0xf00
val mtime = 0xf01
val minstret = 0xf02
val misa = 0xf10
val mvendorid = 0xf11
val marchid = 0xf12
val mimpid = 0xf13
val mhartid = 0xf14
val mreset = 0x7c2
val cycleh = 0xc80
val timeh = 0xc81
val instreth = 0xc82
val mucycle_deltah = 0x780
val mutime_deltah = 0x781
val muinstret_deltah = 0x782
val mscycle_deltah = 0x784
val mstime_deltah = 0x785
val msinstret_deltah = 0x786
val mcycleh = 0xf80
val mtimeh = 0xf81
val minstreth = 0xf82
val all = {
val res = collection.mutable.ArrayBuffer[Int]()
res += fflags
res += frm
res += fcsr
res += cycle
res += time
res += instret
res += sstatus
res += sie
res += stvec
res += sscratch
res += sepc
res += scause
res += sbadaddr
res += sip
res += sptbr
res += scycle
res += stime
res += sinstret
res += mstatus
res += medeleg
res += mideleg
res += mie
res += mtvec
res += mscratch
res += mepc
res += mcause
res += mbadaddr
res += mip
res += mucounteren
res += mscounteren
res += mucycle_delta
res += mutime_delta
res += muinstret_delta
res += mscycle_delta
res += mstime_delta
res += msinstret_delta
res += tdrselect
res += tdrdata1
res += tdrdata2
res += tdrdata3
res += dcsr
res += dpc
res += dscratch
res += mcycle
res += mtime
res += minstret
res += misa
res += mvendorid
res += marchid
res += mimpid
res += mhartid
res += mreset
res.toArray
}
val all32 = {
val res = collection.mutable.ArrayBuffer(all:_*)
res += cycleh
res += timeh
res += instreth
res += mucycle_deltah
res += mutime_deltah
res += muinstret_deltah
res += mscycle_deltah
res += mstime_deltah
res += msinstret_deltah
res += mcycleh
res += mtimeh
res += minstreth
res.toArray
}
}

View File

@ -0,0 +1,152 @@
// See LICENSE for license details.
package rocket
import Chisel._
import ALU._
import Util._
class MultiplierReq(dataBits: Int, tagBits: Int) extends Bundle {
val fn = Bits(width = SZ_ALU_FN)
val dw = Bits(width = SZ_DW)
val in1 = Bits(width = dataBits)
val in2 = Bits(width = dataBits)
val tag = UInt(width = tagBits)
override def cloneType = new MultiplierReq(dataBits, tagBits).asInstanceOf[this.type]
}
class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle {
val data = Bits(width = dataBits)
val tag = UInt(width = tagBits)
override def cloneType = new MultiplierResp(dataBits, tagBits).asInstanceOf[this.type]
}
class MultiplierIO(dataBits: Int, tagBits: Int) extends Bundle {
val req = Decoupled(new MultiplierReq(dataBits, tagBits)).flip
val kill = Bool(INPUT)
val resp = Decoupled(new MultiplierResp(dataBits, tagBits))
}
class MulDiv(
width: Int,
nXpr: Int = 32,
unroll: Int = 1,
earlyOut: Boolean = false) extends Module {
val io = new MultiplierIO(width, log2Up(nXpr))
val w = io.req.bits.in1.getWidth
val mulw = (w+unroll-1)/unroll*unroll
val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6)
val state = Reg(init=s_ready)
val req = Reg(io.req.bits)
val count = Reg(UInt(width = log2Up(w+1)))
val neg_out = Reg(Bool())
val isMul = Reg(Bool())
val isHi = Reg(Bool())
val divisor = Reg(Bits(width = w+1)) // div only needs w bits
val remainder = Reg(Bits(width = 2*mulw+2)) // div only needs 2*w+1 bits
val cmdMul :: cmdHi :: lhsSigned :: rhsSigned :: Nil =
DecodeLogic(io.req.bits.fn, List(X, X, X, X), List(
FN_DIV -> List(N, N, Y, Y),
FN_REM -> List(N, Y, Y, Y),
FN_DIVU -> List(N, N, N, N),
FN_REMU -> List(N, Y, N, N),
FN_MUL -> List(Y, N, X, X),
FN_MULH -> List(Y, Y, Y, Y),
FN_MULHU -> List(Y, Y, N, N),
FN_MULHSU -> List(Y, Y, Y, N))).map(_ toBool)
require(w == 32 || w == 64)
def halfWidth(req: MultiplierReq) = Bool(w > 32) && req.dw === DW_32
def sext(x: Bits, halfW: Bool, signed: Bool) = {
val sign = signed && Mux(halfW, x(w/2-1), x(w-1))
val hi = Mux(halfW, Fill(w/2, sign), x(w-1,w/2))
(Cat(hi, x(w/2-1,0)), sign)
}
val (lhs_in, lhs_sign) = sext(io.req.bits.in1, halfWidth(io.req.bits), lhsSigned)
val (rhs_in, rhs_sign) = sext(io.req.bits.in2, halfWidth(io.req.bits), rhsSigned)
val subtractor = remainder(2*w,w) - divisor(w,0)
val less = subtractor(w)
val negated_remainder = -remainder(w-1,0)
when (state === s_neg_inputs) {
when (remainder(w-1) || isMul) {
remainder := negated_remainder
}
when (divisor(w-1) || isMul) {
divisor := subtractor
}
state := s_busy
}
when (state === s_neg_output) {
remainder := negated_remainder
state := s_done
}
when (state === s_move_rem) {
remainder := remainder(2*w, w+1)
state := Mux(neg_out, s_neg_output, s_done)
}
when (state === s_busy && isMul) {
val mulReg = Cat(remainder(2*mulw+1,w+1),remainder(w-1,0))
val mplier = mulReg(mulw-1,0)
val accum = mulReg(2*mulw,mulw).toSInt
val mpcand = divisor.toSInt
val prod = mplier(unroll-1,0) * mpcand + accum
val nextMulReg = Cat(prod, mplier(mulw-1,unroll)).toUInt
val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * unroll)(log2Up(mulw)-1,0))(mulw-1,0)
val eOut = Bool(earlyOut) && count =/= mulw/unroll-1 && count =/= 0 &&
!isHi && (mplier & ~eOutMask) === UInt(0)
val eOutRes = (mulReg >> (mulw - count * unroll)(log2Up(mulw)-1,0))
val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0))
remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0))
count := count + 1
when (eOut || count === mulw/unroll-1) {
state := Mux(isHi, s_move_rem, s_done)
}
}
when (state === s_busy && !isMul) {
when (count === w) {
state := Mux(isHi, s_move_rem, Mux(neg_out, s_neg_output, s_done))
}
count := count + 1
remainder := Cat(Mux(less, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !less)
val divisorMSB = Log2(divisor(w-1,0), w)
val dividendMSB = Log2(remainder(w-1,0), w)
val eOutPos = UInt(w-1) + divisorMSB - dividendMSB
val eOutZero = divisorMSB > dividendMSB
val eOut = count === 0 && less /* not divby0 */ && (eOutPos > 0 || eOutZero)
when (Bool(earlyOut) && eOut) {
val shift = Mux(eOutZero, UInt(w-1), eOutPos(log2Up(w)-1,0))
remainder := remainder(w-1,0) << shift
count := shift
}
when (count === 0 && !less /* divby0 */ && !isHi) { neg_out := false }
}
when (io.resp.fire() || io.kill) {
state := s_ready
}
when (io.req.fire()) {
state := Mux(lhs_sign || rhs_sign && !cmdMul, s_neg_inputs, s_busy)
isMul := cmdMul
isHi := cmdHi
count := 0
neg_out := !cmdMul && Mux(cmdHi, lhs_sign, lhs_sign =/= rhs_sign)
divisor := Cat(rhs_sign, rhs_in)
remainder := lhs_in
req := io.req.bits
}
io.resp.bits := req
io.resp.bits.data := Mux(halfWidth(req), Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0))
io.resp.valid := state === s_done
io.req.ready := state === s_ready
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,4 @@
// See LICENSE for license details.
package object rocket extends
rocket.constants.ScalarOpConstants

View File

@ -0,0 +1,203 @@
// See LICENSE for license details.
package rocket
import Chisel._
import uncore.agents._
import uncore.constants._
import Util._
import cde.{Parameters, Field}
class PTWReq(implicit p: Parameters) extends CoreBundle()(p) {
val prv = Bits(width = 2)
val pum = Bool()
val mxr = Bool()
val addr = UInt(width = vpnBits)
val store = Bool()
val fetch = Bool()
}
class PTWResp(implicit p: Parameters) extends CoreBundle()(p) {
val pte = new PTE
}
class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Decoupled(new PTWReq)
val resp = Valid(new PTWResp).flip
val ptbr = new PTBR().asInput
val invalidate = Bool(INPUT)
val status = new MStatus().asInput
}
class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
val ptbr = new PTBR().asInput
val invalidate = Bool(INPUT)
val status = new MStatus().asInput
}
class PTE(implicit p: Parameters) extends CoreBundle()(p) {
val reserved_for_hardware = Bits(width = 16)
val ppn = UInt(width = 38)
val reserved_for_software = Bits(width = 2)
val d = Bool()
val a = Bool()
val g = Bool()
val u = Bool()
val x = Bool()
val w = Bool()
val r = Bool()
val v = Bool()
def table(dummy: Int = 0) = v && !r && !w && !x
def leaf(dummy: Int = 0) = v && (r || (x && !w))
def ur(dummy: Int = 0) = sr() && u
def uw(dummy: Int = 0) = sw() && u
def ux(dummy: Int = 0) = sx() && u
def sr(dummy: Int = 0) = leaf() && r
def sw(dummy: Int = 0) = leaf() && w
def sx(dummy: Int = 0) = leaf() && x
def access_ok(req: PTWReq) = {
val perm_ok = Mux(req.fetch, x, Mux(req.store, w, r || (x && req.mxr)))
val priv_ok = Mux(u, !req.pum, req.prv(0))
leaf() && priv_ok && perm_ok
}
}
class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
val io = new Bundle {
val requestor = Vec(n, new TLBPTWIO).flip
val mem = new HellaCacheIO
val dpath = new DatapathPTWIO
}
require(usingAtomics, "PTW requires atomic memory operations")
val s_ready :: s_req :: s_wait :: s_set_dirty :: s_wait_dirty :: s_done :: Nil = Enum(UInt(), 6)
val state = Reg(init=s_ready)
val count = Reg(UInt(width = log2Up(pgLevels)))
val r_req = Reg(new PTWReq)
val r_req_dest = Reg(Bits())
val r_pte = Reg(new PTE)
val vpn_idxs = (0 until pgLevels).map(i => (r_req.addr >> (pgLevels-i-1)*pgLevelBits)(pgLevelBits-1,0))
val vpn_idx = vpn_idxs(count)
val arb = Module(new RRArbiter(new PTWReq, n))
arb.io.in <> io.requestor.map(_.req)
arb.io.out.ready := state === s_ready
val pte = new PTE().fromBits(io.mem.resp.bits.data)
val pte_addr = Cat(r_pte.ppn, vpn_idx).toUInt << log2Up(xLen/8)
when (arb.io.out.fire()) {
r_req := arb.io.out.bits
r_req_dest := arb.io.chosen
r_pte.ppn := io.dpath.ptbr.ppn
}
val (pte_cache_hit, pte_cache_data) = {
val size = 1 << log2Up(pgLevels * 2)
val plru = new PseudoLRU(size)
val valid = Reg(init = UInt(0, size))
val tags = Reg(Vec(size, UInt(width = paddrBits)))
val data = Reg(Vec(size, UInt(width = ppnBits)))
val hits = tags.map(_ === pte_addr).toBits & valid
val hit = hits.orR
when (io.mem.resp.valid && pte.table() && !hit) {
val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid))
valid := valid | UIntToOH(r)
tags(r) := pte_addr
data(r) := pte.ppn
}
when (hit && state === s_req) { plru.access(OHToUInt(hits)) }
when (io.dpath.invalidate) { valid := 0 }
(hit, Mux1H(hits, data))
}
val set_dirty_bit = pte.access_ok(r_req) && (!pte.a || (r_req.store && !pte.d))
when (io.mem.resp.valid && state === s_wait && !set_dirty_bit) {
r_pte := pte
}
val pte_wdata = Wire(init=new PTE().fromBits(0))
pte_wdata.a := true
pte_wdata.d := r_req.store
io.mem.req.valid := state === s_req || state === s_set_dirty
io.mem.req.bits.phys := Bool(true)
io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD)
io.mem.req.bits.typ := MT_D
io.mem.req.bits.addr := pte_addr
io.mem.s1_data := pte_wdata.toBits
io.mem.s1_kill := Bool(false)
io.mem.invalidate_lr := Bool(false)
val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits
val resp_ppns = (0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn
val resp_ppn = resp_ppns(count)
val resp_val = state === s_done
for (i <- 0 until io.requestor.size) {
io.requestor(i).resp.valid := resp_val && (r_req_dest === i)
io.requestor(i).resp.bits.pte := r_pte
io.requestor(i).resp.bits.pte.ppn := resp_ppn
io.requestor(i).ptbr := io.dpath.ptbr
io.requestor(i).invalidate := io.dpath.invalidate
io.requestor(i).status := io.dpath.status
}
// control state machine
switch (state) {
is (s_ready) {
when (arb.io.out.valid) {
state := s_req
}
count := UInt(0)
}
is (s_req) {
when (pte_cache_hit && count < pgLevels-1) {
io.mem.req.valid := false
state := s_req
count := count + 1
r_pte.ppn := pte_cache_data
}.elsewhen (io.mem.req.ready) {
state := s_wait
}
}
is (s_wait) {
when (io.mem.s2_nack) {
state := s_req
}
when (io.mem.resp.valid) {
state := s_done
when (set_dirty_bit) {
state := s_set_dirty
}
when (pte.table() && count < pgLevels-1) {
state := s_req
count := count + 1
}
}
}
is (s_set_dirty) {
when (io.mem.req.ready) {
state := s_wait_dirty
}
}
is (s_wait_dirty) {
when (io.mem.s2_nack) {
state := s_set_dirty
}
when (io.mem.resp.valid) {
state := s_req
}
}
is (s_done) {
state := s_ready
}
}
}

View File

@ -0,0 +1,303 @@
// See LICENSE for license details.
package rocket
import Chisel._
import uncore.tilelink._
import uncore.constants._
import uncore.agents.CacheName
import Util._
import cde.{Parameters, Field}
case object RoccMaxTaggedMemXacts extends Field[Int]
case object RoccNMemChannels extends Field[Int]
case object RoccNPTWPorts extends Field[Int]
case object RoccNCSRs extends Field[Int]
class RoCCCSRs(implicit p: Parameters) extends CoreBundle()(p) {
val rdata = Vec(nRoccCsrs, UInt(INPUT, xLen))
val waddr = UInt(OUTPUT, CSR.ADDRSZ)
val wdata = UInt(OUTPUT, xLen)
val wen = Bool(OUTPUT)
}
class RoCCInstruction extends Bundle
{
val funct = Bits(width = 7)
val rs2 = Bits(width = 5)
val rs1 = Bits(width = 5)
val xd = Bool()
val xs1 = Bool()
val xs2 = Bool()
val rd = Bits(width = 5)
val opcode = Bits(width = 7)
}
class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) {
val inst = new RoCCInstruction
val rs1 = Bits(width = xLen)
val rs2 = Bits(width = xLen)
val status = new MStatus
}
class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) {
val rd = Bits(width = 5)
val data = Bits(width = xLen)
}
class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) {
val cmd = Decoupled(new RoCCCommand).flip
val resp = Decoupled(new RoCCResponse)
val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
val busy = Bool(OUTPUT)
val interrupt = Bool(OUTPUT)
// These should be handled differently, eventually
val autl = new ClientUncachedTileLinkIO
val utl = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO)
val ptw = Vec(p(RoccNPTWPorts), new TLBPTWIO)
val fpu_req = Decoupled(new FPInput)
val fpu_resp = Decoupled(new FPResult).flip
val exception = Bool(INPUT)
val csr = (new RoCCCSRs).flip
val host_id = UInt(INPUT, log2Up(nCores))
override def cloneType = new RoCCInterface().asInstanceOf[this.type]
}
abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) {
val io = new RoCCInterface
io.mem.req.bits.phys := Bool(true) // don't perform address translation
}
class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) {
val regfile = Mem(n, UInt(width = xLen))
val busy = Reg(init = Vec.fill(n){Bool(false)})
val cmd = Queue(io.cmd)
val funct = cmd.bits.inst.funct
val addr = cmd.bits.rs2(log2Up(n)-1,0)
val doWrite = funct === UInt(0)
val doRead = funct === UInt(1)
val doLoad = funct === UInt(2)
val doAccum = funct === UInt(3)
val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)
// datapath
val addend = cmd.bits.rs1
val accum = regfile(addr)
val wdata = Mux(doWrite, addend, accum + addend)
when (cmd.fire() && (doWrite || doAccum)) {
regfile(addr) := wdata
}
when (io.mem.resp.valid) {
regfile(memRespTag) := io.mem.resp.bits.data
}
// control
when (io.mem.req.fire()) {
busy(addr) := Bool(true)
}
when (io.mem.resp.valid) {
busy(memRespTag) := Bool(false)
}
val doResp = cmd.bits.inst.xd
val stallReg = busy(addr)
val stallLoad = doLoad && !io.mem.req.ready
val stallResp = doResp && !io.resp.ready
cmd.ready := !stallReg && !stallLoad && !stallResp
// command resolved if no stalls AND not issuing a load that will need a request
// PROC RESPONSE INTERFACE
io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
// valid response if valid command, need a response, and no stalls
io.resp.bits.rd := cmd.bits.inst.rd
// Must respond with the appropriate tag or undefined behavior
io.resp.bits.data := accum
// Semantics is to always send out prior accumulator register value
io.busy := cmd.valid || busy.reduce(_||_)
// Be busy when have pending memory requests or committed possibility of pending requests
io.interrupt := Bool(false)
// Set this true to trigger an interrupt on the processor (please refer to supervisor documentation)
// MEMORY REQUEST INTERFACE
io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
io.mem.req.bits.addr := addend
io.mem.req.bits.tag := addr
io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
io.mem.req.bits.data := Bits(0) // we're not performing any stores...
io.mem.invalidate_lr := false
io.autl.acquire.valid := false
io.autl.grant.ready := false
}
class TranslatorExample(implicit p: Parameters) extends RoCC()(p) {
val req_addr = Reg(UInt(width = coreMaxAddrBits))
val req_rd = Reg(io.resp.bits.rd)
val req_offset = req_addr(pgIdxBits - 1, 0)
val req_vpn = req_addr(coreMaxAddrBits - 1, pgIdxBits)
val pte = Reg(new PTE)
val s_idle :: s_ptw_req :: s_ptw_resp :: s_resp :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle)
io.cmd.ready := (state === s_idle)
when (io.cmd.fire()) {
req_rd := io.cmd.bits.inst.rd
req_addr := io.cmd.bits.rs1
state := s_ptw_req
}
private val ptw = io.ptw(0)
when (ptw.req.fire()) { state := s_ptw_resp }
when (state === s_ptw_resp && ptw.resp.valid) {
pte := ptw.resp.bits.pte
state := s_resp
}
when (io.resp.fire()) { state := s_idle }
ptw.req.valid := (state === s_ptw_req)
ptw.req.bits.addr := req_vpn
ptw.req.bits.store := Bool(false)
ptw.req.bits.fetch := Bool(false)
io.resp.valid := (state === s_resp)
io.resp.bits.rd := req_rd
io.resp.bits.data := Mux(pte.leaf(), Cat(pte.ppn, req_offset), ~UInt(0, xLen))
io.busy := (state =/= s_idle)
io.interrupt := Bool(false)
io.mem.req.valid := Bool(false)
io.mem.invalidate_lr := Bool(false)
io.autl.acquire.valid := Bool(false)
io.autl.grant.ready := Bool(false)
}
class CharacterCountExample(implicit p: Parameters) extends RoCC()(p)
with HasTileLinkParameters {
private val blockOffset = tlBeatAddrBits + tlByteAddrBits
val needle = Reg(UInt(width = 8))
val addr = Reg(UInt(width = coreMaxAddrBits))
val count = Reg(UInt(width = xLen))
val resp_rd = Reg(io.resp.bits.rd)
val addr_block = addr(coreMaxAddrBits - 1, blockOffset)
val offset = addr(blockOffset - 1, 0)
val next_addr = (addr_block + UInt(1)) << UInt(blockOffset)
val s_idle :: s_acq :: s_gnt :: s_check :: s_resp :: Nil = Enum(Bits(), 5)
val state = Reg(init = s_idle)
val gnt = io.autl.grant.bits
val recv_data = Reg(UInt(width = tlDataBits))
val recv_beat = Reg(UInt(width = tlBeatAddrBits))
val data_bytes = Vec.tabulate(tlDataBytes) { i => recv_data(8 * (i + 1) - 1, 8 * i) }
val zero_match = data_bytes.map(_ === UInt(0))
val needle_match = data_bytes.map(_ === needle)
val first_zero = PriorityEncoder(zero_match)
val chars_found = PopCount(needle_match.zipWithIndex.map {
case (matches, i) =>
val idx = Cat(recv_beat, UInt(i, tlByteAddrBits))
matches && idx >= offset && UInt(i) <= first_zero
})
val zero_found = zero_match.reduce(_ || _)
val finished = Reg(Bool())
io.cmd.ready := (state === s_idle)
io.resp.valid := (state === s_resp)
io.resp.bits.rd := resp_rd
io.resp.bits.data := count
io.autl.acquire.valid := (state === s_acq)
io.autl.acquire.bits := GetBlock(addr_block = addr_block)
io.autl.grant.ready := (state === s_gnt)
when (io.cmd.fire()) {
addr := io.cmd.bits.rs1
needle := io.cmd.bits.rs2
resp_rd := io.cmd.bits.inst.rd
count := UInt(0)
finished := Bool(false)
state := s_acq
}
when (io.autl.acquire.fire()) { state := s_gnt }
when (io.autl.grant.fire()) {
recv_beat := gnt.addr_beat
recv_data := gnt.data
state := s_check
}
when (state === s_check) {
when (!finished) {
count := count + chars_found
}
when (zero_found) { finished := Bool(true) }
when (recv_beat === UInt(tlDataBeats - 1)) {
addr := next_addr
state := Mux(zero_found || finished, s_resp, s_acq)
} .otherwise {
state := s_gnt
}
}
when (io.resp.fire()) { state := s_idle }
io.busy := (state =/= s_idle)
io.interrupt := Bool(false)
io.mem.req.valid := Bool(false)
io.mem.invalidate_lr := Bool(false)
}
class OpcodeSet(val opcodes: Seq[UInt]) {
def |(set: OpcodeSet) =
new OpcodeSet(this.opcodes ++ set.opcodes)
def matches(oc: UInt) = opcodes.map(_ === oc).reduce(_ || _)
}
object OpcodeSet {
val custom0 = new OpcodeSet(Seq(Bits("b0001011")))
val custom1 = new OpcodeSet(Seq(Bits("b0101011")))
val custom2 = new OpcodeSet(Seq(Bits("b1011011")))
val custom3 = new OpcodeSet(Seq(Bits("b1111011")))
val all = custom0 | custom1 | custom2 | custom3
}
class RoccCommandRouter(opcodes: Seq[OpcodeSet])(implicit p: Parameters)
extends CoreModule()(p) {
val io = new Bundle {
val in = Decoupled(new RoCCCommand).flip
val out = Vec(opcodes.size, Decoupled(new RoCCCommand))
val busy = Bool(OUTPUT)
}
val cmd = Queue(io.in)
val cmdReadys = io.out.zip(opcodes).map { case (out, opcode) =>
val me = opcode.matches(cmd.bits.inst.opcode)
out.valid := cmd.valid && me
out.bits := cmd.bits
out.ready && me
}
cmd.ready := cmdReadys.reduce(_ || _)
io.busy := cmd.valid
assert(PopCount(cmdReadys) <= UInt(1),
"Custom opcode matched for more than one accelerator")
}

View File

@ -0,0 +1,679 @@
// See LICENSE for license details.
package rocket
import Chisel._
import junctions._
import uncore.devices._
import uncore.agents.CacheName
import uncore.constants._
import Util._
import cde.{Parameters, Field}
case object UseFPU extends Field[Boolean]
case object FDivSqrt extends Field[Boolean]
case object XLen extends Field[Int]
case object FetchWidth extends Field[Int]
case object RetireWidth extends Field[Int]
case object UseVM extends Field[Boolean]
case object UseUser extends Field[Boolean]
case object UseDebug extends Field[Boolean]
case object UseAtomics extends Field[Boolean]
case object UsePerfCounters extends Field[Boolean]
case object FastLoadWord extends Field[Boolean]
case object FastLoadByte extends Field[Boolean]
case object MulUnroll extends Field[Int]
case object DivEarlyOut extends Field[Boolean]
case object CoreInstBits extends Field[Int]
case object CoreDataBits extends Field[Int]
case object CoreDCacheReqTagBits extends Field[Int]
case object NCustomMRWCSRs extends Field[Int]
case object MtvecWritable extends Field[Boolean]
case object MtvecInit extends Field[BigInt]
case object ResetVector extends Field[BigInt]
case object NBreakpoints extends Field[Int]
trait HasCoreParameters extends HasAddrMapParameters {
implicit val p: Parameters
val xLen = p(XLen)
val usingVM = p(UseVM)
val usingUser = p(UseUser)
val usingDebug = p(UseDebug)
val usingFPU = p(UseFPU)
val usingAtomics = p(UseAtomics)
val usingFDivSqrt = p(FDivSqrt)
val usingRoCC = !p(BuildRoCC).isEmpty
val mulUnroll = p(MulUnroll)
val divEarlyOut = p(DivEarlyOut)
val fastLoadWord = p(FastLoadWord)
val fastLoadByte = p(FastLoadByte)
val retireWidth = p(RetireWidth)
val fetchWidth = p(FetchWidth)
val coreInstBits = p(CoreInstBits)
val coreInstBytes = coreInstBits/8
val coreDataBits = xLen
val coreDataBytes = coreDataBits/8
val coreDCacheReqTagBits = 7 + (2 + (if(!usingRoCC) 0 else 1))
val vpnBitsExtended = vpnBits + (vaddrBits < xLen).toInt
val vaddrBitsExtended = vpnBitsExtended + pgIdxBits
val coreMaxAddrBits = paddrBits max vaddrBitsExtended
val nCustomMrwCsrs = p(NCustomMRWCSRs)
val roccCsrs = if (p(BuildRoCC).isEmpty) Nil
else p(BuildRoCC).flatMap(_.csrs)
val nRoccCsrs = p(RoccNCSRs)
val nCores = p(NTiles)
// Print out log of committed instructions and their writeback values.
// Requires post-processing due to out-of-order writebacks.
val enableCommitLog = false
val usingPerfCounters = p(UsePerfCounters)
val maxPAddrBits = xLen match {
case 32 => 34
case 64 => 50
}
require(paddrBits < maxPAddrBits)
require(!fastLoadByte || fastLoadWord)
}
abstract class CoreModule(implicit val p: Parameters) extends Module
with HasCoreParameters
abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasCoreParameters
class RegFile(n: Int, w: Int, zero: Boolean = false) {
private val rf = Mem(n, UInt(width = w))
private def access(addr: UInt) = rf(~addr(log2Up(n)-1,0))
private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]()
private var canRead = true
def read(addr: UInt) = {
require(canRead)
reads += addr -> Wire(UInt())
reads.last._2 := Mux(Bool(zero) && addr === UInt(0), UInt(0), access(addr))
reads.last._2
}
def write(addr: UInt, data: UInt) = {
canRead = false
when (addr =/= UInt(0)) {
access(addr) := data
for ((raddr, rdata) <- reads)
when (addr === raddr) { rdata := data }
}
}
}
object ImmGen {
def apply(sel: UInt, inst: UInt) = {
val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt)
val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign)
val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19,12).toSInt)
val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0),
Mux(sel === IMM_UJ, inst(20).toSInt,
Mux(sel === IMM_SB, inst(7).toSInt, sign)))
val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25))
val b4_1 = Mux(sel === IMM_U, Bits(0),
Mux(sel === IMM_S || sel === IMM_SB, inst(11,8),
Mux(sel === IMM_Z, inst(19,16), inst(24,21))))
val b0 = Mux(sel === IMM_S, inst(7),
Mux(sel === IMM_I, inst(20),
Mux(sel === IMM_Z, inst(15), Bits(0))))
Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt
}
}
class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val io = new Bundle {
val prci = new PRCITileIO().flip
val imem = new FrontendIO()(p.alterPartial({case CacheName => "L1I" }))
val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
val ptw = new DatapathPTWIO().flip
val fpu = new FPUIO().flip
val rocc = new RoCCInterface().flip
}
val decode_table = {
(if (true) new MDecode +: (if (xLen > 32) Seq(new M64Decode) else Nil) else Nil) ++:
(if (usingAtomics) new ADecode +: (if (xLen > 32) Seq(new A64Decode) else Nil) else Nil) ++:
(if (usingFPU) new FDecode +: (if (xLen > 32) Seq(new F64Decode) else Nil) else Nil) ++:
(if (usingFPU && usingFDivSqrt) Some(new FDivSqrtDecode) else None) ++:
(if (usingRoCC) Some(new RoCCDecode) else None) ++:
(if (xLen > 32) Some(new I64Decode) else None) ++:
(if (usingVM) Some(new SDecode) else None) ++:
(if (usingDebug) Some(new DebugDecode) else None) ++:
Seq(new IDecode)
} flatMap(_.table)
val ex_ctrl = Reg(new IntCtrlSigs)
val mem_ctrl = Reg(new IntCtrlSigs)
val wb_ctrl = Reg(new IntCtrlSigs)
val ex_reg_xcpt_interrupt = Reg(Bool())
val ex_reg_valid = Reg(Bool())
val ex_reg_btb_hit = Reg(Bool())
val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits)
val ex_reg_xcpt = Reg(Bool())
val ex_reg_flush_pipe = Reg(Bool())
val ex_reg_load_use = Reg(Bool())
val ex_reg_cause = Reg(UInt())
val ex_reg_replay = Reg(Bool())
val ex_reg_pc = Reg(UInt())
val ex_reg_inst = Reg(Bits())
val mem_reg_xcpt_interrupt = Reg(Bool())
val mem_reg_valid = Reg(Bool())
val mem_reg_btb_hit = Reg(Bool())
val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits)
val mem_reg_xcpt = Reg(Bool())
val mem_reg_replay = Reg(Bool())
val mem_reg_flush_pipe = Reg(Bool())
val mem_reg_cause = Reg(UInt())
val mem_reg_slow_bypass = Reg(Bool())
val mem_reg_load = Reg(Bool())
val mem_reg_store = Reg(Bool())
val mem_reg_pc = Reg(UInt())
val mem_reg_inst = Reg(Bits())
val mem_reg_wdata = Reg(Bits())
val mem_reg_rs2 = Reg(Bits())
val take_pc_mem = Wire(Bool())
val wb_reg_valid = Reg(Bool())
val wb_reg_xcpt = Reg(Bool())
val wb_reg_mem_xcpt = Reg(Bool())
val wb_reg_replay = Reg(Bool())
val wb_reg_cause = Reg(UInt())
val wb_reg_pc = Reg(UInt())
val wb_reg_inst = Reg(Bits())
val wb_reg_wdata = Reg(Bits())
val wb_reg_rs2 = Reg(Bits())
val take_pc_wb = Wire(Bool())
val take_pc_mem_wb = take_pc_wb || take_pc_mem
val take_pc = take_pc_mem_wb
// decode stage
val id_pc = io.imem.resp.bits.pc
val id_inst = io.imem.resp.bits.data(0).toBits; require(fetchWidth == 1)
val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table)
val id_raddr3 = id_inst(31,27)
val id_raddr2 = id_inst(24,20)
val id_raddr1 = id_inst(19,15)
val id_waddr = id_inst(11,7)
val id_load_use = Wire(Bool())
val id_reg_fence = Reg(init=Bool(false))
val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2)
val id_raddr = IndexedSeq(id_raddr1, id_raddr2)
val rf = new RegFile(31, xLen)
val id_rs = id_raddr.map(rf.read _)
val ctrl_killd = Wire(Bool())
val csr = Module(new CSRFile)
val id_csr_en = id_ctrl.csr =/= CSR.N
val id_system_insn = id_ctrl.csr === CSR.I
val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0)
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
val id_csr_addr = id_inst(31,20)
// this is overly conservative
val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil
val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*)
val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs.map(UInt(_)), (legal_csrs -- safe_csrs).toList.map(UInt(_))))
val id_illegal_insn = !id_ctrl.legal ||
id_ctrl.fp && !csr.io.status.fs.orR ||
id_ctrl.rocc && !csr.io.status.xs.orR
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
val id_amo_aq = id_inst(26)
val id_amo_rl = id_inst(25)
val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
val id_rocc_busy = Bool(usingRoCC) &&
(io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||
mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)
id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy
val id_do_fence = id_rocc_busy && id_ctrl.fence ||
id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en)
val bpu = Module(new BreakpointUnit)
bpu.io.status := csr.io.status
bpu.io.bp := csr.io.bp
bpu.io.pc := id_pc
bpu.io.ea := mem_reg_wdata
val (id_xcpt, id_cause) = checkExceptions(List(
(csr.io.interrupt, csr.io.interrupt_cause),
(bpu.io.xcpt_if, UInt(Causes.breakpoint)),
(io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)),
(id_illegal_insn, UInt(Causes.illegal_instruction))))
val dcache_bypass_data =
if (fastLoadByte) io.dmem.resp.bits.data
else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass
else wb_reg_wdata
// detect bypass opportunities
val ex_waddr = ex_reg_inst(11,7)
val mem_waddr = mem_reg_inst(11,7)
val wb_waddr = wb_reg_inst(11,7)
val bypass_sources = IndexedSeq(
(Bool(true), UInt(0), UInt(0)), // treat reading x0 as a bypass
(ex_reg_valid && ex_ctrl.wxd, ex_waddr, mem_reg_wdata),
(mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, mem_waddr, wb_reg_wdata),
(mem_reg_valid && mem_ctrl.wxd, mem_waddr, dcache_bypass_data))
val id_bypass_src = id_raddr.map(raddr => bypass_sources.map(s => s._1 && s._2 === raddr))
// execute stage
val bypass_mux = Vec(bypass_sources.map(_._3))
val ex_reg_rs_bypass = Reg(Vec(id_raddr.size, Bool()))
val ex_reg_rs_lsb = Reg(Vec(id_raddr.size, UInt()))
val ex_reg_rs_msb = Reg(Vec(id_raddr.size, UInt()))
val ex_rs = for (i <- 0 until id_raddr.size)
yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
val ex_imm = ImmGen(ex_ctrl.sel_imm, ex_reg_inst)
val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq(
A1_RS1 -> ex_rs(0).toSInt,
A1_PC -> ex_reg_pc.toSInt))
val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq(
A2_RS2 -> ex_rs(1).toSInt,
A2_IMM -> ex_imm,
A2_FOUR -> SInt(4)))
val alu = Module(new ALU)
alu.io.dw := ex_ctrl.alu_dw
alu.io.fn := ex_ctrl.alu_fn
alu.io.in2 := ex_op2.toUInt
alu.io.in1 := ex_op1.toUInt
// multiplier and divider
val div = Module(new MulDiv(width = xLen,
unroll = mulUnroll,
earlyOut = divEarlyOut))
div.io.req.valid := ex_reg_valid && ex_ctrl.div
div.io.req.bits.dw := ex_ctrl.alu_dw
div.io.req.bits.fn := ex_ctrl.alu_fn
div.io.req.bits.in1 := ex_rs(0)
div.io.req.bits.in2 := ex_rs(1)
div.io.req.bits.tag := ex_waddr
ex_reg_valid := !ctrl_killd
ex_reg_replay := !take_pc && io.imem.resp.valid && io.imem.resp.bits.replay
ex_reg_xcpt := !ctrl_killd && id_xcpt
ex_reg_xcpt_interrupt := !take_pc && io.imem.resp.valid && csr.io.interrupt
when (id_xcpt) { ex_reg_cause := id_cause }
when (!ctrl_killd) {
ex_ctrl := id_ctrl
ex_ctrl.csr := id_csr
ex_reg_btb_hit := io.imem.btb_resp.valid
when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits }
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep
ex_reg_load_use := id_load_use
when (id_ctrl.jalr && csr.io.status.debug) {
ex_reg_flush_pipe := true
ex_ctrl.fence_i := true
}
for (i <- 0 until id_raddr.size) {
val do_bypass = id_bypass_src(i).reduce(_||_)
val bypass_src = PriorityEncoder(id_bypass_src(i))
ex_reg_rs_bypass(i) := do_bypass
ex_reg_rs_lsb(i) := bypass_src
when (id_ren(i) && !do_bypass) {
ex_reg_rs_lsb(i) := id_rs(i)(bypass_src.getWidth-1,0)
ex_reg_rs_msb(i) := id_rs(i) >> bypass_src.getWidth
}
}
}
when (!ctrl_killd || csr.io.interrupt || io.imem.resp.bits.replay) {
ex_reg_inst := id_inst
ex_reg_pc := id_pc
}
// replay inst in ex stage?
val ex_pc_valid = ex_reg_valid || ex_reg_replay || ex_reg_xcpt_interrupt
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
ex_ctrl.div && !div.io.req.ready
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
val replay_ex = ex_reg_replay || (ex_reg_valid && (replay_ex_structural || replay_ex_load_use))
val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid
// detect 2-cycle load-use delay for LB/LH/SC
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
val (ex_xcpt, ex_cause) = checkExceptions(List(
(ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause),
(ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction))))
// memory stage
val mem_br_taken = mem_reg_wdata(0)
val mem_br_target = mem_reg_pc.toSInt +
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4)))
val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt
val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).toSInt, mem_br_target) & SInt(-2)).toUInt
val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(io.imem.resp.valid, mem_npc =/= id_pc, Bool(true)))
val mem_npc_misaligned = mem_npc(1)
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal
val mem_misprediction =
if (p(BtbKey).nEntries == 0) mem_cfi_taken
else mem_wrong_npc
val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
take_pc_mem := want_take_pc_mem && !mem_npc_misaligned
mem_reg_valid := !ctrl_killx
mem_reg_replay := !take_pc_mem_wb && replay_ex
mem_reg_xcpt := !ctrl_killx && ex_xcpt
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
when (ex_xcpt) { mem_reg_cause := ex_cause }
when (ex_pc_valid) {
mem_ctrl := ex_ctrl
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
mem_reg_btb_hit := ex_reg_btb_hit
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
mem_reg_flush_pipe := ex_reg_flush_pipe
mem_reg_slow_bypass := ex_slow_bypass
mem_reg_inst := ex_reg_inst
mem_reg_pc := ex_reg_pc
mem_reg_wdata := alu.io.out
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) {
mem_reg_rs2 := ex_rs(1)
}
}
val (mem_new_xcpt, mem_new_cause) = checkExceptions(List(
(mem_reg_load && bpu.io.xcpt_ld, UInt(Causes.breakpoint)),
(mem_reg_store && bpu.io.xcpt_st, UInt(Causes.breakpoint)),
(want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)),
(mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)),
(mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)),
(mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)),
(mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load))))
val (mem_xcpt, mem_cause) = checkExceptions(List(
(mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
(mem_reg_valid && mem_new_xcpt, mem_new_cause)))
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
div.io.kill := killm_common && Reg(next = div.io.req.fire())
val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
// writeback stage
wb_reg_valid := !ctrl_killm
wb_reg_replay := replay_mem && !take_pc_wb
wb_reg_xcpt := mem_xcpt && !take_pc_wb
wb_reg_mem_xcpt := mem_reg_valid && mem_new_xcpt && !(mem_reg_xcpt_interrupt || mem_reg_xcpt)
when (mem_xcpt) { wb_reg_cause := mem_cause }
when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) {
wb_ctrl := mem_ctrl
wb_reg_wdata := Mux(mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (mem_ctrl.rocc) {
wb_reg_rs2 := mem_reg_rs2
}
wb_reg_inst := mem_reg_inst
wb_reg_pc := mem_reg_pc
}
val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
val replay_wb_common = io.dmem.s2_nack || wb_reg_replay
val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
val replay_wb = replay_wb_common || replay_wb_rocc
val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt
take_pc_wb := replay_wb || wb_xcpt || csr.io.eret
// writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp.bits.tag >> 1
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay
div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
val ll_wdata = Wire(init = div.io.resp.bits.data)
val ll_waddr = Wire(init = div.io.resp.bits.tag)
val ll_wen = Wire(init = div.io.resp.fire())
if (usingRoCC) {
io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
when (io.rocc.resp.fire()) {
div.io.resp.ready := Bool(false)
ll_wdata := io.rocc.resp.bits.data
ll_waddr := io.rocc.resp.bits.rd
ll_wen := Bool(true)
}
}
when (dmem_resp_replay && dmem_resp_xpu) {
div.io.resp.ready := Bool(false)
if (usingRoCC)
io.rocc.resp.ready := Bool(false)
ll_waddr := dmem_resp_waddr
ll_wen := Bool(true)
}
val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt
val wb_wen = wb_valid && wb_ctrl.wxd
val rf_wen = wb_wen || ll_wen
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data,
Mux(ll_wen, ll_wdata,
Mux(wb_ctrl.csr =/= CSR.N, csr.io.rw.rdata,
wb_reg_wdata)))
when (rf_wen) { rf.write(rf_waddr, rf_wdata) }
// hook up control/status regfile
csr.io.exception := wb_reg_xcpt
csr.io.cause := wb_reg_cause
csr.io.retire := wb_valid
csr.io.prci <> io.prci
io.fpu.fcsr_rm := csr.io.fcsr_rm
csr.io.fcsr_flags := io.fpu.fcsr_flags
io.rocc.csr <> csr.io.rocc.csr
csr.io.rocc.interrupt <> io.rocc.interrupt
csr.io.pc := wb_reg_pc
csr.io.badaddr := Mux(wb_reg_mem_xcpt, encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata), wb_reg_pc)
io.ptw.ptbr := csr.io.ptbr
io.ptw.invalidate := csr.io.fatc
io.ptw.status := csr.io.status
csr.io.rw.addr := wb_reg_inst(31,20)
csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
csr.io.rw.wdata := wb_reg_wdata
val hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 =/= UInt(0), id_raddr1),
(id_ctrl.rxs2 && id_raddr2 =/= UInt(0), id_raddr2),
(id_ctrl.wxd && id_waddr =/= UInt(0), id_waddr))
val fp_hazard_targets = Seq((io.fpu.dec.ren1, id_raddr1),
(io.fpu.dec.ren2, id_raddr2),
(io.fpu.dec.ren3, id_raddr3),
(io.fpu.dec.wen, id_waddr))
val sboard = new Scoreboard(32)
sboard.clear(ll_wen, ll_waddr)
val id_sboard_hazard = checkHazards(hazard_targets, sboard.read _)
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
val ex_cannot_bypass = ex_ctrl.csr =/= CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc
val data_hazard_ex = ex_ctrl.wxd && checkHazards(hazard_targets, _ === ex_waddr)
val fp_data_hazard_ex = ex_ctrl.wfd && checkHazards(fp_hazard_targets, _ === ex_waddr)
val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex)
// stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.
val mem_mem_cmd_bh =
if (fastLoadWord) Bool(!fastLoadByte) && mem_reg_slow_bypass
else Bool(true)
val mem_cannot_bypass = mem_ctrl.csr =/= CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc
val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr)
val fp_data_hazard_mem = mem_ctrl.wfd && checkHazards(fp_hazard_targets, _ === mem_waddr)
val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem)
id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem
// stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback.
val data_hazard_wb = wb_ctrl.wxd && checkHazards(hazard_targets, _ === wb_waddr)
val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr)
val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)
val id_stall_fpu = if (usingFPU) {
val fp_sboard = new Scoreboard(32)
fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr)
fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr)
fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra)
id_csr_en && !io.fpu.fcsr_rdy || checkHazards(fp_hazard_targets, fp_sboard.read _)
} else Bool(false)
val dcache_blocked = Reg(Bool())
dcache_blocked := !io.dmem.req.ready && (io.dmem.req.valid || dcache_blocked)
val rocc_blocked = Reg(Bool())
rocc_blocked := !wb_reg_xcpt && !io.rocc.cmd.ready && (io.rocc.cmd.valid || rocc_blocked)
val ctrl_stalld =
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||
id_ctrl.fp && id_stall_fpu ||
id_ctrl.mem && dcache_blocked || // reduce activity during D$ misses
id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy
id_do_fence ||
csr.io.csr_stall
ctrl_killd := !io.imem.resp.valid || io.imem.resp.bits.replay || take_pc || ctrl_stalld || csr.io.interrupt
io.imem.req.valid := take_pc
io.imem.req.bits.speculative := !take_pc_wb
io.imem.req.bits.pc :=
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
Mux(replay_wb, wb_reg_pc, // replay
mem_npc)).toUInt // mispredicted branch
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
io.imem.flush_tlb := csr.io.fatc
io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt || take_pc_mem
io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && mem_cfi_taken && !take_pc_wb
io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1")
io.imem.btb_update.bits.pc := mem_reg_pc
io.imem.btb_update.bits.target := io.imem.req.bits.pc
io.imem.btb_update.bits.br_pc := mem_reg_pc
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb
io.imem.bht_update.bits.pc := mem_reg_pc
io.imem.bht_update.bits.taken := mem_br_taken
io.imem.bht_update.bits.mispredict := mem_wrong_npc
io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction
io.imem.ras_update.valid := mem_reg_valid && io.imem.btb_update.bits.isJump && !mem_npc_misaligned && !take_pc_wb
io.imem.ras_update.bits.returnAddr := mem_int_wdata
io.imem.ras_update.bits.isCall := mem_ctrl.wxd && mem_waddr(0)
io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn
io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction
io.fpu.valid := !ctrl_killd && id_ctrl.fp
io.fpu.killx := ctrl_killx
io.fpu.killm := killm_common
io.fpu.inst := id_inst
io.fpu.fromint_data := ex_rs(0)
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
io.fpu.dmem_resp_tag := dmem_resp_waddr
io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
val ex_dcache_tag = Cat(ex_waddr, ex_ctrl.fp)
require(coreDCacheReqTagBits >= ex_dcache_tag.getWidth)
io.dmem.req.bits.tag := ex_dcache_tag
io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
io.dmem.req.bits.typ := ex_ctrl.mem_type
io.dmem.req.bits.phys := Bool(false)
io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out)
io.dmem.s1_kill := killm_common || mem_xcpt
io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
io.dmem.invalidate_lr := wb_xcpt
io.rocc.cmd.valid := wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
io.rocc.exception := wb_xcpt && csr.io.status.xs.orR
io.rocc.cmd.bits.status := csr.io.status
io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst)
io.rocc.cmd.bits.rs1 := wb_reg_wdata
io.rocc.cmd.bits.rs2 := wb_reg_rs2
if (enableCommitLog) {
val pc = Wire(SInt(width=xLen))
pc := wb_reg_pc
val inst = wb_reg_inst
val rd = RegNext(RegNext(RegNext(id_waddr)))
val wfd = wb_ctrl.wfd
val wxd = wb_ctrl.wxd
val has_data = wb_wen && !wb_set_sboard
val priv = csr.io.status.prv
when (wb_valid) {
when (wfd) {
printf ("%d 0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd+UInt(32))
}
.elsewhen (wxd && rd =/= UInt(0) && has_data) {
printf ("%d 0x%x (0x%x) x%d 0x%x\n", priv, pc, inst, rd, rf_wdata)
}
.elsewhen (wxd && rd =/= UInt(0) && !has_data) {
printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd)
}
.otherwise {
printf ("%d 0x%x (0x%x)\n", priv, pc, inst)
}
}
when (ll_wen && rf_waddr =/= UInt(0)) {
printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata)
}
}
else {
printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",
io.prci.id, csr.io.time(31,0), wb_valid, wb_reg_pc,
Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen,
wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),
wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),
wb_reg_inst, wb_reg_inst)
}
def checkExceptions(x: Seq[(Bool, UInt)]) =
(x.map(_._1).reduce(_||_), PriorityMux(x))
def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) =
targets.map(h => h._1 && cond(h._2)).reduce(_||_)
def encodeVirtualAddress(a0: UInt, ea: UInt) = if (vaddrBitsExtended == vaddrBits) ea else {
// efficient means to compress 64-bit VA into vaddrBits+1 bits
// (VA is bad if VA(vaddrBits) != VA(vaddrBits-1))
val a = a0 >> vaddrBits-1
val e = ea(vaddrBits,vaddrBits-1).toSInt
val msb =
Mux(a === UInt(0) || a === UInt(1), e =/= SInt(0),
Mux(a.toSInt === SInt(-1) || a.toSInt === SInt(-2), e === SInt(-1), e(0)))
Cat(msb, ea(vaddrBits-1,0))
}
class Scoreboard(n: Int)
{
def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))
def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))
def read(addr: UInt): Bool = r(addr)
def readBypassed(addr: UInt): Bool = _next(addr)
private val r = Reg(init=Bits(0, n))
private var _next = r
private var ens = Bool(false)
private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))
private def update(en: Bool, update: UInt) = {
_next = update
ens = ens || en
when (ens) { r := _next }
}
}
}

View File

@ -0,0 +1,151 @@
// See LICENSE for license details.
package rocket
import Chisel._
import uncore.tilelink._
import uncore.agents._
import uncore.devices._
import Util._
import cde.{Parameters, Field}
case object CoreName extends Field[String]
case object BuildRoCC extends Field[Seq[RoccParameters]]
case object NCachedTileLinkPorts extends Field[Int]
case object NUncachedTileLinkPorts extends Field[Int]
case class RoccParameters(
opcodes: OpcodeSet,
generator: Parameters => RoCC,
nMemChannels: Int = 0,
nPTWPorts : Int = 0,
csrs: Seq[Int] = Nil,
useFPU: Boolean = false)
abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null)
(implicit p: Parameters) extends Module(Option(clockSignal), Option(resetSignal)) {
val nCachedTileLinkPorts = p(NCachedTileLinkPorts)
val nUncachedTileLinkPorts = p(NUncachedTileLinkPorts)
val dcacheParams = p.alterPartial({ case CacheName => "L1D" })
val io = new Bundle {
val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO)
val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO)
val prci = new PRCITileIO().flip
}
}
class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null)
(implicit p: Parameters) extends Tile(clockSignal, resetSignal)(p) {
val buildRocc = p(BuildRoCC)
val usingRocc = !buildRocc.isEmpty
val nRocc = buildRocc.size
val nFPUPorts = buildRocc.filter(_.useFPU).size
val core = Module(new Rocket()(p.alterPartial({ case CoreName => "Rocket" })))
val icache = Module(new Frontend()(p.alterPartial({
case CacheName => "L1I"
case CoreName => "Rocket" })))
val dcache =
if (p(NMSHRs) == 0) Module(new DCache()(dcacheParams)).io
else Module(new HellaCache()(dcacheParams)).io
val ptwPorts = collection.mutable.ArrayBuffer(icache.io.ptw, dcache.ptw)
val dcPorts = collection.mutable.ArrayBuffer(core.io.dmem)
val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem)
val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]()
val cachedPorts = collection.mutable.ArrayBuffer(dcache.mem)
core.io.prci <> io.prci
icache.io.cpu <> core.io.imem
val fpuOpt = if (p(UseFPU)) Some(Module(new FPU)) else None
fpuOpt.foreach(fpu => core.io.fpu <> fpu.io)
if (usingRocc) {
val respArb = Module(new RRArbiter(new RoCCResponse, nRocc))
core.io.rocc.resp <> respArb.io.out
val roccOpcodes = buildRocc.map(_.opcodes)
val cmdRouter = Module(new RoccCommandRouter(roccOpcodes))
cmdRouter.io.in <> core.io.rocc.cmd
val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) =>
val rocc = accelParams.generator(p.alterPartial({
case RoccNMemChannels => accelParams.nMemChannels
case RoccNPTWPorts => accelParams.nPTWPorts
case RoccNCSRs => accelParams.csrs.size
}))
val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams))
rocc.io.cmd <> cmdRouter.io.out(i)
rocc.io.exception := core.io.rocc.exception
rocc.io.host_id := io.prci.id
dcIF.io.requestor <> rocc.io.mem
dcPorts += dcIF.io.cache
uncachedArbPorts += rocc.io.autl
rocc
}
if (nFPUPorts > 0) {
fpuOpt.foreach { fpu =>
val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nFPUPorts))
val fp_roccs = roccs.zip(buildRocc)
.filter { case (_, params) => params.useFPU }
.map { case (rocc, _) => rocc.io }
fpArb.io.in_req <> fp_roccs.map(_.fpu_req)
fp_roccs.zip(fpArb.io.in_resp).foreach {
case (rocc, fpu_resp) => rocc.fpu_resp <> fpu_resp
}
fpu.io.cp_req <> fpArb.io.out_req
fpArb.io.out_resp <> fpu.io.cp_resp
}
}
core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _)
core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp))
if (p(RoccNCSRs) > 0) {
core.io.rocc.csr.rdata <> roccs.flatMap(_.io.csr.rdata)
for ((rocc, accelParams) <- roccs.zip(buildRocc)) {
rocc.io.csr.waddr := core.io.rocc.csr.waddr
rocc.io.csr.wdata := core.io.rocc.csr.wdata
rocc.io.csr.wen := core.io.rocc.csr.wen &&
accelParams.csrs
.map(core.io.rocc.csr.waddr === UInt(_))
.reduce((a, b) => a || b)
}
}
ptwPorts ++= roccs.flatMap(_.io.ptw)
uncachedPorts ++= roccs.flatMap(_.io.utl)
}
val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size))
uncachedArb.io.in <> uncachedArbPorts
uncachedArb.io.out +=: uncachedPorts
// Connect the caches and RoCC to the outer memory system
io.uncached <> uncachedPorts
io.cached <> cachedPorts
// TODO remove nCached/nUncachedTileLinkPorts parameters and these assertions
require(uncachedPorts.size == nUncachedTileLinkPorts)
require(cachedPorts.size == nCachedTileLinkPorts)
if (p(UseVM)) {
val ptw = Module(new PTW(ptwPorts.size)(dcacheParams))
ptw.io.requestor <> ptwPorts
ptw.io.mem +=: dcPorts
core.io.ptw <> ptw.io.dpath
}
val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams))
dcArb.io.requestor <> dcPorts
dcache.cpu <> dcArb.io.mem
if (!usingRocc || nFPUPorts == 0) {
fpuOpt.foreach { fpu =>
fpu.io.cp_req.valid := Bool(false)
fpu.io.cp_resp.ready := Bool(false)
}
}
}

View File

@ -0,0 +1,176 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Util._
import junctions._
import scala.math._
import cde.{Parameters, Field}
import uncore.agents.PseudoLRU
import uncore.coherence._
case object NTLBEntries extends Field[Int]
trait HasTLBParameters extends HasCoreParameters {
val entries = p(NTLBEntries)
val camAddrBits = log2Ceil(entries)
val camTagBits = asIdBits + vpnBits
}
class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
val vpn = UInt(width = vpnBitsExtended)
val passthrough = Bool()
val instruction = Bool()
val store = Bool()
}
class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
// lookup responses
val miss = Bool(OUTPUT)
val ppn = UInt(OUTPUT, ppnBits)
val xcpt_ld = Bool(OUTPUT)
val xcpt_st = Bool(OUTPUT)
val xcpt_if = Bool(OUTPUT)
val cacheable = Bool(OUTPUT)
}
class TLB(implicit val p: Parameters) extends Module with HasTLBParameters {
val io = new Bundle {
val req = Decoupled(new TLBReq).flip
val resp = new TLBResp
val ptw = new TLBPTWIO
}
val valid = Reg(init = UInt(0, entries))
val ppns = Reg(Vec(entries, io.ptw.resp.bits.pte.ppn))
val tags = Reg(Vec(entries, UInt(width = asIdBits + vpnBits)))
val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4)
val state = Reg(init=s_ready)
val r_refill_tag = Reg(UInt(width = asIdBits + vpnBits))
val r_refill_waddr = Reg(UInt(width = log2Ceil(entries)))
val r_req = Reg(new TLBReq)
val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0)).toUInt
val hitsVec = (0 until entries).map(i => valid(i) && tags(i) === lookup_tag)
val hits = hitsVec.toBits
// permission bit arrays
val pte_array = Reg(new PTE)
val u_array = Reg(UInt(width = entries)) // user permission
val sw_array = Reg(UInt(width = entries)) // write permission
val sx_array = Reg(UInt(width = entries)) // execute permission
val sr_array = Reg(UInt(width = entries)) // read permission
val dirty_array = Reg(UInt(width = entries)) // PTE dirty bit
when (io.ptw.resp.valid) {
val pte = io.ptw.resp.bits.pte
ppns(r_refill_waddr) := pte.ppn
tags(r_refill_waddr) := r_refill_tag
val mask = UIntToOH(r_refill_waddr)
valid := valid | mask
u_array := Mux(pte.u, u_array | mask, u_array & ~mask)
sr_array := Mux(pte.sr(), sr_array | mask, sr_array & ~mask)
sw_array := Mux(pte.sw(), sw_array | mask, sw_array & ~mask)
sx_array := Mux(pte.sx(), sx_array | mask, sx_array & ~mask)
dirty_array := Mux(pte.d, dirty_array | mask, dirty_array & ~mask)
}
// high if there are any unused (invalid) entries in the TLB
val plru = new PseudoLRU(entries)
val repl_waddr = Mux(!valid.andR, PriorityEncoder(~valid), plru.replace)
val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction
val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv)
val priv_s = priv === PRV.S
val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug
val priv_ok = Mux(priv_s, ~Mux(io.ptw.status.pum, u_array, UInt(0)), u_array)
val w_array = priv_ok & sw_array
val x_array = priv_ok & sx_array
val r_array = priv_ok & (sr_array | Mux(io.ptw.status.mxr, x_array, UInt(0)))
val vm_enabled = Bool(usingVM) && io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough
val bad_va =
if (vpnBits == vpnBitsExtended) Bool(false)
else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1)
// it's only a store hit if the dirty bit is set
val tag_hits = hits & (dirty_array | ~Mux(io.req.bits.store, w_array, UInt(0)))
val tag_hit = tag_hits.orR
val tlb_hit = vm_enabled && tag_hit
val tlb_miss = vm_enabled && !tag_hit && !bad_va
when (io.req.valid && tlb_hit) {
plru.access(OHToUInt(hits))
}
val paddr = Cat(io.resp.ppn, UInt(0, pgIdxBits))
val addr_prot = addrMap.getProt(paddr)
io.req.ready := state === s_ready
io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & hits).orR)
io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & hits).orR)
io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & hits).orR)
io.resp.cacheable := addrMap.isCacheable(paddr)
io.resp.miss := tlb_miss
io.resp.ppn := Mux(vm_enabled, Mux1H(hitsVec, ppns), io.req.bits.vpn(ppnBits-1,0))
io.ptw.req.valid := state === s_request
io.ptw.req.bits := io.ptw.status
io.ptw.req.bits.addr := r_refill_tag
io.ptw.req.bits.store := r_req.store
io.ptw.req.bits.fetch := r_req.instruction
if (usingVM) {
when (io.req.fire() && tlb_miss) {
state := s_request
r_refill_tag := lookup_tag
r_refill_waddr := repl_waddr
r_req := io.req.bits
}
when (state === s_request) {
when (io.ptw.invalidate) {
state := s_ready
}
when (io.ptw.req.ready) {
state := s_wait
when (io.ptw.invalidate) { state := s_wait_invalidate }
}
}
when (state === s_wait && io.ptw.invalidate) {
state := s_wait_invalidate
}
when (io.ptw.resp.valid) {
state := s_ready
}
when (io.ptw.invalidate) {
valid := 0
}
}
}
class DecoupledTLB(implicit p: Parameters) extends Module {
val io = new Bundle {
val req = Decoupled(new TLBReq).flip
val resp = Decoupled(new TLBResp)
val ptw = new TLBPTWIO
}
val reqq = Queue(io.req)
val tlb = Module(new TLB)
val resp_helper = DecoupledHelper(
reqq.valid, tlb.io.req.ready, io.resp.ready)
val tlb_miss = tlb.io.resp.miss
tlb.io.req.valid := resp_helper.fire(tlb.io.req.ready)
tlb.io.req.bits := reqq.bits
reqq.ready := resp_helper.fire(reqq.valid, !tlb_miss)
io.resp.valid := resp_helper.fire(io.resp.ready, !tlb_miss)
io.resp.bits := tlb.io.resp
io.ptw <> tlb.io.ptw
}

View File

@ -0,0 +1,179 @@
// See LICENSE for license details.
package rocket
import Chisel._
import uncore._
import scala.math._
import cde.{Parameters, Field}
object Util {
implicit def uintToBitPat(x: UInt): BitPat = BitPat(x)
implicit def intToUInt(x: Int): UInt = UInt(x)
implicit def bigIntToUInt(x: BigInt): UInt = UInt(x)
implicit def booleanToBool(x: Boolean): Bits = Bool(x)
implicit def intSeqToUIntSeq(x: Seq[Int]): Seq[UInt] = x.map(UInt(_))
implicit def wcToUInt(c: WideCounter): UInt = c.value
implicit class UIntToAugmentedUInt(val x: UInt) extends AnyVal {
def sextTo(n: Int): UInt =
if (x.getWidth == n) x
else Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x)
def extract(hi: Int, lo: Int): UInt = {
if (hi == lo-1) UInt(0)
else x(hi, lo)
}
}
implicit def booleanToIntConv(x: Boolean) = new AnyRef {
def toInt: Int = if (x) 1 else 0
}
implicit class SeqToAugmentedSeq[T <: Data](val x: Seq[T]) extends AnyVal {
def apply(idx: UInt): T = {
if (x.size == 1) {
x.head
} else {
val half = 1 << (log2Ceil(x.size) - 1)
val newIdx = idx & (half - 1)
Mux(idx >= UInt(half), x.drop(half)(newIdx), x.take(half)(newIdx))
}
}
def toBits(): UInt = Cat(x.map(_.toBits).reverse)
}
def minUInt(values: Seq[UInt]): UInt =
values.reduce((a, b) => Mux(a < b, a, b))
def minUInt(first: UInt, rest: UInt*): UInt =
minUInt(first +: rest.toSeq)
}
import Util._
object Str
{
def apply(s: String): UInt = {
var i = BigInt(0)
require(s.forall(validChar _))
for (c <- s)
i = (i << 8) | c
UInt(i, s.length*8)
}
def apply(x: Char): UInt = {
require(validChar(x))
UInt(x.toInt, 8)
}
def apply(x: UInt): UInt = apply(x, 10)
def apply(x: UInt, radix: Int): UInt = {
val rad = UInt(radix)
val w = x.getWidth
require(w > 0)
var q = x
var s = digit(q % rad)
for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) {
q = q / rad
s = Cat(Mux(Bool(radix == 10) && q === UInt(0), Str(' '), digit(q % rad)), s)
}
s
}
def apply(x: SInt): UInt = apply(x, 10)
def apply(x: SInt, radix: Int): UInt = {
val neg = x < SInt(0)
val abs = x.abs
if (radix != 10) {
Cat(Mux(neg, Str('-'), Str(' ')), Str(abs, radix))
} else {
val rad = UInt(radix)
val w = abs.getWidth
require(w > 0)
var q = abs
var s = digit(q % rad)
var needSign = neg
for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) {
q = q / rad
val placeSpace = q === UInt(0)
val space = Mux(needSign, Str('-'), Str(' '))
needSign = needSign && !placeSpace
s = Cat(Mux(placeSpace, space, digit(q % rad)), s)
}
Cat(Mux(needSign, Str('-'), Str(' ')), s)
}
}
private def digit(d: UInt): UInt = Mux(d < UInt(10), Str('0')+d, Str(('a'-10).toChar)+d)(7,0)
private def validChar(x: Char) = x == (x & 0xFF)
}
object Split
{
// is there a better way to do do this?
def apply(x: Bits, n0: Int) = {
val w = checkWidth(x, n0)
(x(w-1,n0), x(n0-1,0))
}
def apply(x: Bits, n1: Int, n0: Int) = {
val w = checkWidth(x, n1, n0)
(x(w-1,n1), x(n1-1,n0), x(n0-1,0))
}
def apply(x: Bits, n2: Int, n1: Int, n0: Int) = {
val w = checkWidth(x, n2, n1, n0)
(x(w-1,n2), x(n2-1,n1), x(n1-1,n0), x(n0-1,0))
}
private def checkWidth(x: Bits, n: Int*) = {
val w = x.getWidth
def decreasing(x: Seq[Int]): Boolean =
if (x.tail.isEmpty) true
else x.head >= x.tail.head && decreasing(x.tail)
require(decreasing(w :: n.toList))
w
}
}
// a counter that clock gates most of its MSBs using the LSB carry-out
case class WideCounter(width: Int, inc: UInt = UInt(1))
{
private val isWide = width > 2*inc.getWidth
private val smallWidth = if (isWide) inc.getWidth max log2Up(width) else width
private val small = Reg(init=UInt(0, smallWidth))
private val nextSmall = small +& inc
small := nextSmall
private val large = if (isWide) {
val r = Reg(init=UInt(0, width - smallWidth))
when (nextSmall(smallWidth)) { r := r + UInt(1) }
r
} else null
val value = if (isWide) Cat(large, small) else small
def := (x: UInt) = {
small := x
if (isWide) large := x >> smallWidth
}
}
object Random
{
def apply(mod: Int, random: UInt): UInt = {
if (isPow2(mod)) random(log2Up(mod)-1,0)
else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod))
}
def apply(mod: Int): UInt = apply(mod, randomizer)
def oneHot(mod: Int, random: UInt): UInt = {
if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0))
else PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod)).toBits
}
def oneHot(mod: Int): UInt = oneHot(mod, randomizer)
private def randomizer = LFSR16()
private def round(x: Double): Int =
if (x.toInt.toDouble == x) x.toInt else (x.toInt + 1) & -2
private def partition(value: UInt, slices: Int) =
Seq.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices))
}