1
0

Heterogeneous Tiles (#550)

Fundamental new features:

* Added tile package: This package is intended to hold components re-usable across different types of tile. Will be the future location of TL2-RoCC accelerators and new diplomatic versions of intra-tile interfaces.
* Adopted [ModuleName]Params convention: Code base was very inconsistent about what to name case classes that provide parameters to modules. Settled on calling them [ModuleName]Params to distinguish them from config.Parameters and config.Config. So far applied mostly only to case classes defined within rocket and tile.
* Defined RocketTileParams: A nested case class containing case classes for all the components of a tile (L1 caches and core). Allows all such parameters to vary per-tile.
* Defined RocketCoreParams: All the parameters that can be varied per-core.
* Defined L1CacheParams: A trait defining the parameters common to L1 caches, made concrete in different derived case classes.
* Defined RocketTilesKey: A sequence of RocketTileParams, one for every tile to be created.
* Provided HeterogeneousDualCoreConfig: An example of making a heterogeneous chip with two cores, one big and one little.
* Changes to legacy code: ReplacementPolicy moved to package util. L1Metadata moved to package tile. Legacy L2 cache agent removed because it can no longer share the metadata array implementation with the L1. Legacy GroundTests on life support.

Additional changes that got rolled in along the way:

* rocket: 	Fix critical path through BTB for I$ index bits > pgIdxBits
* coreplex: tiles connected via :=*
* groundtest: updated to use TileParams
* tilelink: cache cork requirements are relaxed to allow more cacheless masters
This commit is contained in:
Henry Cook
2017-02-09 13:59:09 -08:00
committed by GitHub
parent f9acd4988c
commit e8c8d2af71
57 changed files with 1084 additions and 1933 deletions

View File

@ -0,0 +1,81 @@
// See LICENSE.SiFive for license details.
package tile
import Chisel._
import config._
import diplomacy._
import rocket._
import uncore.tilelink2._
import util._
case object SharedMemoryTLEdge extends Field[TLEdgeOut]
case object TileKey extends Field[TileParams]
trait TileParams {
val core: CoreParams
val icache: Option[ICacheParams]
val dcache: Option[DCacheParams]
val rocc: Seq[RoCCParams]
val btb: Option[BTBParams]
val dataScratchpadBytes: Int
}
trait HasTileParameters {
implicit val p: Parameters
val tileParams: TileParams = p(TileKey)
val usingVM = tileParams.core.useVM
val usingUser = tileParams.core.useUser || usingVM
val usingDebug = tileParams.core.useDebug
val usingRoCC = !tileParams.rocc.isEmpty
val usingBTB = tileParams.btb.isDefined && tileParams.btb.get.nEntries > 0
val usingPTW = usingVM
val usingDataScratchpad = tileParams.dcache.isDefined && tileParams.dataScratchpadBytes > 0
def dcacheArbPorts = 1 + usingVM.toInt + usingDataScratchpad.toInt + tileParams.rocc.size
}
abstract class BareTile(implicit p: Parameters) extends LazyModule
abstract class BareTileBundle[+L <: BareTile](_outer: L) extends GenericParameterizedBundle(_outer) {
val outer = _outer
implicit val p = outer.p
}
abstract class BareTileModule[+L <: BareTile, +B <: BareTileBundle[L]](_outer: L, _io: () => B) extends LazyModuleImp(_outer) {
val outer = _outer
val io = _io ()
}
// Uses TileLink master port to connect caches and accelerators to the coreplex
trait HasTileLinkMasterPort extends HasTileParameters {
implicit val p: Parameters
val module: HasTileLinkMasterPortModule
val masterNode = TLOutputNode()
}
trait HasTileLinkMasterPortBundle {
val outer: HasTileLinkMasterPort
val master = outer.masterNode.bundleOut
}
trait HasTileLinkMasterPortModule {
val outer: HasTileLinkMasterPort
val io: HasTileLinkMasterPortBundle
}
abstract class BaseTile(tileParams: TileParams)(implicit p: Parameters) extends BareTile
with HasTileLinkMasterPort {
override lazy val module = new BaseTileModule(this, () => new BaseTileBundle(this))
}
class BaseTileBundle[+L <: BaseTile](_outer: L) extends BareTileBundle(_outer)
with HasTileLinkMasterPortBundle {
val hartid = UInt(INPUT, p(XLen))
val interrupts = new TileInterrupts()(p).asInput
val resetVector = UInt(INPUT, p(XLen))
}
class BaseTileModule[+L <: BaseTile, +B <: BaseTileBundle[L]](_outer: L, _io: () => B) extends BareTileModule(_outer, _io)
with HasTileLinkMasterPortModule

View File

@ -0,0 +1,88 @@
// See LICENSE.SiFive for license details.
package tile
import Chisel._
import config._
import rocket._
import util._
case object BuildCore extends Field[Parameters => CoreModule with HasCoreIO]
case object XLen extends Field[Int]
// These parameters can be varied per-core
trait CoreParams {
val useVM: Boolean
val useUser: Boolean
val useDebug: Boolean
val useAtomics: Boolean
val useCompressed: Boolean
val mulDiv: Option[MulDivParams]
val fpu: Option[FPUParams]
val fetchWidth: Int
val decodeWidth: Int
val retireWidth: Int
val instBits: Int
}
trait HasCoreParameters extends HasTileParameters {
val coreParams: CoreParams = tileParams.core
val xLen = p(XLen)
val fLen = xLen // TODO relax this
require(xLen == 32 || xLen == 64)
val usingMulDiv = coreParams.mulDiv.nonEmpty
val usingFPU = coreParams.fpu.nonEmpty
val usingAtomics = coreParams.useAtomics
val usingCompressed = coreParams.useCompressed
val retireWidth = coreParams.retireWidth
val fetchWidth = coreParams.fetchWidth
val decodeWidth = coreParams.decodeWidth
val coreInstBits = coreParams.instBits
val coreInstBytes = coreInstBits/8
val coreDataBits = xLen
val coreDataBytes = coreDataBits/8
val coreDCacheReqTagBits = 6
val dcacheReqTagBits = coreDCacheReqTagBits + log2Ceil(dcacheArbPorts)
def pgIdxBits = 12
def pgLevelBits = 10 - log2Ceil(xLen / 32)
def vaddrBits = pgIdxBits + pgLevels * pgLevelBits
val paddrBits = 32//p(PAddrBits)
def ppnBits = paddrBits - pgIdxBits
def vpnBits = vaddrBits - pgIdxBits
val pgLevels = p(PgLevels)
val asIdBits = p(ASIdBits)
val vpnBitsExtended = vpnBits + (vaddrBits < xLen).toInt
val vaddrBitsExtended = vpnBitsExtended + pgIdxBits
val coreMaxAddrBits = paddrBits max vaddrBitsExtended
val maxPAddrBits = xLen match { case 32 => 34; case 64 => 50 }
require(paddrBits <= maxPAddrBits)
// Print out log of committed instructions and their writeback values.
// Requires post-processing due to out-of-order writebacks.
val enableCommitLog = false
}
abstract class CoreModule(implicit val p: Parameters) extends Module
with HasCoreParameters
abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasCoreParameters
trait HasCoreIO {
implicit val p: Parameters
val io = new Bundle {
val interrupts = new TileInterrupts().asInput
val hartid = UInt(INPUT, p(XLen))
val imem = new FrontendIO()(p)
val dmem = new HellaCacheIO()(p)
val ptw = new DatapathPTWIO().flip
val fpu = new FPUCoreIO().flip
val rocc = new RoCCCoreIO().flip
}
}

View File

@ -0,0 +1,752 @@
// See LICENSE.Berkeley for license details.
// See LICENSE.SiFive for license details.
package tile
import Chisel._
import Chisel.ImplicitConversions._
import FPConstants._
import rocket.DecodeLogic
import rocket.Instructions._
import uncore.constants.MemoryOpConstants._
import config._
import util._
case class FPUParams(
divSqrt: Boolean = true,
sfmaLatency: Int = 3,
dfmaLatency: Int = 4
)
object FPConstants
{
def FCMD_ADD = BitPat("b0??00")
def FCMD_SUB = BitPat("b0??01")
def FCMD_MUL = BitPat("b0??10")
def FCMD_MADD = BitPat("b1??00")
def FCMD_MSUB = BitPat("b1??01")
def FCMD_NMSUB = BitPat("b1??10")
def FCMD_NMADD = BitPat("b1??11")
def FCMD_DIV = BitPat("b?0011")
def FCMD_SQRT = BitPat("b?1011")
def FCMD_SGNJ = BitPat("b??1?0")
def FCMD_MINMAX = BitPat("b?01?1")
def FCMD_CVT_FF = BitPat("b??0??")
def FCMD_CVT_IF = BitPat("b?10??")
def FCMD_CMP = BitPat("b?01??")
def FCMD_MV_XF = BitPat("b?11??")
def FCMD_CVT_FI = BitPat("b??0??")
def FCMD_MV_FX = BitPat("b??1??")
def FCMD_X = BitPat("b?????")
val FCMD_WIDTH = 5
val RM_SZ = 3
val FLAGS_SZ = 5
}
trait HasFPUCtrlSigs {
val cmd = Bits(width = FCMD_WIDTH)
val ldst = Bool()
val wen = Bool()
val ren1 = Bool()
val ren2 = Bool()
val ren3 = Bool()
val swap12 = Bool()
val swap23 = Bool()
val single = Bool()
val fromint = Bool()
val toint = Bool()
val fastpipe = Bool()
val fma = Bool()
val div = Bool()
val sqrt = Bool()
val round = Bool()
val wflags = Bool()
}
class FPUCtrlSigs extends Bundle with HasFPUCtrlSigs
class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) {
val io = new Bundle {
val inst = Bits(INPUT, 32)
val sigs = new FPUCtrlSigs().asOutput
}
val default = List(FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X)
val f =
Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N),
FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,Y,N,Y,N,N,N,N,N,N),
FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,N),
FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,Y,N,N,N,Y,N,N,Y,Y),
FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FDIV_S -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,Y,N,N,N,N,Y,N,Y,Y),
FSQRT_S -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,Y,N,N,N,N,N,Y,Y,Y))
val d =
Array(FLD -> List(FCMD_X, Y,Y,N,N,N,X,X,N,N,N,N,N,N,N,N,N),
FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,N,N,Y,N,N,N,N,N,N),
FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,N),
FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,Y,N,N,Y,N,N,N,Y,Y),
FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,N,N,N,Y,N,N,N,Y,Y),
FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y,Y),
FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FDIV_D -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,Y,Y),
FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y))
val insns = fLen match {
case 32 => f
case 64 => f ++ d
}
val decoder = DecodeLogic(io.inst, default, insns)
val s = io.sigs
val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12,
s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma,
s.div, s.sqrt, s.round, s.wflags)
sigs zip decoder map {case(s,d) => s := d}
}
class FPUCoreIO(implicit p: Parameters) extends CoreBundle()(p) {
val inst = Bits(INPUT, 32)
val fromint_data = Bits(INPUT, xLen)
val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ))
val store_data = Bits(OUTPUT, fLen)
val toint_data = Bits(OUTPUT, xLen)
val dmem_resp_val = Bool(INPUT)
val dmem_resp_type = Bits(INPUT, 3)
val dmem_resp_tag = UInt(INPUT, 5)
val dmem_resp_data = Bits(INPUT, fLen)
val valid = Bool(INPUT)
val fcsr_rdy = Bool(OUTPUT)
val nack_mem = Bool(OUTPUT)
val illegal_rm = Bool(OUTPUT)
val killx = Bool(INPUT)
val killm = Bool(INPUT)
val dec = new FPUCtrlSigs().asOutput
val sboard_set = Bool(OUTPUT)
val sboard_clr = Bool(OUTPUT)
val sboard_clra = UInt(OUTPUT, 5)
}
class FPUIO(implicit p: Parameters) extends FPUCoreIO ()(p) {
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
val cp_resp = Decoupled(new FPResult())
}
class FPResult(implicit p: Parameters) extends CoreBundle()(p) {
val data = Bits(width = fLen+1)
val exc = Bits(width = 5)
}
class FPInput(implicit p: Parameters) extends CoreBundle()(p) with HasFPUCtrlSigs {
val rm = Bits(width = 3)
val typ = Bits(width = 2)
val in1 = Bits(width = fLen+1)
val in2 = Bits(width = fLen+1)
val in3 = Bits(width = fLen+1)
override def cloneType = new FPInput().asInstanceOf[this.type]
}
object ClassifyRecFN {
def apply(expWidth: Int, sigWidth: Int, in: UInt) = {
val sign = in(sigWidth + expWidth)
val exp = in(sigWidth + expWidth - 1, sigWidth - 1)
val sig = in(sigWidth - 2, 0)
val code = exp(expWidth,expWidth-2)
val codeHi = code(2, 1)
val isSpecial = codeHi === UInt(3)
val isHighSubnormalIn = exp(expWidth-2, 0) < UInt(2)
val isSubnormal = code === UInt(1) || codeHi === UInt(1) && isHighSubnormalIn
val isNormal = codeHi === UInt(1) && !isHighSubnormalIn || codeHi === UInt(2)
val isZero = code === UInt(0)
val isInf = isSpecial && !exp(expWidth-2)
val isNaN = code.andR
val isSNaN = isNaN && !sig(sigWidth-2)
val isQNaN = isNaN && sig(sigWidth-2)
Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign,
isSubnormal && !sign, isZero && !sign, isZero && sign,
isSubnormal && sign, isNormal && sign, isInf && sign)
}
}
object IsNaNRecFN {
def apply(expWidth: Int, sigWidth: Int, in: UInt) =
in(sigWidth + expWidth - 1, sigWidth + expWidth - 3).andR
}
object IsSNaNRecFN {
def apply(expWidth: Int, sigWidth: Int, in: UInt) =
IsNaNRecFN(expWidth, sigWidth, in) && !in(sigWidth - 2)
}
/** Format conversion without rounding or NaN handling */
object RecFNToRecFN_noncompliant {
def apply(in: UInt, inExpWidth: Int, inSigWidth: Int, outExpWidth: Int, outSigWidth: Int) = {
val sign = in(inSigWidth + inExpWidth)
val fractIn = in(inSigWidth - 2, 0)
val expIn = in(inSigWidth + inExpWidth - 1, inSigWidth - 1)
val fractOut = fractIn << outSigWidth >> inSigWidth
val expOut = {
val expCode = expIn(inExpWidth, inExpWidth - 2)
val commonCase = (expIn + (1 << outExpWidth)) - (1 << inExpWidth)
Mux(expCode === 0 || expCode >= 6, Cat(expCode, commonCase(outExpWidth - 3, 0)),
commonCase(outExpWidth, 0))
}
Cat(sign, expOut, fractOut)
}
}
object CanonicalNaN {
def apply(expWidth: Int, sigWidth: Int): UInt =
UInt((BigInt(7) << (expWidth + sigWidth - 3)) + (BigInt(1) << (sigWidth - 2)), expWidth + sigWidth + 1)
}
trait HasFPUParameters {
val fLen: Int
val (sExpWidth, sSigWidth) = (8, 24)
val (dExpWidth, dSigWidth) = (11, 53)
val floatWidths = fLen match {
case 32 => List((sExpWidth, sSigWidth))
case 64 => List((sExpWidth, sSigWidth), (dExpWidth, dSigWidth))
}
val maxExpWidth = floatWidths.map(_._1).max
val maxSigWidth = floatWidths.map(_._2).max
}
abstract class FPUModule(implicit p: Parameters) extends CoreModule()(p) with HasFPUParameters
class FPToInt(implicit p: Parameters) extends FPUModule()(p) {
class Output extends Bundle {
val lt = Bool()
val store = Bits(width = fLen)
val toint = Bits(width = xLen)
val exc = Bits(width = 5)
override def cloneType = new Output().asInstanceOf[this.type]
}
val io = new Bundle {
val in = Valid(new FPInput).flip
val as_double = new FPInput().asOutput
val out = Valid(new Output)
}
val in = Reg(new FPInput)
val valid = Reg(next=io.in.valid)
def upconvert(x: UInt) = RecFNToRecFN_noncompliant(x, sExpWidth, sSigWidth, maxExpWidth, maxSigWidth)
when (io.in.valid) {
in := io.in.bits
if (fLen > 32) when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd =/= FCMD_MV_XF) {
in.in1 := upconvert(io.in.bits.in1)
in.in2 := upconvert(io.in.bits.in2)
}
}
val unrec_s = hardfloat.fNFromRecFN(sExpWidth, sSigWidth, in.in1).sextTo(xLen)
val unrec_mem = fLen match {
case 32 => unrec_s
case 64 =>
val unrec_d = hardfloat.fNFromRecFN(dExpWidth, dSigWidth, in.in1).sextTo(xLen)
Mux(in.single, unrec_s, unrec_d)
}
val unrec_int = xLen match {
case 32 => unrec_s
case fLen => unrec_mem
}
val classify_s = ClassifyRecFN(sExpWidth, sSigWidth, in.in1)
val classify_out = fLen match {
case 32 => classify_s
case 64 =>
val classify_d = ClassifyRecFN(dExpWidth, dSigWidth, in.in1)
Mux(in.single, classify_s, classify_d)
}
val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth))
dcmp.io.a := in.in1
dcmp.io.b := in.in2
dcmp.io.signaling := !in.rm(1)
io.out.bits.toint := Mux(in.rm(0), classify_out, unrec_int)
io.out.bits.store := unrec_mem
io.out.bits.exc := Bits(0)
when (in.cmd === FCMD_CMP) {
io.out.bits.toint := (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR
io.out.bits.exc := dcmp.io.exceptionFlags
}
when (in.cmd === FCMD_CVT_IF) {
val minXLen = 32
val n = log2Ceil(xLen/minXLen) + 1
for (i <- 0 until n) {
val conv = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, minXLen << i))
conv.io.in := in.in1
conv.io.roundingMode := in.rm
conv.io.signedOut := ~in.typ(0)
when (in.typ.extract(log2Ceil(n), 1) === i) {
io.out.bits.toint := conv.io.out.sextTo(xLen)
io.out.bits.exc := Cat(conv.io.intExceptionFlags(2, 1).orR, UInt(0, 3), conv.io.intExceptionFlags(0))
}
}
}
io.out.valid := valid
io.out.bits.lt := dcmp.io.lt
io.as_double := in
}
class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
val io = new Bundle {
val in = Valid(new FPInput).flip
val out = Valid(new FPResult)
}
val in = Pipe(io.in)
val mux = Wire(new FPResult)
mux.exc := Bits(0)
mux.data := hardfloat.recFNFromFN(sExpWidth, sSigWidth, in.bits.in1)
if (fLen > 32) when (!in.bits.single) {
mux.data := hardfloat.recFNFromFN(dExpWidth, dSigWidth, in.bits.in1)
}
val intValue = {
val minXLen = 32
val n = log2Ceil(xLen/minXLen) + 1
val res = Wire(init = in.bits.in1.asSInt)
for (i <- 0 until n-1) {
val smallInt = in.bits.in1((minXLen << i) - 1, 0)
when (in.bits.typ.extract(log2Ceil(n), 1) === i) {
res := Mux(in.bits.typ(0), smallInt.zext, smallInt.asSInt)
}
}
res.asUInt
}
when (in.bits.cmd === FCMD_CVT_FI) {
val l2s = Module(new hardfloat.INToRecFN(xLen, sExpWidth, sSigWidth))
l2s.io.signedIn := ~in.bits.typ(0)
l2s.io.in := intValue
l2s.io.roundingMode := in.bits.rm
mux.data := Cat(UInt((BigInt(1) << (fLen - 32)) - 1), l2s.io.out)
mux.exc := l2s.io.exceptionFlags
fLen match {
case 32 =>
case 64 =>
val l2d = Module(new hardfloat.INToRecFN(xLen, dExpWidth, dSigWidth))
l2d.io.signedIn := ~in.bits.typ(0)
l2d.io.in := intValue
l2d.io.roundingMode := in.bits.rm
when (!in.bits.single) {
mux.data := Cat(UInt((BigInt(1) << (fLen - 64)) - 1), l2d.io.out)
mux.exc := l2d.io.exceptionFlags
}
}
}
io.out <> Pipe(in.valid, mux, latency-1)
}
class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
val io = new Bundle {
val in = Valid(new FPInput).flip
val out = Valid(new FPResult)
val lt = Bool(INPUT) // from FPToInt
}
val in = Pipe(io.in)
val signNum = Mux(in.bits.rm(1), in.bits.in1 ^ in.bits.in2, Mux(in.bits.rm(0), ~in.bits.in2, in.bits.in2))
val fsgnj_s = Cat(signNum(32), in.bits.in1(31, 0))
val fsgnj = fLen match {
case 32 => fsgnj_s
case 64 => Mux(in.bits.single, Cat(in.bits.in1 >> 33, fsgnj_s),
Cat(signNum(64), in.bits.in1(63, 0)))
}
val mux = Wire(new FPResult)
mux.exc := UInt(0)
mux.data := fsgnj
when (in.bits.cmd === FCMD_MINMAX) {
def doMinMax(expWidth: Int, sigWidth: Int) = {
val isnan1 = IsNaNRecFN(expWidth, sigWidth, in.bits.in1)
val isnan2 = IsNaNRecFN(expWidth, sigWidth, in.bits.in2)
val issnan1 = IsSNaNRecFN(expWidth, sigWidth, in.bits.in1)
val issnan2 = IsSNaNRecFN(expWidth, sigWidth, in.bits.in2)
val invalid = issnan1 || issnan2
val isNaNOut = invalid || (isnan1 && isnan2)
val cNaN = floatWidths.filter(_._1 >= expWidth).map(x => CanonicalNaN(x._1, x._2)).reduce(_+_)
(isnan2 || in.bits.rm(0) =/= io.lt && !isnan1, invalid, isNaNOut, cNaN)
}
val (isLHS, isInvalid, isNaNOut, cNaN) = fLen match {
case 32 => doMinMax(sExpWidth, sSigWidth)
case 64 => MuxT(in.bits.single, doMinMax(sExpWidth, sSigWidth), doMinMax(dExpWidth, dSigWidth))
}
mux.exc := isInvalid << 4
mux.data := Mux(isNaNOut, cNaN, Mux(isLHS, in.bits.in1, in.bits.in2))
}
fLen match {
case 32 =>
case 64 =>
when (in.bits.cmd === FCMD_CVT_FF) {
when (in.bits.single) {
val d2s = Module(new hardfloat.RecFNToRecFN(dExpWidth, dSigWidth, sExpWidth, sSigWidth))
d2s.io.in := in.bits.in1
d2s.io.roundingMode := in.bits.rm
mux.data := Cat(UInt((BigInt(1) << (fLen - 32)) - 1), d2s.io.out)
mux.exc := d2s.io.exceptionFlags
}.otherwise {
val s2d = Module(new hardfloat.RecFNToRecFN(sExpWidth, sSigWidth, dExpWidth, dSigWidth))
s2d.io.in := in.bits.in1
s2d.io.roundingMode := in.bits.rm
mux.data := s2d.io.out
mux.exc := s2d.io.exceptionFlags
}
}
}
io.out <> Pipe(in.valid, mux, latency-1)
}
class FPUFMAPipe(val latency: Int, expWidth: Int, sigWidth: Int)(implicit p: Parameters) extends FPUModule()(p) {
val io = new Bundle {
val in = Valid(new FPInput).flip
val out = Valid(new FPResult)
}
val width = sigWidth + expWidth
val one = UInt(1) << (width-1)
val zero = (io.in.bits.in1(width) ^ io.in.bits.in2(width)) << width
val valid = Reg(next=io.in.valid)
val in = Reg(new FPInput)
when (io.in.valid) {
in := io.in.bits
val cmd_fma = io.in.bits.ren3
val cmd_addsub = io.in.bits.swap23
in.cmd := Cat(io.in.bits.cmd(1) & (cmd_fma || cmd_addsub), io.in.bits.cmd(0))
when (cmd_addsub) { in.in2 := one }
unless (cmd_fma || cmd_addsub) { in.in3 := zero }
}
val fma = Module(new hardfloat.MulAddRecFN(expWidth, sigWidth))
fma.io.op := in.cmd
fma.io.roundingMode := in.rm
fma.io.a := in.in1
fma.io.b := in.in2
fma.io.c := in.in3
val res = Wire(new FPResult)
res.data := Cat(UInt((BigInt(1) << (fLen - (expWidth + sigWidth))) - 1), fma.io.out)
res.exc := fma.io.exceptionFlags
io.out := Pipe(valid, res, latency-1)
}
class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
val io = new FPUIO
val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
val req_valid = ex_reg_valid || io.cp_req.valid
val ex_reg_inst = RegEnable(io.inst, io.valid)
val ex_cp_valid = io.cp_req.fire()
val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx || ex_cp_valid, init=Bool(false))
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false))
val killm = (io.killm || io.nack_mem) && !mem_cp_valid
val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false))
val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false))
val fp_decoder = Module(new FPUDecoder)
fp_decoder.io.inst := io.inst
val cp_ctrl = Wire(new FPUCtrlSigs)
cp_ctrl <> io.cp_req.bits
io.cp_resp.valid := Bool(false)
io.cp_resp.bits.data := UInt(0)
val id_ctrl = fp_decoder.io.sigs
val ex_ctrl = Mux(ex_cp_valid, cp_ctrl, RegEnable(id_ctrl, io.valid))
val mem_ctrl = RegEnable(ex_ctrl, req_valid)
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
// load response
val load_wb = Reg(next=io.dmem_resp_val)
val load_wb_single = RegEnable(!io.dmem_resp_type(0), io.dmem_resp_val)
val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val)
val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val)
val rec_s = hardfloat.recFNFromFN(sExpWidth, sSigWidth, load_wb_data)
val load_wb_data_recoded = fLen match {
case 32 => rec_s
case 64 =>
val rec_d = hardfloat.recFNFromFN(dExpWidth, dSigWidth, load_wb_data)
Mux(load_wb_single, Cat(UInt((BigInt(1) << (fLen - 32)) - 1), rec_s), rec_d)
}
// regfile
val regfile = Mem(32, Bits(width = fLen+1))
when (load_wb) {
regfile(load_wb_tag) := load_wb_data_recoded
if (enableCommitLog)
printf("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + 32, Mux(load_wb_single, load_wb_data(31,0), load_wb_data))
}
val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
when (io.valid) {
when (id_ctrl.ren1) {
when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) }
when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) }
}
when (id_ctrl.ren2) {
when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) }
when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) }
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) }
}
when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) }
}
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
val req = Wire(new FPInput)
req := ex_ctrl
req.rm := ex_rm
req.in1 := regfile(ex_ra1)
req.in2 := regfile(ex_ra2)
req.in3 := regfile(ex_ra3)
req.typ := ex_reg_inst(21,20)
when (ex_cp_valid) {
req := io.cp_req.bits
when (io.cp_req.bits.swap23) {
req.in2 := io.cp_req.bits.in3
req.in3 := io.cp_req.bits.in2
}
}
val sfma = Module(new FPUFMAPipe(cfg.sfmaLatency, sExpWidth, sSigWidth))
sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single
sfma.io.in.bits := req
val fpiu = Module(new FPToInt)
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
fpiu.io.in.bits := req
io.store_data := fpiu.io.out.bits.store
io.toint_data := fpiu.io.out.bits.toint
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
io.cp_resp.bits.data := fpiu.io.out.bits.toint
io.cp_resp.valid := Bool(true)
}
val ifpu = Module(new IntToFP(2))
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
ifpu.io.in.bits := req
ifpu.io.in.bits.in1 := Mux(ex_cp_valid, io.cp_req.bits.in1, io.fromint_data)
val fpmu = Module(new FPToFP(2))
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
fpmu.io.in.bits := req
fpmu.io.lt := fpiu.io.out.bits.lt
val divSqrt_wen = Reg(next=Bool(false))
val divSqrt_inReady = Wire(init=Bool(false))
val divSqrt_waddr = Reg(UInt(width = 5))
val divSqrt_wdata = Wire(UInt(width = fLen+1))
val divSqrt_flags = Wire(UInt(width = 5))
val divSqrt_in_flight = Reg(init=Bool(false))
val divSqrt_killed = Reg(Bool())
// writeback arbitration
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult)
val pipes = List(
Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits),
Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits),
Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out.bits)) ++
(fLen > 32).option({
val dfma = Module(new FPUFMAPipe(cfg.dfmaLatency, dExpWidth, dSigWidth))
dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single
dfma.io.in.bits := req
Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits)
})
def latencyMask(c: FPUCtrlSigs, offset: Int) = {
require(pipes.forall(_.lat >= offset))
pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_)
}
def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), UInt(p._2), UInt(0))).reduce(_|_)
val maxLatency = pipes.map(_.lat).max
val memLatencyMask = latencyMask(mem_ctrl, 2)
class WBInfo extends Bundle {
val rd = UInt(width = 5)
val single = Bool()
val cp = Bool()
val pipeid = UInt(width = log2Ceil(pipes.size))
override def cloneType: this.type = new WBInfo().asInstanceOf[this.type]
}
val wen = Reg(init=Bits(0, maxLatency-1))
val wbInfo = Reg(Vec(maxLatency-1, new WBInfo))
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
for (i <- 0 until maxLatency-2) {
when (wen(i+1)) { wbInfo(i) := wbInfo(i+1) }
}
wen := wen >> 1
when (mem_wen) {
when (!killm) {
wen := wen >> 1 | memLatencyMask
}
for (i <- 0 until maxLatency-1) {
when (!write_port_busy && memLatencyMask(i)) {
wbInfo(i).cp := mem_cp_valid
wbInfo(i).single := mem_ctrl.single
wbInfo(i).pipeid := pipeid(mem_ctrl)
wbInfo(i).rd := mem_reg_inst(11,7)
}
}
}
val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd)
val wdata = Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid))
val wexc = (pipes.map(_.res.exc): Seq[UInt])(wbInfo(0).pipeid)
when ((!wbInfo(0).cp && wen(0)) || divSqrt_wen) {
regfile(waddr) := wdata
if (enableCommitLog) {
val wdata_unrec_s = hardfloat.fNFromRecFN(sExpWidth, sSigWidth, wdata)
val unrec = fLen match {
case 32 => wdata_unrec_s
case 64 =>
val wdata_unrec_d = hardfloat.fNFromRecFN(dExpWidth, dSigWidth, wdata)
Mux(wbInfo(0).single, wdata_unrec_s, wdata_unrec_d)
}
printf("f%d p%d 0x%x\n", waddr, waddr + 32, unrec)
}
}
when (wbInfo(0).cp && wen(0)) {
io.cp_resp.bits.data := wdata
io.cp_resp.valid := Bool(true)
}
io.cp_req.ready := !ex_reg_valid
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
io.fcsr_flags.bits :=
Mux(wb_toint_valid, wb_toint_exc, UInt(0)) |
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
Mux(wen(0), wexc, UInt(0))
val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR)
io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
io.dec <> fp_decoder.io.sigs
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wbInfo(0).pipeid === UInt(x._2))))
io.sboard_clra := waddr
// we don't currently support round-max-magnitude (rm=4)
io.illegal_rm := ex_rm(2) && ex_ctrl.round
divSqrt_wdata := 0
divSqrt_flags := 0
if (cfg.divSqrt) {
require(fLen == 64)
val divSqrt_single = Reg(Bool())
val divSqrt_rm = Reg(Bits())
val divSqrt_flags_double = Reg(Bits())
val divSqrt_wdata_double = Reg(Bits())
val divSqrt = Module(new hardfloat.DivSqrtRecF64)
divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_in_flight
divSqrt.io.sqrtOp := mem_ctrl.sqrt
divSqrt.io.a := fpiu.io.as_double.in1
divSqrt.io.b := fpiu.io.as_double.in2
divSqrt.io.roundingMode := fpiu.io.as_double.rm
when (divSqrt.io.inValid && divSqrt_inReady) {
divSqrt_in_flight := true
divSqrt_killed := killm
divSqrt_single := mem_ctrl.single
divSqrt_waddr := mem_reg_inst(11,7)
divSqrt_rm := divSqrt.io.roundingMode
}
when (divSqrt_outValid) {
divSqrt_wen := !divSqrt_killed
divSqrt_wdata_double := divSqrt.io.out
divSqrt_in_flight := false
divSqrt_flags_double := divSqrt.io.exceptionFlags
}
val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
divSqrt_toSingle.io.in := divSqrt_wdata_double
divSqrt_toSingle.io.roundingMode := divSqrt_rm
divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle.io.out, divSqrt_wdata_double)
divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0))
} else {
when (ex_ctrl.div || ex_ctrl.sqrt) { io.illegal_rm := true }
}
}
/** Mix-ins for constructing tiles that may have an FPU external to the core pipeline */
trait CanHaveSharedFPU extends HasTileParameters
trait CanHaveSharedFPUModule {
val outer: CanHaveSharedFPU
val fpuOpt = outer.tileParams.core.fpu.map(params => Module(new FPU(params)(outer.p)))
// TODO fpArb could go here instead of inside LegacyRoccComplex
}

View File

@ -0,0 +1,15 @@
// See LICENSE.SiFive for license details.
package tile
import Chisel._
import config.Parameters
import util._
class TileInterrupts(implicit p: Parameters) extends CoreBundle()(p) {
val debug = Bool()
val mtip = Bool()
val msip = Bool()
val meip = Bool()
val seip = usingVM.option(Bool())
}

View File

@ -0,0 +1,52 @@
// See LICENSE.SiFive for license details.
package tile
import Chisel._
import config.{Parameters, Field}
import coreplex.CacheBlockBytes
import rocket.PAddrBits
import uncore.tilelink2.ClientMetadata
import uncore.util.{Code, IdentityCode}
import util.ParameterizedBundle
trait L1CacheParams {
def nSets: Int
def nWays: Int
def rowBits: Int
def nTLBEntries: Int
def splitMetadata: Boolean
def ecc: Option[Code]
}
trait HasL1CacheParameters {
implicit val p: Parameters
val cacheParams: L1CacheParams
def cacheBlockBytes = p(CacheBlockBytes)
def lgCacheBlockBytes = log2Up(cacheBlockBytes)
def nSets = cacheParams.nSets
def blockOffBits = lgCacheBlockBytes
def idxBits = log2Up(cacheParams.nSets)
def untagBits = blockOffBits + idxBits
def tagBits = p(PAddrBits) - untagBits
def nWays = cacheParams.nWays
def wayBits = log2Up(nWays)
def isDM = nWays == 1
def rowBits = cacheParams.rowBits
def rowBytes = rowBits/8
def rowOffBits = log2Up(rowBytes)
def code = cacheParams.ecc.getOrElse(new IdentityCode)
def nTLBEntries = cacheParams.nTLBEntries
def hasSplitMetadata = cacheParams.splitMetadata
def cacheDataBits = p(SharedMemoryTLEdge).bundle.dataBits
def cacheDataBeats = (cacheBlockBytes * 8) / cacheDataBits
def refillCycles = cacheDataBeats
}
abstract class L1CacheModule(implicit val p: Parameters) extends Module
with HasL1CacheParameters
abstract class L1CacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasL1CacheParameters

View File

@ -0,0 +1,439 @@
// See LICENSE.Berkeley for license details.
// See LICENSE.SiFive for license details.
package tile
import Chisel._
import Chisel.ImplicitConversions._
import config._
import coreplex._
import diplomacy._
import rocket._
import uncore.constants._
import uncore.agents._
import uncore.coherence._
import uncore.devices._
import uncore.tilelink._
import uncore.tilelink2._
import uncore.util._
import util._
case object RoccMaxTaggedMemXacts extends Field[Int]
case object RoccNMemChannels extends Field[Int]
case object RoccNPTWPorts extends Field[Int]
case object BuildRoCC extends Field[Seq[RoCCParams]]
trait CanHaveLegacyRoccs extends CanHaveSharedFPU with CanHavePTW with HasTileLinkMasterPort {
val module: CanHaveLegacyRoccsModule
val legacyRocc = if (p(BuildRoCC).isEmpty) None
else Some(LazyModule(new LegacyRoccComplex()(p.alter { (site, here, up) => {
case CacheBlockOffsetBits => log2Up(site(CacheBlockBytes))
case AmoAluOperandBits => site(XLen)
case RoccNMemChannels => site(BuildRoCC).map(_.nMemChannels).foldLeft(0)(_ + _)
case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _)
case TLId => "L1toL2"
case TLKey("L1toL2") =>
TileLinkParameters(
coherencePolicy = new MESICoherence(new NullRepresentation(site(NTiles))),
nManagers = site(BankedL2Config).nBanks + 1 /* MMIO */,
nCachingClients = 1,
nCachelessClients = 1,
maxClientXacts = List(
tileParams.dcache.get.nMSHRs + 1 /* IOMSHR */,
if (site(BuildRoCC).isEmpty) 1 else site(RoccMaxTaggedMemXacts)).max,
maxClientsPerPort = if (site(BuildRoCC).isEmpty) 1 else 2,
maxManagerXacts = 8,
dataBeats = (8 * site(CacheBlockBytes)) / site(XLen),
dataBits = site(CacheBlockBytes)*8)
}})))
legacyRocc foreach { lr =>
masterNode := lr.masterNode
nPTWPorts += lr.nPTWPorts
nDCachePorts += lr.nRocc
}
}
trait CanHaveLegacyRoccsModule extends CanHaveSharedFPUModule
with CanHavePTWModule
with HasTileLinkMasterPortModule {
val outer: CanHaveLegacyRoccs
fpuOpt foreach { fpu =>
outer.legacyRocc.orElse {
fpu.io.cp_req.valid := Bool(false)
fpu.io.cp_resp.ready := Bool(false)
None
} foreach { lr =>
fpu.io.cp_req <> lr.module.io.fpu.cp_req
fpu.io.cp_resp <> lr.module.io.fpu.cp_resp
}
}
outer.legacyRocc foreach { lr =>
ptwPorts ++= lr.module.io.ptw
dcachePorts ++= lr.module.io.dcache
}
}
class LegacyRoccComplex(implicit p: Parameters) extends LazyModule {
val buildRocc = p(BuildRoCC)
val usingRocc = !buildRocc.isEmpty
val nRocc = buildRocc.size
val nFPUPorts = buildRocc.filter(_.useFPU).size
val nMemChannels = buildRocc.map(_.nMemChannels).sum + nRocc
val nPTWPorts = buildRocc.map(_.nPTWPorts).sum
val roccOpcodes = buildRocc.map(_.opcodes)
val masterNode = TLOutputNode()
val legacies = List.fill(nMemChannels) { LazyModule(new TLLegacy()(p.alterPartial({ case PAddrBits => 32 }))) }
legacies.foreach { leg => masterNode := TLHintHandler()(leg.node) }
lazy val module = new LazyModuleImp(this) with HasCoreParameters {
val io = new Bundle {
val tl = masterNode.bundleOut
val dcache = Vec(nRocc, new HellaCacheIO)
val fpu = new Bundle {
val cp_req = Decoupled(new FPInput())
val cp_resp = Decoupled(new FPResult()).flip
}
val ptw = Vec(nPTWPorts, new TLBPTWIO)
val core = new Bundle {
val cmd = Decoupled(new RoCCCommand).flip
val resp = Decoupled(new RoCCResponse)
val busy = Bool(OUTPUT)
val interrupt = Bool(OUTPUT)
val exception = Bool(INPUT)
}
}
val respArb = Module(new RRArbiter(new RoCCResponse, nRocc))
io.core.resp <> respArb.io.out
val cmdRouter = Module(new RoccCommandRouter(roccOpcodes))
cmdRouter.io.in <> io.core.cmd
val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) =>
val rocc = accelParams.generator(p.alterPartial({
case RoccNMemChannels => accelParams.nMemChannels
case RoccNPTWPorts => accelParams.nPTWPorts
}))
val dcIF = Module(new SimpleHellaCacheIF)
rocc.io.cmd <> cmdRouter.io.out(i)
rocc.io.exception := io.core.exception
dcIF.io.requestor <> rocc.io.mem
io.dcache(i) := dcIF.io.cache
legacies(i).module.io.legacy <> rocc.io.autl
respArb.io.in(i) <> Queue(rocc.io.resp)
rocc
}
(nRocc until legacies.size) zip roccs.map(_.io.utl) foreach { case(i, utl) =>
legacies(i).module.io.legacy <> utl
}
io.core.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _)
io.core.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
if (usingFPU && nFPUPorts > 0) {
val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nFPUPorts))
val fp_rocc_ios = roccs.zip(buildRocc)
.filter { case (_, params) => params.useFPU }
.map { case (rocc, _) => rocc.io }
fpArb.io.in_req <> fp_rocc_ios.map(_.fpu_req)
fp_rocc_ios.zip(fpArb.io.in_resp).foreach {
case (rocc, arb) => rocc.fpu_resp <> arb
}
io.fpu.cp_req <> fpArb.io.out_req
fpArb.io.out_resp <> io.fpu.cp_resp
} else {
io.fpu.cp_req.valid := Bool(false)
io.fpu.cp_resp.ready := Bool(false)
}
}
}
case class RoCCParams(
opcodes: OpcodeSet,
generator: Parameters => RoCC,
nMemChannels: Int = 0,
nPTWPorts : Int = 0,
useFPU: Boolean = false)
class RoCCInstruction extends Bundle
{
val funct = Bits(width = 7)
val rs2 = Bits(width = 5)
val rs1 = Bits(width = 5)
val xd = Bool()
val xs1 = Bool()
val xs2 = Bool()
val rd = Bits(width = 5)
val opcode = Bits(width = 7)
}
class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) {
val inst = new RoCCInstruction
val rs1 = Bits(width = xLen)
val rs2 = Bits(width = xLen)
val status = new MStatus
}
class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) {
val rd = Bits(width = 5)
val data = Bits(width = xLen)
}
class RoCCCoreIO(implicit p: Parameters) extends CoreBundle()(p) {
val cmd = Decoupled(new RoCCCommand).flip
val resp = Decoupled(new RoCCResponse)
val mem = new HellaCacheIO
val busy = Bool(OUTPUT)
val interrupt = Bool(OUTPUT)
val exception = Bool(INPUT)
override def cloneType = new RoCCCoreIO()(p).asInstanceOf[this.type]
}
class RoCCIO(implicit p: Parameters) extends RoCCCoreIO()(p) {
// These should be handled differently, eventually
val autl = new ClientUncachedTileLinkIO
val utl = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO)
val ptw = Vec(p(RoccNPTWPorts), new TLBPTWIO)
val fpu_req = Decoupled(new FPInput)
val fpu_resp = Decoupled(new FPResult).flip
override def cloneType = new RoCCIO()(p).asInstanceOf[this.type]
}
abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) {
val io = new RoCCIO
io.mem.req.bits.phys := Bool(true) // don't perform address translation
io.mem.invalidate_lr := Bool(false) // don't mess with LR/SC
}
class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) {
val regfile = Mem(n, UInt(width = xLen))
val busy = Reg(init = Vec.fill(n){Bool(false)})
val cmd = Queue(io.cmd)
val funct = cmd.bits.inst.funct
val addr = cmd.bits.rs2(log2Up(n)-1,0)
val doWrite = funct === UInt(0)
val doRead = funct === UInt(1)
val doLoad = funct === UInt(2)
val doAccum = funct === UInt(3)
val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)
// datapath
val addend = cmd.bits.rs1
val accum = regfile(addr)
val wdata = Mux(doWrite, addend, accum + addend)
when (cmd.fire() && (doWrite || doAccum)) {
regfile(addr) := wdata
}
when (io.mem.resp.valid) {
regfile(memRespTag) := io.mem.resp.bits.data
busy(memRespTag) := Bool(false)
}
// control
when (io.mem.req.fire()) {
busy(addr) := Bool(true)
}
val doResp = cmd.bits.inst.xd
val stallReg = busy(addr)
val stallLoad = doLoad && !io.mem.req.ready
val stallResp = doResp && !io.resp.ready
cmd.ready := !stallReg && !stallLoad && !stallResp
// command resolved if no stalls AND not issuing a load that will need a request
// PROC RESPONSE INTERFACE
io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
// valid response if valid command, need a response, and no stalls
io.resp.bits.rd := cmd.bits.inst.rd
// Must respond with the appropriate tag or undefined behavior
io.resp.bits.data := accum
// Semantics is to always send out prior accumulator register value
io.busy := cmd.valid || busy.reduce(_||_)
// Be busy when have pending memory requests or committed possibility of pending requests
io.interrupt := Bool(false)
// Set this true to trigger an interrupt on the processor (please refer to supervisor documentation)
// MEMORY REQUEST INTERFACE
io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
io.mem.req.bits.addr := addend
io.mem.req.bits.tag := addr
io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
io.mem.req.bits.data := Bits(0) // we're not performing any stores...
io.autl.acquire.valid := false
io.autl.grant.ready := false
}
class TranslatorExample(implicit p: Parameters) extends RoCC()(p) {
val req_addr = Reg(UInt(width = coreMaxAddrBits))
val req_rd = Reg(io.resp.bits.rd)
val req_offset = req_addr(pgIdxBits - 1, 0)
val req_vpn = req_addr(coreMaxAddrBits - 1, pgIdxBits)
val pte = Reg(new PTE)
val s_idle :: s_ptw_req :: s_ptw_resp :: s_resp :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle)
io.cmd.ready := (state === s_idle)
when (io.cmd.fire()) {
req_rd := io.cmd.bits.inst.rd
req_addr := io.cmd.bits.rs1
state := s_ptw_req
}
private val ptw = io.ptw(0)
when (ptw.req.fire()) { state := s_ptw_resp }
when (state === s_ptw_resp && ptw.resp.valid) {
pte := ptw.resp.bits.pte
state := s_resp
}
when (io.resp.fire()) { state := s_idle }
ptw.req.valid := (state === s_ptw_req)
ptw.req.bits.addr := req_vpn
ptw.req.bits.store := Bool(false)
ptw.req.bits.fetch := Bool(false)
io.resp.valid := (state === s_resp)
io.resp.bits.rd := req_rd
io.resp.bits.data := Mux(pte.leaf(), Cat(pte.ppn, req_offset), SInt(-1, xLen).asUInt)
io.busy := (state =/= s_idle)
io.interrupt := Bool(false)
io.mem.req.valid := Bool(false)
io.autl.acquire.valid := Bool(false)
io.autl.grant.ready := Bool(false)
}
class CharacterCountExample(implicit p: Parameters) extends RoCC()(p)
with HasTileLinkParameters {
private val blockOffset = tlBeatAddrBits + tlByteAddrBits
val needle = Reg(UInt(width = 8))
val addr = Reg(UInt(width = coreMaxAddrBits))
val count = Reg(UInt(width = xLen))
val resp_rd = Reg(io.resp.bits.rd)
val addr_block = addr(coreMaxAddrBits - 1, blockOffset)
val offset = addr(blockOffset - 1, 0)
val next_addr = (addr_block + UInt(1)) << UInt(blockOffset)
val s_idle :: s_acq :: s_gnt :: s_check :: s_resp :: Nil = Enum(Bits(), 5)
val state = Reg(init = s_idle)
val gnt = io.autl.grant.bits
val recv_data = Reg(UInt(width = tlDataBits))
val recv_beat = Reg(UInt(width = tlBeatAddrBits))
val data_bytes = Vec.tabulate(tlDataBytes) { i => recv_data(8 * (i + 1) - 1, 8 * i) }
val zero_match = data_bytes.map(_ === UInt(0))
val needle_match = data_bytes.map(_ === needle)
val first_zero = PriorityEncoder(zero_match)
val chars_found = PopCount(needle_match.zipWithIndex.map {
case (matches, i) =>
val idx = Cat(recv_beat, UInt(i, tlByteAddrBits))
matches && idx >= offset && UInt(i) <= first_zero
})
val zero_found = zero_match.reduce(_ || _)
val finished = Reg(Bool())
io.cmd.ready := (state === s_idle)
io.resp.valid := (state === s_resp)
io.resp.bits.rd := resp_rd
io.resp.bits.data := count
io.autl.acquire.valid := (state === s_acq)
io.autl.acquire.bits := GetBlock(addr_block = addr_block)
io.autl.grant.ready := (state === s_gnt)
when (io.cmd.fire()) {
addr := io.cmd.bits.rs1
needle := io.cmd.bits.rs2
resp_rd := io.cmd.bits.inst.rd
count := UInt(0)
finished := Bool(false)
state := s_acq
}
when (io.autl.acquire.fire()) { state := s_gnt }
when (io.autl.grant.fire()) {
recv_beat := gnt.addr_beat
recv_data := gnt.data
state := s_check
}
when (state === s_check) {
when (!finished) {
count := count + chars_found
}
when (zero_found) { finished := Bool(true) }
when (recv_beat === UInt(tlDataBeats - 1)) {
addr := next_addr
state := Mux(zero_found || finished, s_resp, s_acq)
} .otherwise {
state := s_gnt
}
}
when (io.resp.fire()) { state := s_idle }
io.busy := (state =/= s_idle)
io.interrupt := Bool(false)
io.mem.req.valid := Bool(false)
}
class OpcodeSet(val opcodes: Seq[UInt]) {
def |(set: OpcodeSet) =
new OpcodeSet(this.opcodes ++ set.opcodes)
def matches(oc: UInt) = opcodes.map(_ === oc).reduce(_ || _)
}
object OpcodeSet {
def custom0 = new OpcodeSet(Seq(Bits("b0001011")))
def custom1 = new OpcodeSet(Seq(Bits("b0101011")))
def custom2 = new OpcodeSet(Seq(Bits("b1011011")))
def custom3 = new OpcodeSet(Seq(Bits("b1111011")))
def all = custom0 | custom1 | custom2 | custom3
}
class RoccCommandRouter(opcodes: Seq[OpcodeSet])(implicit p: Parameters)
extends CoreModule()(p) {
val io = new Bundle {
val in = Decoupled(new RoCCCommand).flip
val out = Vec(opcodes.size, Decoupled(new RoCCCommand))
val busy = Bool(OUTPUT)
}
val cmd = Queue(io.in)
val cmdReadys = io.out.zip(opcodes).map { case (out, opcode) =>
val me = opcode.matches(cmd.bits.inst.opcode)
out.valid := cmd.valid && me
out.bits := cmd.bits
out.ready && me
}
cmd.ready := cmdReadys.reduce(_ || _)
io.busy := cmd.valid
assert(PopCount(cmdReadys) <= UInt(1),
"Custom opcode matched for more than one accelerator")
}

View File

@ -0,0 +1,3 @@
// See LICENSE.SiFive for license details.
package object tile extends rocket.constants.ScalarOpConstants