Heterogeneous Tiles (#550)
Fundamental new features: * Added tile package: This package is intended to hold components re-usable across different types of tile. Will be the future location of TL2-RoCC accelerators and new diplomatic versions of intra-tile interfaces. * Adopted [ModuleName]Params convention: Code base was very inconsistent about what to name case classes that provide parameters to modules. Settled on calling them [ModuleName]Params to distinguish them from config.Parameters and config.Config. So far applied mostly only to case classes defined within rocket and tile. * Defined RocketTileParams: A nested case class containing case classes for all the components of a tile (L1 caches and core). Allows all such parameters to vary per-tile. * Defined RocketCoreParams: All the parameters that can be varied per-core. * Defined L1CacheParams: A trait defining the parameters common to L1 caches, made concrete in different derived case classes. * Defined RocketTilesKey: A sequence of RocketTileParams, one for every tile to be created. * Provided HeterogeneousDualCoreConfig: An example of making a heterogeneous chip with two cores, one big and one little. * Changes to legacy code: ReplacementPolicy moved to package util. L1Metadata moved to package tile. Legacy L2 cache agent removed because it can no longer share the metadata array implementation with the L1. Legacy GroundTests on life support. Additional changes that got rolled in along the way: * rocket: Fix critical path through BTB for I$ index bits > pgIdxBits * coreplex: tiles connected via :=* * groundtest: updated to use TileParams * tilelink: cache cork requirements are relaxed to allow more cacheless masters
This commit is contained in:
81
src/main/scala/tile/BaseTile.scala
Normal file
81
src/main/scala/tile/BaseTile.scala
Normal file
@ -0,0 +1,81 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
|
||||
package tile
|
||||
|
||||
import Chisel._
|
||||
import config._
|
||||
import diplomacy._
|
||||
import rocket._
|
||||
import uncore.tilelink2._
|
||||
import util._
|
||||
|
||||
case object SharedMemoryTLEdge extends Field[TLEdgeOut]
|
||||
case object TileKey extends Field[TileParams]
|
||||
|
||||
trait TileParams {
|
||||
val core: CoreParams
|
||||
val icache: Option[ICacheParams]
|
||||
val dcache: Option[DCacheParams]
|
||||
val rocc: Seq[RoCCParams]
|
||||
val btb: Option[BTBParams]
|
||||
val dataScratchpadBytes: Int
|
||||
}
|
||||
|
||||
trait HasTileParameters {
|
||||
implicit val p: Parameters
|
||||
val tileParams: TileParams = p(TileKey)
|
||||
|
||||
val usingVM = tileParams.core.useVM
|
||||
val usingUser = tileParams.core.useUser || usingVM
|
||||
val usingDebug = tileParams.core.useDebug
|
||||
val usingRoCC = !tileParams.rocc.isEmpty
|
||||
val usingBTB = tileParams.btb.isDefined && tileParams.btb.get.nEntries > 0
|
||||
val usingPTW = usingVM
|
||||
val usingDataScratchpad = tileParams.dcache.isDefined && tileParams.dataScratchpadBytes > 0
|
||||
|
||||
def dcacheArbPorts = 1 + usingVM.toInt + usingDataScratchpad.toInt + tileParams.rocc.size
|
||||
}
|
||||
|
||||
abstract class BareTile(implicit p: Parameters) extends LazyModule
|
||||
|
||||
abstract class BareTileBundle[+L <: BareTile](_outer: L) extends GenericParameterizedBundle(_outer) {
|
||||
val outer = _outer
|
||||
implicit val p = outer.p
|
||||
}
|
||||
|
||||
abstract class BareTileModule[+L <: BareTile, +B <: BareTileBundle[L]](_outer: L, _io: () => B) extends LazyModuleImp(_outer) {
|
||||
val outer = _outer
|
||||
val io = _io ()
|
||||
}
|
||||
|
||||
// Uses TileLink master port to connect caches and accelerators to the coreplex
|
||||
trait HasTileLinkMasterPort extends HasTileParameters {
|
||||
implicit val p: Parameters
|
||||
val module: HasTileLinkMasterPortModule
|
||||
val masterNode = TLOutputNode()
|
||||
}
|
||||
|
||||
trait HasTileLinkMasterPortBundle {
|
||||
val outer: HasTileLinkMasterPort
|
||||
val master = outer.masterNode.bundleOut
|
||||
}
|
||||
|
||||
trait HasTileLinkMasterPortModule {
|
||||
val outer: HasTileLinkMasterPort
|
||||
val io: HasTileLinkMasterPortBundle
|
||||
}
|
||||
|
||||
abstract class BaseTile(tileParams: TileParams)(implicit p: Parameters) extends BareTile
|
||||
with HasTileLinkMasterPort {
|
||||
override lazy val module = new BaseTileModule(this, () => new BaseTileBundle(this))
|
||||
}
|
||||
|
||||
class BaseTileBundle[+L <: BaseTile](_outer: L) extends BareTileBundle(_outer)
|
||||
with HasTileLinkMasterPortBundle {
|
||||
val hartid = UInt(INPUT, p(XLen))
|
||||
val interrupts = new TileInterrupts()(p).asInput
|
||||
val resetVector = UInt(INPUT, p(XLen))
|
||||
}
|
||||
|
||||
class BaseTileModule[+L <: BaseTile, +B <: BaseTileBundle[L]](_outer: L, _io: () => B) extends BareTileModule(_outer, _io)
|
||||
with HasTileLinkMasterPortModule
|
88
src/main/scala/tile/Core.scala
Normal file
88
src/main/scala/tile/Core.scala
Normal file
@ -0,0 +1,88 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
|
||||
package tile
|
||||
|
||||
import Chisel._
|
||||
import config._
|
||||
import rocket._
|
||||
import util._
|
||||
|
||||
case object BuildCore extends Field[Parameters => CoreModule with HasCoreIO]
|
||||
case object XLen extends Field[Int]
|
||||
|
||||
// These parameters can be varied per-core
|
||||
trait CoreParams {
|
||||
val useVM: Boolean
|
||||
val useUser: Boolean
|
||||
val useDebug: Boolean
|
||||
val useAtomics: Boolean
|
||||
val useCompressed: Boolean
|
||||
val mulDiv: Option[MulDivParams]
|
||||
val fpu: Option[FPUParams]
|
||||
val fetchWidth: Int
|
||||
val decodeWidth: Int
|
||||
val retireWidth: Int
|
||||
val instBits: Int
|
||||
}
|
||||
|
||||
trait HasCoreParameters extends HasTileParameters {
|
||||
val coreParams: CoreParams = tileParams.core
|
||||
|
||||
val xLen = p(XLen)
|
||||
val fLen = xLen // TODO relax this
|
||||
require(xLen == 32 || xLen == 64)
|
||||
|
||||
val usingMulDiv = coreParams.mulDiv.nonEmpty
|
||||
val usingFPU = coreParams.fpu.nonEmpty
|
||||
val usingAtomics = coreParams.useAtomics
|
||||
val usingCompressed = coreParams.useCompressed
|
||||
|
||||
val retireWidth = coreParams.retireWidth
|
||||
val fetchWidth = coreParams.fetchWidth
|
||||
val decodeWidth = coreParams.decodeWidth
|
||||
|
||||
val coreInstBits = coreParams.instBits
|
||||
val coreInstBytes = coreInstBits/8
|
||||
val coreDataBits = xLen
|
||||
val coreDataBytes = coreDataBits/8
|
||||
|
||||
val coreDCacheReqTagBits = 6
|
||||
val dcacheReqTagBits = coreDCacheReqTagBits + log2Ceil(dcacheArbPorts)
|
||||
|
||||
def pgIdxBits = 12
|
||||
def pgLevelBits = 10 - log2Ceil(xLen / 32)
|
||||
def vaddrBits = pgIdxBits + pgLevels * pgLevelBits
|
||||
val paddrBits = 32//p(PAddrBits)
|
||||
def ppnBits = paddrBits - pgIdxBits
|
||||
def vpnBits = vaddrBits - pgIdxBits
|
||||
val pgLevels = p(PgLevels)
|
||||
val asIdBits = p(ASIdBits)
|
||||
val vpnBitsExtended = vpnBits + (vaddrBits < xLen).toInt
|
||||
val vaddrBitsExtended = vpnBitsExtended + pgIdxBits
|
||||
val coreMaxAddrBits = paddrBits max vaddrBitsExtended
|
||||
val maxPAddrBits = xLen match { case 32 => 34; case 64 => 50 }
|
||||
require(paddrBits <= maxPAddrBits)
|
||||
|
||||
// Print out log of committed instructions and their writeback values.
|
||||
// Requires post-processing due to out-of-order writebacks.
|
||||
val enableCommitLog = false
|
||||
}
|
||||
|
||||
abstract class CoreModule(implicit val p: Parameters) extends Module
|
||||
with HasCoreParameters
|
||||
|
||||
abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
|
||||
with HasCoreParameters
|
||||
|
||||
trait HasCoreIO {
|
||||
implicit val p: Parameters
|
||||
val io = new Bundle {
|
||||
val interrupts = new TileInterrupts().asInput
|
||||
val hartid = UInt(INPUT, p(XLen))
|
||||
val imem = new FrontendIO()(p)
|
||||
val dmem = new HellaCacheIO()(p)
|
||||
val ptw = new DatapathPTWIO().flip
|
||||
val fpu = new FPUCoreIO().flip
|
||||
val rocc = new RoCCCoreIO().flip
|
||||
}
|
||||
}
|
752
src/main/scala/tile/FPU.scala
Normal file
752
src/main/scala/tile/FPU.scala
Normal file
@ -0,0 +1,752 @@
|
||||
// See LICENSE.Berkeley for license details.
|
||||
// See LICENSE.SiFive for license details.
|
||||
|
||||
package tile
|
||||
|
||||
import Chisel._
|
||||
import Chisel.ImplicitConversions._
|
||||
import FPConstants._
|
||||
import rocket.DecodeLogic
|
||||
import rocket.Instructions._
|
||||
import uncore.constants.MemoryOpConstants._
|
||||
import config._
|
||||
import util._
|
||||
|
||||
case class FPUParams(
|
||||
divSqrt: Boolean = true,
|
||||
sfmaLatency: Int = 3,
|
||||
dfmaLatency: Int = 4
|
||||
)
|
||||
|
||||
object FPConstants
|
||||
{
|
||||
def FCMD_ADD = BitPat("b0??00")
|
||||
def FCMD_SUB = BitPat("b0??01")
|
||||
def FCMD_MUL = BitPat("b0??10")
|
||||
def FCMD_MADD = BitPat("b1??00")
|
||||
def FCMD_MSUB = BitPat("b1??01")
|
||||
def FCMD_NMSUB = BitPat("b1??10")
|
||||
def FCMD_NMADD = BitPat("b1??11")
|
||||
def FCMD_DIV = BitPat("b?0011")
|
||||
def FCMD_SQRT = BitPat("b?1011")
|
||||
def FCMD_SGNJ = BitPat("b??1?0")
|
||||
def FCMD_MINMAX = BitPat("b?01?1")
|
||||
def FCMD_CVT_FF = BitPat("b??0??")
|
||||
def FCMD_CVT_IF = BitPat("b?10??")
|
||||
def FCMD_CMP = BitPat("b?01??")
|
||||
def FCMD_MV_XF = BitPat("b?11??")
|
||||
def FCMD_CVT_FI = BitPat("b??0??")
|
||||
def FCMD_MV_FX = BitPat("b??1??")
|
||||
def FCMD_X = BitPat("b?????")
|
||||
val FCMD_WIDTH = 5
|
||||
|
||||
val RM_SZ = 3
|
||||
val FLAGS_SZ = 5
|
||||
}
|
||||
|
||||
trait HasFPUCtrlSigs {
|
||||
val cmd = Bits(width = FCMD_WIDTH)
|
||||
val ldst = Bool()
|
||||
val wen = Bool()
|
||||
val ren1 = Bool()
|
||||
val ren2 = Bool()
|
||||
val ren3 = Bool()
|
||||
val swap12 = Bool()
|
||||
val swap23 = Bool()
|
||||
val single = Bool()
|
||||
val fromint = Bool()
|
||||
val toint = Bool()
|
||||
val fastpipe = Bool()
|
||||
val fma = Bool()
|
||||
val div = Bool()
|
||||
val sqrt = Bool()
|
||||
val round = Bool()
|
||||
val wflags = Bool()
|
||||
}
|
||||
|
||||
class FPUCtrlSigs extends Bundle with HasFPUCtrlSigs
|
||||
|
||||
class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) {
|
||||
val io = new Bundle {
|
||||
val inst = Bits(INPUT, 32)
|
||||
val sigs = new FPUCtrlSigs().asOutput
|
||||
}
|
||||
|
||||
val default = List(FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X)
|
||||
val f =
|
||||
Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N),
|
||||
FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,Y,N,Y,N,N,N,N,N,N),
|
||||
FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,N),
|
||||
FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
|
||||
FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
|
||||
FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
|
||||
FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
|
||||
FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
|
||||
FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
|
||||
FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
|
||||
FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
|
||||
FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
|
||||
FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
|
||||
FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
|
||||
FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
|
||||
FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FDIV_S -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,Y,N,N,N,N,Y,N,Y,Y),
|
||||
FSQRT_S -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,Y,N,N,N,N,N,Y,Y,Y))
|
||||
val d =
|
||||
Array(FLD -> List(FCMD_X, Y,Y,N,N,N,X,X,N,N,N,N,N,N,N,N,N),
|
||||
FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,N,N,Y,N,N,N,N,N,N),
|
||||
FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,N),
|
||||
FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
|
||||
FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
|
||||
FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
|
||||
FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,Y,N,N,Y,N,N,N,Y,Y),
|
||||
FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,N,N,N,Y,N,N,N,Y,Y),
|
||||
FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
|
||||
FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
|
||||
FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
|
||||
FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
|
||||
FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
|
||||
FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
|
||||
FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
|
||||
FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
|
||||
FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
|
||||
FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
|
||||
FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y,Y),
|
||||
FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
|
||||
FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
|
||||
FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
|
||||
FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
|
||||
FDIV_D -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,Y,Y),
|
||||
FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y))
|
||||
|
||||
val insns = fLen match {
|
||||
case 32 => f
|
||||
case 64 => f ++ d
|
||||
}
|
||||
val decoder = DecodeLogic(io.inst, default, insns)
|
||||
val s = io.sigs
|
||||
val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12,
|
||||
s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma,
|
||||
s.div, s.sqrt, s.round, s.wflags)
|
||||
sigs zip decoder map {case(s,d) => s := d}
|
||||
}
|
||||
|
||||
class FPUCoreIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val inst = Bits(INPUT, 32)
|
||||
val fromint_data = Bits(INPUT, xLen)
|
||||
|
||||
val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ)
|
||||
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ))
|
||||
|
||||
val store_data = Bits(OUTPUT, fLen)
|
||||
val toint_data = Bits(OUTPUT, xLen)
|
||||
|
||||
val dmem_resp_val = Bool(INPUT)
|
||||
val dmem_resp_type = Bits(INPUT, 3)
|
||||
val dmem_resp_tag = UInt(INPUT, 5)
|
||||
val dmem_resp_data = Bits(INPUT, fLen)
|
||||
|
||||
val valid = Bool(INPUT)
|
||||
val fcsr_rdy = Bool(OUTPUT)
|
||||
val nack_mem = Bool(OUTPUT)
|
||||
val illegal_rm = Bool(OUTPUT)
|
||||
val killx = Bool(INPUT)
|
||||
val killm = Bool(INPUT)
|
||||
val dec = new FPUCtrlSigs().asOutput
|
||||
val sboard_set = Bool(OUTPUT)
|
||||
val sboard_clr = Bool(OUTPUT)
|
||||
val sboard_clra = UInt(OUTPUT, 5)
|
||||
}
|
||||
|
||||
class FPUIO(implicit p: Parameters) extends FPUCoreIO ()(p) {
|
||||
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
|
||||
val cp_resp = Decoupled(new FPResult())
|
||||
}
|
||||
|
||||
class FPResult(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val data = Bits(width = fLen+1)
|
||||
val exc = Bits(width = 5)
|
||||
}
|
||||
|
||||
class FPInput(implicit p: Parameters) extends CoreBundle()(p) with HasFPUCtrlSigs {
|
||||
val rm = Bits(width = 3)
|
||||
val typ = Bits(width = 2)
|
||||
val in1 = Bits(width = fLen+1)
|
||||
val in2 = Bits(width = fLen+1)
|
||||
val in3 = Bits(width = fLen+1)
|
||||
|
||||
override def cloneType = new FPInput().asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
object ClassifyRecFN {
|
||||
def apply(expWidth: Int, sigWidth: Int, in: UInt) = {
|
||||
val sign = in(sigWidth + expWidth)
|
||||
val exp = in(sigWidth + expWidth - 1, sigWidth - 1)
|
||||
val sig = in(sigWidth - 2, 0)
|
||||
|
||||
val code = exp(expWidth,expWidth-2)
|
||||
val codeHi = code(2, 1)
|
||||
val isSpecial = codeHi === UInt(3)
|
||||
|
||||
val isHighSubnormalIn = exp(expWidth-2, 0) < UInt(2)
|
||||
val isSubnormal = code === UInt(1) || codeHi === UInt(1) && isHighSubnormalIn
|
||||
val isNormal = codeHi === UInt(1) && !isHighSubnormalIn || codeHi === UInt(2)
|
||||
val isZero = code === UInt(0)
|
||||
val isInf = isSpecial && !exp(expWidth-2)
|
||||
val isNaN = code.andR
|
||||
val isSNaN = isNaN && !sig(sigWidth-2)
|
||||
val isQNaN = isNaN && sig(sigWidth-2)
|
||||
|
||||
Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign,
|
||||
isSubnormal && !sign, isZero && !sign, isZero && sign,
|
||||
isSubnormal && sign, isNormal && sign, isInf && sign)
|
||||
}
|
||||
}
|
||||
|
||||
object IsNaNRecFN {
|
||||
def apply(expWidth: Int, sigWidth: Int, in: UInt) =
|
||||
in(sigWidth + expWidth - 1, sigWidth + expWidth - 3).andR
|
||||
}
|
||||
|
||||
object IsSNaNRecFN {
|
||||
def apply(expWidth: Int, sigWidth: Int, in: UInt) =
|
||||
IsNaNRecFN(expWidth, sigWidth, in) && !in(sigWidth - 2)
|
||||
}
|
||||
|
||||
/** Format conversion without rounding or NaN handling */
|
||||
object RecFNToRecFN_noncompliant {
|
||||
def apply(in: UInt, inExpWidth: Int, inSigWidth: Int, outExpWidth: Int, outSigWidth: Int) = {
|
||||
val sign = in(inSigWidth + inExpWidth)
|
||||
val fractIn = in(inSigWidth - 2, 0)
|
||||
val expIn = in(inSigWidth + inExpWidth - 1, inSigWidth - 1)
|
||||
val fractOut = fractIn << outSigWidth >> inSigWidth
|
||||
val expOut = {
|
||||
val expCode = expIn(inExpWidth, inExpWidth - 2)
|
||||
val commonCase = (expIn + (1 << outExpWidth)) - (1 << inExpWidth)
|
||||
Mux(expCode === 0 || expCode >= 6, Cat(expCode, commonCase(outExpWidth - 3, 0)),
|
||||
commonCase(outExpWidth, 0))
|
||||
}
|
||||
Cat(sign, expOut, fractOut)
|
||||
}
|
||||
}
|
||||
|
||||
object CanonicalNaN {
|
||||
def apply(expWidth: Int, sigWidth: Int): UInt =
|
||||
UInt((BigInt(7) << (expWidth + sigWidth - 3)) + (BigInt(1) << (sigWidth - 2)), expWidth + sigWidth + 1)
|
||||
}
|
||||
|
||||
trait HasFPUParameters {
|
||||
val fLen: Int
|
||||
val (sExpWidth, sSigWidth) = (8, 24)
|
||||
val (dExpWidth, dSigWidth) = (11, 53)
|
||||
val floatWidths = fLen match {
|
||||
case 32 => List((sExpWidth, sSigWidth))
|
||||
case 64 => List((sExpWidth, sSigWidth), (dExpWidth, dSigWidth))
|
||||
}
|
||||
val maxExpWidth = floatWidths.map(_._1).max
|
||||
val maxSigWidth = floatWidths.map(_._2).max
|
||||
}
|
||||
|
||||
abstract class FPUModule(implicit p: Parameters) extends CoreModule()(p) with HasFPUParameters
|
||||
|
||||
class FPToInt(implicit p: Parameters) extends FPUModule()(p) {
|
||||
class Output extends Bundle {
|
||||
val lt = Bool()
|
||||
val store = Bits(width = fLen)
|
||||
val toint = Bits(width = xLen)
|
||||
val exc = Bits(width = 5)
|
||||
override def cloneType = new Output().asInstanceOf[this.type]
|
||||
}
|
||||
val io = new Bundle {
|
||||
val in = Valid(new FPInput).flip
|
||||
val as_double = new FPInput().asOutput
|
||||
val out = Valid(new Output)
|
||||
}
|
||||
|
||||
val in = Reg(new FPInput)
|
||||
val valid = Reg(next=io.in.valid)
|
||||
|
||||
def upconvert(x: UInt) = RecFNToRecFN_noncompliant(x, sExpWidth, sSigWidth, maxExpWidth, maxSigWidth)
|
||||
|
||||
when (io.in.valid) {
|
||||
in := io.in.bits
|
||||
if (fLen > 32) when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd =/= FCMD_MV_XF) {
|
||||
in.in1 := upconvert(io.in.bits.in1)
|
||||
in.in2 := upconvert(io.in.bits.in2)
|
||||
}
|
||||
}
|
||||
|
||||
val unrec_s = hardfloat.fNFromRecFN(sExpWidth, sSigWidth, in.in1).sextTo(xLen)
|
||||
val unrec_mem = fLen match {
|
||||
case 32 => unrec_s
|
||||
case 64 =>
|
||||
val unrec_d = hardfloat.fNFromRecFN(dExpWidth, dSigWidth, in.in1).sextTo(xLen)
|
||||
Mux(in.single, unrec_s, unrec_d)
|
||||
}
|
||||
val unrec_int = xLen match {
|
||||
case 32 => unrec_s
|
||||
case fLen => unrec_mem
|
||||
}
|
||||
|
||||
val classify_s = ClassifyRecFN(sExpWidth, sSigWidth, in.in1)
|
||||
val classify_out = fLen match {
|
||||
case 32 => classify_s
|
||||
case 64 =>
|
||||
val classify_d = ClassifyRecFN(dExpWidth, dSigWidth, in.in1)
|
||||
Mux(in.single, classify_s, classify_d)
|
||||
}
|
||||
|
||||
val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth))
|
||||
dcmp.io.a := in.in1
|
||||
dcmp.io.b := in.in2
|
||||
dcmp.io.signaling := !in.rm(1)
|
||||
|
||||
io.out.bits.toint := Mux(in.rm(0), classify_out, unrec_int)
|
||||
io.out.bits.store := unrec_mem
|
||||
io.out.bits.exc := Bits(0)
|
||||
|
||||
when (in.cmd === FCMD_CMP) {
|
||||
io.out.bits.toint := (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR
|
||||
io.out.bits.exc := dcmp.io.exceptionFlags
|
||||
}
|
||||
when (in.cmd === FCMD_CVT_IF) {
|
||||
val minXLen = 32
|
||||
val n = log2Ceil(xLen/minXLen) + 1
|
||||
for (i <- 0 until n) {
|
||||
val conv = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, minXLen << i))
|
||||
conv.io.in := in.in1
|
||||
conv.io.roundingMode := in.rm
|
||||
conv.io.signedOut := ~in.typ(0)
|
||||
when (in.typ.extract(log2Ceil(n), 1) === i) {
|
||||
io.out.bits.toint := conv.io.out.sextTo(xLen)
|
||||
io.out.bits.exc := Cat(conv.io.intExceptionFlags(2, 1).orR, UInt(0, 3), conv.io.intExceptionFlags(0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
io.out.valid := valid
|
||||
io.out.bits.lt := dcmp.io.lt
|
||||
io.as_double := in
|
||||
}
|
||||
|
||||
class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = Valid(new FPInput).flip
|
||||
val out = Valid(new FPResult)
|
||||
}
|
||||
|
||||
val in = Pipe(io.in)
|
||||
|
||||
val mux = Wire(new FPResult)
|
||||
mux.exc := Bits(0)
|
||||
mux.data := hardfloat.recFNFromFN(sExpWidth, sSigWidth, in.bits.in1)
|
||||
if (fLen > 32) when (!in.bits.single) {
|
||||
mux.data := hardfloat.recFNFromFN(dExpWidth, dSigWidth, in.bits.in1)
|
||||
}
|
||||
|
||||
val intValue = {
|
||||
val minXLen = 32
|
||||
val n = log2Ceil(xLen/minXLen) + 1
|
||||
val res = Wire(init = in.bits.in1.asSInt)
|
||||
for (i <- 0 until n-1) {
|
||||
val smallInt = in.bits.in1((minXLen << i) - 1, 0)
|
||||
when (in.bits.typ.extract(log2Ceil(n), 1) === i) {
|
||||
res := Mux(in.bits.typ(0), smallInt.zext, smallInt.asSInt)
|
||||
}
|
||||
}
|
||||
res.asUInt
|
||||
}
|
||||
|
||||
when (in.bits.cmd === FCMD_CVT_FI) {
|
||||
val l2s = Module(new hardfloat.INToRecFN(xLen, sExpWidth, sSigWidth))
|
||||
l2s.io.signedIn := ~in.bits.typ(0)
|
||||
l2s.io.in := intValue
|
||||
l2s.io.roundingMode := in.bits.rm
|
||||
mux.data := Cat(UInt((BigInt(1) << (fLen - 32)) - 1), l2s.io.out)
|
||||
mux.exc := l2s.io.exceptionFlags
|
||||
|
||||
fLen match {
|
||||
case 32 =>
|
||||
case 64 =>
|
||||
val l2d = Module(new hardfloat.INToRecFN(xLen, dExpWidth, dSigWidth))
|
||||
l2d.io.signedIn := ~in.bits.typ(0)
|
||||
l2d.io.in := intValue
|
||||
l2d.io.roundingMode := in.bits.rm
|
||||
when (!in.bits.single) {
|
||||
mux.data := Cat(UInt((BigInt(1) << (fLen - 64)) - 1), l2d.io.out)
|
||||
mux.exc := l2d.io.exceptionFlags
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
io.out <> Pipe(in.valid, mux, latency-1)
|
||||
}
|
||||
|
||||
class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = Valid(new FPInput).flip
|
||||
val out = Valid(new FPResult)
|
||||
val lt = Bool(INPUT) // from FPToInt
|
||||
}
|
||||
|
||||
val in = Pipe(io.in)
|
||||
|
||||
val signNum = Mux(in.bits.rm(1), in.bits.in1 ^ in.bits.in2, Mux(in.bits.rm(0), ~in.bits.in2, in.bits.in2))
|
||||
val fsgnj_s = Cat(signNum(32), in.bits.in1(31, 0))
|
||||
val fsgnj = fLen match {
|
||||
case 32 => fsgnj_s
|
||||
case 64 => Mux(in.bits.single, Cat(in.bits.in1 >> 33, fsgnj_s),
|
||||
Cat(signNum(64), in.bits.in1(63, 0)))
|
||||
}
|
||||
val mux = Wire(new FPResult)
|
||||
mux.exc := UInt(0)
|
||||
mux.data := fsgnj
|
||||
|
||||
when (in.bits.cmd === FCMD_MINMAX) {
|
||||
def doMinMax(expWidth: Int, sigWidth: Int) = {
|
||||
val isnan1 = IsNaNRecFN(expWidth, sigWidth, in.bits.in1)
|
||||
val isnan2 = IsNaNRecFN(expWidth, sigWidth, in.bits.in2)
|
||||
val issnan1 = IsSNaNRecFN(expWidth, sigWidth, in.bits.in1)
|
||||
val issnan2 = IsSNaNRecFN(expWidth, sigWidth, in.bits.in2)
|
||||
val invalid = issnan1 || issnan2
|
||||
val isNaNOut = invalid || (isnan1 && isnan2)
|
||||
val cNaN = floatWidths.filter(_._1 >= expWidth).map(x => CanonicalNaN(x._1, x._2)).reduce(_+_)
|
||||
(isnan2 || in.bits.rm(0) =/= io.lt && !isnan1, invalid, isNaNOut, cNaN)
|
||||
}
|
||||
val (isLHS, isInvalid, isNaNOut, cNaN) = fLen match {
|
||||
case 32 => doMinMax(sExpWidth, sSigWidth)
|
||||
case 64 => MuxT(in.bits.single, doMinMax(sExpWidth, sSigWidth), doMinMax(dExpWidth, dSigWidth))
|
||||
}
|
||||
mux.exc := isInvalid << 4
|
||||
mux.data := Mux(isNaNOut, cNaN, Mux(isLHS, in.bits.in1, in.bits.in2))
|
||||
}
|
||||
|
||||
fLen match {
|
||||
case 32 =>
|
||||
case 64 =>
|
||||
when (in.bits.cmd === FCMD_CVT_FF) {
|
||||
when (in.bits.single) {
|
||||
val d2s = Module(new hardfloat.RecFNToRecFN(dExpWidth, dSigWidth, sExpWidth, sSigWidth))
|
||||
d2s.io.in := in.bits.in1
|
||||
d2s.io.roundingMode := in.bits.rm
|
||||
mux.data := Cat(UInt((BigInt(1) << (fLen - 32)) - 1), d2s.io.out)
|
||||
mux.exc := d2s.io.exceptionFlags
|
||||
}.otherwise {
|
||||
val s2d = Module(new hardfloat.RecFNToRecFN(sExpWidth, sSigWidth, dExpWidth, dSigWidth))
|
||||
s2d.io.in := in.bits.in1
|
||||
s2d.io.roundingMode := in.bits.rm
|
||||
mux.data := s2d.io.out
|
||||
mux.exc := s2d.io.exceptionFlags
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
io.out <> Pipe(in.valid, mux, latency-1)
|
||||
}
|
||||
|
||||
class FPUFMAPipe(val latency: Int, expWidth: Int, sigWidth: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = Valid(new FPInput).flip
|
||||
val out = Valid(new FPResult)
|
||||
}
|
||||
|
||||
val width = sigWidth + expWidth
|
||||
val one = UInt(1) << (width-1)
|
||||
val zero = (io.in.bits.in1(width) ^ io.in.bits.in2(width)) << width
|
||||
|
||||
val valid = Reg(next=io.in.valid)
|
||||
val in = Reg(new FPInput)
|
||||
when (io.in.valid) {
|
||||
in := io.in.bits
|
||||
val cmd_fma = io.in.bits.ren3
|
||||
val cmd_addsub = io.in.bits.swap23
|
||||
in.cmd := Cat(io.in.bits.cmd(1) & (cmd_fma || cmd_addsub), io.in.bits.cmd(0))
|
||||
when (cmd_addsub) { in.in2 := one }
|
||||
unless (cmd_fma || cmd_addsub) { in.in3 := zero }
|
||||
}
|
||||
|
||||
val fma = Module(new hardfloat.MulAddRecFN(expWidth, sigWidth))
|
||||
fma.io.op := in.cmd
|
||||
fma.io.roundingMode := in.rm
|
||||
fma.io.a := in.in1
|
||||
fma.io.b := in.in2
|
||||
fma.io.c := in.in3
|
||||
|
||||
val res = Wire(new FPResult)
|
||||
res.data := Cat(UInt((BigInt(1) << (fLen - (expWidth + sigWidth))) - 1), fma.io.out)
|
||||
res.exc := fma.io.exceptionFlags
|
||||
io.out := Pipe(valid, res, latency-1)
|
||||
}
|
||||
|
||||
class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
|
||||
val io = new FPUIO
|
||||
|
||||
val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
|
||||
val req_valid = ex_reg_valid || io.cp_req.valid
|
||||
val ex_reg_inst = RegEnable(io.inst, io.valid)
|
||||
val ex_cp_valid = io.cp_req.fire()
|
||||
val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx || ex_cp_valid, init=Bool(false))
|
||||
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
|
||||
val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false))
|
||||
val killm = (io.killm || io.nack_mem) && !mem_cp_valid
|
||||
val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false))
|
||||
val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false))
|
||||
|
||||
val fp_decoder = Module(new FPUDecoder)
|
||||
fp_decoder.io.inst := io.inst
|
||||
|
||||
val cp_ctrl = Wire(new FPUCtrlSigs)
|
||||
cp_ctrl <> io.cp_req.bits
|
||||
io.cp_resp.valid := Bool(false)
|
||||
io.cp_resp.bits.data := UInt(0)
|
||||
|
||||
val id_ctrl = fp_decoder.io.sigs
|
||||
val ex_ctrl = Mux(ex_cp_valid, cp_ctrl, RegEnable(id_ctrl, io.valid))
|
||||
val mem_ctrl = RegEnable(ex_ctrl, req_valid)
|
||||
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
|
||||
|
||||
// load response
|
||||
val load_wb = Reg(next=io.dmem_resp_val)
|
||||
val load_wb_single = RegEnable(!io.dmem_resp_type(0), io.dmem_resp_val)
|
||||
val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val)
|
||||
val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val)
|
||||
val rec_s = hardfloat.recFNFromFN(sExpWidth, sSigWidth, load_wb_data)
|
||||
val load_wb_data_recoded = fLen match {
|
||||
case 32 => rec_s
|
||||
case 64 =>
|
||||
val rec_d = hardfloat.recFNFromFN(dExpWidth, dSigWidth, load_wb_data)
|
||||
Mux(load_wb_single, Cat(UInt((BigInt(1) << (fLen - 32)) - 1), rec_s), rec_d)
|
||||
}
|
||||
|
||||
// regfile
|
||||
val regfile = Mem(32, Bits(width = fLen+1))
|
||||
when (load_wb) {
|
||||
regfile(load_wb_tag) := load_wb_data_recoded
|
||||
if (enableCommitLog)
|
||||
printf("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + 32, Mux(load_wb_single, load_wb_data(31,0), load_wb_data))
|
||||
}
|
||||
|
||||
val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
|
||||
when (io.valid) {
|
||||
when (id_ctrl.ren1) {
|
||||
when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) }
|
||||
when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) }
|
||||
}
|
||||
when (id_ctrl.ren2) {
|
||||
when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) }
|
||||
when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) }
|
||||
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) }
|
||||
}
|
||||
when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) }
|
||||
}
|
||||
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
|
||||
|
||||
val req = Wire(new FPInput)
|
||||
req := ex_ctrl
|
||||
req.rm := ex_rm
|
||||
req.in1 := regfile(ex_ra1)
|
||||
req.in2 := regfile(ex_ra2)
|
||||
req.in3 := regfile(ex_ra3)
|
||||
req.typ := ex_reg_inst(21,20)
|
||||
when (ex_cp_valid) {
|
||||
req := io.cp_req.bits
|
||||
when (io.cp_req.bits.swap23) {
|
||||
req.in2 := io.cp_req.bits.in3
|
||||
req.in3 := io.cp_req.bits.in2
|
||||
}
|
||||
}
|
||||
|
||||
val sfma = Module(new FPUFMAPipe(cfg.sfmaLatency, sExpWidth, sSigWidth))
|
||||
sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single
|
||||
sfma.io.in.bits := req
|
||||
|
||||
val fpiu = Module(new FPToInt)
|
||||
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
|
||||
fpiu.io.in.bits := req
|
||||
io.store_data := fpiu.io.out.bits.store
|
||||
io.toint_data := fpiu.io.out.bits.toint
|
||||
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
|
||||
io.cp_resp.bits.data := fpiu.io.out.bits.toint
|
||||
io.cp_resp.valid := Bool(true)
|
||||
}
|
||||
|
||||
val ifpu = Module(new IntToFP(2))
|
||||
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
|
||||
ifpu.io.in.bits := req
|
||||
ifpu.io.in.bits.in1 := Mux(ex_cp_valid, io.cp_req.bits.in1, io.fromint_data)
|
||||
|
||||
val fpmu = Module(new FPToFP(2))
|
||||
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
|
||||
fpmu.io.in.bits := req
|
||||
fpmu.io.lt := fpiu.io.out.bits.lt
|
||||
|
||||
val divSqrt_wen = Reg(next=Bool(false))
|
||||
val divSqrt_inReady = Wire(init=Bool(false))
|
||||
val divSqrt_waddr = Reg(UInt(width = 5))
|
||||
val divSqrt_wdata = Wire(UInt(width = fLen+1))
|
||||
val divSqrt_flags = Wire(UInt(width = 5))
|
||||
val divSqrt_in_flight = Reg(init=Bool(false))
|
||||
val divSqrt_killed = Reg(Bool())
|
||||
|
||||
// writeback arbitration
|
||||
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult)
|
||||
val pipes = List(
|
||||
Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits),
|
||||
Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits),
|
||||
Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out.bits)) ++
|
||||
(fLen > 32).option({
|
||||
val dfma = Module(new FPUFMAPipe(cfg.dfmaLatency, dExpWidth, dSigWidth))
|
||||
dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single
|
||||
dfma.io.in.bits := req
|
||||
Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits)
|
||||
})
|
||||
def latencyMask(c: FPUCtrlSigs, offset: Int) = {
|
||||
require(pipes.forall(_.lat >= offset))
|
||||
pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_)
|
||||
}
|
||||
def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), UInt(p._2), UInt(0))).reduce(_|_)
|
||||
val maxLatency = pipes.map(_.lat).max
|
||||
val memLatencyMask = latencyMask(mem_ctrl, 2)
|
||||
|
||||
class WBInfo extends Bundle {
|
||||
val rd = UInt(width = 5)
|
||||
val single = Bool()
|
||||
val cp = Bool()
|
||||
val pipeid = UInt(width = log2Ceil(pipes.size))
|
||||
override def cloneType: this.type = new WBInfo().asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
val wen = Reg(init=Bits(0, maxLatency-1))
|
||||
val wbInfo = Reg(Vec(maxLatency-1, new WBInfo))
|
||||
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
|
||||
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
|
||||
|
||||
for (i <- 0 until maxLatency-2) {
|
||||
when (wen(i+1)) { wbInfo(i) := wbInfo(i+1) }
|
||||
}
|
||||
wen := wen >> 1
|
||||
when (mem_wen) {
|
||||
when (!killm) {
|
||||
wen := wen >> 1 | memLatencyMask
|
||||
}
|
||||
for (i <- 0 until maxLatency-1) {
|
||||
when (!write_port_busy && memLatencyMask(i)) {
|
||||
wbInfo(i).cp := mem_cp_valid
|
||||
wbInfo(i).single := mem_ctrl.single
|
||||
wbInfo(i).pipeid := pipeid(mem_ctrl)
|
||||
wbInfo(i).rd := mem_reg_inst(11,7)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd)
|
||||
val wdata = Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid))
|
||||
val wexc = (pipes.map(_.res.exc): Seq[UInt])(wbInfo(0).pipeid)
|
||||
when ((!wbInfo(0).cp && wen(0)) || divSqrt_wen) {
|
||||
regfile(waddr) := wdata
|
||||
if (enableCommitLog) {
|
||||
val wdata_unrec_s = hardfloat.fNFromRecFN(sExpWidth, sSigWidth, wdata)
|
||||
val unrec = fLen match {
|
||||
case 32 => wdata_unrec_s
|
||||
case 64 =>
|
||||
val wdata_unrec_d = hardfloat.fNFromRecFN(dExpWidth, dSigWidth, wdata)
|
||||
Mux(wbInfo(0).single, wdata_unrec_s, wdata_unrec_d)
|
||||
}
|
||||
printf("f%d p%d 0x%x\n", waddr, waddr + 32, unrec)
|
||||
}
|
||||
}
|
||||
when (wbInfo(0).cp && wen(0)) {
|
||||
io.cp_resp.bits.data := wdata
|
||||
io.cp_resp.valid := Bool(true)
|
||||
}
|
||||
io.cp_req.ready := !ex_reg_valid
|
||||
|
||||
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
|
||||
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
|
||||
io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
|
||||
io.fcsr_flags.bits :=
|
||||
Mux(wb_toint_valid, wb_toint_exc, UInt(0)) |
|
||||
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
|
||||
Mux(wen(0), wexc, UInt(0))
|
||||
|
||||
val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR)
|
||||
io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
|
||||
io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
|
||||
io.dec <> fp_decoder.io.sigs
|
||||
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
|
||||
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
|
||||
io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wbInfo(0).pipeid === UInt(x._2))))
|
||||
io.sboard_clra := waddr
|
||||
// we don't currently support round-max-magnitude (rm=4)
|
||||
io.illegal_rm := ex_rm(2) && ex_ctrl.round
|
||||
|
||||
divSqrt_wdata := 0
|
||||
divSqrt_flags := 0
|
||||
if (cfg.divSqrt) {
|
||||
require(fLen == 64)
|
||||
val divSqrt_single = Reg(Bool())
|
||||
val divSqrt_rm = Reg(Bits())
|
||||
val divSqrt_flags_double = Reg(Bits())
|
||||
val divSqrt_wdata_double = Reg(Bits())
|
||||
|
||||
val divSqrt = Module(new hardfloat.DivSqrtRecF64)
|
||||
divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
|
||||
val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
|
||||
divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_in_flight
|
||||
divSqrt.io.sqrtOp := mem_ctrl.sqrt
|
||||
divSqrt.io.a := fpiu.io.as_double.in1
|
||||
divSqrt.io.b := fpiu.io.as_double.in2
|
||||
divSqrt.io.roundingMode := fpiu.io.as_double.rm
|
||||
|
||||
when (divSqrt.io.inValid && divSqrt_inReady) {
|
||||
divSqrt_in_flight := true
|
||||
divSqrt_killed := killm
|
||||
divSqrt_single := mem_ctrl.single
|
||||
divSqrt_waddr := mem_reg_inst(11,7)
|
||||
divSqrt_rm := divSqrt.io.roundingMode
|
||||
}
|
||||
|
||||
when (divSqrt_outValid) {
|
||||
divSqrt_wen := !divSqrt_killed
|
||||
divSqrt_wdata_double := divSqrt.io.out
|
||||
divSqrt_in_flight := false
|
||||
divSqrt_flags_double := divSqrt.io.exceptionFlags
|
||||
}
|
||||
|
||||
val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
|
||||
divSqrt_toSingle.io.in := divSqrt_wdata_double
|
||||
divSqrt_toSingle.io.roundingMode := divSqrt_rm
|
||||
divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle.io.out, divSqrt_wdata_double)
|
||||
divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0))
|
||||
} else {
|
||||
when (ex_ctrl.div || ex_ctrl.sqrt) { io.illegal_rm := true }
|
||||
}
|
||||
}
|
||||
|
||||
/** Mix-ins for constructing tiles that may have an FPU external to the core pipeline */
|
||||
trait CanHaveSharedFPU extends HasTileParameters
|
||||
|
||||
trait CanHaveSharedFPUModule {
|
||||
val outer: CanHaveSharedFPU
|
||||
val fpuOpt = outer.tileParams.core.fpu.map(params => Module(new FPU(params)(outer.p)))
|
||||
// TODO fpArb could go here instead of inside LegacyRoccComplex
|
||||
}
|
15
src/main/scala/tile/Interrupts.scala
Normal file
15
src/main/scala/tile/Interrupts.scala
Normal file
@ -0,0 +1,15 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
|
||||
package tile
|
||||
|
||||
import Chisel._
|
||||
import config.Parameters
|
||||
import util._
|
||||
|
||||
class TileInterrupts(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val debug = Bool()
|
||||
val mtip = Bool()
|
||||
val msip = Bool()
|
||||
val meip = Bool()
|
||||
val seip = usingVM.option(Bool())
|
||||
}
|
52
src/main/scala/tile/L1Cache.scala
Normal file
52
src/main/scala/tile/L1Cache.scala
Normal file
@ -0,0 +1,52 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
|
||||
package tile
|
||||
|
||||
import Chisel._
|
||||
import config.{Parameters, Field}
|
||||
import coreplex.CacheBlockBytes
|
||||
import rocket.PAddrBits
|
||||
import uncore.tilelink2.ClientMetadata
|
||||
import uncore.util.{Code, IdentityCode}
|
||||
import util.ParameterizedBundle
|
||||
|
||||
trait L1CacheParams {
|
||||
def nSets: Int
|
||||
def nWays: Int
|
||||
def rowBits: Int
|
||||
def nTLBEntries: Int
|
||||
def splitMetadata: Boolean
|
||||
def ecc: Option[Code]
|
||||
}
|
||||
|
||||
trait HasL1CacheParameters {
|
||||
implicit val p: Parameters
|
||||
val cacheParams: L1CacheParams
|
||||
|
||||
def cacheBlockBytes = p(CacheBlockBytes)
|
||||
def lgCacheBlockBytes = log2Up(cacheBlockBytes)
|
||||
def nSets = cacheParams.nSets
|
||||
def blockOffBits = lgCacheBlockBytes
|
||||
def idxBits = log2Up(cacheParams.nSets)
|
||||
def untagBits = blockOffBits + idxBits
|
||||
def tagBits = p(PAddrBits) - untagBits
|
||||
def nWays = cacheParams.nWays
|
||||
def wayBits = log2Up(nWays)
|
||||
def isDM = nWays == 1
|
||||
def rowBits = cacheParams.rowBits
|
||||
def rowBytes = rowBits/8
|
||||
def rowOffBits = log2Up(rowBytes)
|
||||
def code = cacheParams.ecc.getOrElse(new IdentityCode)
|
||||
def nTLBEntries = cacheParams.nTLBEntries
|
||||
def hasSplitMetadata = cacheParams.splitMetadata
|
||||
|
||||
def cacheDataBits = p(SharedMemoryTLEdge).bundle.dataBits
|
||||
def cacheDataBeats = (cacheBlockBytes * 8) / cacheDataBits
|
||||
def refillCycles = cacheDataBeats
|
||||
}
|
||||
|
||||
abstract class L1CacheModule(implicit val p: Parameters) extends Module
|
||||
with HasL1CacheParameters
|
||||
|
||||
abstract class L1CacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
|
||||
with HasL1CacheParameters
|
439
src/main/scala/tile/LegacyRoCC.scala
Normal file
439
src/main/scala/tile/LegacyRoCC.scala
Normal file
@ -0,0 +1,439 @@
|
||||
// See LICENSE.Berkeley for license details.
|
||||
// See LICENSE.SiFive for license details.
|
||||
|
||||
package tile
|
||||
|
||||
import Chisel._
|
||||
import Chisel.ImplicitConversions._
|
||||
import config._
|
||||
import coreplex._
|
||||
import diplomacy._
|
||||
import rocket._
|
||||
import uncore.constants._
|
||||
import uncore.agents._
|
||||
import uncore.coherence._
|
||||
import uncore.devices._
|
||||
import uncore.tilelink._
|
||||
import uncore.tilelink2._
|
||||
import uncore.util._
|
||||
import util._
|
||||
|
||||
case object RoccMaxTaggedMemXacts extends Field[Int]
|
||||
case object RoccNMemChannels extends Field[Int]
|
||||
case object RoccNPTWPorts extends Field[Int]
|
||||
case object BuildRoCC extends Field[Seq[RoCCParams]]
|
||||
|
||||
trait CanHaveLegacyRoccs extends CanHaveSharedFPU with CanHavePTW with HasTileLinkMasterPort {
|
||||
val module: CanHaveLegacyRoccsModule
|
||||
val legacyRocc = if (p(BuildRoCC).isEmpty) None
|
||||
else Some(LazyModule(new LegacyRoccComplex()(p.alter { (site, here, up) => {
|
||||
case CacheBlockOffsetBits => log2Up(site(CacheBlockBytes))
|
||||
case AmoAluOperandBits => site(XLen)
|
||||
case RoccNMemChannels => site(BuildRoCC).map(_.nMemChannels).foldLeft(0)(_ + _)
|
||||
case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _)
|
||||
case TLId => "L1toL2"
|
||||
case TLKey("L1toL2") =>
|
||||
TileLinkParameters(
|
||||
coherencePolicy = new MESICoherence(new NullRepresentation(site(NTiles))),
|
||||
nManagers = site(BankedL2Config).nBanks + 1 /* MMIO */,
|
||||
nCachingClients = 1,
|
||||
nCachelessClients = 1,
|
||||
maxClientXacts = List(
|
||||
tileParams.dcache.get.nMSHRs + 1 /* IOMSHR */,
|
||||
if (site(BuildRoCC).isEmpty) 1 else site(RoccMaxTaggedMemXacts)).max,
|
||||
maxClientsPerPort = if (site(BuildRoCC).isEmpty) 1 else 2,
|
||||
maxManagerXacts = 8,
|
||||
dataBeats = (8 * site(CacheBlockBytes)) / site(XLen),
|
||||
dataBits = site(CacheBlockBytes)*8)
|
||||
}})))
|
||||
|
||||
legacyRocc foreach { lr =>
|
||||
masterNode := lr.masterNode
|
||||
nPTWPorts += lr.nPTWPorts
|
||||
nDCachePorts += lr.nRocc
|
||||
}
|
||||
}
|
||||
|
||||
trait CanHaveLegacyRoccsModule extends CanHaveSharedFPUModule
|
||||
with CanHavePTWModule
|
||||
with HasTileLinkMasterPortModule {
|
||||
val outer: CanHaveLegacyRoccs
|
||||
|
||||
fpuOpt foreach { fpu =>
|
||||
outer.legacyRocc.orElse {
|
||||
fpu.io.cp_req.valid := Bool(false)
|
||||
fpu.io.cp_resp.ready := Bool(false)
|
||||
None
|
||||
} foreach { lr =>
|
||||
fpu.io.cp_req <> lr.module.io.fpu.cp_req
|
||||
fpu.io.cp_resp <> lr.module.io.fpu.cp_resp
|
||||
}
|
||||
}
|
||||
|
||||
outer.legacyRocc foreach { lr =>
|
||||
ptwPorts ++= lr.module.io.ptw
|
||||
dcachePorts ++= lr.module.io.dcache
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class LegacyRoccComplex(implicit p: Parameters) extends LazyModule {
|
||||
val buildRocc = p(BuildRoCC)
|
||||
val usingRocc = !buildRocc.isEmpty
|
||||
val nRocc = buildRocc.size
|
||||
val nFPUPorts = buildRocc.filter(_.useFPU).size
|
||||
val nMemChannels = buildRocc.map(_.nMemChannels).sum + nRocc
|
||||
val nPTWPorts = buildRocc.map(_.nPTWPorts).sum
|
||||
val roccOpcodes = buildRocc.map(_.opcodes)
|
||||
|
||||
val masterNode = TLOutputNode()
|
||||
val legacies = List.fill(nMemChannels) { LazyModule(new TLLegacy()(p.alterPartial({ case PAddrBits => 32 }))) }
|
||||
legacies.foreach { leg => masterNode := TLHintHandler()(leg.node) }
|
||||
|
||||
lazy val module = new LazyModuleImp(this) with HasCoreParameters {
|
||||
val io = new Bundle {
|
||||
val tl = masterNode.bundleOut
|
||||
val dcache = Vec(nRocc, new HellaCacheIO)
|
||||
val fpu = new Bundle {
|
||||
val cp_req = Decoupled(new FPInput())
|
||||
val cp_resp = Decoupled(new FPResult()).flip
|
||||
}
|
||||
val ptw = Vec(nPTWPorts, new TLBPTWIO)
|
||||
val core = new Bundle {
|
||||
val cmd = Decoupled(new RoCCCommand).flip
|
||||
val resp = Decoupled(new RoCCResponse)
|
||||
val busy = Bool(OUTPUT)
|
||||
val interrupt = Bool(OUTPUT)
|
||||
val exception = Bool(INPUT)
|
||||
}
|
||||
}
|
||||
|
||||
val respArb = Module(new RRArbiter(new RoCCResponse, nRocc))
|
||||
io.core.resp <> respArb.io.out
|
||||
|
||||
val cmdRouter = Module(new RoccCommandRouter(roccOpcodes))
|
||||
cmdRouter.io.in <> io.core.cmd
|
||||
|
||||
val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) =>
|
||||
val rocc = accelParams.generator(p.alterPartial({
|
||||
case RoccNMemChannels => accelParams.nMemChannels
|
||||
case RoccNPTWPorts => accelParams.nPTWPorts
|
||||
}))
|
||||
val dcIF = Module(new SimpleHellaCacheIF)
|
||||
rocc.io.cmd <> cmdRouter.io.out(i)
|
||||
rocc.io.exception := io.core.exception
|
||||
dcIF.io.requestor <> rocc.io.mem
|
||||
io.dcache(i) := dcIF.io.cache
|
||||
legacies(i).module.io.legacy <> rocc.io.autl
|
||||
respArb.io.in(i) <> Queue(rocc.io.resp)
|
||||
rocc
|
||||
}
|
||||
|
||||
(nRocc until legacies.size) zip roccs.map(_.io.utl) foreach { case(i, utl) =>
|
||||
legacies(i).module.io.legacy <> utl
|
||||
}
|
||||
io.core.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _)
|
||||
io.core.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
|
||||
|
||||
if (usingFPU && nFPUPorts > 0) {
|
||||
val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nFPUPorts))
|
||||
val fp_rocc_ios = roccs.zip(buildRocc)
|
||||
.filter { case (_, params) => params.useFPU }
|
||||
.map { case (rocc, _) => rocc.io }
|
||||
fpArb.io.in_req <> fp_rocc_ios.map(_.fpu_req)
|
||||
fp_rocc_ios.zip(fpArb.io.in_resp).foreach {
|
||||
case (rocc, arb) => rocc.fpu_resp <> arb
|
||||
}
|
||||
io.fpu.cp_req <> fpArb.io.out_req
|
||||
fpArb.io.out_resp <> io.fpu.cp_resp
|
||||
} else {
|
||||
io.fpu.cp_req.valid := Bool(false)
|
||||
io.fpu.cp_resp.ready := Bool(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case class RoCCParams(
|
||||
opcodes: OpcodeSet,
|
||||
generator: Parameters => RoCC,
|
||||
nMemChannels: Int = 0,
|
||||
nPTWPorts : Int = 0,
|
||||
useFPU: Boolean = false)
|
||||
|
||||
class RoCCInstruction extends Bundle
|
||||
{
|
||||
val funct = Bits(width = 7)
|
||||
val rs2 = Bits(width = 5)
|
||||
val rs1 = Bits(width = 5)
|
||||
val xd = Bool()
|
||||
val xs1 = Bool()
|
||||
val xs2 = Bool()
|
||||
val rd = Bits(width = 5)
|
||||
val opcode = Bits(width = 7)
|
||||
}
|
||||
|
||||
class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val inst = new RoCCInstruction
|
||||
val rs1 = Bits(width = xLen)
|
||||
val rs2 = Bits(width = xLen)
|
||||
val status = new MStatus
|
||||
}
|
||||
|
||||
class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val rd = Bits(width = 5)
|
||||
val data = Bits(width = xLen)
|
||||
}
|
||||
|
||||
class RoCCCoreIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val cmd = Decoupled(new RoCCCommand).flip
|
||||
val resp = Decoupled(new RoCCResponse)
|
||||
val mem = new HellaCacheIO
|
||||
val busy = Bool(OUTPUT)
|
||||
val interrupt = Bool(OUTPUT)
|
||||
val exception = Bool(INPUT)
|
||||
|
||||
override def cloneType = new RoCCCoreIO()(p).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class RoCCIO(implicit p: Parameters) extends RoCCCoreIO()(p) {
|
||||
// These should be handled differently, eventually
|
||||
val autl = new ClientUncachedTileLinkIO
|
||||
val utl = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO)
|
||||
val ptw = Vec(p(RoccNPTWPorts), new TLBPTWIO)
|
||||
val fpu_req = Decoupled(new FPInput)
|
||||
val fpu_resp = Decoupled(new FPResult).flip
|
||||
|
||||
override def cloneType = new RoCCIO()(p).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) {
|
||||
val io = new RoCCIO
|
||||
io.mem.req.bits.phys := Bool(true) // don't perform address translation
|
||||
io.mem.invalidate_lr := Bool(false) // don't mess with LR/SC
|
||||
}
|
||||
|
||||
class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) {
|
||||
val regfile = Mem(n, UInt(width = xLen))
|
||||
val busy = Reg(init = Vec.fill(n){Bool(false)})
|
||||
|
||||
val cmd = Queue(io.cmd)
|
||||
val funct = cmd.bits.inst.funct
|
||||
val addr = cmd.bits.rs2(log2Up(n)-1,0)
|
||||
val doWrite = funct === UInt(0)
|
||||
val doRead = funct === UInt(1)
|
||||
val doLoad = funct === UInt(2)
|
||||
val doAccum = funct === UInt(3)
|
||||
val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)
|
||||
|
||||
// datapath
|
||||
val addend = cmd.bits.rs1
|
||||
val accum = regfile(addr)
|
||||
val wdata = Mux(doWrite, addend, accum + addend)
|
||||
|
||||
when (cmd.fire() && (doWrite || doAccum)) {
|
||||
regfile(addr) := wdata
|
||||
}
|
||||
|
||||
when (io.mem.resp.valid) {
|
||||
regfile(memRespTag) := io.mem.resp.bits.data
|
||||
busy(memRespTag) := Bool(false)
|
||||
}
|
||||
|
||||
// control
|
||||
when (io.mem.req.fire()) {
|
||||
busy(addr) := Bool(true)
|
||||
}
|
||||
|
||||
val doResp = cmd.bits.inst.xd
|
||||
val stallReg = busy(addr)
|
||||
val stallLoad = doLoad && !io.mem.req.ready
|
||||
val stallResp = doResp && !io.resp.ready
|
||||
|
||||
cmd.ready := !stallReg && !stallLoad && !stallResp
|
||||
// command resolved if no stalls AND not issuing a load that will need a request
|
||||
|
||||
// PROC RESPONSE INTERFACE
|
||||
io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
|
||||
// valid response if valid command, need a response, and no stalls
|
||||
io.resp.bits.rd := cmd.bits.inst.rd
|
||||
// Must respond with the appropriate tag or undefined behavior
|
||||
io.resp.bits.data := accum
|
||||
// Semantics is to always send out prior accumulator register value
|
||||
|
||||
io.busy := cmd.valid || busy.reduce(_||_)
|
||||
// Be busy when have pending memory requests or committed possibility of pending requests
|
||||
io.interrupt := Bool(false)
|
||||
// Set this true to trigger an interrupt on the processor (please refer to supervisor documentation)
|
||||
|
||||
// MEMORY REQUEST INTERFACE
|
||||
io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
|
||||
io.mem.req.bits.addr := addend
|
||||
io.mem.req.bits.tag := addr
|
||||
io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
|
||||
io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
|
||||
io.mem.req.bits.data := Bits(0) // we're not performing any stores...
|
||||
|
||||
io.autl.acquire.valid := false
|
||||
io.autl.grant.ready := false
|
||||
}
|
||||
|
||||
class TranslatorExample(implicit p: Parameters) extends RoCC()(p) {
|
||||
val req_addr = Reg(UInt(width = coreMaxAddrBits))
|
||||
val req_rd = Reg(io.resp.bits.rd)
|
||||
val req_offset = req_addr(pgIdxBits - 1, 0)
|
||||
val req_vpn = req_addr(coreMaxAddrBits - 1, pgIdxBits)
|
||||
val pte = Reg(new PTE)
|
||||
|
||||
val s_idle :: s_ptw_req :: s_ptw_resp :: s_resp :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
io.cmd.ready := (state === s_idle)
|
||||
|
||||
when (io.cmd.fire()) {
|
||||
req_rd := io.cmd.bits.inst.rd
|
||||
req_addr := io.cmd.bits.rs1
|
||||
state := s_ptw_req
|
||||
}
|
||||
|
||||
private val ptw = io.ptw(0)
|
||||
|
||||
when (ptw.req.fire()) { state := s_ptw_resp }
|
||||
|
||||
when (state === s_ptw_resp && ptw.resp.valid) {
|
||||
pte := ptw.resp.bits.pte
|
||||
state := s_resp
|
||||
}
|
||||
|
||||
when (io.resp.fire()) { state := s_idle }
|
||||
|
||||
ptw.req.valid := (state === s_ptw_req)
|
||||
ptw.req.bits.addr := req_vpn
|
||||
ptw.req.bits.store := Bool(false)
|
||||
ptw.req.bits.fetch := Bool(false)
|
||||
|
||||
io.resp.valid := (state === s_resp)
|
||||
io.resp.bits.rd := req_rd
|
||||
io.resp.bits.data := Mux(pte.leaf(), Cat(pte.ppn, req_offset), SInt(-1, xLen).asUInt)
|
||||
|
||||
io.busy := (state =/= s_idle)
|
||||
io.interrupt := Bool(false)
|
||||
io.mem.req.valid := Bool(false)
|
||||
io.autl.acquire.valid := Bool(false)
|
||||
io.autl.grant.ready := Bool(false)
|
||||
}
|
||||
|
||||
class CharacterCountExample(implicit p: Parameters) extends RoCC()(p)
|
||||
with HasTileLinkParameters {
|
||||
|
||||
private val blockOffset = tlBeatAddrBits + tlByteAddrBits
|
||||
|
||||
val needle = Reg(UInt(width = 8))
|
||||
val addr = Reg(UInt(width = coreMaxAddrBits))
|
||||
val count = Reg(UInt(width = xLen))
|
||||
val resp_rd = Reg(io.resp.bits.rd)
|
||||
|
||||
val addr_block = addr(coreMaxAddrBits - 1, blockOffset)
|
||||
val offset = addr(blockOffset - 1, 0)
|
||||
val next_addr = (addr_block + UInt(1)) << UInt(blockOffset)
|
||||
|
||||
val s_idle :: s_acq :: s_gnt :: s_check :: s_resp :: Nil = Enum(Bits(), 5)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val gnt = io.autl.grant.bits
|
||||
val recv_data = Reg(UInt(width = tlDataBits))
|
||||
val recv_beat = Reg(UInt(width = tlBeatAddrBits))
|
||||
|
||||
val data_bytes = Vec.tabulate(tlDataBytes) { i => recv_data(8 * (i + 1) - 1, 8 * i) }
|
||||
val zero_match = data_bytes.map(_ === UInt(0))
|
||||
val needle_match = data_bytes.map(_ === needle)
|
||||
val first_zero = PriorityEncoder(zero_match)
|
||||
|
||||
val chars_found = PopCount(needle_match.zipWithIndex.map {
|
||||
case (matches, i) =>
|
||||
val idx = Cat(recv_beat, UInt(i, tlByteAddrBits))
|
||||
matches && idx >= offset && UInt(i) <= first_zero
|
||||
})
|
||||
val zero_found = zero_match.reduce(_ || _)
|
||||
val finished = Reg(Bool())
|
||||
|
||||
io.cmd.ready := (state === s_idle)
|
||||
io.resp.valid := (state === s_resp)
|
||||
io.resp.bits.rd := resp_rd
|
||||
io.resp.bits.data := count
|
||||
io.autl.acquire.valid := (state === s_acq)
|
||||
io.autl.acquire.bits := GetBlock(addr_block = addr_block)
|
||||
io.autl.grant.ready := (state === s_gnt)
|
||||
|
||||
when (io.cmd.fire()) {
|
||||
addr := io.cmd.bits.rs1
|
||||
needle := io.cmd.bits.rs2
|
||||
resp_rd := io.cmd.bits.inst.rd
|
||||
count := UInt(0)
|
||||
finished := Bool(false)
|
||||
state := s_acq
|
||||
}
|
||||
|
||||
when (io.autl.acquire.fire()) { state := s_gnt }
|
||||
|
||||
when (io.autl.grant.fire()) {
|
||||
recv_beat := gnt.addr_beat
|
||||
recv_data := gnt.data
|
||||
state := s_check
|
||||
}
|
||||
|
||||
when (state === s_check) {
|
||||
when (!finished) {
|
||||
count := count + chars_found
|
||||
}
|
||||
when (zero_found) { finished := Bool(true) }
|
||||
when (recv_beat === UInt(tlDataBeats - 1)) {
|
||||
addr := next_addr
|
||||
state := Mux(zero_found || finished, s_resp, s_acq)
|
||||
} .otherwise {
|
||||
state := s_gnt
|
||||
}
|
||||
}
|
||||
|
||||
when (io.resp.fire()) { state := s_idle }
|
||||
|
||||
io.busy := (state =/= s_idle)
|
||||
io.interrupt := Bool(false)
|
||||
io.mem.req.valid := Bool(false)
|
||||
}
|
||||
|
||||
class OpcodeSet(val opcodes: Seq[UInt]) {
|
||||
def |(set: OpcodeSet) =
|
||||
new OpcodeSet(this.opcodes ++ set.opcodes)
|
||||
|
||||
def matches(oc: UInt) = opcodes.map(_ === oc).reduce(_ || _)
|
||||
}
|
||||
|
||||
object OpcodeSet {
|
||||
def custom0 = new OpcodeSet(Seq(Bits("b0001011")))
|
||||
def custom1 = new OpcodeSet(Seq(Bits("b0101011")))
|
||||
def custom2 = new OpcodeSet(Seq(Bits("b1011011")))
|
||||
def custom3 = new OpcodeSet(Seq(Bits("b1111011")))
|
||||
def all = custom0 | custom1 | custom2 | custom3
|
||||
}
|
||||
|
||||
class RoccCommandRouter(opcodes: Seq[OpcodeSet])(implicit p: Parameters)
|
||||
extends CoreModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(new RoCCCommand).flip
|
||||
val out = Vec(opcodes.size, Decoupled(new RoCCCommand))
|
||||
val busy = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
val cmd = Queue(io.in)
|
||||
val cmdReadys = io.out.zip(opcodes).map { case (out, opcode) =>
|
||||
val me = opcode.matches(cmd.bits.inst.opcode)
|
||||
out.valid := cmd.valid && me
|
||||
out.bits := cmd.bits
|
||||
out.ready && me
|
||||
}
|
||||
cmd.ready := cmdReadys.reduce(_ || _)
|
||||
io.busy := cmd.valid
|
||||
|
||||
assert(PopCount(cmdReadys) <= UInt(1),
|
||||
"Custom opcode matched for more than one accelerator")
|
||||
}
|
3
src/main/scala/tile/Package.scala
Normal file
3
src/main/scala/tile/Package.scala
Normal file
@ -0,0 +1,3 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
|
||||
package object tile extends rocket.constants.ScalarOpConstants
|
Reference in New Issue
Block a user