1
0

Heterogeneous Tiles (#550)

Fundamental new features:

* Added tile package: This package is intended to hold components re-usable across different types of tile. Will be the future location of TL2-RoCC accelerators and new diplomatic versions of intra-tile interfaces.
* Adopted [ModuleName]Params convention: Code base was very inconsistent about what to name case classes that provide parameters to modules. Settled on calling them [ModuleName]Params to distinguish them from config.Parameters and config.Config. So far applied mostly only to case classes defined within rocket and tile.
* Defined RocketTileParams: A nested case class containing case classes for all the components of a tile (L1 caches and core). Allows all such parameters to vary per-tile.
* Defined RocketCoreParams: All the parameters that can be varied per-core.
* Defined L1CacheParams: A trait defining the parameters common to L1 caches, made concrete in different derived case classes.
* Defined RocketTilesKey: A sequence of RocketTileParams, one for every tile to be created.
* Provided HeterogeneousDualCoreConfig: An example of making a heterogeneous chip with two cores, one big and one little.
* Changes to legacy code: ReplacementPolicy moved to package util. L1Metadata moved to package tile. Legacy L2 cache agent removed because it can no longer share the metadata array implementation with the L1. Legacy GroundTests on life support.

Additional changes that got rolled in along the way:

* rocket: 	Fix critical path through BTB for I$ index bits > pgIdxBits
* coreplex: tiles connected via :=*
* groundtest: updated to use TileParams
* tilelink: cache cork requirements are relaxed to allow more cacheless masters
This commit is contained in:
Henry Cook
2017-02-09 13:59:09 -08:00
committed by GitHub
parent f9acd4988c
commit e8c8d2af71
57 changed files with 1084 additions and 1933 deletions

View File

@ -7,43 +7,58 @@ import Chisel._
import config.{Parameters, Field}
import coreplex._
import diplomacy._
import tile._
import uncore.constants._
import uncore.tilelink2._
import uncore.util._
import util.ParameterizedBundle
import uncore.util.Code
import util.{ParameterizedBundle, RandomReplacement}
import scala.collection.mutable.ListBuffer
case class DCacheConfig(
nMSHRs: Int = 1,
nSDQ: Int = 17,
nRPQ: Int = 16,
nMMIOs: Int = 1)
case class DCacheParams(
nSets: Int = 64,
nWays: Int = 4,
rowBits: Int = 64,
nTLBEntries: Int = 8,
splitMetadata: Boolean = false,
ecc: Option[Code] = None,
nMSHRs: Int = 1,
nSDQ: Int = 17,
nRPQ: Int = 16,
nMMIOs: Int = 1) extends L1CacheParams {
def replacement = new RandomReplacement(nWays)
}
case object DCacheKey extends Field[DCacheConfig]
trait HasL1HellaCacheParameters extends HasL1CacheParameters
with HasCoreParameters {
val cacheParams = tileParams.dcache.get
val cfg = cacheParams
trait HasL1HellaCacheParameters extends HasL1CacheParameters {
val wordBits = xLen // really, xLen max
val wordBytes = wordBits/8
val wordOffBits = log2Up(wordBytes)
val beatBytes = cacheBlockBytes / cacheDataBeats
val beatWords = beatBytes / wordBytes
val beatOffBits = log2Up(beatBytes)
val idxMSB = untagBits-1
val idxLSB = blockOffBits
val offsetmsb = idxLSB-1
val offsetlsb = wordOffBits
val rowWords = rowBits/wordBits
val doNarrowRead = coreDataBits * nWays % rowBits == 0
val encDataBits = code.width(coreDataBits)
val encRowBits = encDataBits*rowWords
val nIOMSHRs = 1
val lrscCycles = 32 // ISA requires 16-insn LRSC sequences to succeed
def wordBits = xLen // really, xLen max
def wordBytes = wordBits/8
def wordOffBits = log2Up(wordBytes)
def beatBytes = cacheBlockBytes / cacheDataBeats
def beatWords = beatBytes / wordBytes
def beatOffBits = log2Up(beatBytes)
def idxMSB = untagBits-1
def idxLSB = blockOffBits
def offsetmsb = idxLSB-1
def offsetlsb = wordOffBits
def rowWords = rowBits/wordBits
def doNarrowRead = coreDataBits * nWays % rowBits == 0
def encDataBits = code.width(coreDataBits)
def encRowBits = encDataBits*rowWords
def lrscCycles = 32 // ISA requires 16-insn LRSC sequences to succeed
def nIOMSHRs = cacheParams.nMMIOs
def maxUncachedInFlight = cacheParams.nMMIOs
def dataScratchpadSize = tileParams.dataScratchpadBytes
require(isPow2(nSets))
require(rowBits >= coreDataBits)
require(rowBits == cacheDataBits) // TODO should rowBits even be seperably specifiable?
require(xLen <= cacheDataBits) // would need offset addr for puts if data width < xlen
require(!usingVM || untagBits <= pgIdxBits)
require(isPow2(nSets), s"nSets($nSets) must be pow2")
require(rowBits >= coreDataBits, s"rowBits($rowBits) < coreDataBits($coreDataBits)")
// TODO should rowBits even be seperably specifiable?
require(rowBits == cacheDataBits, s"rowBits($rowBits) != cacheDataBits($cacheDataBits)")
// would need offset addr for puts if data width < xlen
require(xLen <= cacheDataBits, s"xLen($xLen) > cacheDataBits($cacheDataBits)")
require(!usingVM || untagBits <= pgIdxBits, s"untagBits($untagBits) > pgIdxBits($pgIdxBits)")
}
abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module
@ -52,25 +67,7 @@ abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module
abstract class L1HellaCacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasL1HellaCacheParameters
class L1Metadata(implicit p: Parameters) extends Metadata()(p) with HasL1HellaCacheParameters {
val coh = new ClientMetadata
}
object L1Metadata {
def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = {
val meta = Wire(new L1Metadata)
meta.tag := tag
meta.coh := coh
meta
}
}
class L1MetaReadReq(implicit p: Parameters) extends MetaReadReq {
val tag = Bits(width = tagBits)
override def cloneType = new L1MetaReadReq()(p).asInstanceOf[this.type] //TODO remove
}
class L1MetaWriteReq(implicit p: Parameters) extends
MetaWriteReq[L1Metadata](new L1Metadata)
/** Bundle definitions for HellaCache interfaces */
trait HasCoreMemOp extends HasCoreParameters {
val addr = UInt(width = coreMaxAddrBits)
@ -83,6 +80,10 @@ trait HasCoreData extends HasCoreParameters {
val data = Bits(width = coreDataBits)
}
class HellaCacheReqInternal(implicit p: Parameters) extends CoreBundle()(p) with HasCoreMemOp {
val phys = Bool()
}
class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData
class HellaCacheResp(implicit p: Parameters) extends CoreBundle()(p)
@ -104,7 +105,6 @@ class HellaCacheExceptions extends Bundle {
val pf = new AlignmentExceptions
}
// interface between D$ and processor/DTLB
class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Decoupled(new HellaCacheReq)
@ -119,10 +119,13 @@ class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
val ordered = Bool(INPUT)
}
abstract class HellaCache(val cfg: DCacheConfig)(implicit p: Parameters) extends LazyModule {
/** Base classes for Diplomatic TL2 HellaCaches */
abstract class HellaCache(implicit p: Parameters) extends LazyModule {
private val cfg = p(TileKey).dcache.get
val node = TLClientNode(TLClientParameters(
sourceId = IdRange(0, cfg.nMSHRs + cfg.nMMIOs),
supportsProbe = TransferSizes(p(CacheBlockBytes))))
supportsProbe = TransferSizes(1, p(CacheBlockBytes))))
val module: HellaCacheModule
}
@ -135,38 +138,101 @@ class HellaCacheBundle(outer: HellaCache) extends Bundle {
class HellaCacheModule(outer: HellaCache) extends LazyModuleImp(outer)
with HasL1HellaCacheParameters {
implicit val cfg = outer.cfg
implicit val edge = outer.node.edgesOut(0)
val io = new HellaCacheBundle(outer)
val tl_out = io.mem(0)
}
object HellaCache {
def apply(cfg: DCacheConfig, scratch: () => Option[AddressSet] = () => None)(implicit p: Parameters) = {
if (cfg.nMSHRs == 0) LazyModule(new DCache(cfg, scratch))
else LazyModule(new NonBlockingDCache(cfg))
def apply(blocking: Boolean, scratch: () => Option[AddressSet] = () => None)(implicit p: Parameters) = {
if (blocking) LazyModule(new DCache(scratch))
else LazyModule(new NonBlockingDCache)
}
}
/** Mix-ins for constructing tiles that have a HellaCache */
trait HasHellaCache extends TileNetwork {
trait HasHellaCache extends HasTileLinkMasterPort {
val module: HasHellaCacheModule
implicit val p: Parameters
def findScratchpadFromICache: Option[AddressSet]
var nDCachePorts = 0
val dcacheParams = p.alterPartial({ case CacheName => CacheName("L1D") })
val dcache = HellaCache(p(DCacheKey), findScratchpadFromICache _)(dcacheParams)
l1backend.node := dcache.node
val dcache = HellaCache(tileParams.dcache.get.nMSHRs == 0, findScratchpadFromICache _)
masterNode := dcache.node
}
trait HasHellaCacheBundle extends TileNetworkBundle {
trait HasHellaCacheBundle extends HasTileLinkMasterPortBundle {
val outer: HasHellaCache
}
trait HasHellaCacheModule extends TileNetworkModule {
trait HasHellaCacheModule extends HasTileLinkMasterPortModule {
val outer: HasHellaCache
//val io: HasHellaCacheBundle
val dcachePorts = ListBuffer[HellaCacheIO]()
val dcacheArb = Module(new HellaCacheArbiter(outer.nDCachePorts)(outer.dcacheParams))
val dcacheArb = Module(new HellaCacheArbiter(outer.nDCachePorts)(outer.p))
outer.dcache.module.io.cpu <> dcacheArb.io.mem
}
/** Metadata array used for all HellaCaches */
class L1Metadata(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
val coh = new ClientMetadata
val tag = UInt(width = tagBits)
}
object L1Metadata {
def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = {
val meta = Wire(new L1Metadata)
meta.tag := tag
meta.coh := coh
meta
}
}
class L1MetaReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
val idx = UInt(width = idxBits)
val way_en = UInt(width = nWays)
val tag = UInt(width = tagBits)
}
class L1MetaWriteReq(implicit p: Parameters) extends L1MetaReadReq()(p) {
val data = new L1Metadata
}
class L1MetadataArray[T <: L1Metadata](onReset: () => T)(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val rstVal = onReset()
val io = new Bundle {
val read = Decoupled(new L1MetaReadReq).flip
val write = Decoupled(new L1MetaWriteReq).flip
val resp = Vec(nWays, rstVal.cloneType).asOutput
}
val rst_cnt = Reg(init=UInt(0, log2Up(nSets+1)))
val rst = rst_cnt < UInt(nSets)
val waddr = Mux(rst, rst_cnt, io.write.bits.idx)
val wdata = Mux(rst, rstVal, io.write.bits.data).asUInt
val wmask = Mux(rst || Bool(nWays == 1), SInt(-1), io.write.bits.way_en.asSInt).toBools
val rmask = Mux(rst || Bool(nWays == 1), SInt(-1), io.read.bits.way_en.asSInt).toBools
when (rst) { rst_cnt := rst_cnt+UInt(1) }
val metabits = rstVal.getWidth
if (hasSplitMetadata) {
val tag_arrs = List.fill(nWays){ SeqMem(nSets, UInt(width = metabits)) }
val tag_readout = Wire(Vec(nWays,rstVal.cloneType))
(0 until nWays).foreach { (i) =>
when (rst || (io.write.valid && wmask(i))) {
tag_arrs(i).write(waddr, wdata)
}
io.resp(i) := rstVal.fromBits(tag_arrs(i).read(io.read.bits.idx, io.read.valid && rmask(i)))
}
} else {
val tag_arr = SeqMem(nSets, Vec(nWays, UInt(width = metabits)))
when (rst || io.write.valid) {
tag_arr.write(waddr, Vec.fill(nWays)(wdata), wmask)
}
io.resp := tag_arr.read(io.read.bits.idx, io.read.valid).map(rstVal.fromBits(_))
}
io.read.ready := !rst && !io.write.valid // so really this could be a 6T RAM
io.write.ready := !rst
}