1
0

Massive update containing several months of changes from the now-defunct private chip repo.

* Adds support for a L2 cache with directory bits for tracking L1 coherence (DefaultL2Config), and new metadata-based coherence API.
* Additional tests.
* New virtual memory implementation, priviliged architecture (1.7), custom CSRs, FDivSqrt unit
* Updated TileLink protocol, NASTI protocol SHIMs.
* Lays groundwork for multiple top-level memory channels, superscalar fetch.
* Bump all submodules.
This commit is contained in:
Henry Cook
2015-06-25 23:17:35 -07:00
parent 12d8d8c5e3
commit d3ccec1044
19 changed files with 697 additions and 398 deletions

View File

@ -4,88 +4,133 @@ package rocketchip
import Chisel._
import uncore._
import scala.reflect._
import scala.reflect.runtime.universe._
object TileLinkHeaderOverwriter {
def apply[T <: ClientSourcedMessage](in: DecoupledIO[LogicalNetworkIO[T]], clientId: Int, passThrough: Boolean): DecoupledIO[LogicalNetworkIO[T]] = {
val out = in.clone.asDirectionless
out.bits.payload := in.bits.payload
out.bits.header.src := UInt(clientId)
out.bits.header.dst := (if(passThrough) in.bits.header.dst else UInt(0))
out.valid := in.valid
in.ready := out.ready
out
/** RocketChipNetworks combine a TileLink protocol with a particular physical
* network implementation and chip layout.
*
* Specifically, they provide mappings between ClientTileLinkIO/
* ManagerTileLinkIO channels and LogicalNetwork ports (i.e. generic
* TileLinkIO with networking headers). Channels coming into the network have
* appropriate networking headers appended and outgoing channels have their
* headers stripped.
*
* @constructor base class constructor for Rocket NoC
* @param addrToManagerId a mapping from a physical address to the network
* id of a coherence manager
* @param sharerToClientId a mapping from the id of a particular coherent
* client (as determined by e.g. the directory) and the network id
* of that client
* @param clientDepths the depths of the queue that should be used to buffer
* each channel on the client side of the network
* @param managerDepths the depths of the queue that should be used to buffer
* each channel on the manager side of the network
*/
abstract class RocketChipNetwork(
addrToManagerId: UInt => UInt,
sharerToClientId: UInt => UInt,
clientDepths: TileLinkDepths,
managerDepths: TileLinkDepths) extends TLModule {
val nClients = params(TLNClients)
val nManagers = params(TLNManagers)
val io = new Bundle {
val clients = Vec.fill(nClients){new ClientTileLinkIO}.flip
val managers = Vec.fill(nManagers){new ManagerTileLinkIO}.flip
}
def apply[T <: ClientSourcedMessage with HasPhysicalAddress](in: DecoupledIO[LogicalNetworkIO[T]], clientId: Int, nBanks: Int, addrConvert: UInt => UInt): DecoupledIO[LogicalNetworkIO[T]] = {
val out: DecoupledIO[LogicalNetworkIO[T]] = apply(in, clientId, false)
out.bits.header.dst := (if(nBanks > 1) addrConvert(in.bits.payload.addr) else UInt(0))
out
val clients = io.clients.zipWithIndex.map {
case (c, i) => {
val p = Module(new ClientTileLinkNetworkPort(i, addrToManagerId))
val q = Module(new TileLinkEnqueuer(clientDepths))
p.io.client <> c
q.io.client <> p.io.network
q.io.manager
}
}
val managers = io.managers.zipWithIndex.map {
case (m, i) => {
val p = Module(new ManagerTileLinkNetworkPort(i, sharerToClientId))
val q = Module(new TileLinkEnqueuer(managerDepths))
m <> p.io.manager
p.io.network <> q.io.manager
q.io.client
}
}
}
class RocketChipCrossbarNetwork extends LogicalNetwork {
val io = new Bundle {
val clients = Vec.fill(params(LNClients)){(new TileLinkIO).flip}
val masters = Vec.fill(params(LNMasters)){new TileLinkIO}
/** A simple arbiter for each channel that also deals with header-based routing.
* Assumes a single manager agent. */
class RocketChipTileLinkArbiter(
sharerToClientId: UInt => UInt = (u: UInt) => u,
clientDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0),
managerDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0))
extends RocketChipNetwork(u => UInt(0), sharerToClientId, clientDepths, managerDepths)
with TileLinkArbiterLike
with PassesId {
val arbN = nClients
require(nManagers == 1)
if(arbN > 1) {
hookupClientSource(clients.map(_.acquire), managers.head.acquire)
hookupClientSource(clients.map(_.release), managers.head.release)
hookupFinish(clients.map(_.finish), managers.head.finish)
hookupManagerSourceWithHeader(clients.map(_.probe), managers.head.probe)
hookupManagerSourceWithHeader(clients.map(_.grant), managers.head.grant)
} else {
managers.head <> clients.head
}
}
/** Provides a separate physical crossbar for each channel. Assumes multiple manager
* agents. Managers are assigned to higher physical network port ids than
* clients, and translations between logical network id and physical crossbar
* port id are done automatically.
*/
class RocketChipTileLinkCrossbar(
addrToManagerId: UInt => UInt = u => UInt(0),
sharerToClientId: UInt => UInt = u => u,
clientDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0),
managerDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0))
extends RocketChipNetwork(addrToManagerId, sharerToClientId, clientDepths, managerDepths) {
val n = params(LNEndpoints)
val count = params(TLDataBeats)
// Actually instantiate the particular networks required for TileLink
val acqNet = Module(new BasicCrossbar(n, new Acquire))
val relNet = Module(new BasicCrossbar(n, new Release))
val acqNet = Module(new BasicCrossbar(n, new Acquire, count, Some((a: PhysicalNetworkIO[Acquire]) => a.payload.hasMultibeatData())))
val relNet = Module(new BasicCrossbar(n, new Release, count, Some((r: PhysicalNetworkIO[Release]) => r.payload.hasMultibeatData())))
val prbNet = Module(new BasicCrossbar(n, new Probe))
val gntNet = Module(new BasicCrossbar(n, new Grant))
val gntNet = Module(new BasicCrossbar(n, new Grant, count, Some((g: PhysicalNetworkIO[Grant]) => g.payload.hasMultibeatData())))
val ackNet = Module(new BasicCrossbar(n, new Finish))
// Aliases for the various network IO bundle types
type FBCIO[T <: Data] = DecoupledIO[PhysicalNetworkIO[T]]
type FLNIO[T <: Data] = DecoupledIO[LogicalNetworkIO[T]]
type FromCrossbar[T <: Data] = FBCIO[T] => FLNIO[T]
type ToCrossbar[T <: Data] = FLNIO[T] => FBCIO[T]
type PNIO[T <: Data] = DecoupledIO[PhysicalNetworkIO[T]]
type LNIO[T <: Data] = DecoupledIO[LogicalNetworkIO[T]]
type FromCrossbar[T <: Data] = PNIO[T] => LNIO[T]
type ToCrossbar[T <: Data] = LNIO[T] => PNIO[T]
// Shims for converting between logical network IOs and physical network IOs
//TODO: Could be less verbose if you could override subbundles after a <>
def DefaultFromCrossbarShim[T <: Data](in: FBCIO[T]): FLNIO[T] = {
val out = Decoupled(new LogicalNetworkIO(in.bits.payload)).asDirectionless
out.bits.header := in.bits.header
out.bits.payload := in.bits.payload
out.valid := in.valid
in.ready := out.ready
def crossbarToManagerShim[T <: Data](in: PNIO[T]): LNIO[T] = {
val out = DefaultFromPhysicalShim(in)
out.bits.header.src := in.bits.header.src - UInt(nManagers)
out
}
def CrossbarToMasterShim[T <: Data](in: FBCIO[T]): FLNIO[T] = {
val out = DefaultFromCrossbarShim(in)
out.bits.header.src := in.bits.header.src - UInt(params(LNMasters))
def crossbarToClientShim[T <: Data](in: PNIO[T]): LNIO[T] = {
val out = DefaultFromPhysicalShim(in)
out.bits.header.dst := in.bits.header.dst - UInt(nManagers)
out
}
def CrossbarToClientShim[T <: Data](in: FBCIO[T]): FLNIO[T] = {
val out = DefaultFromCrossbarShim(in)
out.bits.header.dst := in.bits.header.dst - UInt(params(LNMasters))
def managerToCrossbarShim[T <: Data](in: LNIO[T]): PNIO[T] = {
val out = DefaultToPhysicalShim(n, in)
out.bits.header.dst := in.bits.header.dst + UInt(nManagers)
out
}
def DefaultToCrossbarShim[T <: Data](in: FLNIO[T]): FBCIO[T] = {
val out = Decoupled(new PhysicalNetworkIO(n,in.bits.payload)).asDirectionless
out.bits.header := in.bits.header
out.bits.payload := in.bits.payload
out.valid := in.valid
in.ready := out.ready
out
}
def MasterToCrossbarShim[T <: Data](in: FLNIO[T]): FBCIO[T] = {
val out = DefaultToCrossbarShim(in)
out.bits.header.dst := in.bits.header.dst + UInt(params(LNMasters))
out
}
def ClientToCrossbarShim[T <: Data](in: FLNIO[T]): FBCIO[T] = {
val out = DefaultToCrossbarShim(in)
out.bits.header.src := in.bits.header.src + UInt(params(LNMasters))
def clientToCrossbarShim[T <: Data](in: LNIO[T]): PNIO[T] = {
val out = DefaultToPhysicalShim(n, in)
out.bits.header.src := in.bits.header.src + UInt(nManagers)
out
}
// Make an individual connection between virtual and physical ports using
// a particular shim. Also seal the unused FIFO control signal.
def doFIFOInputHookup[T <: Data](phys_in: FBCIO[T], phys_out: FBCIO[T], log_io: FLNIO[T], shim: ToCrossbar[T]) = {
// a particular shim. Also pin the unused Decoupled control signal low.
def doDecoupledInputHookup[T <: Data](phys_in: PNIO[T], phys_out: PNIO[T], log_io: LNIO[T], shim: ToCrossbar[T]) = {
val s = shim(log_io)
phys_in.valid := s.valid
phys_in.bits := s.bits
@ -93,7 +138,7 @@ class RocketChipCrossbarNetwork extends LogicalNetwork {
phys_out.ready := Bool(false)
}
def doFIFOOutputHookup[T <: Data](phys_in: FBCIO[T], phys_out: FBCIO[T], log_io: FLNIO[T], shim: FromCrossbar[T]) = {
def doDecoupledOutputHookup[T <: Data](phys_in: PNIO[T], phys_out: PNIO[T], log_io: LNIO[T], shim: FromCrossbar[T]) = {
val s = shim(phys_out)
log_io.valid := s.valid
log_io.bits := s.bits
@ -101,29 +146,31 @@ class RocketChipCrossbarNetwork extends LogicalNetwork {
phys_in.valid := Bool(false)
}
def doFIFOHookup[T <: Data](isEndpointSourceOfMessage: Boolean, physIn: FBCIO[T], physOut: FBCIO[T], logIO: FLNIO[T], inShim: ToCrossbar[T], outShim: FromCrossbar[T]) = {
if(isEndpointSourceOfMessage) doFIFOInputHookup(physIn, physOut, logIO, inShim)
else doFIFOOutputHookup(physIn, physOut, logIO, outShim)
}
//Hookup all instances of a particular subbundle of TileLink
def doFIFOHookups[T <: Data: TypeTag](physIO: BasicCrossbarIO[T], getLogIO: TileLinkIO => FLNIO[T]) = {
typeTag[T].tpe match{
case t if t <:< typeTag[ClientSourcedMessage].tpe => {
io.masters.zipWithIndex.map{ case (i, id) => doFIFOHookup[T](false, physIO.in(id), physIO.out(id), getLogIO(i), ClientToCrossbarShim, CrossbarToMasterShim) }
io.clients.zipWithIndex.map{ case (i, id) => doFIFOHookup[T](true, physIO.in(id+params(LNMasters)), physIO.out(id+params(LNMasters)), getLogIO(i), ClientToCrossbarShim, CrossbarToMasterShim) }
def doDecoupledHookups[T <: Data](physIO: BasicCrossbarIO[T], getLogIO: TileLinkIO => LNIO[T]) = {
physIO.in.head.bits.payload match {
case c: ClientToManagerChannel => {
managers.zipWithIndex.map { case (i, id) =>
doDecoupledOutputHookup(physIO.in(id), physIO.out(id), getLogIO(i), crossbarToManagerShim[T])
}
clients.zipWithIndex.map { case (i, id) =>
doDecoupledInputHookup(physIO.in(id+nManagers), physIO.out(id+nManagers), getLogIO(i), clientToCrossbarShim[T])
}
}
case t if t <:< typeTag[MasterSourcedMessage].tpe => {
io.masters.zipWithIndex.map{ case (i, id) => doFIFOHookup[T](true, physIO.in(id), physIO.out(id), getLogIO(i), MasterToCrossbarShim, CrossbarToClientShim) }
io.clients.zipWithIndex.map{ case (i, id) => doFIFOHookup[T](false, physIO.in(id+params(LNMasters)), physIO.out(id+params(LNMasters)), getLogIO(i), MasterToCrossbarShim, CrossbarToClientShim) }
case m: ManagerToClientChannel => {
managers.zipWithIndex.map { case (i, id) =>
doDecoupledInputHookup(physIO.in(id), physIO.out(id), getLogIO(i), managerToCrossbarShim[T])
}
clients.zipWithIndex.map { case (i, id) =>
doDecoupledOutputHookup(physIO.in(id+nManagers), physIO.out(id+nManagers), getLogIO(i), crossbarToClientShim[T])
}
}
case _ => require(false, "Unknown message sourcing.")
}
}
doFIFOHookups(acqNet.io, (tl: TileLinkIO) => tl.acquire)
doFIFOHookups(relNet.io, (tl: TileLinkIO) => tl.release)
doFIFOHookups(prbNet.io, (tl: TileLinkIO) => tl.probe)
doFIFOHookups(gntNet.io, (tl: TileLinkIO) => tl.grant)
doFIFOHookups(ackNet.io, (tl: TileLinkIO) => tl.finish)
doDecoupledHookups(acqNet.io, (tl: TileLinkIO) => tl.acquire)
doDecoupledHookups(relNet.io, (tl: TileLinkIO) => tl.release)
doDecoupledHookups(prbNet.io, (tl: TileLinkIO) => tl.probe)
doDecoupledHookups(gntNet.io, (tl: TileLinkIO) => tl.grant)
doDecoupledHookups(ackNet.io, (tl: TileLinkIO) => tl.finish)
}

View File

@ -6,14 +6,13 @@ import Chisel._
import uncore._
import rocket._
import rocket.Util._
import scala.math.max
class DefaultConfig extends ChiselConfig (
topDefinitions = { (pname,site,here) =>
type PF = PartialFunction[Any,Any]
def findBy(sname:Any):Any = here[PF](site[Any](sname))(pname)
pname match {
//RocketChip Parameters
case BuildTile => (r:Bool) => {new RocketTile(resetSignal = r)}
//HTIF Parameters
case HTIFWidth => Dump("HTIF_WIDTH", 16)
case HTIFNSCR => 64
@ -21,121 +20,165 @@ class DefaultConfig extends ChiselConfig (
case HTIFNCores => site(NTiles)
//Memory Parameters
case PAddrBits => 32
case VAddrBits => 43
case PgIdxBits => 13
case ASIdBits => 7
case PermBits => 6
case PgIdxBits => 12
case PgLevels => if (site(XLen) == 64) 3 /* Sv39 */ else 2 /* Sv32 */
case PgLevelBits => site(PgIdxBits) - log2Up(site(XLen)/8)
case VPNBits => site(PgLevels) * site(PgLevelBits)
case PPNBits => site(PAddrBits) - site(PgIdxBits)
case VPNBits => site(VAddrBits) - site(PgIdxBits)
case MIFTagBits => Dump("MEM_TAG_BITS", 5)
case VAddrBits => site(VPNBits) + site(PgIdxBits)
case ASIdBits => 7
case MIFTagBits => Dump("MEM_TAG_BITS",
log2Up(site(NAcquireTransactors)+2) +
log2Up(site(NBanksPerMemoryChannel)) +
log2Up(site(NMemoryChannels)) + /* TODO: Remove for multichannel Top */
1)
case MIFDataBits => Dump("MEM_DATA_BITS", 128)
case MIFAddrBits => Dump("MEM_ADDR_BITS", site(PAddrBits) - site(CacheBlockOffsetBits))
case MIFDataBeats => site(TLDataBits)/site(MIFDataBits)
case MIFDataBeats => site(TLDataBits)*site(TLDataBeats)/site(MIFDataBits)
case NASTIDataBits => site(MIFDataBits)
case NASTIAddrBits => site(MIFAddrBits)
case NASTIIdBits => site(MIFTagBits)
case UseNASTI => false
//Params used by all caches
case NSets => findBy(CacheName)
case NWays => findBy(CacheName)
case RowBits => findBy(CacheName)
case BlockOffBits => findBy(CacheName)
case NTLBEntries => findBy(CacheName)
case "L1I" => {
case NSets => Knob("L1I_SETS") //128
case NWays => Knob("L1I_WAYS") //2
case NSets => Knob("L1I_SETS") //64
case NWays => Knob("L1I_WAYS") //4
case RowBits => 4*site(CoreInstBits)
case BlockOffBits => log2Up(site(TLDataBits)/8)
case NTLBEntries => 8
}:PF
case "L1D" => {
case NSets => Knob("L1D_SETS") //128
case NSets => Knob("L1D_SETS") //64
case NWays => Knob("L1D_WAYS") //4
case RowBits => 2*site(CoreDataBits)
case BlockOffBits => log2Up(site(TLDataBits)/8)
}:PF
case "L2" => {
case NSets => 512
case NWays => 8
case RowBits => site(TLDataBits)
case BlockOffBits => 0
case NTLBEntries => 8
}:PF
case ECCCode => None
case WordBits => site(XprLen)
case Replacer => () => new RandomReplacement(site(NWays))
case AmoAluOperandBits => site(XLen)
//L1InstCache
case NITLBEntries => 8
case NBTBEntries => 62
case NRAS => 2
//L1DataCache
case NDTLBEntries => 8
case WordBits => site(XLen)
case StoreDataQueueDepth => 17
case ReplayQueueDepth => 16
case NMSHRs => Knob("L1D_MSHRS")
case LRSCCycles => 32
//L2CacheParams
case NReleaseTransactors => Knob("L2_REL_XACTS")
case NAcquireTransactors => Knob("L2_ACQ_XACTS")
case NClients => site(NTiles) + 1
//L2 Memory System Params
case NAcquireTransactors => 7
case L2StoreDataQueueDepth => 1
case L2DirectoryRepresentation => new NullRepresentation(site(TLNCachingClients))
case BuildL2CoherenceManager => () =>
Module(new L2BroadcastHub, { case InnerTLId => "L1ToL2"; case OuterTLId => "L2ToMC" })
//Tile Constants
case BuildTiles =>
List.fill(site(NTiles)){ (r:Bool) => Module(new RocketTile(resetSignal = r), {case TLId => "L1ToL2"}) }
case BuildRoCC => None
case NDCachePorts => 2 + (if(site(BuildRoCC).isEmpty) 0 else 1)
case NTilePorts => 2 + (if(site(BuildRoCC).isEmpty) 0 else 1)
case NPTWPorts => 2 + (if(site(BuildRoCC).isEmpty) 0 else 3)
//Rocket Core Constants
case FetchWidth => 1
case RetireWidth => 1
case UseVM => true
case FastLoadWord => true
case FastLoadByte => false
case FastMulDiv => true
case XprLen => 64
case XLen => 64
case NMultXpr => 32
case BuildFPU => Some(() => Module(new FPU))
case FDivSqrt => true
case SFMALatency => 2
case DFMALatency => 3
case CoreInstBits => 32
case CoreDataBits => site(XprLen)
case CoreDataBits => site(XLen)
case CoreDCacheReqTagBits => 7 + log2Up(here(NDCachePorts))
case NCustomMRWCSRs => 0
//Uncore Paramters
case LNMasters => site(NBanks)
case LNClients => site(NTiles)+1
case LNEndpoints => site(LNMasters) + site(LNClients)
case TLId => "inner"
case TLCoherence => site(Coherence)
case TLAddrBits => site(PAddrBits) - site(CacheBlockOffsetBits)
case TLMasterXactIdBits => site(TLId) match {
case "inner" => log2Up(site(NReleaseTransactors)+site(NAcquireTransactors))
case "outer" => 1
}
case TLClientXactIdBits => site(TLId) match {
case "inner" => log2Up(site(NMSHRs))+log2Up(site(NTilePorts))
case "outer" => log2Up(site(NReleaseTransactors)+site(NAcquireTransactors))
}
case TLDataBits => site(CacheBlockBytes)*8
case TLWriteMaskBits => 6
case TLWordAddrBits => 3
case TLAtomicOpBits => 4
case LNEndpoints => site(TLNManagers) + site(TLNClients)
case LNHeaderBits => log2Ceil(site(TLNManagers)) + log2Up(site(TLNClients))
case TLBlockAddrBits => site(PAddrBits) - site(CacheBlockOffsetBits)
case TLNClients => site(TLNCachingClients) + site(TLNCachelessClients)
case TLDataBits => site(CacheBlockBytes)*8/site(TLDataBeats)
case TLDataBeats => 4
case TLNetworkIsOrderedP2P => false
case TLNManagers => findBy(TLId)
case TLNCachingClients => findBy(TLId)
case TLNCachelessClients => findBy(TLId)
case TLCoherencePolicy => findBy(TLId)
case TLMaxManagerXacts => findBy(TLId)
case TLMaxClientXacts => findBy(TLId)
case TLMaxClientsPerPort => findBy(TLId)
case "L1ToL2" => {
case TLNManagers => site(NBanksPerMemoryChannel)*site(NMemoryChannels)
case TLNCachingClients => site(NTiles)
case TLNCachelessClients => site(NTiles) + 1
case TLCoherencePolicy => new MESICoherence(site(L2DirectoryRepresentation))
case TLMaxManagerXacts => site(NAcquireTransactors) + 2
case TLMaxClientXacts => max(site(NMSHRs),
if(site(BuildRoCC).isEmpty) 1
else site(RoCCMaxTaggedMemXacts))
case TLMaxClientsPerPort => if(site(BuildRoCC).isEmpty) 1 else 3
}:PF
case "L2ToMC" => {
case TLNManagers => 1
case TLNCachingClients => site(NBanksPerMemoryChannel)
case TLNCachelessClients => 0
case TLCoherencePolicy => new MEICoherence(new NullRepresentation(site(NBanksPerMemoryChannel)))
case TLMaxManagerXacts => 1
case TLMaxClientXacts => 1
case TLMaxClientsPerPort => site(NAcquireTransactors) + 2
}:PF
case NTiles => Knob("NTILES")
case NBanks => Knob("NBANKS")
case NOutstandingMemReqs => 2*site(NBanks)*(site(NReleaseTransactors)+site(NAcquireTransactors))
case BankIdLSB => 5
case NMemoryChannels => 1
case NBanksPerMemoryChannel => Knob("NBANKS")
case NOutstandingMemReqsPerChannel => site(NBanksPerMemoryChannel)*(site(NAcquireTransactors)+2)
case BankIdLSB => 0
case CacheBlockBytes => 64
case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes))
case UseBackupMemoryPort => true
case BuildCoherenceMaster => (id: Int) => {
Module(new L2CoherenceAgent(id, "inner", "outer"), { case CacheName => "L2" })
}
case Coherence => new MSICoherence(() => new NullRepresentation)
}},
knobValues = {
case "NTILES" => 1
case "NBANKS" => 1
case "L2_REL_XACTS" => 1
case "L2_ACQ_XACTS" => 7
case "L1D_MSHRS" => 2
case "L1D_SETS" => 128
case "L1D_SETS" => 64
case "L1D_WAYS" => 4
case "L1I_SETS" => 128
case "L1I_WAYS" => 2
case "L1I_SETS" => 64
case "L1I_WAYS" => 4
}
)
class DefaultVLSIConfig extends DefaultConfig
class DefaultCPPConfig extends DefaultConfig
class WithL2Cache extends ChiselConfig(
(pname,site,here) => pname match {
case "L2_CAPACITY_IN_KB" => Knob("L2_CAPACITY_IN_KB")
case "L2Bank" => {
case NSets => (((here[Int]("L2_CAPACITY_IN_KB")*1024) /
site(CacheBlockBytes)) /
site(NBanksPerMemoryChannel)*site(NMemoryChannels)) /
site(NWays)
case NWays => Knob("L2_WAYS")
case RowBits => site(TLDataBits)
}: PartialFunction[Any,Any]
case NAcquireTransactors => 2
case NSecondaryMisses => 4
case L2DirectoryRepresentation => new FullRepresentation(site(TLNCachingClients))
case BuildL2CoherenceManager => () =>
Module(new L2HellaCacheBank, {
case CacheName => "L2Bank"
case InnerTLId => "L1ToL2"
case OuterTLId => "L2ToMC"})
},
knobValues = { case "L2_WAYS" => 8; case "L2_CAPACITY_IN_KB" => 2048 }
)
class DefaultL2Config extends ChiselConfig(new WithL2Cache ++ new DefaultConfig)
class FPGAConfig extends ChiselConfig (
(pname,site,here) => pname match {
case UseBackupMemoryPort => false
@ -148,9 +191,8 @@ class SmallConfig extends ChiselConfig (
topDefinitions = { (pname,site,here) => pname match {
case BuildFPU => None
case FastMulDiv => false
case NITLBEntries => 4
case NTLBEntries => 4
case NBTBEntries => 8
case NDTLBEntries => 4
}},
knobValues = {
case "L1D_SETS" => 64

View File

@ -7,157 +7,200 @@ import uncore._
import rocket._
import rocket.Util._
case object NTiles extends Field[Int]
case object NBanks extends Field[Int]
case object NOutstandingMemReqs extends Field[Int]
case object BankIdLSB extends Field[Int]
case object CacheBlockBytes extends Field[Int]
case object CacheBlockOffsetBits extends Field[Int]
case object UseBackupMemoryPort extends Field[Boolean]
case object Coherence extends Field[CoherencePolicyWithUncached]
case object BuildCoherenceMaster extends Field[(Int) => CoherenceAgent]
case object BuildTile extends Field[(Bool)=>Tile]
/** Top-level parameters of RocketChip, values set in e.g. PublicConfigs.scala */
abstract trait TopLevelParameters extends UsesParameters {
/** Number of tiles */
case object NTiles extends Field[Int]
/** Number of memory channels */
case object NMemoryChannels extends Field[Int]
/** Number of banks per memory channel */
case object NBanksPerMemoryChannel extends Field[Int]
/** Least significant bit of address used for bank partitioning */
case object BankIdLSB extends Field[Int]
/** Number of outstanding memory requests */
case object NOutstandingMemReqsPerChannel extends Field[Int]
/** Whether to use the slow backup memory port [VLSI] */
case object UseBackupMemoryPort extends Field[Boolean]
/** Function for building some kind of coherence manager agent */
case object BuildL2CoherenceManager extends Field[() => CoherenceAgent]
/** Function for building some kind of tile connected to a reset signal */
case object BuildTiles extends Field[Seq[(Bool) => Tile]]
/** Which protocol to use to talk to memory/devices */
case object UseNASTI extends Field[Boolean]
/** Utility trait for quick access to some relevant parameters */
trait TopLevelParameters extends UsesParameters {
val htifW = params(HTIFWidth)
val nTiles = params(NTiles)
val nBanks = params(NBanks)
val nMemChannels = params(NMemoryChannels)
val nBanksPerMemChannel = params(NBanksPerMemoryChannel)
val nBanks = nMemChannels*nBanksPerMemChannel
val lsb = params(BankIdLSB)
val refillCycles = params(MIFDataBeats)
val nMemReqs = params(NOutstandingMemReqsPerChannel)
val mifAddrBits = params(MIFAddrBits)
val mifDataBeats = params(MIFDataBeats)
require(lsb + log2Up(nBanks) < mifAddrBits)
}
class OuterMemorySystem extends Module with TopLevelParameters {
val io = new Bundle {
val tiles = Vec.fill(params(NTiles)){new TileLinkIO}.flip
val htif = (new TileLinkIO).flip
val incoherent = Vec.fill(params(LNClients)){Bool()}.asInput
val mem = new MemIO
val mem_backup = new MemSerializedIO(params(HTIFWidth))
val mem_backup_en = Bool(INPUT)
}
// Create a simple NoC and points of coherence serialization
val net = Module(new RocketChipCrossbarNetwork)
val masterEndpoints = (0 until params(NBanks)).map(params(BuildCoherenceMaster))
net.io.clients zip (io.tiles :+ io.htif) map { case (net, end) => net <> end }
net.io.masters zip (masterEndpoints.map(_.io.inner)) map { case (net, end) => net <> end }
masterEndpoints.map{ _.io.incoherent zip io.incoherent map { case (m, c) => m := c } }
// Create a converter between TileLinkIO and MemIO
val conv = Module(new MemPipeIOUncachedTileLinkIOConverter(
params(NOutstandingMemReqs), refillCycles),
{ case TLId => "outer" })
if(params(NBanks) > 1) {
val arb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(params(NBanks)),
{ case TLId => "outer" })
arb.io.in zip masterEndpoints.map(_.io.outer) map { case (arb, cache) => arb <> cache }
conv.io.uncached <> arb.io.out
} else {
conv.io.uncached <> masterEndpoints.head.io.outer
}
// Create a SerDes for backup memory port
if(params(UseBackupMemoryPort)) {
VLSIUtils.doOuterMemorySystemSerdes(conv.io.mem, io.mem, io.mem_backup,
io.mem_backup_en, htifW)
} else {
io.mem <> conv.io.mem
}
class MemBackupCtrlIO extends Bundle {
val en = Bool(INPUT)
val in_valid = Bool(INPUT)
val out_ready = Bool(INPUT)
val out_valid = Bool(OUTPUT)
}
/** Top-level io for the chip */
class BasicTopIO extends Bundle {
val host = new HostIO
val mem_backup_ctrl = new MemBackupCtrlIO
}
class TopIO extends BasicTopIO {
val mem = new MemIO
}
class MultiChannelTopIO extends BasicTopIO with TopLevelParameters {
val mem = Vec.fill(nMemChannels){ new MemIO }
}
/** Top-level module for the chip */
//TODO: Remove this wrapper once multichannel DRAM controller is provided
class Top extends Module with TopLevelParameters {
val io = new TopIO
val temp = Module(new MultiChannelTop)
val arb = Module(new MemIOArbiter(nMemChannels))
arb.io.inner <> temp.io.mem
io.mem <> arb.io.outer
io.mem_backup_ctrl <> temp.io.mem_backup_ctrl
io.host <> temp.io.host
}
class MultiChannelTop extends Module with TopLevelParameters {
val io = new MultiChannelTopIO
// Build an Uncore and a set of Tiles
val uncore = Module(new Uncore, {case TLId => "L1ToL2"})
val tileList = uncore.io.htif zip params(BuildTiles) map { case(hl, bt) => bt(hl.reset) }
// Connect each tile to the HTIF
uncore.io.htif.zip(tileList).zipWithIndex.foreach {
case ((hl, tile), i) =>
tile.io.host.id := UInt(i)
tile.io.host.reset := Reg(next=Reg(next=hl.reset))
tile.io.host.pcr_req <> Queue(hl.pcr_req)
hl.pcr_rep <> Queue(tile.io.host.pcr_rep)
hl.ipi_req <> Queue(tile.io.host.ipi_req)
tile.io.host.ipi_rep <> Queue(hl.ipi_rep)
hl.debug_stats_pcr := tile.io.host.debug_stats_pcr
}
// Connect the uncore to the tile memory ports, HostIO and MemIO
uncore.io.tiles_cached <> tileList.map(_.io.cached)
uncore.io.tiles_uncached <> tileList.map(_.io.uncached)
uncore.io.host <> io.host
uncore.io.mem <> io.mem
if(params(UseBackupMemoryPort)) { uncore.io.mem_backup_ctrl <> io.mem_backup_ctrl }
}
/** Wrapper around everything that isn't a Tile.
*
* Usually this is clocked and/or place-and-routed separately from the Tiles.
* Contains the Host-Target InterFace module (HTIF).
*/
class Uncore extends Module with TopLevelParameters {
val io = new Bundle {
val host = new HostIO
val mem = new MemIO
val tiles = Vec.fill(nTiles){new TileLinkIO}.flip
val mem = Vec.fill(nMemChannels){ new MemIO }
val tiles_cached = Vec.fill(nTiles){new ClientTileLinkIO}.flip
val tiles_uncached = Vec.fill(nTiles){new ClientUncachedTileLinkIO}.flip
val htif = Vec.fill(nTiles){new HTIFIO}.flip
val incoherent = Vec.fill(nTiles){Bool()}.asInput
val mem_backup = new MemSerializedIO(htifW)
val mem_backup_en = Bool(INPUT)
val mem_backup_ctrl = new MemBackupCtrlIO
}
// Used to hash physical addresses to banks
require(params(BankIdLSB) + log2Up(params(NBanks)) < params(MIFAddrBits))
def addrToBank(addr: Bits): UInt = {
if(nBanks > 1) addr( lsb + log2Up(nBanks) - 1, lsb)
else UInt(0)
}
val htif = Module(new HTIF(CSRs.reset)) // One HTIF module per chip
val htif = Module(new HTIF(CSRs.mreset)) // One HTIF module per chip
val outmemsys = Module(new OuterMemorySystem) // NoC, LLC and SerDes
// Wire outer mem system to tiles and htif, adding
// networking headers and endpoint queues
(outmemsys.io.tiles :+ outmemsys.io.htif) // Collect outward-facing TileLink ports
.zip(io.tiles :+ htif.io.mem) // Zip them with matching ports from clients
.zipWithIndex // Index them
.map { case ((outer, client), i) => // Then use the index and bank hash to
// overwrite the networking header
outer.acquire <> Queue(TileLinkHeaderOverwriter(client.acquire, i, nBanks, addrToBank _))
outer.release <> Queue(TileLinkHeaderOverwriter(client.release, i, nBanks, addrToBank _))
outer.finish <> Queue(TileLinkHeaderOverwriter(client.finish, i, true))
client.grant <> Queue(outer.grant, 1, pipe = true)
client.probe <> Queue(outer.probe)
}
outmemsys.io.incoherent := (io.incoherent :+ Bool(true).asInput)
outmemsys.io.incoherent := htif.io.cpu.map(_.reset)
outmemsys.io.htif_uncached <> htif.io.mem
outmemsys.io.tiles_uncached <> io.tiles_uncached
outmemsys.io.tiles_cached <> io.tiles_cached
// Wire the htif to the memory port(s) and host interface
io.host.debug_stats_pcr := htif.io.host.debug_stats_pcr
htif.io.cpu <> io.htif
outmemsys.io.mem <> io.mem
if(params(UseBackupMemoryPort)) {
outmemsys.io.mem_backup_en := io.mem_backup_en
VLSIUtils.padOutHTIFWithDividedClock(htif.io, outmemsys.io.mem_backup,
io.mem_backup, io.host, io.mem_backup_en, htifW)
outmemsys.io.mem_backup_en := io.mem_backup_ctrl.en
VLSIUtils.padOutHTIFWithDividedClock(htif.io, outmemsys.io.mem_backup, io.mem_backup_ctrl, io.host, htifW)
} else {
htif.io.host.out <> io.host.out
htif.io.host.in <> io.host.in
}
}
class TopIO extends Bundle {
val host = new HostIO
val mem = new MemIO
val mem_backup_en = Bool(INPUT)
val in_mem_ready = Bool(OUTPUT)
val in_mem_valid = Bool(INPUT)
val out_mem_ready = Bool(INPUT)
val out_mem_valid = Bool(OUTPUT)
}
class Top extends Module with TopLevelParameters {
val io = new TopIO
val resetSigs = Vec.fill(nTiles){Bool()}
val tileList = (0 until nTiles).map(r => Module(params(BuildTile)(resetSigs(r))))
val uncore = Module(new Uncore)
for (i <- 0 until nTiles) {
val hl = uncore.io.htif(i)
val tl = uncore.io.tiles(i)
val il = uncore.io.incoherent(i)
resetSigs(i) := hl.reset
val tile = tileList(i)
tile.io.tilelink <> tl
il := hl.reset
tile.io.host.id := UInt(i)
tile.io.host.reset := Reg(next=Reg(next=hl.reset))
tile.io.host.pcr_req <> Queue(hl.pcr_req)
hl.pcr_rep <> Queue(tile.io.host.pcr_rep)
hl.ipi_req <> Queue(tile.io.host.ipi_req)
tile.io.host.ipi_rep <> Queue(hl.ipi_rep)
hl.debug_stats_pcr := tile.io.host.debug_stats_pcr
/** The whole outer memory hierarchy, including a NoC, some kind of coherence
* manager agent, and a converter from TileLink to MemIO.
*/
class OuterMemorySystem extends Module with TopLevelParameters {
val io = new Bundle {
val tiles_cached = Vec.fill(nTiles){new ClientTileLinkIO}.flip
val tiles_uncached = Vec.fill(nTiles){new ClientUncachedTileLinkIO}.flip
val htif_uncached = (new ClientUncachedTileLinkIO).flip
val incoherent = Vec.fill(nTiles){Bool()}.asInput
val mem = Vec.fill(nMemChannels){ new MemIO }
val mem_backup = new MemSerializedIO(htifW)
val mem_backup_en = Bool(INPUT)
}
io.host <> uncore.io.host
io.mem <> uncore.io.mem
// Create a simple L1toL2 NoC between the tiles+htif and the banks of outer memory
// Cached ports are first in client list, making sharerToClientId just an indentity function
// addrToBank is sed to hash physical addresses (of cache blocks) to banks (and thereby memory channels)
val ordered_clients = (io.tiles_cached ++ (io.tiles_uncached :+ io.htif_uncached).map(TileLinkIOWrapper(_)))
def sharerToClientId(sharerId: UInt) = sharerId
def addrToBank(addr: Bits): UInt = if(nBanks > 1) addr(lsb + log2Up(nBanks) - 1, lsb) else UInt(0)
val preBuffering = TileLinkDepths(2,2,2,2,2)
val postBuffering = TileLinkDepths(0,0,1,0,0) //TODO: had EOS24 crit path on inner.release
val l1tol2net = Module(
if(nBanks == 1) new RocketChipTileLinkArbiter(sharerToClientId, preBuffering, postBuffering)
else new RocketChipTileLinkCrossbar(addrToBank, sharerToClientId, preBuffering, postBuffering))
// Create point(s) of coherence serialization
val managerEndpoints = List.fill(nMemChannels) {
List.fill(nBanksPerMemChannel) {
params(BuildL2CoherenceManager)()}}
managerEndpoints.flatten.foreach { _.incoherent := io.incoherent }
// Wire the tiles and htif to the TileLink client ports of the L1toL2 network,
// and coherence manager(s) to the other side
l1tol2net.io.clients <> ordered_clients
l1tol2net.io.managers <> managerEndpoints.flatMap(_.map(_.innerTL))
// Create a converter between TileLinkIO and MemIO for each channel
val outerTLParams = params.alterPartial({ case TLId => "L2ToMC" })
val backendBuffering = TileLinkDepths(0,0,0,0,0)
val mem_channels = managerEndpoints.map { banks =>
if(!params(UseNASTI)) {
val arb = Module(new RocketChipTileLinkArbiter(managerDepths = backendBuffering))(outerTLParams)
val conv = Module(new MemPipeIOTileLinkIOConverter(nMemReqs))(outerTLParams)
arb.io.clients <> banks.map(_.outerTL)
conv.io.tl <> arb.io.managers.head
MemIOMemPipeIOConverter(conv.io.mem)
} else {
val arb = Module(new RocketChipTileLinkArbiter(managerDepths = backendBuffering))(outerTLParams)
val conv1 = Module(new NASTIMasterIOTileLinkIOConverter)(outerTLParams)
val conv2 = Module(new MemIONASTISlaveIOConverter)
val conv3 = Module(new MemPipeIOMemIOConverter(nMemReqs))
arb.io.clients <> banks.map(_.outerTL)
conv1.io.tl <> arb.io.managers.head
conv2.io.nasti <> conv1.io.nasti
conv3.io.cpu.req_cmd <> Queue(conv2.io.mem.req_cmd, 2)
conv3.io.cpu.req_data <> Queue(conv2.io.mem.req_data, mifDataBeats)
conv2.io.mem.resp <> conv3.io.cpu.resp
MemIOMemPipeIOConverter(conv3.io.mem)
}
}
// Create a SerDes for backup memory port
if(params(UseBackupMemoryPort)) {
uncore.io.mem_backup.resp.valid := io.in_mem_valid
io.out_mem_valid := uncore.io.mem_backup.req.valid
uncore.io.mem_backup.req.ready := io.out_mem_ready
io.mem_backup_en <> uncore.io.mem_backup_en
}
VLSIUtils.doOuterMemorySystemSerdes(mem_channels, io.mem, io.mem_backup, io.mem_backup_en, nMemChannels)
} else { io.mem <> mem_channels }
}

View File

@ -13,32 +13,43 @@ class MemDessert extends Module {
}
object VLSIUtils {
def doOuterMemorySystemSerdes(llc: MemPipeIO, mem: MemIO,
backup: MemSerializedIO, en: Bool, w: Int) {
val mem_serdes = Module(new MemSerdes(w))
val wide = mem_serdes.io.wide
llc.req_cmd.ready := Mux(en, wide.req_cmd.ready, mem.req_cmd.ready)
mem.req_cmd.valid := llc.req_cmd.valid && !en
mem.req_cmd.bits := llc.req_cmd.bits
wide.req_cmd.valid := llc.req_cmd.valid && en
wide.req_cmd.bits := llc.req_cmd.bits
def doOuterMemorySystemSerdes(
llcs: Seq[MemIO],
mems: Seq[MemIO],
backup: MemSerializedIO,
en: Bool,
nMemChannels: Int) {
val arb = Module(new MemIOArbiter(nMemChannels))
val mem_serdes = Module(new MemSerdes)
mem_serdes.io.wide <> arb.io.outer
mem_serdes.io.narrow <> backup
llc.req_data.ready := Mux(en, wide.req_data.ready, mem.req_data.ready)
mem.req_data.valid := llc.req_data.valid && !en
mem.req_data.bits := llc.req_data.bits
wide.req_data.valid := llc.req_data.valid && en
wide.req_data.bits := llc.req_data.bits
llcs zip mems zip arb.io.inner foreach { case ((llc, mem), wide) =>
llc.req_cmd.ready := Mux(en, wide.req_cmd.ready, mem.req_cmd.ready)
mem.req_cmd.valid := llc.req_cmd.valid && !en
mem.req_cmd.bits := llc.req_cmd.bits
wide.req_cmd.valid := llc.req_cmd.valid && en
wide.req_cmd.bits := llc.req_cmd.bits
llc.resp.valid := Mux(en, wide.resp.valid, mem.resp.valid)
llc.resp.bits := Mux(en, wide.resp.bits, mem.resp.bits)
mem.resp.ready := Bool(true)
llc.req_data.ready := Mux(en, wide.req_data.ready, mem.req_data.ready)
mem.req_data.valid := llc.req_data.valid && !en
mem.req_data.bits := llc.req_data.bits
wide.req_data.valid := llc.req_data.valid && en
wide.req_data.bits := llc.req_data.bits
backup <> mem_serdes.io.narrow
llc.resp.valid := Mux(en, wide.resp.valid, mem.resp.valid)
llc.resp.bits := Mux(en, wide.resp.bits, mem.resp.bits)
mem.resp.ready := llc.resp.ready && !en
wide.resp.ready := llc.resp.ready && en
}
}
def padOutHTIFWithDividedClock(htif: HTIFModuleIO, child: MemSerializedIO,
parent: MemSerializedIO, host: HostIO,
en: Bool, htifW: Int) {
def padOutHTIFWithDividedClock(
htif: HTIFModuleIO,
child: MemSerializedIO,
parent: MemBackupCtrlIO,
host: HostIO,
htifW: Int) {
val hio = Module((new SlowIO(512)) { Bits(width = htifW+1) })
hio.io.set_divisor.valid := htif.scr.wen && (htif.scr.waddr === UInt(63))
hio.io.set_divisor.bits := htif.scr.wdata
@ -50,10 +61,10 @@ object VLSIUtils {
child.req.ready := hio.io.out_fast.ready && !htif.host.out.valid
host.out.valid := hio.io.out_slow.valid && hio.io.out_slow.bits(htifW)
host.out.bits := hio.io.out_slow.bits
parent.req.valid := hio.io.out_slow.valid && !hio.io.out_slow.bits(htifW)
hio.io.out_slow.ready := Mux(hio.io.out_slow.bits(htifW), host.out.ready, parent.req.ready)
parent.out_valid := hio.io.out_slow.valid && !hio.io.out_slow.bits(htifW)
hio.io.out_slow.ready := Mux(hio.io.out_slow.bits(htifW), host.out.ready, parent.out_ready)
val mem_backup_resp_valid = en && parent.resp.valid
val mem_backup_resp_valid = parent.en && parent.in_valid
hio.io.in_slow.valid := mem_backup_resp_valid || host.in.valid
hio.io.in_slow.bits := Cat(mem_backup_resp_valid, host.in.bits)
host.in.ready := hio.io.in_slow.ready